Example #1
0
	def predict_proba(self, X, clean=False):
		#Store the test set into RGF format
		np.savetxt(os.path.join(loc_temp, "test.data.x"), X, delimiter=' ', fmt="%s")
	
		#Find latest model location
		model_glob = self.loc_temp + os.sep + self.prefix + "*"
		latest_model_loc = sorted(glob(model_glob),reverse=True)[0]
		
		#Format test command
		params = []
		params.append("test_x_fn=%s"%os.path.join(loc_temp, "test.data.x"))
		params.append("prediction_fn=%s"%os.path.join(loc_temp, "predictions.txt"))
		params.append("model_fn=%s"%latest_model_loc)
		cmd = "%s predict %s 2>&1"%(self.loc_exec,",".join(params))
		
		output = subprocess.Popen(cmd.split(),stdout=subprocess.PIPE,shell=True).communicate()
		
		#for k in output:
		#	print(k)
			
		y_pred = np.array([logistic.pdf(x) for x in np.loadtxt(os.path.join(loc_temp, "predictions.txt"))])
		y_pred = np.array([[1-x, x] for x in y_pred])
		#Clean temp directory
		if clean:
			model_glob = self.loc_temp + os.sep + "*"
			
			for fn in glob(model_glob):
				if "predictions.txt" in fn or "model-" in fn or "train.data." in fn or "test.data." in fn:
					os.remove(fn)
			
		return y_pred
Example #2
0
    def plot_fit_I(self, nbins=20):
        """
        Plots a comparison of the results of the fit to the input distribution
        """
        me = np.zeros(nbins)
        st = np.zeros(nbins)
        m = np.zeros(nbins)
        k = np.zeros(nbins)

        # Extracts variables of interest
        mag = self.cat['mag_auto'][self.mask]
        I = np.log10(self.cat['sersicfit'][self.mask, 0])
        R = np.log10(self.cat['sersicfit'][self.mask, 1])
        n = np.log10(self.cat['sersicfit'][self.mask, 2])
        q = np.log10(self.cat['sersicfit'][self.mask, 3])

        plt.figure(figsize=(20, 5))
        plt.subplot(141)
        for i in range(nbins):
            m_min = np.percentile(mag, i * 5)
            m_max = np.percentile(mag, (i + 1) * 5)
            ind = (mag > m_min) * (mag < m_max)
            m[i] = 0.5 * (m_min + m_max)
            me[i] = np.mean(I[ind])
            st[i] = np.std(I[ind])
            k[i] = kurtosis(I[ind])
            plt.hist((I[ind] - me[i]) / st[i],
                     30,
                     range=[-5, 5],
                     alpha=0.2,
                     normed=True)
        y = np.linspace(-5, 5)

        plt.plot(y, norm.pdf(y), 'b', label='Gaussian')
        plt.plot(y,
                 logistic.pdf(y, scale=np.sqrt(3) / np.pi),
                 'r',
                 label='Logistic')
        plt.legend()
        plt.title('Standardized $\log_{10}(I)$ in magnitude bins')

        plt.subplot(142)
        plt.plot(m, me, '+-')
        plt.plot(m, self._I_mu(m), 'r--')
        plt.title('Mean $\log_{10}(I)$ ')
        plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k')

        plt.subplot(143)
        plt.plot(m, st, '+-')
        plt.plot(m, self._I_std(m), 'r--')
        plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k')
        plt.title('Standard deviation of $\log_{10}(I)$ ')

        plt.subplot(144)
        plt.plot(m, k, '+-')
        plt.axhline(1.2, color='r')
        plt.axvspan(self.mag_range[0], self.mag_range[1], alpha=0.2, color='k')
        plt.title('Kurtosis')
 def __likemaker__(self,x,b):
     (logL,dlogL,ddlogL) = (0,0,0)
     for i in range(self.n):
         xcur = x[i,:].reshape(-1,1)
         inner = xcur.T.dot(b)
         Fx = logistic.cdf(inner)
         logL += self.y[i]*np.log(Fx)+(1-self.y[i])*np.log(1-Fx)
         dlogL += (self.y[i]-Fx)*xcur
         ddlogL -= logistic.pdf(inner)*(xcur.dot(xcur.T))
     return(logL,dlogL,ddlogL)
def radical_est_logist_val(mu, lmbd, delta, sample):
    i = 0
    sample_size = len(sample)

    if delta == 0:
        result = 1
        while i < sample_size:
            result = result * logistic.pdf(sample[i], mu, lmbd)
            i = i + 1
        return result

    tmp_1 = pow(delta, 2.0) / (1.0 + delta)
    left_multiplier = pow(lmbd, tmp_1) / delta
    right_multiplier = 0

    while i < sample_size:
        right_multiplier = right_multiplier + pow(
            logistic.pdf(sample[i], mu, lmbd), delta)
        i = i + 1

    return left_multiplier * right_multiplier
Example #5
0
def TVD(q):
    """
	Computes Total Variation Distance between exact logistic and approximate logistic distributions
	q : pdf of the approximate distribution
	Omega : interval on which to evaluate TVD (defaults to interval in which the P(Omega)>1-machine_eps)
	"""
    mach_eps = np.finfo(float).eps
    lower = logit(mach_eps / 2)
    Omega, delta = np.linspace(lower, -lower, 10000, retstep=1)
    ## Approximate integral in Omega
    p = logistic.pdf(Omega)
    q = q(Omega)
    tvd = 0.5 * np.linalg.norm(p - q, ord=1) * delta
    return tvd
def figure_logistic_vs_normal():
    from scipy.stats import logistic, norm

    fig, axs = plt.subplots(1, 2, figsize=(7, 3), squeeze=True)

    x = np.linspace(-5, 5, 100)
    axs[0].plot(x, logistic.pdf(x), label='Logistic')
    axs[0].plot(x, norm.pdf(x, 0, 1.8138), label='Normal')
    axs[0].set_title("Probability Density Functions\n(same mean and variance)")
    axs[0].legend()

    axs[1].plot(x, logistic.cdf(x), label='Logistic')
    axs[1].plot(x, norm.cdf(x, 0, 1.8138), label='Normal')
    axs[1].set_title("Cumulative Density Functions\n(same mean and variance)")
    axs[1].legend()
    return xmle.Show(fig)
Example #7
0
def pathway_prediction(landa, a_init, mu, gamma, eta, tau, observed_weight_vector, pathway_dict,
                       record_samples=True):
    number_of_pathways = np.size(eta, 0)
    number_of_metabolites = np.size(eta, 1)
    myModel = pm.Model()
    with myModel:

        landa_value = pm.Beta('landa_value', alpha=1, beta=1)
        # define prior
        a = pm.Bernoulli('a', p=landa_value, shape=number_of_pathways)  # 1 x p
        # define posterior:  p (w|a)
        l = pm.math.dot(a, eta)  # 1xf: number of pathways that can generate each metabolite f
        phi = 1 - tt.exp(tt.log(1 - mu) * l)  # 1xf: p(m_j = 1| a)
        psi = 1 - tt.exp(tt.dot(tt.log(1 - (gamma * phi)), tau))  # 1xk: p(w_k=1 | a)
        w = pm.Bernoulli('w', p=psi, observed=observed_weight_vector, shape=observed_weight_vector.shape)

        start_point = {'landa_value': landa, 'a': a_init.astype(np.int32)}
        step1 = pm.Metropolis([landa_value])
        step2 = pm.BinaryGibbsMetropolis([a])
        trace = pm.sample(draws=1000, step=[step1, step2], start=start_point, random_seed=42)

    landa_value_samples_logodds = trace.get_values(trace.varnames[0], burn=100)
    landa_value_samples = logistic.pdf(landa_value_samples_logodds)
    pathways_samples = trace.get_values(trace.varnames[1],  burn=100)

    mean_pathways_activity = np.mean(pathways_samples, axis=0)
    if record_samples:
        outdata_dir = os.environ['PUMA_OUTPUT_DATA']
        pathway_prediction_output = os.path.join(outdata_dir, 'pathway_prediction_output.xlsx')
        mean_pathways_activity_in_samples = np.squeeze(mean_pathways_activity).reshape(1, -1)
        write_data(mean_pathways_activity_in_samples, pathway_prediction_output, sheetname="samples",
                   header=pathway_dict["pathway"])

    print("mean_pathways_activity_PUMA_detected:", list(mean_pathways_activity))
    n_active_pathways = len(
        [pathway_activity for pathway_activity in np.mean(pathways_samples, axis=0) if pathway_activity >= 0.5])
    print("number_active_pathways [PUMA detected]:", n_active_pathways)
    active_pathways_indices = np.nonzero(mean_pathways_activity >= 0.5)[0]
    active_pathways_ID = [pathway_dict["pathway"][index] for index in active_pathways_indices]
    print("active_pathways_PUMA_detected:", active_pathways_ID)
    not_active_pathways_indices = np.nonzero(mean_pathways_activity < 0.5)[0]
    not_active_pathways_ID = [pathway_dict["pathway"][index] for index in not_active_pathways_indices]
    print("not_active_pathways_PUMA_detected:", not_active_pathways_ID)
    return pathways_samples
    def logistic(shape, scale):
        """
        Standard logistic noise multiplied by `scale`
        Parameters
        ----------
        shape : tuple
            Shape of noise.
        scale : float
            Scale of noise.
        """
        # from http://docs.scipy.org/doc/numpy/reference/generated/numpy.random.logistic.html
        density = lambda x: (np.product(
            np.exp(-x / scale) /
            (1 + np.exp(-x / scale))**2) / scale**(np.product(x.shape)))
        cdf = lambda x: logistic.cdf(x, loc=0., scale=scale)
        pdf = lambda x: logistic.pdf(x, loc=0., scale=scale)
        derivative_log_density = lambda x: (np.exp(-x / scale) - 1) / (
            scale * np.exp(-x / scale) + 1)
        # negative log density is (with \mu=0)
        # x/s + log(s) + 2 \log (1 + e(-x/s))
        grad_negative_log_density = lambda x: (1 - np.exp(-x / scale)) / (
            (1 + np.exp(-x / scale)) * scale)
        sampler = lambda size: np.random.logistic(
            loc=0, scale=scale, size=shape + size)

        constant = -np.product(shape) * np.log(scale)
        return randomization(
            shape,
            density,
            cdf,
            pdf,
            derivative_log_density,
            grad_negative_log_density,
            sampler,
            lipschitz=.25 / scale**2,
            log_density=lambda x: -np.atleast_2d(x).sum(1) / scale - 2 * np.
            log(1 + np.exp(-np.atleast_2d(x) / scale)).sum(1) + constant)
Example #9
0
                                    bounds=(0, [counts / 2, counts, 1]))
    y_fit = f(x, a_, c_, r_)
    print r_
    fig, ax1 = plt.subplots(1, 1, figsize=(6, 4))
    inflection_points[thetastar] = math.log(a_) / r_
    print inflection_points[thetastar]
    ax1.plot(x,
             y_fit,
             '--k',
             label='r = {}, inflection_point = {}'.format(
                 r_, (math.log(a_) / r_)))
    ax1.plot(x, y, 'o')
    plt.legend()
    ax2 = ax1.twinx()
    ax2.plot(x,
             logistic.pdf(x, loc=(math.log(a_) / r_), scale=10),
             color='orange')
    plt.xlabel('Step')
    ax1.set_ylabel('Size of the largest connected component')
    ax2.set_ylabel('Probability density')
    plt.title(
        'Logistic regression for the largest connected component over time for thetastar of {}'
        .format(thetastar))
    plt.show()

with open('inflection_points.json', 'w') as f:
    json.dump(inflection_points.items(), f, sort_keys=True)
with open('one_agent_in_large_system.json', 'r') as fp:
    j = json.load(fp)
j_dict = {}
for i in range(0, len(j)):
Example #10
0
mask = []
fails = 0

#----------- Mask each gene iteratively -----------#

for cell in range(truth.shape[0]):
    nonZeroIdx = np.nonzero(truth[cell, :])[0]
    nonZeroVals = truth[cell, nonZeroIdx]

    if len(nonZeroVals) < 50:
        fails += 1
        print("Cannot mask values for only {} cells".format(len(nonZeroVals)))
        mask.append([])
        continue

    probs = logistic.pdf(np.log(nonZeroVals), *params)

    mask_c = np.random.choice(nonZeroIdx,
                              N_MASKED_PER_CELL,
                              p=probs / sum(probs),
                              replace=False)

    raw[cell, mask_c] = 0

    mask.append(mask_c)

print("Counting masked values..")

print(Counter(truth[(raw != truth)]))
print(fails)
Example #11
0
 def density(self, x):
     return logistic.pdf(x, loc=self.mu, scale=self.sigma)
mean, var, skew, kurt = gamma.stats(a, moments = 'mvsk')
x = np.linspace(gamma.ppf(0.01, a),
                 gamma.ppf(0.99, a), 100)
ax1.plot(x, gamma.pdf(x, a),
       'r-', lw=5, alpha=0.6, label='gamma pdf')
ax1.set_title('gamma pdf')
ax2.plot(x, gamma.cdf(x, a),
       'r-', lw=5, alpha=0.6, label='gamma cdf')
ax2.set_title('gamma cdf')

# logistic
b = 0.5
mean, var, skew, kurt = logistic.stats(b, moments = 'mvsk')
x = np.linspace(logistic.ppf(0.01, b),
                 logistic.ppf(0.99, b), 100)
ax3.plot(x, logistic.pdf(x, b),
       'g-', lw=5, alpha=0.6, label='gamma pdf')
ax3.set_title('logistic pdf')
ax4.plot(x, logistic.cdf(x, b),
       'g-', lw=5, alpha=0.6, label='gamma cdf')
ax4.set_title('logistic cdf')


# exponential
a = 1.99
mean, var, skew, kurt = expon.stats(a, moments = 'mvsk')
x = np.linspace(expon.ppf(0.01, a),
                 expon.ppf(0.99, a), 100)
ax5.plot(x, expon.pdf(x, a),
       'b-', lw=5, alpha=0.6, label='gamma pdf')
ax5.set_title('exponential pdf')
Example #13
0
from scipy.stats import uniform
x = np.linspace(0,12,100)
y = uniform.pdf(x, loc=1, scale=1+9)
plt.plot(x,y)
pass

### ロジスティク分布(Logistic Distribution)

ロジスティック分布のモジュール名は`logistic`。
```
logistic.pdf(x, loc=0, scale=1)
logistic.cdf(x, loc=0, scale=1)
logistic.ppf(a, loc=0, scale=1)
logistic.rvs(loc=0, scale=1, size=1)
```
* `loc`:平均値
* `scale`:分散に影響する値
```
logistic.pdf(x,loc,scale) = logistic.pdf(z), z=(x-loc)/scale
```

`scipy.stats`の`logistic`を読み込む,確率密度関数の図を描く。

from scipy.stats import logistic
x = np.linspace(-5,5,100)
y = logistic.pdf(x)
plt.plot(x,y)
pass

Example #14
0
# python3
import math
import random
import numpy as np
from scipy.stats import logistic

from matplotlib import pyplot as plt


def inv_logistic_cdf(u):
    return math.log(u / (1 - u))

random.seed(1001)

# random samples from Unif(0, 1)
random_numbers = [random.random() for _ in range(12000)]

# random samples of Logistic Dist. through the inverse transform
random_numbers_from_logistic = [inv_logistic_cdf(random_number)
                                for random_number in random_numbers]

x = np.linspace(logistic.ppf(0.001), logistic.ppf(0.999), 100)

plt.hist(random_numbers_from_logistic, 60, facecolor='green', normed=True,
         alpha=0.6, label='random numbers')
plt.plot(x, logistic.pdf(x), lw=2, alpha=0.7, label='logistic pdf')
plt.legend(loc='best')

Example #15
0
File: util.py Project: jjestra/ugd
def neg_weight_f(value):
    value = trim_value(value)
    return logistic.pdf(value) / (1 - logistic.cdf(value))
Example #16
0
File: util.py Project: jjestra/ugd
def pos_weight_f(value):
    value = trim_value(value)
    return logistic.pdf(value) / logistic.cdf(value)
Example #17
0
 def logistic_choice(self, total, sample_size, replace=False):
     p = logistic.pdf(np.arange(0,total), loc=0, scale=total/5.0)
     p /= np.sum(p)
     return np.random.choice(total, size=sample_size, replace=replace, p=p)
import numpy as np
from scipy.stats import norm, logistic
import matplotlib.pyplot as plt

get_ipython().run_line_magic('matplotlib', 'inline')

# định nghĩa hàm phân kì Kullback - Leibler
def kl_divergence(p, q):
    return np.sum(np.where(p != 0, x * np.log2(p / q), 0))

# định nghĩa khoảng để viết các hàm mật độ xác suất (PDF)
x_range = np.arange(-10, 10, 0.0001)

# định nghĩa hàm mật độ xác suất (PDF) của các biến tương ứng
x = norm.pdf(x_range, loc=0, scale=1)
y1 = logistic.pdf(x_range, loc=0, scale=1)
y2 = norm.pdf(x_range, loc=0.5, scale=1)
y3 = norm.pdf(x_range, loc=-0.5, scale=1)

# vẽ tất cả các hàm PDF trên cùng một plot
plt.figure()
plt.title('PDF of all random variables')
plt.plot(x_range, x, label = "N(0,1)")
plt.plot(x_range, y1, label = "Logistic(0,1)")
plt.plot(x_range, y2, label = "N(0.5,1)")
plt.plot(x_range, y3, label = "N(-0.5,1)")
plt.legend(loc = "best")
plt.show()


# In[2]:
def plot_logistic_fit(w, tf, loc, scale, xlabel_count=4, legend_loc='upper right', out_file=None, ax=None):
    """
    Plot time series for given word and the 
    best-fit logistic distribution.
    
    Parameters:
    -----------
    w : str
    tf : pandas.Series
    loc : float
    scale : float
    xlabel_count : int
    legend_loc : str
    out_file : str
    ax : matplotlib.axes.Axes
    """
    label_font = 18
    title_font = 24
    tick_size = 14
    legend_size = 14
    N = len(tf)
    X = pd.np.arange(N)
    xlabels = sorted(tf.index)
    xlabel_interval = int(ceil(N / (xlabel_count))) + 1
    xticks, xlabels = zip(*zip(X, xlabels)[::xlabel_interval])
    xlabel = 'Date'
    ylabel = 'log(f)'
    logistic_y = logistic.pdf(X, loc=loc, scale=scale)
    # rescale logistic y to match tf: 
    # y_logistic_rescaled = y_logistic * y_sum + y_offset
    y_offset = tf.min()
    tf_rescaled = tf - y_offset
    logistic_y_rescaled = logistic_y * tf_rescaled.sum() + y_offset
    series_color = 'r'
    fit_color = 'b'
    series_linestyle = '-'
    fit_linestyle = '--'
    split_color = 'k'
    split_linestyle = '--'
    single_axis = ax is None
    if(single_axis):
        plt.figure(figsize=(5,5))
        ax = plt.subplot(111)
    l1, = ax.plot(X, tf, color=series_color, linestyle=series_linestyle)
    l2, = ax.plot(X, logistic_y_rescaled, color=fit_color, linestyle=fit_linestyle)
    # add legend
    lines = [l1, l2]
#     labels = [w, 'logistic_fit']
    labels = ['observed', 'logistic fit']
    ax.legend(lines, labels, fontsize=legend_size, loc=legend_loc)
    # add dotted line for split point
#     ylim = ax.get_ylim()
#     ax.plot([loc, loc], ylim, color=split_color, linestyle=split_linestyle)
    # set ticks
    ax.set_xticks(xticks)
    ax.set_xticklabels(xlabels, fontsize=tick_size)
    yticks = ax.get_yticks()
    ylabels = map(lambda t: '%.2f'%(t), yticks)
    ax.set_yticks(yticks)
    ax.set_yticklabels(ylabels, fontsize=tick_size)
    ax.set_title(w, fontsize=title_font)
    # if single axis, add x and labels
    if(single_axis):
        ax.set_xlabel(xlabel, fontsize=label_font)
        ax.set_ylabel(ylabel, fontsize=label_font)
    if(out_file is not None):
        plt.tight_layout()
        plt.savefig(out_file)
from scipy.stats import logistic
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

mean, var, skew, kurt = logistic.stats(moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(logistic.ppf(0.01), logistic.ppf(0.99), 100)
ax.plot(x, logistic.pdf(x), 'r-', lw=5, alpha=0.6, label='logistic pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = logistic()
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = logistic.ppf([0.001, 0.5, 0.999])
np.allclose([0.001, 0.5, 0.999], logistic.cdf(vals))
# True

# Generate random numbers:

r = logistic.rvs(size=1000)
Example #21
0
	def make_logistic_plot(self):
		#x = np.linspace(logistic.ppf(0.01), logistic.ppf(0.99), 100)
		x = np.linspace(1, 100, 100)
		pdf = logistic.pdf(self.probs[:100])
		self.line_logistic, = plt.plot(x, pdf, linewidth=2, label="logistic", color="r")