Exemple #1
0
def sample_exposure_to_death():
    exposure_to_onset = lognorm.rvs(s=eto_shape, loc=eto_loc, scale=eto_scale)
    onset_to_death = 1000
    truncate = 40
    while onset_to_death > truncate:
        onset_to_death = lognorm.rvs(s=otd_shape, loc=otd_loc, scale=otd_scale)
    return exposure_to_onset + onset_to_death
Exemple #2
0
def test_PDF_lognormal_distance():
    '''
    Test the lognormal width based distance measure.
    '''

    from scipy.stats import lognorm
    from numpy.random import seed

    seed(13493099)

    data1 = lognorm.rvs(0.4, loc=0.0, scale=1.0, size=5000)
    data2 = lognorm.rvs(0.5, loc=0.0, scale=1.0, size=5000)

    test_dist = \
        PDF_Distance(data1,
                     data2,
                     do_fit=True,
                     normalization_type='normalize_by_mean')
    test_dist.distance_metric()

    # Based on the samples, these are the expected stderrs.
    actual_dist = (0.5 - 0.4) / np.sqrt(0.004**2 + 0.005**2)

    # The distance value can scatter by a couple based on small variations.
    # With the seed set, this should always be true.
    assert np.abs(test_dist.lognormal_distance - actual_dist) < 2.
Exemple #3
0
class TestQDM(NumpyTestCase.NumpyTestCase):
    badinput = 0.5
    nanarray = np.array([1, 2, 3, 4, np.nan])

    obsdist = lognorm.rvs(0.57, size=100)
    obsp = lognorm.fit(obsdist)
    refdist = lognorm.rvs(0.45, size=100)
    refp = lognorm.fit(refdist)
    futdist = lognorm.rvs(0.55, size=100)
    futp = lognorm.fit(futdist)
    x = np.linspace(0, 1, 101)
    qobs = np.quantile(obsdist, x)
    qref = np.quantile(refdist, x)
    qfut = np.quantile(futdist, x)

    def testQDMInput(self):
        """Test input is array-like"""
        self.assertRaises(TypeError, qdm, 0.5, 0.5, 0.5)

    def testQDMNanInput(self):
        """Test input array has no nan values"""
        self.assertRaises(ValueError, qdm, self.nanarray, self.nanarray,
                          self.nanarray)

    def testRefInput(self):
        """Test using reference data as future returns obs dist params"""
        testqfut = qdm(self.obsdist, self.refdist, self.refdist)
        testp = lognorm.fit(testqfut)
        self.assertAlmostEqual(self.obsp[0], testp[0], places=2)
        self.assertAlmostEqual(self.obsp[1], testp[1], places=2)
        self.assertAlmostEqual(self.obsp[2], testp[2], places=2)
Exemple #4
0
def test_PDF_lognormal_distance():
    '''
    Test the lognormal width based distance measure.
    '''

    from scipy.stats import lognorm
    from numpy.random import seed

    seed(13493099)

    data1 = lognorm.rvs(0.4, loc=0.0, scale=1.0, size=5000)
    data2 = lognorm.rvs(0.5, loc=0.0, scale=1.0, size=5000)

    test_dist = \
        PDF_Distance(data1,
                     data2,
                     do_fit=True,
                     normalization_type='normalize_by_mean')
    test_dist.distance_metric()

    # Based on the samples, these are the expected stderrs.
    actual_dist = (0.5 - 0.4) / np.sqrt(0.004**2 + 0.005**2)

    # The distance value can scatter by a couple based on small variations.
    # With the seed set, this should always be true.
    assert np.abs(test_dist.lognormal_distance - actual_dist) < 2.
Exemple #5
0
def estimating_val_with_log(mu, theta_2):
    try:
        Value = lognorm.rvs(s=theta_2**0.5, scale=np.exp(mu))
    except ValueError:
        try:
            Value = lognorm.rvs(s=10**-9, scale=np.exp(mu))
        except ValueError:
            Value = 0.0
    return Value
Exemple #6
0
def main(mean = 0.5, sd = 1.2):
	for x in np.linspace(1, 100000, num=16):
			max_sizes = [0.00001, 5, 10, 20, 50, 100, 250, 10**10]
			titles = ['0-4', '5-9', '10-19', '20-49', '50-99', '100-249', '250+']

			binned_sample_exp = {titles[i]: lognorm.cdf(max_sizes[i + 1], sd, scale=np.exp(mean)) - lognorm.cdf(max_sizes[i], sd, scale=np.exp(mean)) for i in range(len(max_sizes) - 1)}
			binned_sample_gen = analysis.sort_sample(lognorm.rvs(sd, scale=np.exp(mean), size=int(x)))
			binned_sample_gen = {s: v / int(x) for s, v in binned_sample_gen.items()}
			print(binned_sample_gen, binned_sample_exp)
	with Pool() as p:
		data = p.starmap(simulation_one_parameter_set.parameter_expectation, [(int(x), mean, sd) for x in np.linspace(0, 100000, num=16)])

	mean_with = []
	sd_with = []
	mean_without = []
	sd_without = []

	for d in data:
		mean_with.append(d[0] - mean)
		sd_with.append(d[1] - sd)
		mean_without.append(d[2])
		sd_without.append(d[3])

	plt.plot(np.linspace(0, 100000, num=16), mean_with)
	plt.plot(np.linspace(0, 100000, num=16), sd_with)

	plt.show()
def Generate_d_rs_out_sample(mus, cov_matrix, cv,
                             out_sample_size) -> List[List[float]]:
    n = len(mus)
    mus, cov_matrix = np.asarray(mus), np.asarray(cov_matrix)
    stds = np.sqrt([cov_matrix[i, i] for i in range(n)])

    component = 4
    component_size = int(out_sample_size / component)
    # 1: two_points
    d_rs_1 = np.asarray([
        mus - stds + np.random.randint(0, 2) * stds * 2
        for _ in range(component_size)
    ])
    # 2: independent normal
    d_rs_2 = [np.random.normal(mus, stds) for _ in range(component_size)]
    # 3: uniform
    d_rs_3 = np.asarray([
        np.random.uniform(low=mus - np.sqrt(3) * stds,
                          high=mus + np.sqrt(3) * stds)
        for _ in range(component_size)
    ])
    # 4: log normal
    d_rs_4 = np.asarray([
        lognorm.rvs(s=0.31 * (cv / 0.33), scale=mu, size=component_size)
        for mu in mus
    ]).T
    d_rs = np.concatenate([d_rs_1, d_rs_2, d_rs_3, d_rs_4])
    # clip
    d_rs[d_rs <= 0] = 0
    d_rs = d_rs.tolist()
    return d_rs
def lognorm_rvs(ln_params, size):
    # Parameters
    # [loop_prob, ln_trunc_fit, ln_trueloop_fit, nll]

    num_falseLoop = int(round(ln_params[0] * size))
    num_trueLoop = int((1 - ln_params[0]) * size)

    falseEntries = np.array([])
    trueEntries = np.array([])

    if ~np.isnan(ln_params[1][1]) and (ln_params[1][1] != 0.0):
        falseEntries = np.zeros((0, ))
        falseEntries = trunclognormprior_rvs(ln_params[1][0],
                                             ln_params[1][1],
                                             ln_params[1][2],
                                             size=num_falseLoop)

    if ~np.isnan(ln_params[2][0]) and (ln_params[2][0] != 0.0):
        trueEntries = lognorm.rvs(ln_params[2][0],
                                  ln_params[2][1],
                                  ln_params[2][2],
                                  size=num_trueLoop)

    if (~np.isnan(ln_params[1][1])) and (~np.isnan(ln_params[2][0])):
        entries = np.concatenate((falseEntries, -trueEntries), axis=0)
    elif ~np.isnan(ln_params[1][1]):
        entries = falseEntries
    elif ~np.isnan(ln_params[2][0]):
        entries = -trueEntries
    else:
        entries = np.array([1.0] * size)

    print entries.shape[0]
    return entries
Exemple #9
0
def Probability_establishments_within_cluster(naics, establishment, df):
    values = {'Sector': 2,
                'Subsector': 3,
                'Industry Group': 4,
                'NAICS Industry': 5}
    df_interest = df.loc[df['NAICS code'] == naics]
    if df_interest.empty:
        PAU_class = PAU_DB(2008)
        df['NAICS structure'] = df['NAICS code'].apply(lambda x: PAU_class._searching_naics(x, naics))
        df['NAICS structure'] = df['NAICS structure'].map(values)
        Max =  df['NAICS structure'].max()
        df_interest = df[df['NAICS structure'] == Max]
    mean = df_interest['Mean value of shipments ($1,000)'].iloc[0]
    sd = df_interest['SD value of shipments ($1,000)'].iloc[0]
    # measure-of-size (MOS) (e.g., value of shipments, number of employees, etc.),
    # which was highly correlated with pollution abatement operating costs
    # Method of moments
    mu = np.log(mean**2/(sd**2 + mean**2)**0.5)
    theta_2 = np.log(sd**2/mean**2 + 1)
    MOS = lognorm.rvs(s = theta_2**0.5,
                   scale = np.exp(mu),
                   size = int(establishment))
    Best = max(MOS)
    Worst = min(MOS) - 10**(np.log10(min(MOS)) - 2) # For avoiding 0 probability
    MOS.sort()
    # High values of MOS represent a possible high value of PAA. Establishments with high values of PAOC and PACE had a probability of 1 of being selected
    MOS_std = {str(idx + 1):[(val - Worst)/(Best - Worst), val*10**3] for idx, val in enumerate(MOS)}
    return MOS_std
def create_random_variables(no_calls):

    #generate random times in an hour for each call to start
    random_call_start_times = np.random.random_sample(size=no_calls) * 3600

    #get random length of calls - comment out either line 22 or lines 24&25 to decide which distribution you want
    #lognormal distribution
    random_call_length2 = lognorm.rvs(s=skewness,
                                      loc=average_no_calls,
                                      size=no_calls)

    #decaying exponential distribution
    # random_call_length2 = np.random.random_sample(size = no_calls)
    # random_call_length2=[math.log(1-random_call_length2[c])/ -(1/900) for c in range(0,len(random_call_length2))]

    ###############
    #If using decaying exponential distribution, comment out these lines from 37 to 43
    #normalise data between 0-3600 seconds
    np.seterr(all='raise')
    try:
        random_call_length2 = random_call_length2 - min(random_call_length2)
        random_call_length2 = random_call_length2 / max(random_call_length2)
        random_call_length2 = random_call_length2 * maxValue
    except FloatingPointError:
        print("Invalid value caught and programme continues")
    ###############

    #assign call length to call start time
    call_dict = {}
    call_dict = {
        int(random_call_start_times[p]): int(random_call_length2[p])
        for p in range(0, no_calls)
    }

    return call_dict
Exemple #11
0
def lognormal(dim, loc, mean, sigma, seed=5007):
    """

    """
    # ==========================================================
    # Set the random number seed
    # ==========================================================
    np.random.seed(seed)

    # ==========================================================
    # Determine the number of cells that need to be filled
    # ==========================================================
    numCells = len(loc[0])

    # ==========================================================
    # Determine the <scale> parameter: <mean = scale exp(s^2/2)>
    # ==========================================================
    scale = mean * np.exp(-0.5 * sigma**2)

    # ==========================================================
    # Sample values
    # ==========================================================
    values = lognorm.rvs(sigma, scale=scale, size=numCells)

    # ==========================================================
    # Make cube and fill
    # ==========================================================
    cube = makeCube(dim)
    cube[loc] = values

    # ==========================================================
    # Return the cube
    # ==========================================================
    return (cube)
    def __init__(self, syn_strength, n_synapses=100, chan_density=1.0):
        # average per-synapse strength
        self.syn_strength = syn_strength

        # number of synapses to model
        self.n_synapses = n_synapses

        # Light power is unevenly distributed across synapses
        self.stim_power_scale = np.random.uniform(0.5, 1.0, size=n_synapses)

        # fraction of open channels per synapse needed for 50% release probability
        self.release_threshold = lognorm.rvs(
            0.3, size=n_synapses) * 0.5 / chan_density

        # per-synapse strength scaling
        self.synapse_strength_scale = lognorm.rvs(0.5, size=n_synapses)
Exemple #13
0
def biased_regresser(size, ty, beta=1.0):
    #
    value = []
    #
    if dataset_name == "SS":
        ## Best fit for citation dataset
        value = lognorm.rvs(1.604389429520587,
                            48.91174576443938,
                            77.36426476362374,
                            size=size)
    elif dataset_name == "JEE":
        # Best fit for JEE scoress
        if ty == 0:
            value = johnsonsu.rvs(-1.3358254338685507,
                                  1.228621987785165,
                                  -16.10471198333935,
                                  25.658144591068066,
                                  size=size)  ## Men
        if ty == 1:
            value = johnsonsu.rvs(-1.1504808824385124,
                                  1.3649066883190795,
                                  -12.879957294149737,
                                  27.482272133428403,
                                  size=size)  ## Women
    else:
        print("Unknown dataset_name=%x", dataset_name)
        exit()
    #
    if ty == 1: value *= (beta + 1e-4)
    #
    return [{'val': val, 'real_type': ty} for val in value]
def generate(max_time, n_sequences, filename='stationary_renewal'):
    times, nll = [], []

    for _ in range(n_sequences):
        s = np.sqrt(np.log(6*6+1))
        mu = -s*s/2
        tau = lognorm.rvs(s=s, scale=np.exp(mu), size=1000)

        lpdf = lognorm.logpdf(tau, s=s, scale=np.exp(mu))
        T = tau.cumsum()

        T = T[T < max_time]
        lpdf = lpdf[:len(T)]

        score = -np.sum(lpdf)

        times.append(T)
        nll.append(score)

    if filename is not None:
        mean_number_items = sum(len(t) for t in times) / len(times)
        nll = [n/mean_number_items for n in nll]
        np.savez(f'{dataset_dir}/{filename}.npz', arrival_times=times, nll=nll, t_max=max_time, mean_number_items=mean_number_items)
    else:
        return times
Exemple #15
0
 def draw_new_params(self, param_names, heterogeneity):
     for param in param_names:
         mean = self.parameters[param] 
         std = mean*(heterogeneity/100.)
         sigma, scale = lognorm_params(mean, std)
         sample = log_pdf.rvs(sigma, 0, scale, size = 1)
         self.parameters[param] = sample
Exemple #16
0
def plot_bias():
    for sampl in np.arange(10, 45, 5):
        errs = []
        ests = []
        real_val = lognorm.ppf(0.5, 1, 0)
        for _ in range(100000):
            x = lognorm.rvs(1, 0, size=sampl)
            #est_val = estimate_median(x)
            est_val = np.median(x)
            err = (real_val - est_val) / real_val
            errs.append(err)
            ests.append(est_val)

        print(np.mean(errs))

        plt.hist(ests, bins=np.arange(0, 4, .1))
        plt.axvline(real_val, label="actual median", color="black")
        plt.axvline(np.mean(ests),
                    label="avg estimated value of median on sample size: " +
                    str(sampl),
                    color="purple")
        plt.axvline(np.median(ests),
                    label="median estimated value of median on sample size: " +
                    str(sampl),
                    color="orange")
        plt.legend()
        plt.title("Sample size = " + str(sampl))
        plt.savefig('plots/sample_' + str(sampl) + '.png')
        plt.close()
        print('processed sample size ' + str(sampl))
Exemple #17
0
def FUNC_norm_gen(p1, p2, num):
    """
    FUNC_norm_gen
    Generated S1 events using the effective marginalized pdf in S1.
    inputs: s1, shape, scale
    """
    return lognorm.rvs(p1, loc=0, scale=p2, size=num)
Exemple #18
0
    def _make_dark_frame(self,
                         temperature,
                         alpha=0.0488 / u.Kelvin,
                         beta=-12.772,
                         shape=0.4,
                         seed=None):
        """
        Function to create a dark current 'image' in electrons per second per pixel given
        an image sensor temperature and a set of coefficients for a simple dark current model.
        Modal dark current for the image sensor as a whole is modelled as D.C. = 10**(alpha * T + beta) where
        T is the temperature in Kelvin.  Individual pixel dark currents are random uncorrelated values from
        a log normal distribution so that there is a semi-realistic tail of 'hot pixels'.
        For reproducible dark frames the random number generator seed can optionally be specified.
        """
        temperature = temperature.to(
            u.Kelvin, equivalencies=u.equivalencies.temperature())
        mode = 10**(alpha * temperature + beta) * u.electron / (u.second)
        scale = mode * np.exp(shape**2)
        if seed:
            np.random.seed(seed)
        dark_frame = lognorm.rvs(shape,
                                 scale=scale,
                                 size=(self.wcs._naxis2, self.wcs._naxis1))

        return mode, dark_frame
Exemple #19
0
 def PoS_portfolio(self, PoS, size, scale=1):
     PoS_list = [PoS for i in range(size)]
     
     self.success_rates = [np.random.random() <= PoS for PoS in PoS_list]
     self.portfolio = [lognorm.rvs(sigma, scale=scale_factor) for prod in self.success_rates if prod == True]
     
     return(sum(self.portfolio), sum(self.portfolio)/size)
Exemple #20
0
def generate_random_data_from_dist(param, shape, nrows, ncols):

    if shape == 'normal':
        data = norm.rvs(0, param, size=(nrows, ncols))

    # link the two sliders and make the param for t dfs (yolked to sample size in other slider)
    # elif shape=='t':
    #     data = t.rvs(df=ncols-1)

    elif shape == 'lognormal':
        data = lognorm.rvs(param, size=(nrows, ncols))

    elif shape == 'contaminated chi-squared':

        # data = chi2.rvs(4, 0, param, size=size)
        data = chi2.rvs(4, size=(nrows, ncols))
        contam_inds = np.random.randint(ncols, size=int(param * ncols))
        data[:, contam_inds] *= 10

    elif shape == 'contaminated normal':

        sub_size = round(param * ncols)
        norm_size = int(ncols - sub_size)
        standard_norm_values = norm.rvs(0, 1, size=(nrows, norm_size))
        contam_values = norm.rvs(0, 10, size=(nrows, sub_size))
        #print(standard_norm_values.shape)
        #print(contam_values.shape)
        data = np.concatenate([standard_norm_values, contam_values], axis=1)
        #print(data.shape)

    elif shape == 'exponential':
        data = expon.rvs(0, param, size=(nrows, ncols))

    return data
Exemple #21
0
def get_gini(size):
    y = lognorm.rvs(s=1, size=size)

    # comparision
    gini_ineqpy = ineqpy.gini(income=y)
    gini_pysal = Gini(y).g
    gini_diff = abs(gini_ineqpy - gini_pysal)
    return n, gini_ineqpy, gini_pysal, gini_diff
Exemple #22
0
    def sample(self, E):
        """
        Sample a reco/true energy given a true/reco energy.
        """

        mu, sigma = self._get_lognormal_params(E)

        return lognorm.rvs(sigma, loc=0, scale=mu)
Exemple #23
0
    def _boots(self, df, newx, shape, scale, dist=lognorm):
        xr = lognorm.rvs(size=len(df['Prediction']),
                         s=shape,
                         loc=0,
                         scale=scale)
        this_shape, this_loc, this_scale = lognorm.fit(xr, floc=0)
        this_fit = dist.cdf(newx, s=this_shape, loc=0, scale=this_scale)

        return list(this_fit)
Exemple #24
0
def random_doc2vec(min_count_scale=10, **kwargs):
    return ('doc2vec', {
        'vector_size': int(np.floor(beta.rvs(loc=2, a=2, b=3, scale=100, size=1)).item()),  # Dimensionality of the feature vectors.
        'window': max(1, int(norm.rvs(loc=5, scale=1, size=1).item())),  # the maximum distance between the current and predicted word within a sentence.
        'min_count': int(np.floor(beta.rvs(loc=2, a=2, b=2.5, scale=min_count_scale, size=1)).item()),  # Ignores all words with total frequency lower than this.
        'max_vocab_size': int(np.floor(np.exp(lognorm.rvs(loc=9, s=0.1, scale=4, size=1)))),  # Limits the vocabulary; if there are more unique words than this, then prune the infrequent ones
        'sample': uniform.rvs(loc=0, scale=1e-5, size=1).item(),  # The threshold for configuring which higher-frequency words are randomly downsampled, useful range is (0, 1e-5).
        'dm_mean': bernoulli.rvs(0.4, size=1).item(),  # whether to use the sum of the context word vectors instead of the mean
        'dm_concat': bernoulli.rvs(0.05, size=1).item(),  # whether to use concatenation of context vectors rather than sum/average
    })
Exemple #25
0
 def gen_lognorm_params(self, pname, std, n = 20):
     """
     generate lognormally distributed parameters for given SD
     and parameter name
     """
     mean = self.parameters[pname]
     sigma, scale = lognorm_params(mean, std)
     sample = log_pdf.rvs(sigma, 0, scale, size = n)
     
     return sample
Exemple #26
0
    def sample_topic_lambda_sigma(self):

        while True:
            _lambda = lognorm.rvs(s=self.lambda_0_sigma,
                                  loc=0,
                                  scale=self.lambda_0_scale,
                                  size=1)[0]

            if _lambda > 0.05 and _lambda < 0.10:
                break
        while True:
            _sigma = lognorm.rvs(s=self.sigma_siama,
                                 loc=0,
                                 scale=self.sigma_scale,
                                 size=1)[0]
            if _sigma > 0.7 and _sigma < 1.1:
                break

        return _lambda, _sigma
Exemple #27
0
    def get_value(self):
        """
        Get a random value following the distribution

        Returns
        -----------
        value
            Value obtained following the distribution
        """
        from scipy.stats import lognorm

        return lognorm.rvs(self.s, self.loc, self.scale)
Exemple #28
0
def sample(n_samples, std=6):
    """Draw samples from the distribution.

    Args:
        n_samples: Number of samples to generate.
        std: Standart deviation of f*(t).

    """
    s = np.sqrt(np.log(std**2 + 1))
    mu = -0.5 * s * s
    inter_times = lognorm.rvs(s=s, scale=np.exp(mu), size=n_samples)
    arrival_times = inter_times.cumsum()
    return arrival_times
Exemple #29
0
    def sample(self, samples, random_seed=None):
        """

        :param samples: int
        :param random_seed: int
        :return: np.array(samples, self.dimension)
        """
        if random_seed is not None:
            np.random.seed(random_seed)

        points = lognorm.rvs(s=self.scale, scale=self.param, size=samples)
        points = points.reshape([samples, self.dimension])
        return points**2
Exemple #30
0
    def knowledge_gain_coef(self, topics, model):

        if model == 'ST':
            # lambda_list = self._topic_lambda_dis['ST'][0]
            # lambda_probs = self._topic_lambda_dis['ST'][1]
            # return np.random.choice(lambda_list,size=len(topics),p=lambda_probs,replace=True)
            _scale, _sigma = self._topic_lambda_dis['ST']

            return lognorm.rvs(s=_sigma, loc=0, scale=_scale, size=len(topics))

        elif model == 'MT':
            ## 对主题以及位置进行计算
            t_num = defaultdict(list)
            for i, t in enumerate(topics):
                t_num[t].append(i)

            ## 对每一个主题
            lambdas = []
            indexes = []
            for t in t_num.keys():
                ins = t_num[t]
                # lambda_list = self._topic_lambda_dis[t][0]
                # lambda_probs = self._topic_lambda_dis[t][1]
                # t_ls = np.random.choice(lambda_list,size=len(ins),p=lambda_probs,replace=True)
                _scale, _sigma = self._topic_lambda_dis[t]
                t_ls = lognorm.rvs(s=_sigma,
                                   loc=0,
                                   scale=_scale,
                                   size=len(ins))

                lambdas.extend(t_ls)
                indexes.extend(ins)

            ## 根据index对lambdas进行排序
            return [
                lambdas[i]
                for i in sorted(range(len(indexes)), key=lambda x: indexes[x])
            ]
Exemple #31
0
def sampledist(DistributionName, Mean, Std):
    # This function is used for generate random variable (loss and downtime) given distribution of the variable
    if DistributionName == 'Normal':
        temp = norm.rvs(loc=Mean, scale=Std, size=1, random_state=None)
        return temp[0]
#     else: return np.exp(norm.rvs(loc=np.log(Mean), scale=Std, size=1, random_state=None))
    elif DistributionName == 'LogNormal':
        p = np.poly1d([1, -1, 0, 0, -(Std / Mean)**2])
        r = p.roots
        sol = r[(r.imag == 0) & (r.real > 0)].real
        shape = np.sqrt(np.log(sol))
        scale = Mean * sol

        return lognorm.rvs(shape, 0, scale, size=1)[0]
Exemple #32
0
def test_PDF_fitting():
    '''
    Test distribution fitting for PDFs

    By default, we use the lognormal distribution, and only test it here.
    '''

    from scipy.stats import lognorm
    from numpy.random import seed

    seed(13493099)

    data1 = lognorm.rvs(0.4, loc=0.0, scale=1.0, size=50000)

    test = PDF(data1).run()

    npt.assert_almost_equal(0.40, test.model_params[0], decimal=2)
    npt.assert_almost_equal(1.0, test.model_params[1], decimal=1)
Exemple #33
0
def make_csd(shape, scale, npart, show_plot=False):
    """Create cell size distribution and save it to file."""
    if shape == 0:
        rads = [scale + 0 * x for x in range(npart)]
    else:
        rads = lognorm.rvs(shape, scale=scale, size=npart)
    with open('diameters.txt', 'w') as fout:
        for rad in rads:
            fout.write('{0}\n'.format(rad))
    if shape == 0:
        xpos = linspace(scale / 2, scale * 2, 100)
    else:
        xpos = linspace(lognorm.ppf(0.01, shape, scale=scale),
                        lognorm.ppf(0.99, shape, scale=scale), 100)
    plt.plot(xpos, lognorm.pdf(xpos, shape, scale=scale))
    plt.hist(rads, normed=True)
    plt.savefig('packing_histogram.png')
    plt.savefig('packing_histogram.pdf')
    if show_plot:
        plt.show()
Exemple #34
0
def hpml(xs, ys, l0=1, noise=0.001, K=K_SE):
    xs = asarray(xs); ys = ascolumn(ys)
    def nll(l): # negative log likelihood
        #if l < 0.001: return 1e10
        Kxx = K(xs, l=l)
        Kxx += (noise**2) * eye_like(Kxx)
        res = (ys.T).dot(pinvh(Kxx)).dot(ys) + slogdet(Kxx)[1]
        res = squeeze(res)
        #print l,res
        return res
    def nll_prime(l):
        Kxx,Kps = K(xs, l=l, deriv=True)
        Kxx += (noise**2) * eye_like(Kxx)
        KxxI = pinvh(Kxx)
        a = KxxI.dot(ys)
        aaT = outer(a,a) # a . a.T
        KI_aaT = KxxI - aaT # K^-1 - aaT
        res = []
        for Kp in Kps:
            grad = trace_prod(KI_aaT, Kp)
            res.append(grad)
        return asarray(res)
    #l = fmin_cg(nll, l0, maxiter=10, disp=False, epsilon=.001)
    #l = fmin_cg(nll, l0, disp=False, epsilon=.001)
    l = fmin_cg(nll, l0, fprime=nll_prime, disp=False)#, maxiter=10, disp=False)
    best_nll = nll(l)
    nlls = set([int(best_nll/noise)])
    for i in xrange(20):
        cur_l0 = lognorm.rvs(1, size=size(l0))
        cur_l = fmin_cg(nll, cur_l0, fprime=nll_prime, disp=False)
        cur_nll = nll(cur_l)
        nlls.add(int(cur_nll/noise))
        if cur_nll < best_nll:
            #print 'LL up by', best_nll - cur_nll
            best_nll = cur_nll
            l = cur_l
    #print len(nlls), 'suff. uniq. LL optima:', sorted([x*noise for x in nlls])
    return absolute(l), len(nlls)
def mcprices(S0, K, T, r, sigma, N=5000):
    """
    Call and put option prices using log-normal Monte-Carlo method

    Parameters
    ----------
    S0 :
        Current price of the underlying stock
    K :
        Strike price of the option
    T :
        Time to maturity of the option
    r :
        Risk-free rate of return (continuously-compounded)
    sigma :
        Stock price volatility
    N :
        Number of stock prices to simulate

    Returns
    -------
    c :
        Call option price
    p :
        Put option price

    Notes
    -----
    r, T, and sigma must be expressed in consistent units of time        
    """
    scale = S0*exp((r-sigma**2/2)*T)
    shape = sigma*sqrt(T)
    ST_sim = lognorm.rvs(shape,scale=scale, size=N)
    call_pay_off = maximum(ST_sim - K, 0)
    put_pay_off = maximum(K - ST_sim, 0)
    discount = exp(-r*T)
    return (call_pay_off.mean()*discount,
            put_pay_off.mean()*discount)
Exemple #36
0
 def draw(self):
     return lognorm.rvs(self.shape, self.location, self.scale) * self.multiplier
Exemple #37
0
log_a = (log_a-np.mean (log_a))/np.std (log_a)
log_b = (log_b-np.mean (log_b))/np.std (log_b)
log_c = (log_c-np.mean (log_c))/np.std (log_c)

print kstest (log_a, 'norm')
print kstest (log_b, 'norm')
print kstest (log_c, 'norm')

plb.hist (b)
plb.hist (log_b, bins=20)
plb.hist (a, bins=100)
plb.hist (log_a, bins=10)

shape, loc, scale = lognorm.fit(a)
rnd_a = lognorm.rvs(shape, scale=scale, loc=loc, size=len(a))
plb.hist(rnd_a, bins=20, alpha=0.5)
plb.hist(a, bins=20, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(c)
rnd_c = lognorm.rvs(shape, scale=scale, loc=loc, size=len(c))
plb.hist(rnd_c, bins=30, alpha=0.5)
plb.hist(c, bins=30, color='r', alpha=0.5)

shape, loc, scale = lognorm.fit(b)
rnd_b = lognorm.rvs(shape, scale=scale, loc=loc, size=len(b))
plb.hist(rnd_b, bins=20, alpha=0.5)
plb.hist(b, bins=20, color='r', alpha=0.5)

np.mean (b)
shape = np.std (b)
Exemple #38
0
import numpy as np
from scipy.stats import uniform, lognorm
import pystan

# Data
np.random.seed(1056)                 # set seed to replicate example
nobs= 5000                           # number of obs in model 
x1 = uniform.rvs(size=nobs)          # random uniform variable

beta0 = 2.0                          # intercept
beta1 = 3.0                          # linear predictor
sigma = 1.0                          # dispersion
xb = beta0 + beta1 * x1              # linear predictor, xb
exb = np.exp(xb)

y = lognorm.rvs(sigma, scale=exb, size=nobs)    # create y as adjusted
                                                # random normal variate  
# Fit
mydata = {}
mydata['N'] = nobs
mydata['x1'] = x1
mydata['y'] = y


stan_lognormal = """
data{
    int<lower=0> N;
    vector[N] x1;
    vector[N] y;
}
parameters{ 
import powerlaw
# 5.22.1 Continuous distributions p681 scipy manual
from scipy.stats import lognorm
from numpy import rint
sdln=lognorm.rvs(1.3,loc=0,scale=10,size=10)
print "lognormal float data", sdln[1:5]
lnresults = powerlaw.distribution_fit(sdln, distribution='lognormal', discrete=False)
print lnresults

sdlnint=rint(sdln).astype(int)
print "lognormal int data", sdlnint[1:5]
lnintresults = powerlaw.distribution_fit(sdlnint, distribution='lognormal')
#lnintresults = powerlaw.distribution_fit(sdlnint, distribution='lognormal', discrete=True)
print lnintresults

Exemple #40
0
def simple_packing(shape, scale, number_of_cells):
    "Simple and fast algorithm for packing"
    Rad = lognorm.rvs(shape, scale=scale, size=number_of_cells)
    print(Rad)
    Rad /= 2
    Rads1 = list(range(number_of_cells))
    t = 0
    for i in range(number_of_cells):
        c = abs(Rad[t])
        Rads1[t] = c.astype(np.float)
        t = t + 1
    Rads1 = sorted(Rads1)
    v = 0.00
    for i in range(number_of_cells):
        v = v + ((2.00 * Rads1[i])**3.00)
    centers = [[0 for i in range(3)] for j in range(number_of_cells)]
    v = v * 1.40
    lc = v**(1.00 / 3.00)
    K = 0
    while K == 0:
        j = -1
        h = 0
        timeout = time.time() + 10
        while number_of_cells >= j and h == 0:
            if time.time() > timeout:
                h = 1
                break
            j = j + 1
            if j == number_of_cells:
                K = 1
                break
            PickCenterX, PickCenterY, PickCenterZ =\
                lc * random.random(),\
                lc * random.random(),\
                lc * random.random()
            while (lc - Rads1[j] >= PickCenterX
                    and lc - Rads1[j] >= PickCenterY
                    and lc - Rads1[j] >= PickCenterZ and Rads1[j] < PickCenterX
                    and Rads1[j] < PickCenterY and Rads1[j] < PickCenterZ):
                PickCenterX, PickCenterY, PickCenterZ =\
                    lc * random.random(),\
                    lc * random.random(),\
                    lc * random.random()
            centers[j][0], centers[j][1], centers[j][2] =\
                PickCenterX, PickCenterY, PickCenterZ
            KeepCentreX, KeepCentreY, KeepCentreZ, KeepR =\
                PickCenterX, PickCenterY, PickCenterZ, Rads1[j]
            if j > 0:
                for t in range(0, j):
                    if ((((((KeepCentreX - centers[t][0])**2.00) +
                            ((KeepCentreY - centers[t][1])**2.00) +
                            ((KeepCentreZ - centers[t][2])**2.00))**0.50) -
                            (KeepR + Rads1[t])) < 0.000) and t != j:
                        centers[j][0], centers[j][0], centers[j][0] = 0, 0, 0
                        j = j - 1
                        break
    data = zip(*centers)
    data.append(Rads1)
    data = np.array(zip(*data))
    data[:, 3] = 2 * data[:, 3]
    return data
counter = 0
for sig in sig_grid:

    # Compute optimal redistribution scheme of the government
    policy_grid.append(fsolve(
        lambda policies: foc(policies, psi, sig, start=0, end=10),
        x0=x0
    ))
    opt_tax.append(policy_grid[counter][0])
    opt_trans.append(policy_grid[counter][1])

    ## Simulate distribution of wages and compute the distribution of
    ## consumption and hours worked given the optimal redistribution scheme
    ## calculated above
    wage_grid.append(
        lognorm.rvs(s=sig, scale=np.exp(- sig**2 / 2),
                                     size=n_obs)
    )
    print("Check the mean ", np.mean(wage_grid[counter]), " approx. 1???")

    hours_grid.append(hours(wage_grid[counter], opt_tax[counter], psi))
    cons_grid.append(cons(wage_grid[counter], opt_tax[counter], psi,
                          opt_trans[counter]))

    counter += 1


## Plot distributions of optimal wages, consumption and hours worked

fig = plt.figure()
plt.subplot(3, 3, 1)
plt.hist(wage_grid[0], bins=100)
Exemple #42
0
# coding:utf-8
import numpy as np
from scipy.stats import lognorm
import matplotlib.pyplot as plt

r = lognorm.rvs(1, loc=10, scale=1, size=10000)

plt.subplot(211)
plt.hist(r, bins=100)
plt.subplot(212)
plt.xscale("log")
plt.hist(np.log(r), bins=100)

plt.show()
Exemple #43
0
def main():
    usage = 'usage: %prog [options] <gtf> <fasta>'
    parser = OptionParser(usage)
    parser.add_option('-b', dest='bam_length', help='Obtain read length via sampling a distribution from a BAM file [Default: %default]')
    parser.add_option('-e', dest='error_rate', type='float', default=0, help='Error rate (uniform on reads) [Default: %default]')
    parser.add_option('-f', dest='fpkm_file', help='Cufflinks .fpkm_tracking file to use for FPKMs [Default: %default]')
    parser.add_option('-l', dest='read_length', type='int', default=30, help='Read length [Default: %default]')
    parser.add_option('-n', dest='num_reads', type='int', default=100000, help='Number of reads [Default: %default]')
    parser.add_option('-o', dest='output_prefix', default='reads', help='Output files prefix [Default: %default]')
    (options,args) = parser.parse_args()

    if len(args) != 2:
        parser.error('Must provide GTF file and fasta file')
    else:
        gtf_file = args[0]
        fasta_file = args[1]

    if options.bam_length:
        read_length_distribution = bam_length_distribution(options.bam_length)
    else:
        read_length_distribution = {options.read_length:1}

    # read GTF gene_id to transcript_id's mapping
    g2t = gff.g2t(gtf_file)

    # get transcript lengths
    transcript_lengths = {}
    for line in open(gtf_file):
        a = line.split('\t')
        if a[2] == 'exon':
            transcript_id = gff.gtf_kv(a[8])['transcript_id']
            transcript_lengths[transcript_id] = transcript_lengths.get(transcript_id,0) + int(a[4])-int(a[3])+1

    if options.fpkm_file:
        transcript_copies = {}
        fpkm_in = open(options.fpkm_file)
        line = fpkm_in.readline()
        for line in fpkm_in:
            a = line.split('\t')
            transcript_copies[a[0]] = float(a[9])
        fpkm_in.close()

        if sum(transcript_copies.values()) == 0:
            print >> sys.stderr, 'FPKM file shows no expression. Exiting.'
            exit(1)
    else:
        # sample gene copies
        gene_copies_raw = lognorm.rvs(1,size=len(g2t))
        gene_copies_raw_sum = sum(gene_copies_raw)
        gene_copies = dict(zip(g2t.keys(), [gcr/gene_copies_raw_sum for gcr in gene_copies_raw]))

        # sample transcript copies
        transcript_copies = {}
        for gene_id in g2t:
            relative_copies = dict(zip(g2t[gene_id], lognorm.rvs(1,size=len(g2t[gene_id]))))
            relative_sum = sum(relative_copies.values())
            for transcript_id in g2t[gene_id]:
                transcript_copies[transcript_id] = gene_copies[gene_id]*relative_copies[transcript_id]/relative_sum

    # determine transcript probabilities as a function of copy and length
    transcript_weights = {}
    for transcript_id in transcript_copies:
        if transcript_lengths[transcript_id] >= min(read_length_distribution.keys()):
            weight = 0
            for read_length in read_length_distribution:
                weight += read_length_distribution[read_length]*transcript_copies[transcript_id]*(transcript_lengths[transcript_id]-read_length+1)

            if weight > 0:
                transcript_weights[transcript_id] = weight
    weights_sum = sum(transcript_weights.values())
    transcript_probs = dict([(tid,transcript_weights[tid]/weights_sum) for tid in transcript_weights])

    # open fasta file
    fasta = pysam.Fastafile(fasta_file)

    # open output files
    fastq_out = open('%s.fastq' % options.output_prefix, 'w')
    gff_out = open('%s_txome.gff' % options.output_prefix, 'w')

    # for each transcript
    read_index = 1
    for transcript_id in transcript_probs:
        expected_reads = transcript_probs[transcript_id]*options.num_reads
        if expected_reads == 0:
            sampled_reads = 0
        else:
            sampled_reads = poisson.rvs(expected_reads)

        for s in range(sampled_reads):
            read_length = sample_read_length(read_length_distribution)
            if transcript_lengths[transcript_id] > read_length:
                pos = random.randint(0, transcript_lengths[transcript_id]-read_length)
                seq = fasta.fetch(transcript_id, pos, pos+read_length).upper()
                if seq:
                    eseq = inject_errors(seq, options.error_rate)

                    print >> fastq_out, '@read%d\n%s\n+\n%s' % (read_index,eseq,'I'*read_length)
                    print >> gff_out, '\t'.join([transcript_id, 'sim', 'read', str(pos+1), str(pos+read_length), '.', '+', '.', 'read%d'%read_index])

                    read_index += 1
                else:
                    print >> sys.stderr, 'Missing fasta sequence %s:%d-%d' % (transcript_id,pos,(pos+read_length))

    fastq_out.close()
    gff_out.close()

    # map back to genome
    subprocess.call('tgff_cgff.py -c %s %s_txome.gff > %s_genome.gff' % (gtf_file, options.output_prefix, options.output_prefix), shell=True)
    return baseArray
###############################################################
###############################################################
#                                                             #
#            Build some                                       #
#                        Random Dataframes                    #
#                                     For mass building       #
#                                                             #
###############################################################
###############################################################

failSeries = expon.rvs(scale=20, size=100)
# Calculate lognorm parameters
muLog = np.log(15/np.sqrt(1+(10/15**2)))
sigLog = np.sqrt(np.log(1 + 10/15**2))
recoverSeries = np.exp(lognorm.rvs(sigLog, loc=muLog, size=100))
failPerf = 0.1 * uniform.rvs(size=100)
recoveryPerf = 0.9 + 0.2 * uniform.rvs(size=100)


paramArray = pd.DataFrame({'FailTime': failSeries,
                           'RecoverTime': failSeries + recoverSeries,
                           'FailPerformance': failPerf,
                           'RecoveryPerformance': recoveryPerf})

paramArray2 = pd.DataFrame({'FailTime': 15,
                            'RecoverTime': 15 + recoverSeries,
                            'FailPerformance': failPerf,
                            'RecoveryPerformance': recoveryPerf})

from scipy import stats
from scipy.stats import lognorm
rrr=lognorm.rvs(10,loc=0,scale=2,size=1000)
print rrr[1:10]
print "log normal fit", lognorm.fit(rrr,5,loc=0,scale=3)
rrr[1:10]
from numpy import rint
from numpy import around
ppp = around(rrr)
print ppp[1:10]
print lognorm.fit(ppp,5,loc=0,scale=3)

# Trying out different broad distributions with linear and logarithmic PDFs:

n_points = 100000

# power law:
# slope = -2!
one_over_rands = 1/np.random.rand(n_points)
# http://en.wikipedia.org/wiki/Power_law

# exponential distribution
exps = expon.rvs(size=1000)
# http://en.wikipedia.org/wiki/Exponential_distribution

# lognormal (looks like a normal distribution in a log-log scale!)
lognorms = lognorm.rvs(1.0, size=1000)
# http://en.wikipedia.org/wiki/Log-normal_distribution

fig = plt.figure(figsize=(15,15))
fig.suptitle('Different broad distribution PDFs in lin-lin, log-log, and lin-log axes')
n_bins = 30

for i, (rands, name) in enumerate(zip([one_over_rands, exps, lognorms],
                                      ["power law", "exponential", "lognormal"])):
    # linear-linear scale
    ax = fig.add_subplot(4, 3, i+1)
    ax.hist(rands, n_bins, normed=True)
    ax.text(0.5,0.9, "PDF, lin-lin: " + name, transform=ax.transAxes)
    # log-log scale
    ax = fig.add_subplot(4, 3, i+4)
    bins = np.logspace(np.log10(np.min(rands)), np.log10(np.max(rands)), num=n_bins)
Exemple #47
0
    def __init__(self, a, b, n, name, pa=0.1, pb=0.9, lognormal=False, Plot=True):

        mscale.register_scale(ProbitScale)

        if Plot:
            fig = plt.figure(facecolor="white")
            ax1 = fig.add_subplot(121, axisbelow=True)
            ax2 = fig.add_subplot(122, axisbelow=True)
            ax1.set_xlabel(name)
            ax1.set_ylabel("ECDF and Best Fit CDF")
            prop = matplotlib.font_manager.FontProperties(size=8)

        if lognormal:

            sigma = (log(b) - log(a)) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5))
            mu = log(a) - erfinv(2 * pa - 1) * sigma * (2 ** 0.5)
            cdf = arange(0.001, 1.000, 0.001)
            ppf = map(lambda v: lognorm.ppf(v, sigma, scale=exp(mu)), cdf)

            x = lognorm.rvs(sigma, scale=exp(mu), size=n)
            x.sort()

            print "generating lognormal %s, p50 %0.3f, size %s" % (name, exp(mu), n)
            x_s, ecdf_x = ecdf(x)

            best_fit = lognorm.cdf(x, sigma, scale=exp(mu))
            if Plot:
                ax1.set_xscale("log")
                ax2.set_xscale("log")
            hist_y = lognorm.pdf(x_s, std(log(x)), scale=exp(mu))

        else:

            sigma = (b - a) / ((erfinv(2 * pb - 1) - erfinv(2 * pa - 1)) * (2 ** 0.5))
            mu = a - erfinv(2 * pa - 1) * sigma * (2 ** 0.5)
            cdf = arange(0.001, 1.000, 0.001)
            ppf = map(lambda v: norm.ppf(v, mu, scale=sigma), cdf)

            print "generating normal %s, p50 %0.3f, size %s" % (name, mu, n)
            x = norm.rvs(mu, scale=sigma, size=n)
            x.sort()
            x_s, ecdf_x = ecdf(x)
            best_fit = norm.cdf((x - mean(x)) / std(x))
            hist_y = norm.pdf(x_s, loc=mean(x), scale=std(x))
            pass

        if Plot:
            ax1.plot(ppf, cdf, "r-", linewidth=2)
            ax1.set_yscale("probit")
            ax1.plot(x_s, ecdf_x, "o")

            ax1.plot(x, best_fit, "r--", linewidth=2)

            n, bins, patches = ax2.hist(x, normed=1, facecolor="green", alpha=0.75)
            bincenters = 0.5 * (bins[1:] + bins[:-1])
            ax2.plot(x_s, hist_y, "r--", linewidth=2)
            ax2.set_xlabel(name)
            ax2.set_ylabel("Histogram and Best Fit PDF")
            ax1.grid(b=True, which="both", color="black", linestyle="-", linewidth=1)
            # ax1.grid(b=True, which='major', color='black', linestyle='--')
            ax2.grid(True)

        return