Beispiel #1
0
def q2qnbinom(counts, input_mean, output_mean, dispersion):
    """ Quantile to Quantile for a negative binomial
    """
    zero = logical_or(input_mean < 1e-14, output_mean < 1e-14)
    input_mean[zero] = input_mean[zero] + 0.25
    output_mean[zero] = output_mean[zero] + 0.25
    ri = 1 + multiply(np.matrix(dispersion).T, input_mean)
    vi = multiply(input_mean, ri)
    rO = 1 + multiply(np.matrix(dispersion).T, output_mean)
    vO = multiply(output_mean, rO)
    i = counts >= input_mean
    low = logical_not(i)
    p1 = empty(counts.shape, dtype=np.float64)
    p2 = p1.copy()
    q1, q2 = p1.copy(), p1.copy()
    if i.any():
        p1[i] = norm.logsf(counts[i], loc=input_mean[i], scale=np.sqrt(vi[i]))[0, :]
        p2[i] = gamma.logsf(counts[i], (input_mean / ri)[i], scale=ri[i])[0, :]
        q1[i] = norm.ppf(1 - np.exp(p1[i]), output_mean[i], np.sqrt(vO[i]))[0, :]
        q2[i] = gamma.ppf(1 - np.exp(p2[i]), np.divide(output_mean[i], rO[i]), scale=rO[i])[0, :]

    if low.any():
        p1[low] = norm.logcdf(counts[low], loc=input_mean[low], scale=np.sqrt(vi[low]))[0, :]
        p2[low] = gamma.logcdf(counts[low], input_mean[low] / ri[low], scale=ri[low])[0, :]
        q1[low] = norm.ppf(np.exp(p1[low]), loc=output_mean[low], scale=np.sqrt(vO[low]))[0, :]
        q2[low] = gamma.ppf(np.exp(p2[low]), output_mean[low] / rO[low], scale=rO[low])[0, :]
    return (q1 + q2) / 2
	def genGammaTable(self, randTable):
		"""
		This function will generate the gamma table, given that it knows about
		elicited parameters and the randomTable.
		The following line of code:
			myRows = gammaRows * trunc(R[0])
		decides how many rows to allot to a given expert's opinion given the weight
		assigned to that expert's parametrization.
		"""
		gammaRows = len(randTable)
		numExperts = len(self.elicited)
		numParams = len(self.elicited[0])
		randRow = 0
		# First, normalize the weights.
		total_w = sum([R[0] for R in self.elicited])
		for R in self.elicited:
			myRows = gammaRows * trunc(R[0]/total_w) 	
			for r in range(int(myRows)):
					l = []
					for n in range(1, numParams):
						prob = randTable[randRow][n-1]
						alpha = R[n]
						l.append(gamma.ppf(prob, alpha))
					l = norm_log(l)
					self.gammaTable.append(l)
					randRow += 1	
		self.gammaTable = array(self.gammaTable)
    def get_max_firing_rate(self):
        """
        Return the maximum firing rate of the neuron. Where the maximum firing rate is defined
        as the rate at which the CDF =0.99

        :return: maximum firing rate of the neuron.
        """
        return gamma.ppf(0.99, self.a, scale=self.b, loc=0)
    def __get_object_preference(self, cdf_loc):
        """
        Use the inverse cdf to get a random firing rate modifier, its normalized firing rate.

        :rtype : Firing rate modifier
        """
        obj_pref = gamma.ppf(cdf_loc, self.a, scale=self.b, loc=0)

        return obj_pref / self.get_max_firing_rate()
Beispiel #5
0
    def ppf(self,U):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles.

        :param U: Percentiles for which the ppf will be computed.
        :type U: numpy.array
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''
        return Data(gamma.ppf(U,self.param['u'],scale=self.param['s'])**(1/self.param['p']))
Beispiel #6
0
    def quantile_match(self):
        
        mask_indices = np.where(self.nc_patches["masks"] == 1)
        obj_values = self.nc_patches["obj_values"][mask_indices]
        obj_values = np.array(obj_values)
        percentiles = np.linspace(0.1, 99.9, 100)
        
        try:
            filename = self.size_distribution_training_path + '{0}_{1}_Size_Distribution.csv'.format(self.ensemble_name,
                                                                                                    self.watershed_obj)

            train_period_obj_per_vals = pd.read_csv(filename)
            train_period_obj_per_vals = train_period_obj_per_vals.loc[:,"Values"].values
            per_func = interp1d(train_period_obj_per_vals, percentiles / 100.0, 
                                bounds_error=False, fill_value=(0.1, 99.9))
        except:
            obj_per_vals = np.percentile(obj_values, percentiles)
            per_func = interp1d(obj_per_vals, percentiles / 100.0, bounds_error=False, fill_value=(0.1, 99.9))

        obj_percentiles = np.zeros(self.nc_patches["masks"].shape)
        obj_percentiles[mask_indices] = per_func(obj_values)
        obj_hail_sizes = np.zeros(obj_percentiles.shape)
        model_name = self.model_name.replace(" ", "-")
        self.units = "mm"
        self.data = np.zeros((self.forecast_hours.size,
                              self.mapping_data["lon"].shape[0],
                              self.mapping_data["lon"].shape[1]), dtype=np.float32)
        sh = self.forecast_hours.min()
        for p in range(obj_hail_sizes.shape[0]):
            if self.hail_forecast_table.loc[p, self.condition_model_name.replace(" ", "-") + "_conditionthresh"] > 0.5:
                patch_mask = np.where(self.nc_patches["masks"][p] == 1)
                obj_hail_sizes[p,
                               patch_mask[0],
                               patch_mask[1]] = gamma.ppf(obj_percentiles[p,
                                                                          patch_mask[0],
                                                                          patch_mask[1]],
                                                          self.hail_forecast_table.loc[p,
                                                                                       model_name + "_shape"],
                                                          self.hail_forecast_table.loc[p,
                                                                                       model_name + "_location"],
                                                          self.hail_forecast_table.loc[p,
                                                                                       model_name + "_scale"])
                self.data[self.nc_patches["forecast_hour"][p] - sh,
                          self.nc_patches["i"][p, patch_mask[0], patch_mask[1]],
                          self.nc_patches["j"][p, patch_mask[0], patch_mask[1]]] = obj_hail_sizes[p, patch_mask[0], patch_mask[1]]
        return
Beispiel #7
0
def hsic_gam(X, Y, alph = 0.5):
	"""
	X, Y are numpy vectors with row - sample, col - dim
	alph is the significance level
	auto choose median to be the kernel width
	"""
	n = X.shape[0]

	# ----- width of X -----
	Xmed = X

	G = np.sum(Xmed*Xmed, 1).reshape(n,1)
	Q = np.tile(G, (1, n) )
	R = np.tile(G.T, (n, 1) )

	dists = Q + R - 2* np.dot(Xmed, Xmed.T)
	dists = dists - np.tril(dists)
	dists = dists.reshape(n**2, 1)

	width_x = np.sqrt( 0.5 * np.median(dists[dists>0]) )
	# ----- -----

	# ----- width of X -----
	Ymed = Y

	G = np.sum(Ymed*Ymed, 1).reshape(n,1)
	Q = np.tile(G, (1, n) )
	R = np.tile(G.T, (n, 1) )

	dists = Q + R - 2* np.dot(Ymed, Ymed.T)
	dists = dists - np.tril(dists)
	dists = dists.reshape(n**2, 1)

	width_y = np.sqrt( 0.5 * np.median(dists[dists>0]) )
	# ----- -----

	bone = np.ones((n, 1), dtype = float)
	H = np.identity(n) - np.ones((n,n), dtype = float) / n

	K = rbf_dot(X, X, width_x)
	L = rbf_dot(Y, Y, width_y)

	Kc = np.dot(np.dot(H, K), H)
	Lc = np.dot(np.dot(H, L), H)

	testStat = np.sum(Kc.T * Lc) / n

	varHSIC = (Kc * Lc / 6)**2

	varHSIC = ( np.sum(varHSIC) - np.trace(varHSIC) ) / n / (n-1)

	varHSIC = varHSIC * 72 * (n-4) * (n-5) / n / (n-1) / (n-2) / (n-3)

	K = K - np.diag(np.diag(K))
	L = L - np.diag(np.diag(L))

	muX = np.dot(np.dot(bone.T, K), bone) / n / (n-1)
	muY = np.dot(np.dot(bone.T, L), bone) / n / (n-1)

	mHSIC = (1 + muX * muY - muX - muY) / n

	al = mHSIC**2 / varHSIC
	bet = varHSIC*n / mHSIC

	thresh = gamma.ppf(1-alph, al, scale=bet)[0][0]

	return (testStat, thresh)
Beispiel #8
0
def generate_data(Nsims, plot=True):

    # Get z1, ... , z10 where zi = (zi(1), zi(2)) and zi(1), zi(2) ~ U(0,1)
    z1 = np.random.uniform(size=10)
    z2 = np.random.uniform(size=10)
    z = list(zip(z1, z2))

    # Get theta in (0, 0.4, 5) with probabilities (0.05, 0.6, 0.35)
    x = np.random.uniform(0, 1, size=Nsims)
    thetas = np.zeros(Nsims)

    thetas[np.where(x <= 0.05)] = 0
    thetas[np.where((0.05 < x) & (x <= 0.65))] = 0.4
    thetas[np.where(x > 0.65)] = 5

    # Generate realizations of (L1, ..., L10) using Gaussian copulas
    x = np.zeros((Nsims, 10))
    theta_labels = {5: [], 0.4: [], 0: []}
    for i in range(Nsims):
        theta = thetas[i]
        # Get the correlation
        # rho_ij = exp{-theta_i * ||zi - zj||} where ||.|| denotes the Euclidean distance
        Omega = np.zeros((10, 10))
        for j in range(10):
            for k in range(10):
                Omega[j, k] = np.exp(
                    -theta * np.linalg.norm(np.array(z[j]) - np.array(z[k])))

        # Create samples from a correlated multivariate normal
        x0 = np.random.multivariate_normal(mean=np.zeros(10), cov=Omega)
        x[i, :] = x0
        theta_labels[theta].append(i)

    # Get uniform marginals
    u = norm.cdf(x)

    # Marginal distributions Li ~ Gamma(5, 0.2i) with mean=25
    L = np.zeros((Nsims, 10))
    x_axis = np.linspace(25, 50, 200)
    means = np.zeros(10)
    for i in range(10):
        L_i = gamma.ppf(u[:, i], a=5, loc=25, scale=0.2 * (i + 1))
        L[:, i] = L_i

        means[i] = np.mean(L_i)

        # Gamma distribution plot
        if plot:
            y_i = gamma.pdf(x_axis, a=5, loc=25, scale=0.2 * (i + 1))
            plt.plot(x_axis, y_i, label=f"scale={0.2*(i+1)}")
    if plot:
        plt.legend()
        plt.savefig('Plots/ex/data_marginal_dist.pdf', format='pdf')
        plt.show()

    max_mean = np.max(means)
    min_mean = np.min(means)
    marker_sizes = (means - min_mean) / (max_mean - min_mean) * 150

    # Location plot
    if plot:
        plt.scatter(z1, z2, marker='o', color='black', s=marker_sizes)
        plt.savefig('Plots/ex/data_location_by_mean.pdf', format='pdf')
        plt.show()

    # Define the data and get the bandwidths, density and CDF
    data = {"y": np.sum(L, axis=1), "x": L}

    return data, theta_labels
Beispiel #9
0
def relSDM(obs, mod, sce, cdf_threshold=0.9999999, lower_limit=0.1):
    '''relative scaled distribution mapping assuming a gamma distributed parameter (with lower limit zero)
    rewritten from pyCAT for 1D data

    obs :: observed variable time series
    mod :: modelled variable for same time series as obs
    sce :: to unbias modelled time series
    cdf_threshold :: upper and lower threshold of CDF
    lower_limit :: lower limit of data signal (values below will be masked!)

    returns corrected timeseries
    tested with pandas series.
    '''

    obs_r = obs[obs >= lower_limit]
    mod_r = mod[mod >= lower_limit]
    sce_r = sce[sce >= lower_limit]

    obs_fr = 1. * len(obs_r) / len(obs)
    mod_fr = 1. * len(mod_r) / len(mod)
    sce_fr = 1. * len(sce_r) / len(sce)
    sce_argsort = np.argsort(sce)

    obs_gamma = gamma.fit(obs_r, floc=0)
    mod_gamma = gamma.fit(mod_r, floc=0)
    sce_gamma = gamma.fit(sce_r, floc=0)

    obs_cdf = gamma.cdf(np.sort(obs_r), *obs_gamma)
    mod_cdf = gamma.cdf(np.sort(mod_r), *mod_gamma)
    obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold
    mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold

    expected_sce_raindays = min(
        int(np.round(len(sce) * obs_fr * sce_fr / mod_fr)), len(sce))
    sce_cdf = gamma.cdf(np.sort(sce_r), *sce_gamma)
    sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold

    # interpolate cdf-values for obs and mod to the length of the scenario
    obs_cdf_intpol = np.interp(np.linspace(1, len(obs_r), len(sce_r)),
                               np.linspace(1, len(obs_r), len(obs_r)), obs_cdf)
    mod_cdf_intpol = np.interp(np.linspace(1, len(mod_r), len(sce_r)),
                               np.linspace(1, len(mod_r), len(mod_r)), mod_cdf)

    # adapt the observation cdfs
    obs_inverse = 1. / (1 - obs_cdf_intpol)
    mod_inverse = 1. / (1 - mod_cdf_intpol)
    sce_inverse = 1. / (1 - sce_cdf)
    adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse)
    adapted_cdf[adapted_cdf < 0.] = 0.

    # correct by adapted observation cdf-values
    xvals = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) * gamma.ppf(
        sce_cdf, *sce_gamma) / gamma.ppf(sce_cdf, *mod_gamma)

    # interpolate to the expected length of future raindays
    correction = np.zeros(len(sce))
    if len(sce_r) > expected_sce_raindays:
        xvals = np.interp(np.linspace(1, len(sce_r), expected_sce_raindays),
                          np.linspace(1, len(sce_r), len(sce_r)), xvals)
    else:
        xvals = np.hstack(
            (np.zeros(expected_sce_raindays - len(sce_r)), xvals))

    correction[sce_argsort[-expected_sce_raindays:]] = xvals

    return pd.Series(correction, index=sce.index)
Beispiel #10
0
       % (np.float(afb), np.float(bfb)))

# Plot Max firing rates based on Lehky (non-optimal stimuli set) & the full (optimal stimuli set)
n = 1000
shape_param_dist_f = gamma.rvs(afa, scale=bfa, loc=0, size=n)
scale_param_dist_f = gamma.rvs(afb, scale=bfb, loc=0, size=n)

shape_param_dist_l = gamma.rvs(ala, scale=bla, loc=0, size=n)
scale_param_dist_l = gamma.rvs(alb, scale=blb, loc=0, size=n)

max_rates_f = []
max_rates_l = []

for index in np.arange(n):
    max_rates_f.append(
        gamma.ppf(0.99, shape_param_dist_f[index], loc=0, scale=scale_param_dist_f[index]))

    max_rates_l.append(
        gamma.ppf(0.99, shape_param_dist_l[index], loc=0, scale=scale_param_dist_l[index]))

plt.figure("Max Fire Rate Distributions")
plt.subplot(211)
plt.hist(max_rates_f)
plt.title('Histogram of full (unscaled) max spike rates')
plt.subplot(212)
plt.hist(max_rates_l, label='method1')
plt.title('Histogram of scaled (Lehky) max spike rates')

# Method 2 of getting Lehky distribution from full spike rates
# noinspection PyArgumentList
scale_factors = np.random.rand(n)
Beispiel #11
0
# print(np.mean(deaths_5), np.mean(deaths_6), np.mean(deaths_7), np.mean(deaths_8))

deaths = [deaths_5, deaths_6, deaths_7, deaths_8]
deaths
# deaths_alt = deaths_5+deaths_6+deaths_7+deaths_8
# print(deaths_alt)

plt.figure(figsize=(16, 8))
death_sum = 0
i = 0

for d_i in deaths:
    death_sum += sum(d_i)
    alpha = death_sum + 1
    b = (i + 1) * 7 + (1 / beta)
    x = np.linspace(gamma.ppf(0.01, alpha, scale=1 / b),
                    gamma.ppf(0.99, alpha, scale=1 / b), 100)
    plt.title("Posterior Gamma distributions")
    label = "Week-" + str(i + 5) + " MAP(mean): " + str(alpha / b)
    plt.plot(x, gamma.pdf(x, alpha, scale=1 / b), label=label)
    plt.xlabel("Deaths")
    plt.ylabel("PDF of Gamma distribution")
    plt.legend()
    i += 1

plt.show()

# ### Observations:
#
# - From the above graphs, we can say that as the weeks progress, MAP is reducing indicating a decrease in number of deaths
# - We can also infer that as the time progresses, the number of deaths might saturate if the trend follows a similar pattern (rate)
Beispiel #12
0
def relative_sdm(obs_cube, mod_cube, sce_cubes, *args, **kwargs):
    """
    apply relative scaled distribution mapping to all scenario cubes
    assuming a gamma distributed parameter (with lower limit zero)

    if one of obs, mod or sce data has less than min_samplesize valid
    values, the correction will NOT be performed but the original data
    is output

    Args:

    * obs_cube (:class:`iris.cube.Cube`):
        the observational data

    * mod_cube (:class:`iris.cube.Cube`):
        the model data at the reference period

    * sce_cubes (:class:`iris.cube.CubeList`):
        the scenario data that shall be corrected

    Kwargs:

    * lower_limit (float):
        assume values below lower_limit to be zero (default: 0.1)

    * cdf_threshold (float):
        limit of the cdf-values (default: .99999999)

    * min_samplesize (int):
        minimal number of samples (e.g. wet days) for the gamma fit
        (default: 10)
    """
    from scipy.stats import gamma

    lower_limit = kwargs.get('lower_limit', 0.1)
    cdf_threshold = kwargs.get('cdf_threshold', .99999999)
    min_samplesize = kwargs.get('min_samplesize', 10)

    obs_cube_mask = np.ma.getmask(obs_cube.data)
    cell_iterator = np.nditer(obs_cube.data[0], flags=['multi_index'])
    while not cell_iterator.finished:
        index_list = list(cell_iterator.multi_index)
        cell_iterator.iternext()

        index_list.insert(0, 0)
        index = tuple(index_list)

        # consider only cells with valid observational data
        if obs_cube_mask and obs_cube_mask[index]:
            continue

        index_list[0] = slice(0, None, 1)
        index = tuple(index_list)

        obs_data = obs_cube.data[index]
        mod_data = mod_cube.data[index]
        obs_raindays = obs_data[obs_data >= lower_limit]
        mod_raindays = mod_data[mod_data >= lower_limit]

        if obs_raindays.size < min_samplesize \
           or mod_raindays.size < min_samplesize:
            continue

        obs_frequency = 1. * obs_raindays.shape[0] / obs_data.shape[0]
        mod_frequency = 1. * mod_raindays.shape[0] / mod_data.shape[0]
        obs_gamma = gamma.fit(obs_raindays, floc=0)
        mod_gamma = gamma.fit(mod_raindays, floc=0)

        obs_cdf = gamma.cdf(np.sort(obs_raindays), *obs_gamma)
        mod_cdf = gamma.cdf(np.sort(mod_raindays), *mod_gamma)
        obs_cdf[obs_cdf > cdf_threshold] = cdf_threshold
        mod_cdf[mod_cdf > cdf_threshold] = cdf_threshold

        for sce_cube in sce_cubes:
            sce_data = sce_cube[index].data
            sce_raindays = sce_data[sce_data >= lower_limit]

            if sce_raindays.size < min_samplesize:
                continue

            sce_frequency = 1. * sce_raindays.shape[0] / sce_data.shape[0]
            sce_argsort = np.argsort(sce_data)
            sce_gamma = gamma.fit(sce_raindays, floc=0)

            expected_sce_raindays = min(
                np.round(
                    len(sce_data) * obs_frequency * sce_frequency /
                    mod_frequency), len(sce_data))

            sce_cdf = gamma.cdf(np.sort(sce_raindays), *sce_gamma)
            sce_cdf[sce_cdf > cdf_threshold] = cdf_threshold

            # interpolate cdf-values for obs and mod to the length of the
            # scenario
            obs_cdf_intpol = np.interp(
                np.linspace(1, len(obs_raindays), len(sce_raindays)),
                np.linspace(1, len(obs_raindays), len(obs_raindays)), obs_cdf)
            mod_cdf_intpol = np.interp(
                np.linspace(1, len(mod_raindays), len(sce_raindays)),
                np.linspace(1, len(mod_raindays), len(mod_raindays)), mod_cdf)

            # adapt the observation cdfs
            obs_inverse = 1. / (1 - obs_cdf_intpol)
            mod_inverse = 1. / (1 - mod_cdf_intpol)
            sce_inverse = 1. / (1 - sce_cdf)
            adapted_cdf = 1 - 1. / (obs_inverse * sce_inverse / mod_inverse)
            adapted_cdf[adapted_cdf < 0.] = 0.

            # correct by adapted observation cdf-values
            xvals = gamma.ppf(np.sort(adapted_cdf), *obs_gamma) *\
                gamma.ppf(sce_cdf, *sce_gamma) /\
                gamma.ppf(sce_cdf, *mod_gamma)

            # interpolate to the expected length of future raindays
            correction = np.zeros(len(sce_data))
            if len(sce_raindays) > expected_sce_raindays:
                xvals = np.interp(
                    np.linspace(1, len(sce_raindays), expected_sce_raindays),
                    np.linspace(1, len(sce_raindays), len(sce_raindays)),
                    xvals)
            else:
                xvals = np.hstack(
                    (np.zeros(expected_sce_raindays - len(sce_raindays)),
                     xvals))

            correction[sce_argsort[-expected_sce_raindays:]] = xvals
            sce_cube.data[index] = correction
Beispiel #13
0
def getDelta(myA, myAlpha):
    q = gamma.ppf(myAlpha, myA, 0, 1 / myA)
    return (q - 1) * (myA + np.divide(1 - myA, q))
Beispiel #14
0
def posterior_ess(Y,
                  M,
                  Sigma,
                  A,
                  B,
                  C,
                  Beta=None,
                  lam_gridsize=100,
                  nburn=1000,
                  nsamples=1000,
                  nthin=1,
                  nthreads=1,
                  print_freq=100):
    # Filter out the unknown Y values
    Present = Y >= 0

    if Beta is None:
        # Initialize beta to the approximate MLE where data is not missing
        # and the prior where data is missing
        Beta = M * (1 - Present) + Present * (
            (Y - C[:, None]) / A[:, None] * B[:, None]).clip(1e-6, 1 - 1e-6)

    # Use a grid approximation for lambda integral
    Lam_grid, Lam_weights = [], []
    for a, b, c in zip(A, B, C):
        grid = np.linspace(gamma.ppf(1e-3, a, scale=b),
                           gamma.ppf(1 - 1e-3, a, scale=b),
                           lam_gridsize)[np.newaxis, :]
        weights = gamma.pdf(grid, a, scale=b)
        weights /= weights.sum()
        Lam_grid.append(grid)
        Lam_weights.append(weights)
    Lam_grid = np.array(Lam_grid)
    Lam_weights = np.array(Lam_weights)

    # Create the results arrays
    Cur_log_likelihood = np.zeros(M.shape[0])
    chol = np.linalg.cholesky(Sigma)
    Beta_samples = np.zeros((nsamples, Beta.shape[0], Beta.shape[1]))
    Loglikelihood_samples = np.zeros(nsamples)

    if nthreads == 1:
        ### Create a log-likelihood function for the ES sampler ###
        def log_likelihood_fn(proposal_beta, idx):
            if np.any(proposal_beta[:-1] > proposal_beta[1:]):
                return -np.inf
            present = Present[idx]
            y = Y[idx][present][:, np.newaxis]
            tau = ilogit(proposal_beta)[present][:, np.newaxis]
            grid = Lam_grid[idx]
            weights = Lam_weights[idx]
            c = C[idx]
            return np.log((poisson.pmf(y, grid * tau + c) * weights).clip(
                1e-10, np.inf).sum(axis=1)).sum()

        # Run the MCMC sampler on a single thread
        for step in range(nburn + nsamples * nthin):
            if print_freq and step % print_freq == 0:
                if step > 0:
                    sys.stdout.write("\033[F")  # Cursor up one line
                print('MCMC step {}'.format(step))

            # Ellipitical slice sample for each beta
            for idx, beta in enumerate(Beta):
                cur_ll = None if step == 0 else Cur_log_likelihood[idx]
                Beta[idx], Cur_log_likelihood[idx] = elliptical_slice(
                    beta,
                    chol,
                    log_likelihood_fn,
                    cur_log_like=cur_ll,
                    ll_args=idx,
                    mu=M[idx])

            # Save this sample after burn-in and markov chain thinning
            if step < nburn or ((step - nburn) % nthin) != 0:
                continue

            # Save the samples
            sample_idx = (step - nburn) // nthin
            Beta_samples[sample_idx] = Beta
            Loglikelihood_samples[sample_idx] = Cur_log_likelihood.sum()
    else:
        from multiprocessing import Pool
        jobs = [(Y[idx][Present[idx]][:, np.newaxis], Present[idx],
                 Lam_grid[idx], Lam_weights[idx], C[idx], M[idx], Beta[idx],
                 chol, nburn, nsamples, nthin) for idx in range(Beta.shape[0])]

        # Calculate the posteriors in parallel
        with Pool(nthreads) as pool:
            results = pool.map(posterior_ess_helper, jobs)

            # Aggregate the results
            for idx in range(Beta.shape[0]):
                Beta_samples[:, idx] = results[idx][0]
                Loglikelihood_samples += results[idx][1]

    return Beta_samples, Loglikelihood_samples
Beispiel #15
0
def getRK(gBar, myA, myW, myP, myAlpha):
    q = gamma.ppf(myAlpha, myA, 0, 1 / myA)
    return gBar * myP * (1 - myW + myW * q)
Beispiel #16
0
def getK(gBar, myA, myW, myP, myAlpha):
    q = gamma.ppf(myAlpha, myA, 0, 1 / myA)
    return gBar * myP * myW * (q - 1)
Beispiel #17
0
def getW(myP, myA, myRho, myAlpha):
    num = th.computeP(myP, myRho, norm.ppf(1 - myAlpha)) - myP
    den = myP * (gamma.ppf(myAlpha, myA, 0, 1 / myA) - 1)
    return np.divide(num, den)
import numpy as np
from scipy.stats import gamma
import nibabel as nib


#probably redundant and have to find a better way to do this
img = nib.load('/Users/nanditharajamani/Desktop/IIT_delhi_stuff/Assignment_IIT_delhi/fsl_preprocessed/smoothed_img/sub-MSC01_ses-func01_task-motor_run-01_bold_mcf_filt_st_smooth.nii.gz')
img_data = img.get_data()
num_vols = img_data.shape[3]
header = img.header
find_pix = header['pixdim']
TR = find_pix[4]
TR=2.2
t_list = np.arange(1,img_data.shape[3],TR)
#define the hrf model
h = gamma.ppf(t_list,6) + -0.5*gamma.ppf(t_list,10)
h = h/max(h)
#read from the covariates file and determine the duration of each task
dur_of_each_task = 15.4
TRperStim = TR*dur_of_each_task
nREPS = 2 #number of times each stimulus is repeated. This can also be obtained from the
#number of rows in the covariates file, for each stimulus
nTRs = TRperStim*nREPS + len(h)
design_matrix = np.zeros(1,nTRs)
#now, let's make each entry at the time point the stimulus was on as 1. the rest will be zero
#left hand stimulus
left_hand_stim = design_matrix
left_hand_stim(1:TRperStim:)
Beispiel #19
0
elif (EARTH_QUESTION):
	print('EARTH_QUESTION')
	Lambda = np.arange(0.01,1,0.01)

	prior_alpha = 1
	prior_beta = 30
	prior = Gamma(Lambda, prior_alpha, prior_beta)
	
	data = [16,8,114,60,4,23,30,105]
	
	likelihood = Exponential(Lambda, data)

	posterior_alpha = prior_alpha + len(data)
	posterior_beta = prior_beta + sum(data)

	PPTLT = GammaDist.ppf(0.95,posterior_alpha,scale=1.0/posterior_beta)
	print(PPTLT)

	posterior = Gamma(Lambda, posterior_alpha, posterior_beta)

	# posterior predictive density given the data f(newData | oldData)
	# range of new data
	Y = np.arange(0,121,1)
	#notice that now we are considering the above posterior to be our
	#new prior
	#and we are calculating the new posterior for each possible new data
	predictive_posterior = []
	for y in Y:
		predictive_posterior.append(max(Gamma(Lambda, posterior_alpha+1, posterior_beta+y)))

	plt.plot(Y,predictive_posterior)
                                                 function_of_gamma=f_gamma,
                                                 range_gamma=range_gamma,
                                                 k_opt_fcn=k_opt,
                                                 range_of_k=range_of_k,
                                                 beta=beta,
                                                 tol=tol)
            R[rep, i] = dist
            c = limiting_dist_EQOPP(X=data,
                                    a=data_sensitive,
                                    y=data_label,
                                    beta=beta,
                                    marginals=marginals_rand)

            k = 1 / 2
            theta = 2 * c
            if N * dist > gamma.ppf(.95, a=k, scale=theta):
                print('Reject')
                test_res[rep, i] = 1
            else:
                print('Fail to reject')
                test_res[rep, i] = 0
            end = time.time()
            time_elapsed = (end - start) * (replications - rep - 1)
            conversion = datetime.timedelta(seconds=time_elapsed)
            print('Replication====>' + str(rep) + '/' + str(replications) +
                  ', Time remaining : ' + str(conversion))

    np.savetxt(
        'results_' + str(N_range) + '_iterations_' + str(replications) +
        '.out', R)
    np.savetxt(
def SMC2(td,
         beta_softmax=1.,
         numberOfStateSamples=200,
         numberOfThetaSamples=200,
         numberOfBetaSamples=20,
         coefficient=.5,
         latin_hyp_sampling=True):

    print('\n')
    print('Forward Constant Volatility Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # uniform distribution
    if latin_hyp_sampling:
        d0 = uniform()
        print('latin hypercube sampling')
    else:
        print('sobolev sampling')

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    rewards = td['reward']
    actions = td['A_chosen']
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(stimuli)  # Number of Trials

    # verification
    if K == 2:
        if latin_hyp_sampling == False:
            raise ValueError(
                'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.'
            )

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    tauPrior = np.array([1, 1])
    gammaPrior = np.ones(K)  # Prior on Gamma, the Dirichlet parameter
    log_proba = 0.
    log_proba_ = 0.
    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    betaWeights = np.zeros(numberOfBetaSamples)
    betaLog = np.zeros(numberOfBetaSamples)
    logbetaWeights = np.zeros(numberOfBetaSamples)
    betaAncestors = np.arange(numberOfBetaSamples)

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions])
    sum_actionLik = np.zeros(numberOfBetaSamples)
    filt_actionLkd = np.zeros(
        [numberOfTrials, numberOfBetaSamples, numberOfActions])

    # Keep track of probability correct/exploration after switches
    tsProbability = np.zeros([numberOfBetaSamples, K])
    sum_tsProbability = np.zeros(numberOfBetaSamples)
    dirichletParamCandidates = np.zeros(K)

    # SMC particles initialisation
    muSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples]
    )  #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples])
    gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    tauSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples])

    if K == 24:
        try:
            latin_hyp_samples = pickle.load(
                open('../../utils/sobol_200_26.pkl', 'rb'))
        except:
            latin_hyp_samples = pickle.load(
                open('../../models/utils/sobol_200_26.pkl', 'rb'))
        for beta_idx in range(numberOfBetaSamples):
            if latin_hyp_sampling:
                latin_hyp_samples = mcerp.lhd(dist=d0,
                                              size=numberOfThetaSamples,
                                              dims=K + 2)
            muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0],
                                              betaPrior[0], betaPrior[1])
            tauSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 1],
                                               tauPrior[0], tauPrior[1])
            gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:],
                                                  gammaPrior)
            gammaSamples[beta_idx] = np.transpose(
                gammaSamples[beta_idx].T /
                np.sum(gammaSamples[beta_idx], axis=1))
    elif K == 2:
        muSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                   [numberOfBetaSamples, numberOfThetaSamples])
        tauSamples = np.random.beta(
            tauPrior[0], tauPrior[1],
            [numberOfBetaSamples, numberOfThetaSamples])
        gammaSamples = np.random.dirichlet(
            gammaPrior, [numberOfBetaSamples, numberOfThetaSamples])
    else:
        raise IndexError('Wrong number of task sets')

    logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    currentSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    ancestorSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples
                           ]) / numberOfStateSamples

    log_proba_corr = 0.
    ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc)
    gammaAdaptedProba = np.zeros(K)
    likelihoods = np.zeros(K)
    positiveStates = np.zeros(K, dtype=np.intc)

    # Guided SMC variables
    muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    tauSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    normalisedThetaWeights = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples])

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        for beta_idx in range(numberOfBetaSamples):

            ances = betaAncestors[beta_idx]

            smc_c.bootstrapUpdateStep_c(currentSamples[beta_idx], logThetaWeights[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, tauSamples[ances]/2., T, ancestorSamples[ances], weightsList, \
                                            np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1], ancestorsIndexes, gammaAdaptedProba, likelihoods, positiveStates, 0)

            # Move step
            normalisedThetaWeights[
                beta_idx] = useful_functions.to_normalized_weights(
                    logThetaWeights[beta_idx])
            ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2)

            if (ess < coefficient * numberOfThetaSamples):
                acceptanceProba = 0.
                tauMu = np.sum(normalisedThetaWeights[beta_idx] *
                               tauSamples[ances])
                tauVar = np.sum(normalisedThetaWeights[beta_idx] *
                                (tauSamples[ances] - tauMu)**2)
                tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2
                tauBeta = tauAlpha * (1 / tauMu - 1)
                assert (tauAlpha > 0)
                assert (tauBeta > 0)
                betaMu = np.sum(normalisedThetaWeights[beta_idx] *
                                muSamples[ances])
                betaVar = np.sum(normalisedThetaWeights[beta_idx] *
                                 (muSamples[ances] - betaMu)**2)
                betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
                betaBeta = betaAlpha * (1 / betaMu - 1)
                assert (betaAlpha > 0)
                assert (betaBeta > 0)
                dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] *
                                        gammaSamples[ances].T,
                                        axis=1)
                dirichletVar = np.sum(normalisedThetaWeights[beta_idx] *
                                      (gammaSamples[ances]**2).T,
                                      axis=1) - dirichletMeans**2
                dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2
                                            ) / (np.sum(dirichletVar)) - 1
                dirichletParamCandidates[:] = np.maximum(
                    dirichletMeans * dirichletPrecision, 1.)
                assert ((dirichletParamCandidates > 0).all())

                if K == 2:
                    tauSamplesNew[beta_idx] = np.random.beta(
                        tauAlpha, tauBeta, numberOfThetaSamples)
                    muSamplesNew[beta_idx] = np.random.beta(
                        betaAlpha, betaBeta, numberOfThetaSamples)
                    gammaSamplesNew[beta_idx] = np.random.dirichlet(
                        dirichletParamCandidates, numberOfThetaSamples)
                elif K == 24:
                    if latin_hyp_sampling:
                        latin_hyp_samples = mcerp.lhd(
                            dist=d0, size=numberOfThetaSamples, dims=K + 2)
                    muSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 0], betaAlpha, betaBeta)
                    tauSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 1], tauAlpha, tauBeta)
                    gammaSamplesNew[beta_idx] = gammalib.ppf(
                        latin_hyp_samples[:, 2:], dirichletParamCandidates)
                    gammaSamplesNew[beta_idx] = np.transpose(
                        gammaSamplesNew[beta_idx].T /
                        np.sum(gammaSamplesNew[beta_idx], axis=1))

                logThetaWeightsNew[beta_idx] = 0.
                normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples
            else:
                tauSamplesNew[beta_idx] = tauSamples[ances]
                muSamplesNew[beta_idx] = muSamples[ances]
                gammaSamplesNew[beta_idx] = gammaSamples[ances]
                logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx]

        # task set probability
        sum_tsProbability[:] = 0.
        for ts_idx in range(K):
            tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentSamples == ts_idx), axis=2),
                                              axis=1)
            sum_tsProbability += tsProbability[:, ts_idx]

        tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability)

        # Compute action likelihood
        sum_actionLik[:] = 0.
        for action_idx in range(numberOfActions):
            actionLikelihood[:, action_idx] = np.exp(
                np.log(
                    np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] ==
                                         action_idx],
                           axis=1)) * beta_softmax)
            sum_actionLik += actionLikelihood[:, action_idx]

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik)
        betaWeights[:] = actionLikelihood[:, actions[T].astype(int)]

        filt_actionLkd[T] = actionLikelihood

        log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples)
        betaWeights = betaWeights / sum(betaWeights)

        betaAncestors[:] = useful_functions.stratified_resampling(betaWeights)

        # update particles
        muSamples[:] = muSamplesNew
        gammaSamples[:] = gammaSamplesNew
        tauSamples[:] = tauSamplesNew
        logThetaWeights[:] = logThetaWeightsNew[betaAncestors]
        ancestorSamples[:] = currentSamples

    elapsed_time = time.time() - start_time_multi

    return log_proba_, filt_actionLkd
Beispiel #22
0
def posterior_ess_Sigma(Y,
                        M,
                        A,
                        B,
                        C,
                        Sigma=None,
                        nu=None,
                        Psi=None,
                        Beta=None,
                        lam_gridsize=100,
                        nburn=500,
                        nsamples=1000,
                        nthin=1,
                        print_freq=100):
    if nu is None:
        # Default degrees of freedom
        nu = M.shape[1] + 1

    if Psi is None:
        # # Default squared exponential kernel prior
        # bandwidth, kernel_scale, noise_var = 2., 1., 0.5
        # Psi = np.array([kernel_scale*(np.exp(-0.5*(i - np.arange(M.shape[1]))**2 / bandwidth**2)) for i in np.arange(M.shape[1])]) + noise_var*np.eye(M.shape[1])
        Psi = np.eye(M.shape[1])
        Psi *= nu - M.shape[1] + 1

    if Sigma is None:
        # Sample from the prior to initialize Sigma
        Sigma = invwishart.rvs(nu, Psi)

    if Beta is None:
        Beta = np.copy(M)

    # Filter out the unknown Y values
    Present = Y >= 0

    # Use a grid approximation for lambda integral
    Lam_grid, Lam_weights = [], []
    for a, b, c in zip(A, B, C):
        grid = np.linspace(gamma.ppf(1e-3, a, scale=b),
                           gamma.ppf(1 - 1e-3, a, scale=b),
                           lam_gridsize)[np.newaxis, :]
        weights = gamma.pdf(grid, a, scale=b)
        weights /= weights.sum()
        Lam_grid.append(grid)
        Lam_weights.append(weights)
    Lam_grid = np.array(Lam_grid)
    Lam_weights = np.array(Lam_weights)

    ### Create a log-likelihood function for the ES sampler ###
    def log_likelihood_fn(proposal_beta, idx):
        if np.any(proposal_beta[:-1] > proposal_beta[1:] + 1e-6):
            return -np.inf
        present = Present[idx]
        y = Y[idx][present][:, np.newaxis]
        tau = ilogit(proposal_beta)[present][:, np.newaxis]
        grid = Lam_grid[idx]
        weights = Lam_weights[idx]
        c = C[idx]
        return np.log((poisson.pmf(y, grid * tau + c) * weights).clip(
            1e-10, np.inf).sum(axis=1)).sum()

    # Initialize betas with draws from the prior
    Cur_log_likelihood = np.zeros(M.shape[0])
    chol = np.linalg.cholesky(Sigma)

    # Create the results arrays
    Beta_samples = np.zeros((nsamples, Beta.shape[0], Beta.shape[1]))
    Sigma_samples = np.zeros((nsamples, Sigma.shape[0], Sigma.shape[1]))
    Loglikelihood_samples = np.zeros(nsamples)

    # Run the MCMC sampler
    for step in range(nburn + nsamples * nthin):
        if print_freq and step % print_freq == 0:
            if step > 0:
                sys.stdout.write("\033[F")  # Cursor up one line
            print('MCMC step {}'.format(step))

        # Ellipitical slice sample for each beta
        for idx, beta in enumerate(Beta):
            Beta[idx], Cur_log_likelihood[idx] = elliptical_slice(
                beta, chol, log_likelihood_fn, ll_args=idx, mu=M[idx])
            # Cur_log_likelihood[idx] += mvn.logpdf(Beta[idx], M[idx], Sigma)

        # Conjugate prior update for Sigma
        Sigma = invwishart.rvs(nu + M.shape[0],
                               Psi + (Beta - M).T.dot(Beta - M))

        # Cholesky representation
        chol = np.linalg.cholesky(Sigma)

        # Save this sample after burn-in and markov chain thinning
        if step < nburn or ((step - nburn) % nthin) != 0:
            continue

        # Save the samples
        sample_idx = (step - nburn) // nthin
        Beta_samples[sample_idx] = Beta
        Sigma_samples[sample_idx] = Sigma
        Loglikelihood_samples[sample_idx] = Cur_log_likelihood.sum()

    return Beta_samples, Sigma_samples, Loglikelihood_samples
Beispiel #23
0
    def noiselevel(self):

        if len(self.img.shape) < 3:
            self.img = np.expand_dims(self.img, 2)

        nlevel = np.ndarray(self.img.shape[2])
        th = np.ndarray(self.img.shape[2])
        num = np.ndarray(self.img.shape[2])

        kh = np.expand_dims(np.expand_dims(np.array([-0.5, 0, 0.5]), 0),2)
        imgh = correlate(self.img, kh, mode='nearest')
        imgh = imgh[:, 1: imgh.shape[1] - 1, :]
        imgh = imgh * imgh

        kv = np.expand_dims(np.vstack(np.array([-0.5, 0, 0.5])), 2)
        imgv = correlate(self.img, kv, mode='nearest')
        imgv = imgv[1: imgv.shape[0] - 1, :, :]
        imgv = imgv * imgv

        Dh = np.matrix(self.convmtx2(np.squeeze(kh,2), self.patchsize, self.patchsize))
        Dv = np.matrix(self.convmtx2(np.squeeze(kv,2), self.patchsize, self.patchsize))

        DD = Dh.getH() * Dh + Dv.getH() * Dv

        r = np.double(np.linalg.matrix_rank(DD))
        Dtr = np.trace(DD)

        tau0 = gamma.ppf(self.conf, r / 2, scale=(2 * Dtr / r))

        for cha in range(self.img.shape[2]):
            X = view_as_windows(self.img[:, :, cha], (self.patchsize, self.patchsize))
            X = X.reshape(np.int(X.size / self.patchsize ** 2), self.patchsize ** 2, order='F').transpose()

            Xh = view_as_windows(imgh[:, :, cha], (self.patchsize, self.patchsize - 2))
            Xh = Xh.reshape(np.int(Xh.size / ((self.patchsize - 2) * self.patchsize)),
                            ((self.patchsize - 2) * self.patchsize), order='F').transpose()

            Xv = view_as_windows(imgv[:, :, cha], (self.patchsize - 2, self.patchsize))
            Xv = Xv.reshape(np.int(Xv.size / ((self.patchsize - 2) * self.patchsize)),
                            ((self.patchsize - 2) * self.patchsize), order='F').transpose()

            Xtr = np.expand_dims(np.sum(np.concatenate((Xh, Xv), axis=0), axis=0), 0)

            if self.decim > 0:
                XtrX = np.transpose(np.concatenate((Xtr, X), axis=0))
                XtrX = np.transpose(XtrX[XtrX[:, 0].argsort(),])
                p = np.floor(XtrX.shape[1] / (self.decim + 1))
                p = np.expand_dims(np.arange(0, p) * (self.decim + 1), 0)
                Xtr = XtrX[0, p.astype('int')]
                X = np.squeeze(XtrX[1:XtrX.shape[1], p.astype('int')])

            # noise level estimation
            tau = np.inf

            if X.shape[1] < X.shape[0]:
                sig2 = 0
            else:
                cov = (np.asmatrix(X) @ np.asmatrix(X).getH()) / (X.shape[1] - 1)
                d = np.flip(np.linalg.eig(cov)[0], axis=0)
                sig2 = d[0]

            for i in range(1, self.itr):
                # weak texture selection
                tau = sig2 * tau0
                p = Xtr < tau
                Xtr = Xtr[p]
                X = X[:, np.squeeze(p)]

                # noise level estimation
                if X.shape[1] < X.shape[0]:
                    break

                cov = (np.asmatrix(X) @ np.asmatrix(X).getH()) / (X.shape[1] - 1)
                d = np.flip(np.linalg.eig(cov)[0], axis=0)
                sig2 = d[0]

            nlevel[cha] = np.sqrt(sig2)
            th[cha] = tau
            num[cha] = X.shape[1]

        # clean up
        self.img = np.squeeze(self.img)

        return nlevel, th, num
Beispiel #24
0
def hsic_gam(X, Y, alph=0.5):
    """
	X, Y are numpy vectors with row - sample, col - dim
	alph is the significance level
	auto choose median to be the kernel width
	"""
    n = X.shape[0]

    # ----- width of X -----
    Xmed = X

    G = np.sum(Xmed * Xmed, 1).reshape(n, 1)
    Q = np.tile(G, (1, n))
    R = np.tile(G.T, (n, 1))

    dists = Q + R - 2 * np.dot(Xmed, Xmed.T)
    dists = dists - np.tril(dists)
    dists = dists.reshape(n**2, 1)

    width_x = np.sqrt(0.5 * np.median(dists[dists > 0]))
    # ----- -----

    # ----- width of X -----
    Ymed = Y

    G = np.sum(Ymed * Ymed, 1).reshape(n, 1)
    Q = np.tile(G, (1, n))
    R = np.tile(G.T, (n, 1))

    dists = Q + R - 2 * np.dot(Ymed, Ymed.T)
    dists = dists - np.tril(dists)
    dists = dists.reshape(n**2, 1)

    width_y = np.sqrt(0.5 * np.median(dists[dists > 0]))
    # ----- -----

    bone = np.ones((n, 1), dtype=float)
    H = np.identity(n) - np.ones((n, n), dtype=float) / n

    K = rbf_dot(X, X, width_x)
    L = rbf_dot(Y, Y, width_y)

    Kc = np.dot(np.dot(H, K), H)
    Lc = np.dot(np.dot(H, L), H)

    testStat = np.sum(Kc.T * Lc) / n

    varHSIC = (Kc * Lc / 6)**2

    varHSIC = (np.sum(varHSIC) - np.trace(varHSIC)) / n / (n - 1)

    varHSIC = varHSIC * 72 * (n - 4) * (n - 5) / n / (n - 1) / (n - 2) / (n -
                                                                          3)

    K = K - np.diag(np.diag(K))
    L = L - np.diag(np.diag(L))

    muX = np.dot(np.dot(bone.T, K), bone) / n / (n - 1)
    muY = np.dot(np.dot(bone.T, L), bone) / n / (n - 1)

    mHSIC = (1 + muX * muY - muX - muY) / n

    al = mHSIC**2 / varHSIC
    bet = varHSIC * n / mHSIC

    thresh = gamma.ppf(1 - alph, al, scale=bet)[0][0]

    return (testStat, thresh)
Beispiel #25
0
    def qGamma(p: float, location: np.ndarray, scale: np.ndarray):
        """Quantile function.

        """
        q = gamma.ppf(p, a=1 / scale**2, scale=location * scale**2)
        return q
Beispiel #26
0
#Create frequency distribution with numpy
freq, counts = FreqDist(testdata, x)

ax.plot(x, freq, 'r.')
ax.legend(loc='best', frameon=False)
plt.xscale('log')
plt.yscale('log')
plt.ylim(0.001, 1)
plt.show()

#%%
fig, ax = plt.subplots(1, 1)
a = 1.99
mean, var, skew, kurt = gamma.stats(a, moments='mvsk')

x = np.linspace(gamma.ppf(0.01, a), gamma.ppf(0.99, a), 100)
ax.plot(x, gamma.pdf(x, a), 'r-', lw=5, alpha=0.6, label='gamma pdf')

rv = gamma(a)
ax.plot(x, rv.pdf(x), 'k-', lw=3, label='best-fit distribution')

#%%
testdata = WDists["15_set_10"]
x = np.logspace(np.log10(0.01), np.log10(10))


def FreqDist(data, bins):
    counts = np.zeros(len(bins))
    for i in range(len(bins)):
        if i == 0:
            lower = 0
# X ~ Gamma(k, theta)   but in scipy, theta = 1
# f(x;k, theta) = x**(k-1) * exp(-x / theta) / theta ** k / gamma_function(k)
#############################
from scipy.stats import gamma
k = 1
x = 1

pdf_value = gamma.pdf(x, k)
print(f"When X ~ Gamma({k}, 1),\t pdf(X = {x}) = {pdf_value}")

cdf_value = gamma.cdf(x, k)
print(f"When X ~ Gamma({k}, 1),\t cdf(X <= {x}) = {cdf_value}")

# ppf: percentage point function (inverse function of cdf)
p = 0.25
ppf_value = gamma.ppf(p, k)
print(f"When X ~ Gamma({k}, 1),\t ppf(p = {p}) = {ppf_value}")
print(f"When X ~ Gamma({k}, 1),\t IQR = [{gamma.ppf(0.25, k)}, {gamma.ppf(0.75, k)}]")

# rvs : random variates
sample_size = 10
print(f"Random Variates (size :{sample_size}) from X ~ Gamma({k}, 1)\n", gamma.rvs(k, size=sample_size))
print()
#%%
#############################
# Expotential Distribution
# X ~ Exp(lambd)  ... (X ~ Gamma(1, 1 / lambda))
# f(x;lambda) = lambda * exp(-x * lambda)
# in scipy, scale = 1 / lambda
#############################
from scipy.stats import expon
Beispiel #28
0
def dip_threshold(n, p_value):
    k = 21.642
    theta = 1.84157e-2 / numpy.sqrt(n)
    return gamma.ppf(1. - p_value, a=k, scale=theta)
 for i in range(2,len(TotalCases)):
     new_cases=float(TotalCases[i]-TotalCases[i-1])
     old_new_cases=float(TotalCases[i-1]-TotalCases[i-2])
     
     # This uses a conjugate prior as a Gamma distribution for b_t, with parameters alpha and beta
     alpha =alpha+new_cases
     beta=beta +old_new_cases
     valpha.append(alpha)
     vbeta.append(beta)
     
     mean = gamma.stats(a=alpha, scale=1/beta, moments='m')
     
     RRest=1.+infperiod*ln(mean)
     if (RRest<0.): RRest=0.
     predR.append(RRest)
     testRRM=1.+infperiod*ln( gamma.ppf(0.99, a=alpha, scale=1./beta) )# these are the boundaries of the 99% confidence interval  for new cases
     if (testRRM <0.): testRRM=0.
     pstRRM.append(testRRM)
     testRRm=1.+infperiod*ln( gamma.ppf(0.01, a=alpha, scale=1./beta) )
     if (testRRm <0.): testRRm=0.
     pstRRm.append(testRRm)
     
     #print('estimated RR=',RRest,testRRm,testRRM) # to see the numbers for the evolution of Rt
     
     if (new_cases==0. or old_new_cases==0.):
         pred.append(0.)
         pstdM.append(10.)
         pstdm.append(0.)
         NewCases.append(0.)
     
     if (new_cases>0. and old_new_cases>0.):
Beispiel #30
0
def ppf(p, a, b):

    q = gamma.ppf(p, a, loc=0, scale=b)
    return q
Beispiel #31
0
 def u_to_x(self, u):
   return gamma.ppf(norm.cdf(u, 0, 1), a=self.k, scale=self.th)
Beispiel #32
0
def zradius(ndim, siglevel=6):
    q = 1 - 2.0 * norm.cdf(-siglevel)
    xx = gamma.ppf(q, ndim * 0.5)
    zz = np.sqrt(2 * xx)
    return zz
def Draw_samples_from_dist(num_samples, dist, **params):
    uniform_samps = Draw_samples_from_Uni(0, 1, num_samples)

    ## Using inverse transform sampling and the Box-Muller transform
    if dist == "Normal":

        if "mean" in params:
            mean = params["mean"]
        else:
            print("Please specify 'mean' parameter.")
            return
        if "std" in params:
            std = params["std"]
        else:
            print("Please specify 'std' parameter.")
            return

        uniform_samps_pairs = Draw_samples_from_Uni(0, 1, num_samples)
        z = np.zeros(num_samples)
        for i in range(num_samples):
            z0 = np.sqrt(-2 * np.log(uniform_samps[i])) * np.cos(
                2 * np.pi * uniform_samps_pairs[i])
            # z1 = np.sqrt(-2*np.log(uniform_samps[i]))*np.sin(2*np.pi*uniform_samps_pairs[i]) ## however we can just use z0, we don't need pairs
            z[i] = z0 * std + mean
        return z

    elif dist == "Exponential":

        if "lamb" in params:
            lamb = params["lamb"]
        else:
            print("Please specify 'lamb' parameter.")
            return

        x = np.zeros(num_samples)
        for i in range(num_samples):
            x[i] = -(1 / lamb) * (
                np.log(1 - uniform_samps[i])
            )  ## take the inverse Exponential CDF and apply inverse transform sampling
        return x

    elif dist == "Gamma":

        if "shape" in params:
            shape = params["shape"]
        else:
            print("Please specify 'shape' parameter.")
            return
        if "loc" in params:
            loc = params["loc"]
        else:
            print("Please specify 'loc' parameter.")
            return
        if "scale" in params:
            scale = params["scale"]
        else:
            print("Please specify 'scale' parameter.")
            return

        g = np.zeros(num_samples)
        for i in range(num_samples):
            g[i] = gamma.ppf(uniform_samps[i], shape, loc, scale)
        return g
    else:
        print(
            "Please input a correct distribution. Type either 'Normal', 'Exponential' or 'Gamma'"
        )
Beispiel #34
0
import matplotlib.pyplot as plt
from scipy.stats import gamma
import numpy as np

plt.style.use('seaborn-paper')
fig, ax = plt.subplots(1, 1)

# Param. for Gamma distribution
alpha, beta = 6, 6

# Separate x-axis to conver cum. prob.1% ~ 99%
x = np.linspace(gamma.ppf(.01, alpha, beta), gamma.ppf(.99, alpha, beta), 100)

# Plot
ax.plot(x,
        gamma.pdf(x, alpha, beta),
        label='Gam({0}, {1})'.format(alpha, beta))

plt.title('Pdf of Gamma Distrubtion')
plt.legend(loc='best')
plt.tight_layout()
plt.show()
Beispiel #35
0
def run_platformqc(data_path, output_path, *, suffix=None, b_width=1000):
    if not suffix:
        suffix = ""
    else:
        suffix = "_" + suffix
    log_path = os.path.join(output_path, "log",
                            "log_sequel_platformqc" + suffix + ".txt")
    fig_path = os.path.join(output_path, "fig",
                            "fig_sequel_platformqc_length" + suffix + ".png")
    fig_path_bar = os.path.join(
        output_path, "fig", "fig_sequel_platformqc_adapter" + suffix + ".png")
    json_path = os.path.join(output_path, "QC_vals_sequel" + suffix + ".json")
    # json
    tobe_json = {}

    # output_path will be made too.
    if not os.path.isdir(os.path.join(output_path, "log")):
        os.makedirs(os.path.join(output_path, "log"), exist_ok=True)

    if not os.path.isdir(os.path.join(output_path, "fig")):
        os.makedirs(os.path.join(output_path, "fig"), exist_ok=True)

    ### logging conf ###
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler(log_path, 'w')
    sh = logging.StreamHandler()

    formatter = logging.Formatter(
        '%(module)s:%(asctime)s:%(lineno)d:%(levelname)s:%(message)s')
    fh.setFormatter(formatter)
    sh.setFormatter(formatter)

    logger.addHandler(sh)
    logger.addHandler(fh)
    #####################

    logger.info("Started sequel platform QC for %s" % data_path)

    # sequel
    xml_file = get_sts_xml_path(data_path, logger)

    if not xml_file:
        logger.warning("sts.xml is missing. Productivity won't be shown")
        [p0, p1, p2] = [None] * 3
    else:
        [p0, p1, p2] = parse_sts_xml(
            xml_file,
            ns="http://pacificbiosciences.com/PacBioBaseDataModel.xsd")
        logger.info("Parsed sts.xml")

    [subr_bam_p, scrap_bam_p] = get_bam_path(data_path, logger)
    if subr_bam_p and scrap_bam_p:
        scrap_bam = pysam.AlignmentFile(scrap_bam_p, 'rb', check_sq=False)
        subr_bam = pysam.AlignmentFile(subr_bam_p, 'rb', check_sq=False)
    else:
        logger.ERROR("Platform QC failed due to missing bam files")
        return 1

    bam_reads = {}
    snr = [[], [], [], []]
    hr_fraction = []
    tot_lengths = []
    hr_lengths = []
    ad_num_stat = {}
    control_throughput = 0

    if get_readtype(scrap_bam.header) == 'SCRAP':
        logger.info("Started to load scraps.bam...")
        control_throughput = set_scrap(bam_reads, scrap_bam, snr)
    else:
        logger.ERROR("the given scrap file has incorrect header.")

    logger.info("Scrap reads were loaded.")

    if get_readtype(subr_bam.header) == 'SUBREAD':
        logger.info("Started to load subreads.bam...")
        set_subreads(bam_reads, subr_bam, snr)
    else:
        logger.ERROR("the given subread file has incorrect header.")

    logger.info("Subreads were loaded.")

    for k, v in bam_reads.items():
        #print(k)
        l = construct_polread(v)

        #print(l)
        if l[4]:
            hr_fraction.append(l[2] / l[3])
            tot_lengths.append(l[3])
            hr_lengths.append(l[2])
            if l[5] in ad_num_stat:
                ad_num_stat[l[5]] += 1
            else:
                ad_num_stat[l[5]] = 1

    max_adnum = max(ad_num_stat.keys())
    min_adnum = min(ad_num_stat.keys())

    left = []
    height = []
    for i in range(min_adnum, max_adnum + 1):
        left.append(i)
        if i in ad_num_stat:
            height.append(ad_num_stat[i])
        else:
            height.append(0)

    plt.bar(left, height)
    plt.savefig(fig_path_bar, bbox_inches="tight")
    plt.close()
    logger.info("Plotted bar plot for adpter occurence")

    (a, b) = lq_gamma.estimate_gamma_dist_scipy(hr_lengths)
    logger.info("Fitting by Gamma dist finished.")

    _max = np.array(hr_lengths).max()
    _mean = np.array(hr_lengths).mean()
    _n50 = get_N50(hr_lengths)
    _n90 = get_NXX(hr_lengths, 90)
    throughput = np.sum(hr_lengths)
    longest = np.max(hr_lengths)
    fracs = np.mean(hr_fraction)

    tobe_json["Productivity"] = {"P0": p0, "P1": p1, "P2": p2}
    tobe_json["Throughput"] = int(throughput)
    tobe_json["Throughput(Control)"] = int(control_throughput)
    tobe_json["Longest_read"] = int(_max)
    tobe_json["Num_of_reads"] = len(hr_lengths)
    tobe_json["polread_gamma_params"] = [float(a), float(b)]
    tobe_json["Mean_polread_length"] = float(_mean)
    tobe_json["N50_polread_length"] = float(_n50)
    tobe_json["Mean_HQ_fraction"] = float(np.mean(fracs))
    tobe_json["Adapter_observation"] = ad_num_stat

    with open(json_path, "w") as f:
        logger.info("Quality measurements were written into a JSON file: %s" %
                    json_path)
        json.dump(tobe_json, f, indent=4)

    x = np.linspace(0, gamma.ppf(0.99, a, 0, b))
    est_dist = gamma(a, 0, b)
    plt.plot(x, est_dist.pdf(x), c=rgb(214, 39, 40))
    plt.grid(True)
    plt.hist(hr_lengths,
             histtype='step',
             bins=np.arange(min(hr_lengths), _max + b_width, b_width),
             color=rgb(214, 39, 40),
             alpha=0.7,
             density=True)
    plt.xlabel('Read length')
    plt.ylabel('Probability density')

    if _mean >= 10000:  # pol read mean is expected >= 10k and <= 15k, but omit the <= 15k condition.
        plt.axvline(x=_mean,
                    linestyle='dashed',
                    linewidth=2,
                    color=rgb(44, 160, 44),
                    alpha=0.8)
    else:
        plt.axvline(x=_mean,
                    linestyle='dashed',
                    linewidth=2,
                    color=rgb(188, 189, 34),
                    alpha=0.8)

    if _n50 >= 20000:
        plt.axvline(x=_n50, linewidth=2, color=rgb(44, 160, 44), alpha=0.8)
    else:
        plt.axvline(x=_n50, linewidth=2, color=rgb(188, 189, 34), alpha=0.8)

    plt.hist(tot_lengths,
             histtype='step',
             bins=np.arange(min(tot_lengths),
                            max(tot_lengths) + b_width, b_width),
             color=rgb(31, 119, 180),
             alpha=0.7,
             density=True)

    ymin, ymax = plt.gca().get_ylim()
    xmin, xmax = plt.gca().get_xlim()
    plt.text(xmax * 0.6, ymax * 0.72, r'$\alpha=%.3f,\ \beta=%.3f$' % (a, b))
    plt.text(xmax * 0.6, ymax * 0.77, r'Gamma dist params:')

    plt.text(xmax * 0.6, ymax * 0.85, r'sample mean: %.3f' % (_mean, ))
    plt.text(xmax * 0.6, ymax * 0.9, r'N50: %.3f' % (_n50, ))
    plt.text(xmax * 0.6, ymax * 0.95, r'N90: %.3f' % (_n90, ))

    plt.text(_mean, ymax * 0.85, r'Mean')
    plt.text(_n50, ymax * 0.9, r'N50')

    plt.savefig(fig_path, bbox_inches="tight")
    plt.close()
    #plt.show()

    logger.info("Figs were generated.")
    logger.info("Finished all processes.")
Beispiel #36
0
 def get_rate_percentile(self, percentile):
     return gamma.ppf(percentile, self.alpha, scale=1/float(self.beta))
Beispiel #37
0
#    plt.hist(abc.x,bins=20,label="$\epsilon$="+str(abc.epsilon),density=True,alpha=0.5)
    #pmc sequence
    for eps in abc.epsilon_list[1:]:
        abc.run()
        abc.check()

    tend = time.time()

    print(tend-tstart,"sec")
    
    #plotting...
    fig=plt.figure(figsize=(10,5))
    ax=fig.add_subplot(211)
    ax.hist(abc.x,bins=30,label="$\epsilon$="+str(abc.epsilon),density=True,alpha=0.5)
    ax.hist(abc.xres(),bins=30,label="resampled",density=True,alpha=0.2)

    alpha=alpha0+abc.nsample
    beta=beta0+Ysum
    xl = np.linspace(gammafunc.ppf(0.0001, alpha,scale=1.0/beta),gammafunc.ppf(0.9999, alpha,scale=1.0/beta), 100)
    ax.plot(xl, gammafunc.pdf(xl, alpha, scale=1.0/beta),label="analytic")
    plt.xlabel("$\lambda$")
    plt.ylabel("$\pi_\mathrm{ABC}$")
    plt.legend()
    ax=fig.add_subplot(212)
    ax.plot(abc.x,abc.w,".")
    plt.xlabel("$\lambda$")
    plt.ylabel("$weight$")
    plt.savefig("abcpmc.png")
    plt.show()

    def compile(alphabet, words, nonwords):
        print('  Generating all possible transitions...')
        from itertools import product
        all = []
        for state_size in range(args.max_state_size + 1):
            all += product(product(alphabet, repeat = state_size), [*alphabet, None])

        def of(string):
            for i in range(len(string)):
                yield string[max(0, i - args.max_state_size):i], string[i]
            yield string[max(0, len(string) - args.max_state_size):], None

        from collections import Counter
        counts = Counter()
        for word in tqdm(words, '  Counting transitions', leave = True):
            for state, symbol in of(word):
                counts[state, symbol] += 1
        state_counts = Counter()
        for state, symbol in tqdm(counts, '  Counting states', leave = True):
            state_counts[state] += counts[state, symbol]

        import numpy as np
        logprobs = np.empty(len(all))
        for i, (state, symbol) in enumerate(tqdm(all, '  Computing conditional transition probabilities', leave = True)):
            try:
                logprobs[i] = np.log(state_counts[state] / counts[state, symbol])
            except ZeroDivisionError:
                logprobs[i] = np.inf

        print('  Fitting flattening distribution...')
        from scipy.stats import gamma
        params = gamma.fit(logprobs[logprobs != np.inf])

        print('  Flattening...')
        logprobs = gamma.cdf(logprobs, *params)
        lower_bound = np.min(logprobs)
        upper_bound = np.max(logprobs[logprobs != 1])
        new_logprobs = np.empty(len(logprobs), int)
        for i, logprob in enumerate(tqdm(logprobs, '  Discretizing', leave = True)):
            if logprob == 1:
                new_logprobs[i] = 2 ** args.transition_bits - 1
            else:
                new_logprobs[i] = round((logprob - lower_bound) * ((2 ** args.transition_bits - 2) / (upper_bound - lower_bound)))
        logprobs = new_logprobs

        data = bytearray()

        bit_buffer = 0
        bit_buffer_size = 0
        for logprob in tqdm(logprobs, '  Packing', leave = True):
            bit_buffer = bit_buffer << args.transition_bits | int(logprob)
            bit_buffer_size += args.transition_bits
            if bit_buffer_size % 8 == 0:
                data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big')
                bit_buffer = 0
                bit_buffer_size = 0
        while bit_buffer_size % 8 != 0:
            bit_buffer = bit_buffer << args.transition_bits
            bit_buffer_size += args.transition_bits
        data += bit_buffer.to_bytes(bit_buffer_size // 8, 'big')

        old_logprobs = np.empty(len(logprobs))
        for i, logprob in enumerate(tqdm(logprobs, '  Undiscretizing...', leave = True)):
            if logprob == 2 ** args.transition_bits - 1:
                old_logprobs[i] = 1
            else:
                old_logprobs[i] = lower_bound + logprob * ((upper_bound - lower_bound) / (2 ** args.transition_bits - 2))
        print('  Unflattening...')
        old_logprobs = gamma.ppf(old_logprobs, *params)
        old_logprobs = dict(zip(all, old_logprobs))

        def params_of(strings):
            strings_logprobs = np.empty(len(strings))
            for i, string in enumerate(strings):
                strings_logprobs[i] = sum(old_logprobs[state, symbol] for state, symbol in of(string))
            strings_params = gamma.fit(strings_logprobs[strings_logprobs != np.inf])
            _, bins, _ = plt.hist(strings_logprobs[strings_logprobs != np.inf], 500, histtype = 'step', normed = True)
            plt.plot(bins, gamma.pdf(bins, *strings_params))
            return strings_params

        print('  Fitting words distribution...')
        words_params = params_of(words)

        print('  Fitting nonwords distribution...')
        nonwords_params = params_of(nonwords)

        def minify(code):
            if args.minify:
                import subprocess
                p = subprocess.run([str(Path(__file__).parent / 'node_modules/uglify-js/bin/uglifyjs'),
                    '--screw-ie8',
                    '--mangle', 'sort,toplevel',
                    '--compress',
                    '--bare-returns',
                ], input = code.encode(),
                   stdout = subprocess.PIPE,
                   stderr = subprocess.PIPE)
                if p.returncode != 0:
                    import sys
                    sys.stderr.buffer.write(p.stderr)
                    p.check_returncode()
                code = p.stdout.decode()
            return code

        print('  Generating JS code...')
        code = minify(r'''
            exports.init = function(buffer) {
                exports.test = (new Function('buffer', buffer.utf8Slice(''' + str(len(data)) + r''')))(buffer);
            };
        ''').encode()
        data += minify(r'''
            var abs = Math.abs;
            var min = Math.min;
            var max = Math.max;

            var alphabet = [
                ''' + r'''
                '''.join('"' + symbol + '",' for symbol in alphabet) + r'''
            ];

            var of; (function() {
                function fold(string) {
                    string = Array.from(string);
                    for (var i = alphabet.length - 1; alphabet[i].length > 1; --i) {
                        for (var j = 0; j <= string.length - alphabet[i].length; ++j) {
                            if (string.slice(j, j + alphabet[i].length).join('') == alphabet[i]) {
                                string.splice(j, alphabet[i].length, alphabet[i]);
                            }
                        }
                    }
                    return string;
                }

                of = function(string) {
                    string = fold(string);
                    var ofString = [];
                    for (var i = 0; i < string.length; ++i) {
                        ofString.push([string.slice(max(0, i - ''' + str(args.max_state_size) + r'''), i), string[i]]);
                    }
                    ofString.push([string.slice(max(0, string.length - ''' + str(args.max_state_size) + r''')), null]);
                    return ofString;
                };
            })();

            var all; (function() {
                function product(xs, ys) {
                    var result = [];
                    for (var i = 0; i < xs.length; ++i) {
                        for (var j = 0; j < ys.length; ++j) {
                            result.push([xs[i], ys[j]]);
                        }    
                    }
                    return result;
                }

                function power(a, k) {
                    if (k == 0) {
                        return [[]];    
                    }
                    var result = [];
                    for (var i = 0; i < a.length; ++i) {
                        var b = power(a, k - 1);
                        for (var j = 0; j < b.length; ++j) {
                            result.push([a[i]].concat(b[j]));
                        }    
                    }
                    return result;
                }

                all = [];
                for (var stateSize = 0; stateSize <= ''' + str(args.max_state_size) + r'''; ++stateSize) {
                    all = all.concat(product(power(alphabet, stateSize), alphabet.concat([null])));
                }
            })();

            var gammaPdf, gammaPpf; (function() {
                var pow = Math.pow;
                var exp = Math.exp;
                var log = Math.log;
                var sqrt = Math.sqrt;

                var cof = [
                    76.18009172947146,
                    -86.50532032941677,
                    24.01409824083091,
                    -1.231739572450155,
                    0.1208650973866179e-2,
                    -0.5395239384953e-5,
                ];

                function ln(x) {
                    var j = 0;
                    var ser = 1.000000000190015;
                    var xx, y, tmp;

                    tmp = (y = xx = x) + 5.5;
                    tmp -= (xx + 0.5) * log(tmp);
                    for (; j < 6; j++)
                        ser += cof[j] / ++y;
                    return log(2.5066282746310005 * ser / xx) - tmp;
                }

                gammaPdf = function(x, a) {
                    if (x < 0)
                        return 0;
                    if (x === 0 && a === 1)
                        return 1;
                    return exp((a - 1) * log(x) - x - ln(a));
                };

                function lowReg(a, x) {
                    var aln = ln(a);
                    var ap = a;
                    var sum = 1 / a;
                    var del = sum;
                    var b = x + 1 - a;
                    var c = 1 / 1.0e-30;
                    var d = 1 / b;
                    var h = d;
                    var i = 1;
                    var ITMAX = -~(log((a >= 1) ? a : 1 / a) * 8.5 + a * 0.4 + 17);
                    var an, endval;

                    if (x < 0 || a <= 0) {
                        return NaN;
                    } else if (x < a + 1) {
                        for (; i <= ITMAX; i++) {
                            sum += del *= x / ++ap;
                        }
                        return sum * exp(-x + a * log(x) - aln);
                    }

                    for (; i <= ITMAX; i++) {
                        an = -i * (i - a);
                        b += 2;
                        d = an * d + b;
                        c = b + an / c;
                        d = 1 / d;
                        h *= d * c;
                    }

                    return 1 - h * exp(-x + a * log(x) - aln);
                }

                gammaPpf = function(p, a) {
                    var j = 0;
                    var a1 = a - 1;
                    var EPS = 1e-8;
                    var gln = ln(a);
                    var x, err, t, u, pp, lna1, afac;

                    if (p > 1)
                        return NaN;
                    if (p == 1)
                        return Infinity;
                    if (p < 0)
                        return NaN;
                    if (p == 0)
                        return 0;
                    if (a > 1) {
                        lna1 = log(a1);
                        afac = exp(a1 * (lna1 - 1) - gln);
                        pp = (p < 0.5) ? p : 1 - p;
                        t = sqrt(-2 * log(pp));
                        x = (2.30753 + t * 0.27061) / (1 + t * (0.99229 + t * 0.04481)) - t;
                        if (p < 0.5)
                            x = -x;
                        x = max(1e-3, a * pow(1 - 1 / (9 * a) - x / (3 * sqrt(a)), 3));
                    } else {
                        t = 1 - a * (0.253 + a * 0.12);
                        if (p < t)
                            x = pow(p / t, 1 / a);
                        else
                            x = 1 - log(1 - (p - t) / (1 - t));
                    }

                    for(; j < 12; j++) {
                        if (x <= 0)
                            return 0;
                        err = lowReg(a, x) - p;
                        if (a > 1)
                            t = afac * exp(-(x - a1) + a1 * (log(x) - lna1));
                        else
                            t = exp(-x + a1 * log(x) - gln);
                        u = err / t;
                        x -= (t = u / (1 - 0.5 * min(1, u * ((a - 1) / x - 1))));
                        if (x <= 0)
                            x = 0.5 * (x + t);
                        if (abs(t) < EPS * x)
                            break;
                    }

                    return x; 
                };
            })();

            var logprobs = {};
            var bitBuffer = 0, bitBufferSize = 0;
            var bufferOffset = 0;
            for (var i = 0; i < all.length; ++i) {
                while (bitBufferSize < ''' + str(args.transition_bits) + r''') {
                    bitBuffer = bitBuffer << 8 | buffer.readUInt8(bufferOffset++); bitBufferSize += 8;
                }

                var logprob = bitBuffer >> (bitBufferSize - ''' + str(args.transition_bits) + r''') & ''' + hex(2 ** args.transition_bits - 1) + r'''; bitBufferSize -= ''' + str(args.transition_bits) + r''';

                if (logprob == ''' + str(2 ** args.transition_bits - 1) + r''') {
                    logprob = 1;
                } else {
                    logprob = ''' + str(lower_bound) + r''' + logprob * ''' + str((upper_bound - lower_bound) / (2 ** args.transition_bits - 2)) + r''';
                }
                logprob = ''' + str(params[1]) + r''' + gammaPpf(logprob, ''' + str(params[0]) + r''') * ''' + str(params[2]) + r''';
            
                logprobs[all[i]] = logprob;
            }

            return function(string) {
                var stringLogprob = 0;
                var ofString = of(string);
                for (var i = 0; i < ofString.length; ++i) {
                    stringLogprob += logprobs[ofString[i]];    
                }
                if (stringLogprob == Infinity) {
                    return false;    
                }
                var wordsDensity = gammaPdf((stringLogprob - ''' + str(words_params[1]) + r''') / ''' + str(words_params[2]) + r''', ''' + str(words_params[0]) + r''') / ''' + str(words_params[2]) + r''';
                var nonwordsDensity = gammaPdf((stringLogprob - ''' + str(nonwords_params[1]) + r''') / ''' + str(nonwords_params[2]) + r''', ''' + str(nonwords_params[0]) + r''') / ''' + str(nonwords_params[2]) + r''';
                if (wordsDensity > nonwordsDensity) {
                    return true;
                }
                if (wordsDensity < nonwordsDensity) {
                    return false;
                }
                return Math.random() >= 0.5;
            };
        ''').encode()

        data, is_gzipped = bytes(data), False

        if args.gzip:
            import gzip
            print('  Gzipping...')
            gzipped_data = gzip.compress(data)
            if len(gzipped_data) < len(data):
                data, is_gzipped = gzipped_data, True

        return code, data, is_gzipped
import matplotlib.pyplot as plt
#from statsmodels.distributions.empirical_distribution import ECDF
from scipy.integrate import quad
from scipy.optimize import fsolve

a = 10  # shape
b = 2  # scale
n = 1000  # size

# task 1
samples_numpy = np.random.gamma(a, b, n)
samples_scipy = gamma.rvs(a=a, scale=b, size=n)

# task 2

print(gamma.ppf(0.01, a))
print(gamma.ppf(0.99, a))
x = np.linspace(gamma.ppf(0.000001, a), gamma.ppf(0.99999999999, a), n)
plt.plot(x, gamma.pdf(x, a, loc=0, scale=b))
plt.hist(samples_scipy, normed=True)
plt.show()

loc = 30
scale = 3

samples_numpy = np.random.normal(loc, scale, n)
samples_scipy = norm.rvs(loc=loc, scale=scale, size=n)

print(norm.ppf(0.01, loc=loc, scale=scale))
print(norm.ppf(0.99, loc=loc, scale=scale))
x = np.linspace(norm.ppf(0.00000001, loc=loc, scale=scale),
def pearscdf(X, mu, sigma, skew, kurt, method, k, output):

# pearspdf
#   [p,type,coefs] = pearspdf(X,mu,sigma,skew,kurt)
#
#   Returns the probability distribution denisty of the pearsons distribution
#   with mean `mu`, standard deviation `sigma`, skewness `skew` and
#   kurtosis `kurt`, evaluated at the values in X.
#
#   Some combinations of moments are not valid for any random variable, and in
#   particular, the kurtosis must be greater than the square of the skewness
#   plus 1.  The kurtosis of the normal distribution is defined to be 3.
#
#   The seven distribution types in the Pearson system correspond to the
#   following distributions:
#
#      Type 0: Normal distribution
#      Type 1: Four-parameter beta
#      Type 2: Symmetric four-parameter beta
#      Type 3: Three-parameter gamma
#      Type 4: Not related to any standard distribution.  Density proportional
#              to (1+((x-a)/b)^2)^(-c) * exp(-d*arctan((x-a)/b)).
#      Type 5: Inverse gamma location-scale
#      Type 6: F location-scale
#      Type 7: Student's t location-scale
#
#   Examples
#
#   See also
#       pearspdf pearsrnd mean std skewness kurtosis
#


#   References:
#      [1] Johnson, N.L., S. Kotz, and N. Balakrishnan (1994) Continuous
#          Univariate Distributions, Volume 1,  Wiley-Interscience.
#      [2] Devroye, L. (1986) Non-Uniform Random Variate Generation,
#          Springer-Verlag.

    otpt = len(output)
#    outClass = superiorfloat(mu, sigma, skew, kurt)

    if X[1] == inf:
        cdist = 1
        limstate = X[0]
    elif X[0] == -inf:
        cdist = 2
        limstate = X[1]
    else:
        cdist = 3
        limstate = X

    if sigma == 0:
        print "Warning: The standard deviation of output distribution",k,"is zero. No distribution or correlation can be calculated for it."
        if mu>=X[0] and mu<=X[1]:   #mean is in the limits
            return 1, None, inf, None, None, None, None, None, None, None, None
        else:                       #mean is outside the limits
            return 0, None, inf, None, None, None, None, None, None, None, None

    X = (X - mu) / sigma    # Z-score

    if method == 'MCS':
        beta1 = 0
        beta2 = 3
        beta3 = sigma ** 2
    else:
        beta1 = skew ** 2
        beta2 = kurt
        beta3 = sigma ** 2

    # Return NaN for illegal parameter values.
    if (sigma < 0) or (beta2 <= beta1 + 1):
        p = zeros(otpt)+nan
        #p = zeros(sizeout)+nan
        dtype = NaN
        coefs = zeros((1,3))+nan
        print 'Illegal parameter values passed to pearscdf! (sigma:',sigma,'  beta1:',beta1,'  beta2:', beta2,')'
        return

    #% Classify the distribution and find the roots of c0 + c1*x + c2*x^2
    c0 = (4 * beta2 - 3 * beta1)# ./ (10*beta2 - 12*beta1 - 18);
    c1 = skew * (beta2 + 3)# ./ (10*beta2 - 12*beta1 - 18);
    c2 = (2 * beta2 - 3 * beta1 - 6)# ./ (10*beta2 - 12*beta1 - 18);

    if c1 == 0:    # symmetric dist'ns
        if beta2 == 3:
            dtype = 0
            a1 = 0
            a2 = 0
        else:
            if beta2 < 3:
                dtype = 2
            elif beta2 > 3:
                dtype = 7
            a1 = -sqrt(abs(c0 / c2))
            a2 = -a1        # symmetric roots
    elif c2 == 0:    # kurt = 3 + 1.5*skew^2
        dtype = 3
        a1 = -c0 / c1    # single root
        a2 = a1
    else:
        kappa = c1 ** 2 / (4 * c0 * c2)
        if kappa < 0:
            dtype = 1
        elif kappa < 1 - finfo(float64).eps:
            dtype = 4
        elif kappa <= 1 + finfo(float64).eps:
            dtype = 5
        else:
            dtype = 6
        # Solve the quadratic for general roots a1 and a2 and sort by their real parts
        csq=c1 ** 2 - 4 * c0 * c2
        if c1 ** 2 - 4 * c0 * c2 < 0:
            tmp = -(c1 + sign(c1) * cmath.sqrt(c1 ** 2 - 4 * c0 * c2)) / 2
        else:
            tmp = -(c1 + sign(c1) * sqrt(c1 ** 2 - 4 * c0 * c2)) / 2
        a1 = tmp / c2
        a2 = c0 / tmp
        if (real(a1) > real(a2)):
            tmp = a1;
            a1 = a2;
            a2 = tmp;

    denom = (10 * beta2 - 12 * beta1 - 18)

    if abs(denom) > sqrt(finfo(double).tiny):
        c0 = c0 / denom
        c1 = c1 / denom
        c2 = c2 / denom
        coefs = [c0, c1, c2]
    else:
        dtype = 1    # this should have happened already anyway
        # beta2 = 1.8 + 1.2*beta1, and c0, c1, and c2 -> Inf.  But a1 and a2 are
        # still finite.
        coefs = zeroes((1,3))+inf

    if method == 'MCS':
        dtype = 8

    #% Generate standard (zero mean, unit variance) values
    if dtype == 0:
        # normal: standard support (-Inf,Inf)
        #     m1 = zeros(outClass);
        #     m2 = ones(outClass);
        m1 = 0
        m2 = 1
        p = norm.cdf(X[1], m1, m2) - norm.cdf(X[0], m1, m2)
        lo= norm.ppf( 3.39767E-06, mu,sigma ); 
        hi= norm.ppf( 0.999996602, mu,sigma );
        Inv1 = norm.ppf(p, 0, 1)
        #     Inv1=norm.ppf( normcdf(X[0],m1,m2), 0,1 );
        #Inv2 = norm.ppf(normcdf(X[1], m1, m2), 0, 1)

    elif dtype == 1:
        # four-parameter beta: standard support (a1,a2)
        if abs(denom) > sqrt(finfo(double).tiny):
            m1 = (c1 + a1) / (c2 * (a2 - a1))
            m2 = -(c1 + a2) / (c2 * (a2 - a1))
        else:
            # c1 and c2 -> Inf, but c1/c2 has finite limit
            m1 = c1 / (c2 * (a2 - a1))
            m2 = -c1 / (c2 * (a2 - a1))
        #     r = a1 + (a2 - a1) .* betarnd(m1+1,m2+1,sizeOut);
        X = (X - a1) / (a2 - a1)    # Transform to 0-1 interval
        #     lambda = -(a2-a1)*(m1+1)./(m1+m1+2)-a1;
        #     X = (X - lambda - a1)./(a2-a1);

        alph=m1+1
        beta=m2+1
        if alph < 1.001 and beta < 1.001:
            alph=1.001
            beta=1.001

        mode=(alph-1)/(alph+beta-2)

        if mode < 0.1: 
            if alph > beta:
                alph = max(2.0,alph)
                beta = (alph-1)/0.9 - alph + 2
            elif beta > alph:
                beta = max(2.0,beta)
                alph = (0.1*(beta -2) +1)/(1 - 0.1)
        elif mode > 0.9:
            if alph > beta:
                alph = max(2.0,alph)
                beta =(alph-1)/0.9 - alph + 2
            elif beta > alph:
                beta = max(2.0,beta);
                alph = (0.1*(beta -2) +1)/(1 - 0.1)

        p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta)
        lo=a1*sigma+mu;
        hi=a2*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( beta.cdf(X[0],m1+1,m2+1), 0,1 );
        #Inv2 = norm.ppf(beta.cdf(X[1], m1 + 1, m2 + 1), 0, 1)

        #     X = X*(a2-a1) + a1;         % Undo interval tranformation
        #     r = r + (0 - a1 - (a2-a1).*(m1+1)./(m1+m2+2));
    elif dtype == 2:
        # symmetric four-parameter beta: standard support (-a1,a1)
        m = (c1 + a1) / (c2 * 2 * abs(a1))
        m1 = m
        m2 = m
        X = (X - a1) / (2 * abs(a1))
        #     r = a1 + 2*abs(a1) .* betapdf(X,m+1,m+1);

        alph=m+1;
        beta=m+1;
        if alph < 1.01: 
            alph=1.01
            beta=1.01

        p = stats.beta.cdf(X[1], alph, beta) - stats.beta.cdf(X[0], alph, beta)
        lo=a1*sigma+mu;
        hi=a2*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( beta.cdf(X[0],m+1,m+1), 0,1 );
        #Inv2 = norm.ppf(beta.cdf(X[1], m + 1, m + 1), 0, 1)

        #     X = a1 + 2*abs(a1).*X;
    elif dtype == 3:
        # three-parameter gamma: standard support (a1,Inf) or (-Inf,a1)
        m = (c0 / c1 - c1) / c1
        m1 = m
        m2 = m
        X = (X - a1) / c1
        #     r = c1 .* gampdf(X,m+1,1,sizeOut) + a1;

        p = gamma.cdf(X[1], m + 1, 1) - gamma.cdf(X[0], m + 1, 1)
        lo=(gamma.ppf( 3.39767E-06, m+1, scale=1 )*c1+a1)*sigma+mu; 
        hi=(gamma.ppf( 0.999996602, m+1, scale=1 )*c1+a1)*sigma+mu;
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( gamcdf(X[0],m+1,1), 0,1 );
        #Inv2 = norm.ppf(gamcdf(X[1], m + 1, 1), 0, 1)

        #     X = c1 .* X + a1;
    elif dtype == 4:
        # Pearson IV is not a transformation of a standard distribution: density
        # proportional to (1+((x-lambda)/a)^2)^(-m) * exp(-nu*arctan((x-lambda)/a)),
        # standard support (-Inf,Inf)
        X = X * sigma + mu
        r = 6 * (beta2 - beta1 - 1) / (2 * beta2 - 3 * beta1 - 6)
        m = 1 + r / 2
        nu = -r * (r - 2) * skew / sqrt(16 * (r - 1) - beta1 * (r - 2) ** 2)
        a = sqrt(beta3 * (16 * (r - 1) - beta1 * (r - 2) ** 2)) / 4
        _lambda = mu - ((r - 2) * skew * sigma) / 4    # gives zero mean
        m1 = m
        m2 = nu
        #     X = (X - lambda)./a;
        if cdist == 1:
            p = 1 - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma)
        elif cdist == 2:
            p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma)
        elif cdist == 3:
            p = pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma) - pearson4cdf(X[0], m, nu, a, _lambda, mu, sigma)
        lo=norm.ppf( 3.39767E-06, mu,sigma );   
        hi=norm.ppf( 0.999996602, mu,sigma );
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( pearson4cdf(X[0],m,nu,a,lambda,mu,sigma), 0,1 );
        #Inv2 = norm.ppf(pearson4cdf(X[1], m, nu, a, _lambda, mu, sigma), 0, 1)

        #     C = X.*a + lambda;
        #     C = diff(C);
        #     C= C(1);
        #     p = p./(sum(p)*C);
    elif dtype == 5:
        # inverse gamma location-scale: standard support (-C1,Inf) or
        # (-Inf,-C1)
        C1 = c1 / (2 * c2)
        #     r = -((c1 - C1) ./ c2) ./ gampdf(X,1./c2 - 1,1) - C1;
        X = -((c1 - C1) / c2) / (X + C1)
        m1 = c2
        m2 = 0
        p = gamma.cdf(X[1], 1. / c2 - 1, scale=1) - gamma.cdf(X[0], 1. / c2 - 1, scale=1)
        lo=(-((c1-C1)/c2)/gamma.ppf( 3.39767E-06, 1/c2 - 1, scale=1 )-C1)*sigma+mu; 
        hi=(-((c1-C1)/c2)/gamma.ppf( 0.999996602, 1/c2 - 1, scale=1 )-C1)*sigma+mu; 
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( gamcdf(X[0],1./c2 - 1,1), 0,1 );
        #Inv2 = norm.ppf(gamcdf(X[1], 1. / c2 - 1, 1), 0, 1)

        #     X = -((c1-C1)./c2)./X-C1;
    elif dtype == 6:
        # F location-scale: standard support (a2,Inf) or (-Inf,a1)
        m1 = (a1 + c1) / (c2 * (a2 - a1))
        m2 = -(a2 + c1) / (c2 * (a2 - a1))
        # a1 and a2 have the same sign, and they've been sorted so a1 < a2
        if a2 < 0:
            nu1 = 2 * (m2 + 1)
            nu2 = -2 * (m1 + m2 + 1)
            X = (X - a2) / (a2 - a1) * (nu2 / nu1)
            #         r = a2 + (a2 - a1) .* (nu1./nu2) .* fpdf(X,nu1,nu2);

            p = fcdf(X[1], nu1, nu2) - fcdf(X[0], nu1, nu2)
            lo=(f.ppf( 3.39767E-06, nu1,nu2)+a2)*sigma+mu
            hi=(f.ppf( 0.999996602, nu1,nu2)+a2)*sigma+mu
            Inv1 = norm.ppf(p, 0, 1)
            #             Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 );
            #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1)

            #         X = a2 + (a2-a1).*(nu1./nu2).*X
        else:        # 0 < a1
            nu1 = 2 * (m1 + 1)
            nu2 = -2 * (m1 + m2 + 1)
            X = (X - a1) / (a1 - a2) * (nu2 / nu1)
            #         r = a1 + (a1 - a2) .* (nu1./nu2) .* fpdf(X,nu1,nu2);

            p = -fcdf(X[1], nu1, nu2) + fcdf(X[0], nu1, nu2)
            hi=(-f.ppf( 3.39767E-06, nu1,nu2)+a1)*sigma+mu;
            lo=(-f.ppf( 0.999996602, nu1,nu2)+a1)*sigma+mu; 
            Inv1 = norm.ppf(p, 0, 1)
            #             Inv1=norm.ppf( fcdf(X[0],nu1,nu2), 0,1 );
            #Inv2 = norm.ppf(fcdf(X[1], nu1, nu2), 0, 1)

            #         X = a1 + (a1-a2).*(nu1./nu2).*X;
    elif dtype == 7:
        # t location-scale: standard support (-Inf,Inf)

        nu = 1. / c2 - 1
        X = X / sqrt(c0 / (1 - c2))
        m1 = nu
        m2 = 0
        p = t.cdf(X[1], nu) - t.cdf(X[0], nu)
        lo=t.ppf( 3.39767E-06, nu )*sqrt(c0/(1-c2))*sigma+mu
        hi=t.ppf( 0.999996602, nu )*sqrt(c0/(1-c2))*sigma+mu
        Inv1 = norm.ppf(p, 0, 1)
        #         Inv1=norm.ppf( tcdf(X[0],nu), 0,1 );
        #Inv2 = norm.ppf(tcdf(X[1], nu), 0, 1)

        #     p = sqrt(c0./(1-c2)).*tpdf(X,nu);
        #     X = sqrt(c0./(1-c2)).*X;
    else:
        print "ERROR: Unknown data type!"
#    elif dtype == 8:
        #Monte Carlo Simulation Histogram
#        out = kurt
#        p = skew
#        m1 = 0
#        m2 = 0

    # scale and shift
    # X = X.*sigma + mu; % Undo z-score

    if dtype != 1 and dtype != 2:
        mu_s=(mu-lo)/(hi-lo);
        sigma_s=sigma ** 2/(hi-lo) ** 2;
        alph = ((1-mu_s)/sigma_s -1/mu_s)*mu_s ** 2;
        beta = alph*(1/mu_s - 1);

    if alph >70 or beta>70:
        alph=70;
        beta=70;
        lo=mu-11.87434*sigma
        hi=2*mu-lo

    return p, dtype, Inv1, m1, m2, a1, a2, alph, beta, lo, hi
Beispiel #41
0
            line = file.readline()
        for _ in range(2):
            line = file.readline()
    tree2 = skbio.read(StringIO(line), 'newick', skbio.TreeNode)
    tree = get_tree(tree1, tree2)

    # Load rate categories
    with open(f'../asr_indel/out/{OGid}.iqtree') as file:
        line = file.readline()
        while not line.startswith('Model of rate heterogeneity:'):
            line = file.readline()
        num_categories = int(line.rstrip().split(' Gamma with ')[1][0])
        alpha = float(file.readline().rstrip().split(': ')[1])
    igfs = []  # Incomplete gamma function evaluations
    for i in range(num_categories + 1):
        x = gamma.ppf(i / num_categories, a=alpha, scale=1 / alpha)
        igfs.append(gammainc(alpha + 1, alpha * x))
    rates = []  # Normalized rates
    for i in range(num_categories):
        rate = num_categories * (igfs[i + 1] - igfs[i])
        rates.append((rate, 1 / num_categories))

    # Load sequence and convert to vectors at tips of tree
    mca = read_fasta(f'../asr_indel/out/{OGid}.mfa')
    tips = {tip.name: tip for tip in tree.tips()}
    for header, seq in mca:
        tip = tips[header[1:5]]
        conditional = np.zeros((2, len(seq)))
        for j, sym in enumerate(seq):
            conditional[int(sym), j] = 1
        tip.conditional = conditional