Exemple #1
0
def generate_train_test_phenotypes(betas, train_snps, test_snps, h2=0.01):
    """
    Generate genotypes given betas and SNPs
    """
    (m, n) = train_snps.shape
    (test_m, test_n) = test_snps.shape
    assert len(betas) == m == test_m, 'WTF?'
    
    #Training phenotypes
    phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n) 
    phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise
    genetic_part = sp.dot(train_snps.T, betas)
    genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part
    train_phen = genetic_part + phen_noise
#        print 'Herit:', sp.var(genetic_part) / sp.var(train_phen)        
    ret_dict = {}
    ret_dict['phen'] = train_phen
    betas_marg = (1. / n) * sp.dot(train_phen, train_snps.T)
    ret_dict['betas_marg'] = betas_marg
    
    #Testing phenotypes
    phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=test_n) 
    phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise
    genetic_part = sp.dot(test_snps.T, betas)
    genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part
    test_phen = genetic_part + phen_noise
    ret_dict['test_phen'] = test_phen
    return ret_dict
Exemple #2
0
def generate_test_data_w_sum_stats(h2=0.5, n=100000, n_sample=100, m=50000, model='gaussian', 
                                         p=1.0, conseq_r2=0, m_ld_chunk_size=100):
    """
    Generate 
    """
    #Get LD sample matrix
    D_sample = genotypes.get_sample_D(200,conseq_r2=conseq_r2,m=m_ld_chunk_size)
    
    #Simulate beta_hats
    ret_dict = simulate_beta_hats(h2=h2, n=n, n_sample=n_sample, m=m, model=model, p=p, 
                                    conseq_r2=conseq_r2, m_ld_chunk_size=m_ld_chunk_size, D_sample=D_sample)
    
    #Simulate test genotypes
    test_snps = genotypes.simulate_genotypes_w_ld(n_sample=n_sample, m=m, conseq_r2=conseq_r2, 
                                                  m_ld_chunk_size=m_ld_chunk_size)
    ret_dict['test_snps'] = test_snps
    
    #Simulate test phenotypes
    phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n_sample) 
    phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise
    genetic_part = sp.dot(test_snps.T, ret_dict['betas'])
    genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part
    test_phen = genetic_part + phen_noise
    ret_dict['test_phen'] = test_phen
    return ret_dict
Exemple #3
0
 def infer_diag_post(self,X_ii,D_i):
     
     X_i = dc(X_ii)
     ns = len(D_i)
     
     X_i.resize([ns,self.D])
     [m,V] = self.infer_diag(X_i,D_i)
     if sp.amin(V)<=-0.:
         class MJMError(Exception):
             pass
         print "negative/eq variance"
         print [m,V,X_i,D_i]
         print "_______________"
         #self.printc()
         raise(MJMError)
     if sp.amin(sp.var(m,axis=0))<-0.:
         class MJMError(Exception):
             pass
         print "negativevar of mean"
         print X_i.shape
         print [m,V,sp.var(m,axis=0),X_i,D_i]
         print "_______________"
         #self.printc()
         raise(MJMError)
     
     return [sp.mean(m,axis=0).reshape([1,ns]),(sp.mean(V,axis=0)+sp.var(m,axis=0)).reshape([1,ns])]
Exemple #4
0
 def _sqr_transform(self,  method='standard'):
     a = sp.array(self.values)
     if method == 'standard':
         vals = ((a - min(a)) + 0.1 * sp.var(a)) * ((a - min(a)) + 0.1 * sp.var(a))
     else:
         vals = a * a
     self._perform_transform(vals,"sqr")
     return True
def prnt(filename, type, duration, run):
    sptp = EL.G.sig_per_turtle_p  # Sig per turtle based on percent of total significance
    sptn = EL.G.sig_per_turtle_n  # Sig per turtle based on number of significant patches visited
    open(filename, 'a').write(str(type) + ',' + str(EL.G.NUM_TURTLES[type]) + \
        ',' + str(duration) + ',' + str(run) + ',' + \
        str(EL.G.epprog) + ',' + str(EL.G.total_prog) + ',' + str(EL.G.percent_progress) + \
        ',' + str(EL.G.agents_peak) + ',' + str(EL.G.agents_hill) + ',' + str(EL.G.wasted_effort) + \
        ',,' + str(min(sptp)) + ',' + str(max(sptp)) + ',' + str(mean(sptp)) + ',' + str(median(sptp)) + \
        ',' + str(var(sptp)) + ',' + str(skew(sptp)) + ',,' + str(min(sptn)) + ',' + str(max(sptn)) + \
        ',' + str(mean(sptn)) + ',' + str(median(sptn)) + ',' + str(var(sptn)) + ',' + str(skew(sptn)) + '\n')
def ftest(X, Y):
    ''' F-test to test variance equality.
    :param X: data 1
    :param Y: data 2
    :return: f and p-value of F-test
    '''
    F = scipy.var(X) / scipy.var(Y)
    df1, df2 = len(X), len(Y)
    pval = stats.f.cdf(F, df1, df2)
    return (F,pval)
Exemple #7
0
    def power(self, currentSource, active, inactive, histories, discMesh):
        """
        power is the main method for the power method

        currentSource: Initial source for power method
        active: Number of active iterations
        inactive: Number of inactive iterations
        histories: Number histories per iteration
        discMesh: Mesh for discretization of FissionSource
        """
        self.k = 1

        self.eigEstI = []   # Estimate of eigenvalue from inactive iterations
        self.meanEigI = []  # Mean of the eigenvalues from inactive iterations
        self.varEigI = []   # Variance of the eigenvalues from inactive iterations
        self.eigEst = []    # Estimate of eigenvalue from active iterations
        self.meanEig = []   # Mean of the eigenvalues from active iterations
        self.varEig = []    # Variance of the eigenvalues from active iterations
        
        self.eigVector = [] # Eigenvector estimate for active iterations

        start = time.time()
        for i in xrange(inactive):
            nextSource = self.Markov_Transport(currentSource, histories)
            self.k = self.k*(len(currentSource)/float(histories))

            self.eigEstI.append(self.k)
            self.meanEigI.append(scipy.mean(self.eigEst))    # Mean eigenvalue
            self.varEigI.append(scipy.var(self.eigEst))  # Variance eigenvalue

            print "I: %5i, eigenvalue = %8.6f," %(i, self.k),
            print " time: %8.3f sec" %(time.time() - start)

            currentSource = nextSource

        print "------------------ACTIVE ITERATIONS------------------"
        for self.i in xrange(active):
            nextSource = self.Markov_Transport(currentSource, histories)
            self.k = self.k*(len(currentSource)/float(histories))

            self.eigEst.append(self.k)
            self.meanEig.append(scipy.mean(self.eigEst))    # Mean eigenvalue
            self.varEig.append(scipy.var(self.meanEig))  # Variance eigenvalue

            print "A: %5i, eigenvalue = %8.6f," %(self.i, self.k),
            print " mean = %6.4f, std.dev = %6.4f, time: %8.3f sec" %(
                    self.meanEig[-1], math.sqrt(self.varEig[-1]),
                    (time.time() - start))

            # Discretized fissionSource
            discSource = nextSource.discretized(discMesh)
            discSource = discSource/sum(discSource)
            self.eigVector.append(discSource)

            currentSource = nextSource
def variance_explained(spikes, means=None, noise=None):
    """ Returns the fraction of variance in each channel that is explained
    by the means.

    Values below 0 or above 1 for large data sizes indicate
    that some assumptions were incorrect (e.g. about channel noise) and
    the results should not be trusted.

    :param dict spikes: Dictionary, indexed by unit, of
        :class:`neo.core.SpikeTrain` objects (where the ``waveforms``
        member includes the spike waveforms) or lists of
        :class:`neo.core.Spike` objects.
    :param dict means: Dictionary, indexed by unit, of lists of
        spike waveforms as :class:`neo.core.Spike` objects or numpy arrays.
        Means for units that are not in this dictionary will be estimated
        using the spikes.
        Default: None - means will be estimated from given spikes.
    :type noise: Quantity 1D
    :param noise: The known noise levels (as variance) per channel of the
        original data. This should be estimated from the signal periods
        that do not contain spikes, otherwise the explained variance
        could be overestimated. If None, the estimate of explained variance
        is done without regard for noise.
        Default: None
    :return dict: A dictionary of arrays, both indexed by unit. If ``noise``
        is ``None``, the  dictionary contains
        the fraction of explained variance per channel without taking noise
        into account. If ``noise`` is given, it contains the fraction of
        variance per channel explained by the means and given noise level
        together.
    """
    ret = {}
    if means is None:
        means = {}
    for u, spks in spikes.iteritems():
        train = spks
        if not isinstance(train, neo.SpikeTrain):
            train = spikes_to_spike_train(spks)
        if u in means and means[u].waveform.shape[0] == train.waveforms.shape[1]:
            spike = means[u]
        else:
            spike = neo.Spike(0)
            spike.waveform = sp.mean(train.waveforms, axis=0)

        orig = sp.mean(sp.var(train.waveforms, axis=1), axis=0)
        waves = train.waveforms - spike.waveform
        new = sp.mean(sp.var(waves, axis=1), axis=0)

        if noise is not None:
            ret[u] = sp.asarray(1 - (new - noise) / orig)
        else:
            ret[u] = sp.asarray(1 - new / orig)

    return ret
Exemple #9
0
 def _measureColorEntryMonitor(self, colorentry, n=5):
     xyY_list = self.calibmonitor.measureGratingStimColor(
             colorentry.patch_stim_value, n)
     colorentry.monitor_xyY = (
             scipy.mean([xyY[0] for xyY in xyY_list]),
             scipy.mean([xyY[1] for xyY in xyY_list]),
             scipy.mean([xyY[2] for xyY in xyY_list]))
     colorentry.monitor_xyY_sd = (
             math.sqrt(scipy.var([xyY[0] for xyY in xyY_list])),
             math.sqrt(scipy.var([xyY[1] for xyY in xyY_list])),
             math.sqrt(scipy.var([xyY[2] for xyY in xyY_list])))
Exemple #10
0
def regress_erp(y, test_idx, predictor, events,  ns):
    event_types = events['uniqueLabel']
    labels = events['label']
    latencies = events['latencyInFrame']

    train_idx = ~test_idx
    ytrn = matrix(y[train_idx].tolist()).T

    #There is a specific test_set to use
    if (len(np.where(test_idx)[0])!=0):
        tst_start_idx = min(np.where(test_idx)[0])
        tst_end_idx = max(np.where(test_idx)[0])

    #Test on all the data
    else:
        tst_start_idx = min(np.where(~test_idx)[0])
        tst_end_idx = max(np.where(~test_idx)[0])

    train_idx_list= np.where(train_idx==1)[0]
    train_idx_list = array(train_idx_list, dtype=np.int).tolist()

    #Solve the system of equations y = Ax
    P = predictor[train_idx_list,:].T*predictor[train_idx_list,:]
    q = -predictor[train_idx_list, :].T*ytrn
    rerp_vec = solvers.coneqp(P, q)['x']

    yestimate = array(predictor*rerp_vec)
    y_temp = matrix(y.tolist()).T
    noise = y_temp-yestimate


    events_to_test = np.where((array(latencies)<tst_end_idx) & (array(latencies)>tst_start_idx))[0]
    gc.disable()
    #Compute performance stats
    stats = np.empty((len(event_types),2))
    for i, this_type in enumerate(event_types):
        this_stat = np.empty((0,2))
        for j, event_idx in enumerate(events_to_test):
            this_event=labels[event_idx]
            if this_event==this_type:
                start_idx = latencies[event_idx];
                end_idx = np.minimum(tst_end_idx, start_idx+ns)

                yblock = y[start_idx:end_idx]
                noiseblock = noise[start_idx:end_idx]
                this_stat = np.append(this_stat, array([[sp.var(yblock)], [sp.var(noiseblock)]]).T, axis=0)

        rov_raw = this_stat[:,0]-this_stat[:,1]
        rov_nor = rov_raw/this_stat[:,0]
        rov = array([sp.mean(rov_raw), sp.mean(rov_nor)])
        stats[i,:] =  rov

    gc.enable()
    return stats, np.reshape(array(rerp_vec),(-1, ns)).T
Exemple #11
0
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length):
	psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length);
	iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0;
	beta_0=sum(psi1)/len(psi1);
	beta_1=sum(psi2)/len(psi2);
	var1=10*scipy.var(numpy.array(psi1)-beta_0);
	var2=10*scipy.var(numpy.array(psi2)-beta_1);
	if var1<=0.01:
		var1=0.01;
	if var2<=0.01:
		var2=0.01;
	print('var1');print(var1);print('var2');print(var2);
	while((iter_cutoff>0.01)&(count<=iter_maxrun)):
		count+=1;
		#iteration of beta
		beta_0=sum(psi1)/len(psi1);
		beta_1=sum(psi2)/len(psi2);
		print('var1');print(var1);print('var2');print(var2);
		#if abs(sum(psi1)/len(psi1)-sum(psi2)/len(psi2))>cutoff:
		if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case
			xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1)
			theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff;
		else:#minize psi1 if this is the case
			xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1)
			theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff;
		print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt);
		#else:
		#	theta1=sum(psi1)/len(psi1);theta2=sum(psi2)/len(psi2);
		beta_0=theta1;beta_1=theta2;
		#iteration of psi
		new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0;
		print('constrain_2xopt');
		for i in range(len(psi1)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]);
		for i in range(len(psi2)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]);
		print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]);
		psi1=new_psi1;psi2=new_psi2;
		print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum);
		if count>1:
			iter_cutoff=abs(previous_sum-current_sum);
		previous_sum=current_sum;
	#print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum);
	#print(xopt);
	return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
Exemple #12
0
 def _box_cox_transform(self, verbose=False, method='standard'):
     """
     Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality.
     """
     from scipy import stats
     a = sp.array(self.values)
     if method == 'standard':
         vals = (a - min(a)) + 0.1 * sp.var(a)
     else:
         vals = a
     sw_pvals = []
     lambdas = sp.arange(-2.0, 2.1, 0.1)
     for l in lambdas:
         if l == 0:
             vs = sp.log(vals)
         else:
             vs = ((vals ** l) - 1) / l
         r = stats.shapiro(vs)
         if sp.isfinite(r[0]):
             pval = r[1]
         else:
             pval = 0.0
         sw_pvals.append(pval)
     i = sp.argmax(sw_pvals)
     l = lambdas[i]
     if l == 0:
         vs = sp.log(vals)
     else:
         vs = ((vals ** l) - 1) / l
     self._perform_transform(vs,"box_cox")
     log.debug('optimal lambda was %0.1f' % l)
     return True
Exemple #13
0
def simulate_betas(num_traits=1000,
                   p=0.1,
                   m=100,
                   h2=0.5,
                   effect_prior='gaussian',
                   verbose=False):
    betas_list = []
    for i in range(num_traits):
        if effect_prior == 'gaussian':
            if p == 1.0:
                betas = stats.norm.rvs(0, sp.sqrt(h2 / m), size=m)
            else:
                M = int(round(m * p))
                betas = sp.concatenate(
                    (stats.norm.rvs(0, sp.sqrt(h2 / M),
                                    size=M), sp.zeros(m - M, dtype=float)))
        elif effect_prior == 'laplace':
            if p == 1.0:
                betas = stats.laplace.rvs(scale=sp.sqrt(h2 / (2 * m)), size=m)
            else:
                M = int(round(m * p))
                betas = sp.concatenate(
                    (stats.laplace.rvs(scale=sp.sqrt(h2 / (2 * M)),
                                       size=M), sp.zeros(m - M, dtype=float)))

        betas_var = sp.var(betas)
        beta_scalar = sp.sqrt(h2 / (m * betas_var))
        betas = betas * beta_scalar
        sp.random.shuffle(betas)
        betas_list.append(betas)
    return sp.array(betas_list)
Exemple #14
0
def _exp_transform(values, standard=True):
    a = sp.array(values)
    if standard:
        vals = sp.exp((a - min(a)) + 0.1 * sp.var(a))
    else:
        vals = sp.exp(a)
    return vals
Exemple #15
0
def main():
    args = parse_args()
    dt = 1.32e-14
    with h5py.File(args.filename) as h5:
        no_scans, no_steps, no_angles, no_pulses = h5["raw_quadratures"].shape
        if args.scans == "all":
            scans = range(no_scans)
        else:
            scans = args.scans
        t = linspace(0, no_steps * dt, no_steps)
        dphi = h5["corrected_angles"][0, 0, 1] - h5["corrected_angles"][0, 0,
                                                                        0]
        phi = linspace(0.5 * pi, 2.5 * pi, 2. * pi / dphi)
        av_mean = scipy.zeros((len(phi), len(t)), dtype=scipy.float32)
        for i_scan in scans:
            for i_step in xrange(no_steps):
                print i_scan, i_step
                ip = interp1d(
                    h5["corrected_angles"][i_scan, i_step],
                    var(h5["standardized_quadratures"][i_scan, i_step],
                        axis=1))
                av_mean[:, i_step] += ip(phi)
        from matplotlib import pyplot
        pyplot.imshow(av_mean)
        pyplot.show()
Exemple #16
0
    def infer_diag_post(self,X_,D_i):
        X_i = copy.copy(X_)
        ns = len(D_i)

        X_i.resize([ns,self.d])
        [m,V] = self.infer_diag(X_i,D_i)
        if sp.amin(V)<=-0.:
            print( "negative/eq variance")
            print( "_______________")
            raise(flow_Error)
        if sp.amin(sp.var(m,axis=0))<-0.:
            print( "negativevar of mean")
            print( "_______________")
            raise(flow_Error)

        return [sp.mean(m,axis=0).reshape([1,ns]),(sp.mean(V,axis=0)+sp.var(m,axis=0)).reshape([1,ns])]
Exemple #17
0
def _box_cox_transform(values, standard=True):
    """
    Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality.
    """
    a = sp.array(values)
    if standard:
        vals = (a - min(a)) + 0.1 * sp.var(a)
    else:
        vals = a
    sw_pvals = []
    lambdas = sp.arange(-2.0, 2.1, 0.1)
    for l in lambdas:
        if l == 0:
            vs = sp.log(vals)
        else:
            vs = ((vals**l) - 1) / l
        r = stats.shapiro(vs)
        if sp.isfinite(r[0]):
            pval = r[1]
        else:
            pval = 0.0
        sw_pvals.append(pval)
    i = sp.argmax(sw_pvals)
    l = lambdas[i]
    if l == 0:
        vs = sp.log(vals)
    else:
        vs = ((vals**l) - 1) / l
    return vs
Exemple #18
0
def mergelines(x, y):
    minx = max([min(i) for i in x])
    maxx = min([max(i) for i in x])
    fs = []
    for i in xrange(len(x)):
        #print [x[i].shape,y[i].shape]

        fs.append(interp1d(x[i], y[i]))
    X = [
        i for i in sorted(sp.hstack([sp.array(j) for j in x]))
        if i <= maxx and i >= minx
    ]
    np = len(X)
    X = sp.array(X)
    Y = sp.empty(np)
    ub = sp.empty(np)
    lb = sp.empty(np)
    for i in xrange(np):
        q = [j(X[i]) for j in fs]
        Y[i] = sp.mean(q)
        v = sp.var(q)
        ub[i] = Y[i] + 2. * sp.sqrt(v)
        lb[i] = Y[i] - 2. * sp.sqrt(v)

    return X, Y, lb, ub
def ar1fit(ts):
    '''
    Fits an AR(1) model to the time series data ts.  AR(1) is a
    linear model of the form
       x_t = beta * x_{t-1} + c + e_{t-1}
    where beta is the coefficient of term x_{t-1}, c is a constant
    and x_{t-1} is an i.i.d. noise term.  Here we assume that e_{t-1}
    is normally distributed. 
    Returns the tuple (beta, c, sigma).
    '''
    # Fitting AR(1) entails finding beta, c, and the noise term.
    # Beta is well approximated by the coefficient of OLS regression
    # on the lag of the data with itself.  Since the noise term is
    # assumed to be i.i.d. and normal, we must only estimate sigma,
    # the standard deviation.

    # Estimate beta
    x = ts[0:-1]
    y = ts[1:]
    p = sp.polyfit(x, y, 1)
    beta = p[0]

    # Estimate c
    c = sp.mean(ts) * (1 - beta)

    # Estimate the variance from the residuals of the OLS regression.
    yhat = sp.polyval(p, x)
    variance = sp.var(y - yhat)
    sigma = sp.sqrt(variance)

    return beta, c, sigma
Exemple #20
0
    def GRDRun(self, chains):
        """This is a implementation of the Gelman Rubin diagnostic"""
        mean_chain = []
        var_chain = []

        if len(chains) == 1:
            lchain = len(chains[0]) // 2
            chains = [chains[0][:lchain], chains[0][lchain:]]
        else:
            clen = [len(chain) for chain in chains]
            if len(set(clen)) == 1:
                lchain = clen[0]
            else:
                #print('take same # steps', clen)
                lchain = min(clen)

        try:
            for chain in chains:
                mean_chain.append(sp.mean(chain[-lchain:], axis=0))
                var_chain.append(sp.var(chain[-lchain:], axis=0))
        except:
            return 1

        M = sp.mean(mean_chain, axis=0)
        W = sp.mean(var_chain, axis=0)

        B = sum([(b - M)**2 for b in mean_chain])
        B = lchain / (len(chains) - 1.) * B
        R = (1. - 1. / lchain) * W + B / lchain

        result = sp.array(sp.absolute(1 - sp.sqrt(R / W)))
        return result
Exemple #21
0
def univariate_gelman_rubin(chains):
    """
	http://www.stat.columbia.edu/~gelman/research/published/brooksgelman2.pdf
	dim 0: nchains
	dim 1: nsteps
	"""
    nchains = len(chains)
    mean = scipy.asarray([scipy.mean(chain, axis=0) for chain in chains])
    variance = scipy.asarray(
        [scipy.var(chain, ddof=1, axis=0) for chain in chains])
    nsteps = scipy.asarray([len(chain) for chain in chains])
    Wn1 = scipy.mean(variance)
    Wn = scipy.mean((nsteps - 1.) / nsteps * variance)
    B = scipy.var(mean, ddof=1)
    V = Wn + (nchains + 1.) / nchains * B
    return scipy.sqrt(V / Wn1)
Exemple #22
0
    def fish_many():
        # 変数の中身確認用
        def variable_confirm(c1, c2):
            line_f = list_format(c2)
            line = "---------------------------"
            print(c1.head())
            print(line)
            print(line_f.format())
            print(line)
            print("c2の平均 >> " + str(f'{c2.mean():.3f}'))

        # 10000尾のサンプルデータ
        data = pd.read_csv("/root/app/sts4_csv.csv")["length"]
        # 10尾のサンプリング
        rmdata10 = np.random.choice(data, size=10, replace=False)

        # 母集団分布の準備
        base_mean = data.mean()
        base_std = sp.std(data, ddof=0)  # 母標準偏差
        base_var = sp.var(data, ddof=0)  # 母分散

        # グラフ描写
        def sigma_graph(list, op1, op2, op3):
            title = "fish_population_graph"
            plt.title(title)
            graph = sns.distplot(list, kde=False, color='black')
            # 表示
            canvas = f.image_graph(graph, title)
            canvas.view_option(op1, op2, op3)

        # run function
        # variable_confirm(data, rmdata10)
        sigma_graph(data, base_mean, base_std, base_var)
Exemple #23
0
    def calc_opt_rho(self):
        from limix_core.covar import FreeFormCov
        from limix_core.gp import GP2KronSumLR

        _covs = sp.concatenate([self.F, self.W, self.x], 1)
        xoE = self.x * self.Env
        gp = GP2KronSumLR(Y=self.y,
                          F=_covs,
                          A=sp.eye(1),
                          Cn=FreeFormCov(1),
                          G=xoE)
        gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1)))
        gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1)))
        gp.optimize(verbose=False)

        # var_xEEx = sp.tr(xEEx P)/(n-1) = sp.tr(PW (PW)^T)/(n-1) = (PW**2).sum()/(n-1)
        # W = xE

        # variance heterogenenty
        var_xEEx = ((xoE - xoE.mean(0))**2).sum()
        var_xEEx /= float(self.y.shape[0] - 1)
        v_het = gp.covar.Cr.K()[0, 0] * var_xEEx

        #  variance persistent
        v_comm = sp.var(gp.b()[-1] * self.x)

        rho = v_het / (v_comm + v_het)

        return rho
Exemple #24
0
 def _box_cox_transform(self, verbose=False, method='standard'):
     """
     Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality.
     """
     from scipy import stats
     a = sp.array(self.values)
     if method == 'standard':
         vals = (a - min(a)) + 0.1 * sp.var(a)
     else:
         vals = a
     sw_pvals = []
     lambdas = sp.arange(-2.0, 2.1, 0.1)
     for l in lambdas:
         if l == 0:
             vs = sp.log(vals)
         else:
             vs = ((vals**l) - 1) / l
         r = stats.shapiro(vs)
         if sp.isfinite(r[0]):
             pval = r[1]
         else:
             pval = 0.0
         sw_pvals.append(pval)
     i = sp.argmax(sw_pvals)
     l = lambdas[i]
     if l == 0:
         vs = sp.log(vals)
     else:
         vs = ((vals**l) - 1) / l
     self._perform_transform(vs, "box_cox")
     log.debug('optimal lambda was %0.1f' % l)
     return True
Exemple #25
0
def DataArrayStatisticsReport(parent, titleString, tempdata):
    scrolledText = tk_stxt.ScrolledText(parent, width=textboxWidth, height=textboxHeight, wrap=tk.NONE)
    scrolledText.insert(tk.END, titleString + '\n\n')
    
    # must at least have max and min
    minData = min(tempdata)
    maxData = max(tempdata)
    
    if maxData == minData:
        scrolledText.insert(tk.END, 'All data has the same value,\n')
        scrolledText.insert(tk.END, "value = %-.16E\n" % (minData))
        scrolledText.insert(tk.END, 'statistics cannot be calculated.')
    else:
        scrolledText.insert(tk.END, "max = %-.16E\n" % (maxData))
        scrolledText.insert(tk.END, "min = %-.16E\n" % (minData))
        
        try:
            temp = scipy.mean(tempdata)
            scrolledText.insert(tk.END, "mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "mean gave error in calculation\n")

        try:
            temp = scipy.stats.sem(tempdata)
            scrolledText.insert(tk.END, "standard error of mean = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "standard error of mean gave error in calculation\n")

        try:
            temp = scipy.median(tempdata)
            scrolledText.insert(tk.END, "median = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "median gave error in calculation\n")

        try:
            temp = scipy.var(tempdata)
            scrolledText.insert(tk.END, "variance = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "variance gave error in calculation\n")

        try:
            temp = scipy.std(tempdata)
            scrolledText.insert(tk.END, "std. deviation = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "std. deviation gave error in calculation\n")

        try:
            temp = scipy.stats.skew(tempdata)
            scrolledText.insert(tk.END, "skew = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "skew gave error in calculation\n")

        try:
            temp = scipy.stats.kurtosis(tempdata)
            scrolledText.insert(tk.END, "kurtosis = %-.16E\n" % (temp))
        except:
            scrolledText.insert(tk.END, "kurtosis gave error in calculation\n")
            
    return scrolledText
Exemple #26
0
 def dumpSeries(self):
     for series in self.series:
         print "name:",series.getFullName()
         
         for index,value in enumerate(series):
             print value
             #print "index=",index, " , value=",value
         print "avg=",scipy.average(series)," , variance=",scipy.var(series), " , stddev=",scipy.std(series)
Exemple #27
0
def indof_constfeatures(X, axis=0):
    '''
    Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants,
    such that it is equivalent to a bias and nothing else
    '''
    featvar = sp.var(X, axis=axis)
    badind = sp.nonzero(featvar == 0)[0]
    return badind
Exemple #28
0
 def _exp_transform(self, method='standard'):
     a = sp.array(self.values)
     if method == 'standard':
         vals = sp.exp((a - min(a)) + 0.1 * sp.var(a))
     else:
         vals = sp.exp(a)
     self._perform_transform(vals, "exp")
     return True
 def test_basic(self):
     time_stream, ra, dec, az, el, time, mask_inds = \
             self.Maker.preprocess_data(self.Blocks)
     self.assertTrue(sp.allclose(sp.mean(time_stream, 1), 0, atol=0.2))
     self.assertTrue(sp.allclose(sp.var(time_stream, 1), self.norms[0,:], 
                                 rtol=0.4))
     self.assertTrue(sp.allclose(self.Maker.channel_vars,
                                 self.norms[0,:], rtol=0.4))
Exemple #30
0
def findAccessAnomalies(data):
	# breaks down minute-long intervals from data
	intervalDict = {}
	for access in data:
		# breaks to 10-second intervals
		seconds = int(access[3])
		seconds = seconds - (seconds%10)

		key = (int(access[1]), int(access[2]), seconds)
		if key in intervalDict:
			intervalDict[key].append(access)
		else:
			intervalDict[key] = [access]

	totAccess = [len(intervalDict[key]) for key in intervalDict]
	totAccessMean = sc.mean(totAccess)
	totAccessVar = sc.var(totAccess)
	# print totAccessMean
	# print totAccessVar

	clientAccess = []
	clientDict = {}
	for key in intervalDict:
		count = Counter([access[10] for access in intervalDict[key]])
		for ckey in count:
			clientAccess.append(count[ckey])
			clientDict[(key[0], key[1], key[2], ckey)] = count[ckey]

	clientAccessMean = sc.mean(clientAccess)
	clientAccessVar = sc.var(clientAccess)
	# print clientAccessMean
	# print clientAccessVar

	clientAttackProb = {}
	for key in clientDict:
		totProb = totAccessVar/pow((totAccessMean-len(intervalDict[(key[0],key[1],key[2])])),2)
		clientProb = clientAccessVar/pow((clientAccessMean-clientDict[key]),2)
		prob = (totProb + clientProb)/2
		clientAttackProb[key] = prob

	arr = []
	for i in range(10):
		minKey = min(clientAttackProb, key=clientAttackProb.get)
		arr.append((minKey, clientAttackProb[minKey]))
		clientAttackProb.pop(minKey, None)
	return arr
Exemple #31
0
 def test_basic(self):
     time_stream, ra, dec, az, el, time, mask_inds = \
             self.Maker.preprocess_data(self.Blocks)
     self.assertTrue(sp.allclose(sp.mean(time_stream, 1), 0, atol=0.2))
     self.assertTrue(
         sp.allclose(sp.var(time_stream, 1), self.norms[0, :], rtol=0.4))
     self.assertTrue(
         sp.allclose(self.Maker.channel_vars, self.norms[0, :], rtol=0.4))
    def eval(self, f=lambda x: 1):
        """"
        evaluate function on the values
        and take weighted average.
        """

        if self.weights is None:
            vec = [f(v) for v in self.values]
            result = mean(vec)
            variance = var(vec)
        else:
            v = self.values
            vec = [f(v[i]) * w for i, w in enumerate(self.weights)]
            result = sum(vec)
            variance = var(vec)

        return result, variance
Exemple #33
0
 def _exp_transform(self, method='standard'):
     a = sp.array(self.values)
     if method == 'standard':
         vals = sp.exp((a - min(a)) + 0.1 * sp.var(a))
     else:
         vals = sp.exp(a)
     self._perform_transform(vals,"exp")
     return True
Exemple #34
0
def indof_constfeatures(X,axis=0):
    '''
    Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants,
    such that it is equivalent to a bias and nothing else
    '''
    featvar=sp.var(X,axis=axis)
    badind = sp.nonzero(featvar==0)[0]
    return badind
Exemple #35
0
def DataArrayStatistics(inArray):
    returnString = '' # uild this as we progress
    
    # must at least have max and min
    minData = min(inArray)
    maxData = max(inArray)
    
    if maxData == minData:
        returnString += 'All data has the same value,\n'
        returnString += "value = %-.16E\n" % (minData)
        returnString += 'statistics cannot be calculated.'
    else:
        returnString += "max = %-.16E\n" % (maxData)
        returnString += "min = %-.16E\n" % (minData)
        
        try:
            temp = scipy.mean(inArray)
            returnString += "mean = %-.16E\n" % (temp)
        except:
            returnString += "mean gave error in calculation\n"

        try:
            temp = scipy.stats.sem(inArray)
            returnString += "standard error of mean = %-.16E\n" % (temp)
        except:
            returnString += "standard error of mean gave error in calculation\n"

        try:
            temp = scipy.median(inArray)
            returnString += "median = %-.16E\n" % (temp)
        except:
            returnString += "median gave error in calculation\n"

        try:
            temp = scipy.var(inArray)
            returnString += "variance = %-.16E\n" % (temp)
        except:
            returnString += "variance gave error in calculation\n"

        try:
            temp = scipy.std(inArray)
            returnString += "std. deviation = %-.16E\n" % (temp)
        except:
            returnString += "std. deviation gave error in calculation\n"

        try:
            temp = scipy.stats.skew(inArray)
            returnString += "skew = %-.16E\n" % (temp)
        except:
            returnString += "skew gave error in calculation\n"

        try:
            temp = scipy.stats.kurtosis(inArray)
            returnString += "kurtosis = %-.16E\n" % (temp)
        except:
            returnString += "kurtosis gave error in calculation\n"
    
    return returnString
def DataArrayStatistics(inArray):
    returnString = ''  # build this as we progress

    # must at least have max and min
    minData = min(inArray)
    maxData = max(inArray)

    if maxData == minData:
        returnString += 'All data has the same value,\n'
        returnString += "value = %-.16E\n" % (minData)
        returnString += 'statistics cannot be calculated.'
    else:
        returnString += "max = %-.16E\n" % (maxData)
        returnString += "min = %-.16E\n" % (minData)

        try:
            temp = scipy.mean(inArray)
            returnString += "mean = %-.16E\n" % (temp)
        except:
            returnString += "mean gave error in calculation\n"

        try:
            temp = scipy.stats.sem(inArray)
            returnString += "standard error of mean = %-.16E\n" % (temp)
        except:
            returnString += "standard error of mean gave error in calculation\n"

        try:
            temp = scipy.median(inArray)
            returnString += "median = %-.16E\n" % (temp)
        except:
            returnString += "median gave error in calculation\n"

        try:
            temp = scipy.var(inArray)
            returnString += "variance = %-.16E\n" % (temp)
        except:
            returnString += "variance gave error in calculation\n"

        try:
            temp = scipy.std(inArray)
            returnString += "std. deviation = %-.16E\n" % (temp)
        except:
            returnString += "std. deviation gave error in calculation\n"

        try:
            temp = scipy.stats.skew(inArray)
            returnString += "skew = %-.16E\n" % (temp)
        except:
            returnString += "skew gave error in calculation\n"

        try:
            temp = scipy.stats.kurtosis(inArray)
            returnString += "kurtosis = %-.16E\n" % (temp)
        except:
            returnString += "kurtosis gave error in calculation\n"

    return returnString
def detect_skew(img, min_angle=-20, max_angle=20, quality='low'):
    img = sp.atleast_2d(img)
    rows, cols = img.shape
    min_min_angle = min_angle
    max_max_angle = max_angle

    if quality == 'low':
        resolution = sp.arctan2(2.0, cols) * 180.0 / sp.pi
        min_target_size = 100
        resize_order = 1
    elif quality == 'high':
        resolution = sp.arctan2(1.0, cols) * 180.0 / sp.pi
        min_target_size = 300
        resize_order = 3
    else:
        resolution = sp.arctan2(1.0, cols) * 180.0 / sp.pi
        min_target_size = 200
        resize_order = 2

    # resize the image so it's faster to work with
    min_size = min(rows, cols)
    target_size = min_target_size if min_size > min_target_size else min_size
    resize_ratio = float(target_size) / min_size
    img = imresize(img, resize_ratio)
    rows, cols = img.shape

    # pad the image and invert the colors
    img *= -1
    img += 255
    padded_img = sp.zeros((rows*2, cols*2))
    padded_img[rows//2:rows//2+rows, cols//2:cols//2+cols] = img
    img = padded_img

    # keep dividing the interval in half to achieve O(log(n))
    while True:
        current_resolution = (max_angle - min_angle) / 30.0
        best_angle = None
        best_variance = 0.0

        # rotate the image, sum the pixel values in each row for each rotation
        # then find the variance of all the sums, pick the highest variance
        for i in xrange(31):
            angle = min_angle + i * current_resolution
            rotated_img = rotate(img, angle, reshape=False, order=resize_order)
            num_black_pixels = sp.sum(rotated_img, axis=1)
            variance = sp.var(num_black_pixels)
            if variance > best_variance:
                best_angle = angle
                best_variance = variance

        if current_resolution < resolution:
            break

        # update the angle range
        min_angle = max(best_angle - current_resolution, min_min_angle)
        max_angle = min(best_angle + current_resolution, max_max_angle)

    return best_angle
Exemple #38
0
def aggregate_raw_data(y):
    '''
    Compute means and covariances of the raw data y
    :param y: array(n_periods x n_individuals x n_vars)
    :return:
    '''
    m = sp.mean(y, axis=0)
    c = sp.var(y, axis=0)
    return m, c
Exemple #39
0
 def _fit(self, type, vc=False):
     #2. init
     if type == 'null':
         self.gp[type].covar.Cn.setCovariance(self.covY)
     elif type == 'full':
         Cn0_K = self.gp['null'].covar.Cn.K()
         #self.gp[type].covar.Cr.setCovariance(1e-4*sp.ones(self.covY.shape)+1e-4*sp.eye(self.covY.shape[0]))
         self.gp[type].covar.Cr.setCovariance(0.5 * Cn0_K)
         self.gp[type].covar.Cn.setCovariance(0.5 * Cn0_K)
     elif type == 'block':
         Crf_K = self.gp['full'].covar.Cr.K()
         Cnf_K = self.gp['full'].covar.Cn.K()
         self.gp[type].covar.Cr.scale = sp.mean(Crf_K)
         self.gp[type].covar.Cn.setCovariance(Cnf_K)
     elif type == 'rank1':
         Crf_K = self.gp['full'].covar.Cr.K()
         Cnf_K = self.gp['full'].covar.Cn.K()
         self.gp[type].covar.Cr.setCovariance(Crf_K)
         self.gp[type].covar.Cn.setCovariance(Cnf_K)
     else:
         print('poppo')
     self.gp[type].optimize(factr=self.factr, verbose=False)
     RV = {
         'Cr': self.gp[type].covar.Cr.K(),
         'Cn': self.gp[type].covar.Cn.K(),
         'B': self.gp[type].mean.B[0],
         'LML': sp.array([self.gp[type].LML()]),
         'LMLgrad':
         sp.array([sp.mean((self.gp[type].LML_grad()['covar'])**2)])
     }
     if vc:
         # tr(P CoR) = tr(C)tr(R) - tr(Ones C) tr(Ones R) / float(NP)
         #           = tr(C)tr(R) - C.sum() * R.sum() / float(NP)
         trRr = (self.Xr**2).sum()
         var_r = sp.trace(RV['Cr']) * trRr / float(self.Y.size - 1)
         var_c = sp.var(sp.dot(self.F, RV['B']))
         var_n = sp.trace(RV['Cn']) * self.Y.shape[0]
         var_n -= RV['Cn'].sum() / float(RV['Cn'].shape[0])
         var_n /= float(self.Y.size - 1)
         RV['var'] = sp.array([var_r, var_c, var_n])
         if 0 and self.Y.size < 5000:
             pdb.set_trace()
             Kr = sp.kron(RV['Cr'], sp.dot(self.Xr, self.Xr.T))
             Kn = sp.kron(RV['Cn'], sp.eye(self.Y.shape[0]))
             _var_r = sp.trace(Kr - Kr.mean(0)) / float(self.Y.size - 1)
             _var_n = sp.trace(Kn - Kn.mean(0)) / float(self.Y.size - 1)
             _var = sp.array([_var_r, var_c, _var_n])
             print(((_var - RV['var'])**2).mean())
         if type == 'full':
             # calculate within region vcs
             Cr_block = sp.mean(RV['Cr']) * sp.ones(RV['Cr'].shape)
             Cr_rank1 = lowrank_approx(RV['Cr'], rank=1)
             var_block = sp.trace(Cr_block) * trRr / float(self.Y.size - 1)
             var_rank1 = sp.trace(Cr_rank1) * trRr / float(self.Y.size - 1)
             RV['var_r'] = sp.array(
                 [var_block, var_rank1 - var_block, var_r - var_rank1])
     return RV
Exemple #40
0
 def _measureColorEntryTubes(self, colorentry, n=5):
     vol_col_spec_list = self.calibtubes.measureVoltages(
             [colorentry.voltages,],
             imi=0.5, each=n)
     colorentry.tubes_xyY = (
             scipy.mean([vol_col_spec[1][0] for vol_col_spec in
                 vol_col_spec_list]),
             scipy.mean([vol_col_spec[1][1] for vol_col_spec in
                 vol_col_spec_list]),
             scipy.mean([vol_col_spec[1][2] for vol_col_spec in
                 vol_col_spec_list]))
     colorentry.tubes_xyY_sd = (
             math.sqrt(scipy.var([vol_col_spec[1][0] for vol_col_spec in
                 vol_col_spec_list])),
             math.sqrt(scipy.var([vol_col_spec[1][1] for vol_col_spec in
                 vol_col_spec_list])),
             math.sqrt(scipy.var([vol_col_spec[1][2] for vol_col_spec in
                 vol_col_spec_list])))
Exemple #41
0
    def dumpSeries(self):
        for series in self.series:
            print "name:", series.getFullName()

            for index, value in enumerate(series):
                print value
                #print "index=",index, " , value=",value
            print "avg=", scipy.average(series), " , variance=", scipy.var(
                series), " , stddev=", scipy.std(series)
Exemple #42
0
def calc_variance(filename, key):
    """
    Calculates the Variance of the Fileinput of the given Key.
    """

    a = []
    for item in items(filename):
        a.append(item[key])
    return scipy.var(a)
Exemple #43
0
def dist_parameters(lens):
    #compute Negative binomial distribution parameters

    m = scipy.mean(lens)
    v = scipy.var(lens)
    p = (v-m)/v
    r = m*(1-p)/p
    
    return m, v, p, r
def test_serial1(guys):
    arr = sp.array(list(to_ints(guys)),dtype="float")
    n = len(guys)
    mu = sp.mean(arr)
    v = sp.var(arr)

    front = arr[1:]
    back = arr[:-1]

    return 1-abs((1./n * sp.sum( (front-mu)*(back-mu) ))/(v+1e-10))
 def __extract_conditions(self, dmap):
     conditions = dict()
     for v in self.conditioned_vars:
         values = dmap[v]
         if sp.var(values) > 0:
             raise ValueError(
                 "Expected conditioning variable {0} to be constant at inference time"
                 .format(v))
         conditions[v] = values[0]
     return conditions
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length):
	psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length);
	iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0;
	beta_0=sum(psi1)/len(psi1);
	beta_1=sum(psi2)/len(psi2);
	var1=10*scipy.var(numpy.array(psi1)-beta_0);
	var2=10*scipy.var(numpy.array(psi2)-beta_1);
	if var1<=0.01:
		var1=0.01;
	if var2<=0.01:
		var2=0.01;
	#print('var1');print(var1);print('var2');print(var2);
	while((iter_cutoff>0.01)&(count<=iter_maxrun)):
		count+=1;
		#iteration of beta
		beta_0=sum(psi1)/len(psi1);
		beta_1=sum(psi2)/len(psi2);
		xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1);
		beta_0=float(xopt[0][0]);
		beta_1=float(xopt[0][1]);
		#print('unconstrain_1xopt');print(xopt);
		#print('theta');print(beta_0);print(beta_1);print('theta_end');
		#iteration of psi
		new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0;
		for i in range(len(psi1)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]);
		for i in range(len(psi2)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]);
		#print('new_psi[0]');print(new_psi1[0]);#print(new_psi2[0]);
		psi1=new_psi1;psi2=new_psi2;#print
		#print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum);
		if count>1:
			iter_cutoff=abs(previous_sum-current_sum);
		previous_sum=current_sum;
	#print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum);
	#print(xopt);
	if count>iter_maxrun:
		return([current_sum,[psi1,psi2,0,0,var1,var2]]);
	return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
Exemple #47
0
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length):
	psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length);
	iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0;
	beta_0=sum(psi1)/len(psi1);
	beta_1=sum(psi2)/len(psi2);
	var1=10*scipy.var(numpy.array(psi1)-beta_0);
	var2=10*scipy.var(numpy.array(psi2)-beta_1);
	if var1<=0.01:
		var1=0.01;
	if var2<=0.01:
		var2=0.01;
	print('var1');print(var1);print('var2');print(var2);
	while((iter_cutoff>0.01)&(count<=iter_maxrun)):
		count+=1;
		#iteration of beta
		beta_0=sum(psi1)/len(psi1);
		beta_1=sum(psi2)/len(psi2);
		xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1);
		beta_0=float(xopt[0][0]);
		beta_1=float(xopt[0][1]);
		print('unconstrain_1xopt');print(xopt);
		print('theta');print(beta_0);print(beta_1);print('theta_end');
		#iteration of psi
		new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0;
		for i in range(len(psi1)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]);
		for i in range(len(psi2)):
			xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1);
			new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt);
			#likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]);
		print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]);
		psi1=new_psi1;psi2=new_psi2;print
		print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum);
		if count>1:
			iter_cutoff=abs(previous_sum-current_sum);
		previous_sum=current_sum;
	#print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum);
	#print(xopt);
	if count>iter_maxrun:
		return([current_sum,[psi1,psi2,0,0,var1,var2]]);
	return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
Exemple #48
0
def generate_test_data_w_sum_stats(h2=0.5,
                                   n=100000,
                                   n_sample=100,
                                   m=50000,
                                   model='gaussian',
                                   p=1.0,
                                   conseq_r2=0,
                                   m_ld_chunk_size=100):
    """
    Generate 
    """
    #Get LD sample matrix
    D_sample = genotypes.get_sample_D(200,
                                      conseq_r2=conseq_r2,
                                      m=m_ld_chunk_size)

    #Simulate beta_hats
    ret_dict = simulate_beta_hats(h2=h2,
                                  n=n,
                                  n_sample=n_sample,
                                  m=m,
                                  model=model,
                                  p=p,
                                  conseq_r2=conseq_r2,
                                  m_ld_chunk_size=m_ld_chunk_size,
                                  D_sample=D_sample)

    #Simulate test genotypes
    test_snps = genotypes.simulate_genotypes_w_ld(
        n_sample=n_sample,
        m=m,
        conseq_r2=conseq_r2,
        m_ld_chunk_size=m_ld_chunk_size)
    ret_dict['test_snps'] = test_snps

    #Simulate test phenotypes
    phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n_sample)
    phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise
    genetic_part = sp.dot(test_snps.T, ret_dict['betas'])
    genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part
    test_phen = genetic_part + phen_noise
    ret_dict['test_phen'] = test_phen
    return ret_dict
Exemple #49
0
def diagPlots(s, qs):
    axs[0].plot(ts, qs, '-')
    Rs = (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2 + pot.dV(qs[1:-1]) + gamma * (qs[2:]-qs[:-2])/(2*tau)
    print scipy.mean(Rs) * (beta*tau)/gamma, scipy.var(Rs) * (beta*tau) / gamma
    # axs[1].plot(ts[1:-1], Rs)
    # axs[1].plot(s, scipy.var(Rs) * (beta*tau) / gamma, '.')
    mean = scipy.mean(Rs)
    std = scipy.std(Rs)
    axs[1].plot(scipy.array((s,s)), scipy.array((mean-std, mean+std)) * (beta*tau) / gamma, '-')
    axs[1].plot(s, mean * (beta*tau) / gamma, 'o')
Exemple #50
0
def descriptive_statistics():

    my_data = sp.randn(100)  # 100 random numbers
    print len(my_data)  # 100

    #print my_data
    # [ -9.90322017e-01   1.15233159e-01  -2.93076899e-02  -2.17625707e-01
    #   -1.27680249e-02   5.14887346e-01   1.89355659e-01   1.52055706e+00...]

    ### NumPy - some basic functions from numpy and scipy overlap

    print("Mean: {0:8.6f}".format(np.mean(my_data)))
    # Mean: 0.094097

    print("Minimum: {0:8.6f}".format(np.min(my_data)))
    # Minimum: -2.437701

    print("Maximum: {0:8.6f}".format(np.max(my_data)))
    # Maximum: 2.333469

    print("Median: {0:8.6f}".format(np.median(my_data)))
    # Median: 0.084608

    ### SciPy

    print("Variance with N in denominator: {0:8.6f}".format(sp.var(my_data)))
    # Variance with N in denominator: 1.011191

    print("Variance with N-1 in denominator: {0:8.6f}".format(
        sp.var(my_data, ddof=1)))
    # Variance with N-1 in denominator: 1.021405

    print("Std. Deviation: {0:8.6f}".format(sp.std(my_data)))
    # Std. Deviation: 1.005580

    print("Skew: {0:8.6f}".format(stats.skew(my_data)))
    # Skew: -0.085338

    print("Kurtosis: {0:8.6f}".format(stats.kurtosis(my_data)))
    # Kurtosis: -0.511248

    print("Describe: "), stats.describe(my_data)
Exemple #51
0
def trustvariance( K, d ):
    """
    This function evaluate the trust variance on more than one datasets.
    If you evaluate twice the same thing, the evaluate 
    function be able to remember it (if you call it with evolutionmap).
    
    Parameters:
    K = network
    d = date
    """
    return (d,float(scipy.var(K.weights_list())))
def diagPlots(s, qs):
    # axs[0].plot(ts, qs, '-')
    Rs = (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2 + pot.dV(qs[1:-1]) + gamma * (qs[2:]-qs[:-2])/(2*tau)
    # print (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2
    # print pot.dV(qs[1:-1])
    # print gamma * (qs[2:]-qs[:-2])/(2*tau)
    print scipy.mean(Rs) * (beta*tau)/gamma, scipy.var(Rs) * (beta*tau) / gamma
    # axs[1].plot(ts[1:-1], Rs)
    # axs[1].plot(s, scipy.var(Rs) * (beta*tau) / gamma, '.')
    mean = scipy.mean(Rs)
    std = scipy.std(Rs)
Exemple #53
0
def fnormal(X):
    mu = np.array(sc.mean(X, 0))  #get the mean of the data
    var = np.array(sc.var(X, 0))  #get the variance of the data
    var = var**0.5  #calculate the standard deviation of the datat
    mu = np.ones([np.size(X, 0), np.size(X, 1)]) * mu  #build the mean matrix
    var = np.ones([np.size(X, 0), np.size(X, 1)
                   ]) * var  #build the standard deviation matrix
    X = np.subtract(X, mu)  #subtract the mean from the given data
    X = np.divide(
        X, var)  #divide the data with its corresponding standard deviation
    return X, mu, var  #return the normalised data along with mean and standard deviation
def descriptive_statistics():

    my_data = sp.randn(100)  # 100 random numbers
    print len(my_data)  # 100

    # print my_data
    # [ -9.90322017e-01   1.15233159e-01  -2.93076899e-02  -2.17625707e-01
    #   -1.27680249e-02   5.14887346e-01   1.89355659e-01   1.52055706e+00...]

    ### NumPy - some basic functions from numpy and scipy overlap

    print ("Mean: {0:8.6f}".format(np.mean(my_data)))
    # Mean: 0.094097

    print ("Minimum: {0:8.6f}".format(np.min(my_data)))
    # Minimum: -2.437701

    print ("Maximum: {0:8.6f}".format(np.max(my_data)))
    # Maximum: 2.333469

    print ("Median: {0:8.6f}".format(np.median(my_data)))
    # Median: 0.084608

    ### SciPy

    print ("Variance with N in denominator: {0:8.6f}".format(sp.var(my_data)))
    # Variance with N in denominator: 1.011191

    print ("Variance with N-1 in denominator: {0:8.6f}".format(sp.var(my_data, ddof=1)))
    # Variance with N-1 in denominator: 1.021405

    print ("Std. Deviation: {0:8.6f}".format(sp.std(my_data)))
    # Std. Deviation: 1.005580

    print ("Skew: {0:8.6f}".format(stats.skew(my_data)))
    # Skew: -0.085338

    print ("Kurtosis: {0:8.6f}".format(stats.kurtosis(my_data)))
    # Kurtosis: -0.511248

    print ("Describe: "), stats.describe(my_data)
Exemple #55
0
 def extend_x(arr, additions=True, extension=True):
     if extension:
         x.extend(arr)
     if additions:
         x.append(scipy.std(arr))
         x.append(scipy.var(arr))
         x.append(sum(arr) / len(arr))
         x.append(sum(np.abs(arr)) / len(arr))
         x.append(min(arr))
         x.append(max(arr))
         x.append(scipy.mean(arr))
         x.append(scipy.median(arr))
Exemple #56
0
def format_results(kernel, times):
    '''
    Convenience function to convert the results of the timeit function
    into a dictionary.
    '''
    res = dict()
    res["kernel"] = kernel
    res["avg"] = scipy.mean(times)
    res["var"] = scipy.var(times)
    res["max"] = max(times)
    res["min"] = min(times)
    return res
Exemple #57
0
 def stats(self, startdate, enddate, mktbasket, avdate, output=False, mappingoverride=None):
     """
     Calculates statistics for a fund over a period.
     
     Parameters
     ----------
     startdate : datetime
         beginning of statistic period
     enddate : datetime
         end of statistic period
     mktbasket : dict
         dictionary of market streams
     output : bool
         if True, output results to db
     mappingoverride : None or mapping dictionary
     	whether to override the db mapping
     
     Returns
     -------
     stats : dict
         dictionary of statistics
     """
     actualstream, projstream = self.project(mktbasket, mappingoverride)
     if actualstream[startdate:enddate] is None: return None
     if projstream[startdate:enddate] is None: return None 
     actual = actualstream[startdate:enddate].returns
     projected = projstream[startdate:enddate].returns
     diff = actual - projected
     outdata = {
              'TE'     : scipy.std(diff) * 100.0 * 100.0,
              'BETA'   : scipy.cov(projected, actual, bias=1)[1, 0] / scipy.var(projected),
              'ALPHA'  : (scipy.product(diff + 1.0)) ** (1.0 / diff.size) - 1.0,
              'VOL'    : scipy.std(actual) * scipy.sqrt(252.0),
              'PROJ'   : scipy.product(1.0 + projected) - 1.0,
              'ACT'    : scipy.product(1.0 + actual) - 1.0,
              'R2'     : 0.0 if scipy.all(actual == 0.0) else scipy.corrcoef(projected, actual)[1, 0] ** 2.0,
              'AV'     : self.av(avdate),
              'DELTA'  : self.deltaestimate(avdate)
             }
     outdata['DIFF'] = outdata['ACT'] - outdata['PROJ']
     outdata['PL'] = outdata['DELTA'] * outdata['DIFF'] * 100.0 
     if output:
         cnxn = pyodbc.connect(ORACLESTRING)
         cursor = cnxn.cursor()
         sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
         sql = sql.format(self.fundcode, outdata['PROJ'], outdata['ACT'], outdata['DIFF'],
                    outdata['DELTA'], outdata['PL'], oracledatebuilder(startdate),
                    oracledatebuilder(enddate), outdata['TE'], outdata['R2'], outdata['BETA'],
                    outdata['ALPHA'], outdata['VOL'], outdata['AV'])
         cursor.execute(sql)
         cnxn.commit()
         cnxn.close()
     return outdata
Exemple #58
0
            self.mapping[indexes[i]] = finalbeta[i]
        return self.mapping

    def stats(self, startdate, enddate, mktbasket, output = False):
        """
        Calculates statistics for a fund over a period.
        
        Parameters
        ----------
        startdate : datetime
            beginning of statistic period
        enddate : datetime
            end of statistic period
        mktbasket : dict
            dictionary of market streams
        output : bool
            if True, output results to db
        
        Returns
        -------
        stats : dict
            dictionary of statistics
        """
        inputmatrix, fundreturns, indexes, daterange = self.align(startdate, enddate, mktbasket)
        if self.mapping and not(inputmatrix is None):
            weights = scipy.array([self.mapping[mykey] if mykey in self.mapping else 0.0 for mykey in mktbasket.keys()])
            projected = scipy.dot(inputmatrix,weights.reshape(len(indexes),1)).flatten()
            actual = fundreturns.flatten()
            diff = actual-projected
            outdata = {
                     'TE'     : scipy.std(diff)*100.0*100.0,
                     'BETA'   : scipy.cov(projected,actual)[1,0]/scipy.var(projected),
                     'ALPHA'  : (scipy.product(diff+1.0))**(1.0/diff.size)-1.0,
                     'VOL'    : scipy.std(actual)*scipy.sqrt(252.0),
                     'PROJ'   : scipy.product(1.0+projected)-1.0,
                     'ACT'    : scipy.product(1.0+actual)-1.0,
                     'R2'     : 0.0 if scipy.all(actual==0.0) else scipy.corrcoef(projected,actual)[1,0]**2.0,
                     'AV'     : self.av(startdate),
                     'DELTA'  : self.deltaestimate(startdate)
                    }
            outdata['DIFF'] = outdata['ACT']-outdata['PROJ']
            outdata['PL'] = outdata['DELTA']*outdata['DIFF']*100.0 
            if output:
                cnxn = pyodbc.connect(ORACLESTRING)
                cursor = cnxn.cursor()
                sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
                sql = sql.format(self.fundcode,outdata['PROJ'],outdata['ACT'],outdata['DIFF'],
                           outdata['DELTA'],outdata['PL'],oracledatebuilder(startdate),
                           oracledatebuilder(enddate),outdata['TE'],outdata['R2'],outdata['BETA'],
                           outdata['ALPHA'],outdata['VOL'],outdata['AV'])
                cursor.execute(sql)
                cnxn.commit()            
                cnxn.close()