Esempi in Python per corrcoef, esempi in Python per scipy.corrcoef

Esempio n. 1

0

Mostra file

def get_correlations(y,
                     yhat,
                     yhat_sim,
                     cbool,
                     kernels=np.power(2, range(1, 10))):
    if yhat.ndim == 1:
        yhat = yhat[np.newaxis]
    if yhat_sim.ndim == 1:
        yhat_sim = yhat_sim[:, np.newaxis]
    spt = neo.SpikeTrain(np.where(y)[0] * pq.ms,
                         sampling_rate=pq.kHz,
                         t_stop=y.shape[0] * pq.ms)

    rate = [
        elephant.statistics.instantaneous_rate(
            spt,
            sampling_period=pq.ms,
            kernel=elephant.kernels.GaussianKernel(x * pq.ms)) for x in kernels
    ]
    R = {}
    R['yhat'] = [
        scipy.corrcoef(x.magnitude.ravel()[cbool],
                       yhat.ravel()[cbool])[0, 1] for x in rate
    ]
    sim_rate = np.mean(yhat_sim, axis=1)
    R['yhat_sim'] = [
        scipy.corrcoef(x.magnitude.ravel()[cbool],
                       sim_rate.ravel()[cbool])[0, 1] for x in rate
    ]
    return (R, kernels)

Esempio n. 2

0

Mostra file

File: sequence_motif.py Progetto: bunbun/HTS-waterworks

def alignAndCompareMotifs(motif1, motif2, reportAll=False, tryAllAlignments=True, reverseComp=True, quitThreshold=None, normalizeRows=True, fillValue=.25):
    """ Compare the PWM's for two motifs by calculating their correlation coefficient.
    By default, all possible alignments and orientations will be tried and the top coefficient will be reported.
    fillValue may be a number, or a 4-element array with nuc frequencies
    Returns (corrCoef, motif2_relative_posn, motif2_orientation) form best alignment, or the entire list if reportAll=True.
    """
    pwm1,pwm2 = motif1.matrix, motif2.matrix
    if normalizeRows:  # make sum in each row = 1
        pwm1, pwm2 = map(normalizePwmRows, [pwm1, pwm2])
    alignsToTry = xrange(-len(motif2) + 1, len(motif1)-1) if tryAllAlignments else [0]  # all possible shifts or no shifting
    results = []
    for curOffset in alignsToTry:
        curPwm1, curPwm2 = map(scipy.array, extendPWMs(pwm1, pwm2, curOffset, fillValue))
        # flatten arrays and take 1-dimensional correlation between them
        corrCoef = scipy.corrcoef(curPwm1.ravel(), curPwm2.ravel())[0,1] # top-right is correlation between matrices
        results.append([corrCoef, curOffset, 1])
        if quitThreshold is not None and corrCoef > quitThreshold:
            # return immediately if quit threshold has been passed
            break
        if reverseComp:
            curPwm2 = scipy.array(reverseComplement(curPwm2))
            corrCoef = scipy.corrcoef(curPwm1.ravel(), curPwm2.ravel())[0,1] # top-right is correlation between matrices
            results.append([corrCoef, curOffset, -1])
        if quitThreshold is not None and corrCoef > quitThreshold:
            # return immediately if quit threshold has been passed
            break
    if reportBest:
        results = scipy.array(results)
        best = results[results[:,0].argmax(), :] # choose the result (row) with the best corrCoef
        return best
    else:
        return results

Esempio n. 3

0

Mostra file

File: DSGE_simulation-Sara_Final.py Progetto: snowdj/byu_macro_boot_camp

def Corr(GDP,I,C):
	m = sp.shape(GDP)[1]
	GDPIcorr = []
	GDPCcorr = []
	for i in range(0, m):
		gdp = GDP[:,i]
		inv = I[:,i]
		con = C[:,i]
		#Correlation between output and investment for each series
		gdpi = sp.corrcoef(gdp,inv)
		GDPIcorr.append(gdpi[0,1])
		#Correlation between output and consumption for each series
		gdpc = sp.corrcoef(gdp,con)
		GDPCcorr.append(gdpc[0,1])
	#Mean and standard deviation of correlation between GDP and
	#Investment and Consumption over total number of simulations
	GDPICORR = sp.array(GDPIcorr)
	gdpimean = sp.mean(GDPICORR)
	gdpistdev = sp.std(GDPICORR)
	GDPCCORR = sp.array(GDPCcorr)
	gdpcmean = sp.mean(GDPCCORR)
	gdpcstdev = sp.std(GDPCCORR)
	sp.savetxt('GDPICORR.csv',GDPICORR)
	sp.savetxt('GDPCCORR.csv',GDPCCORR)
	print "The mean and standard deviation between GDP and"
	print "Investment and GDP and Consumption followed by"
	print "The lists of each correlation coefficient for"
	print "each series are saved in csv files"
	return gdpimean, gdpistdev, gdpcmean, gdpcstdev

Esempio n. 4

0

Mostra file

File: example.py Progetto: michagaebler/political-affiliation-prediction

def word_party_correlations(folder='model'):
    stopwords = codecs.open("stopwords.txt", "r", "utf-8").readlines()[5:]
    stops = map(lambda x: x.lower().strip(), stopwords)

    # using now stopwords and filtering out digits
    bow = TfidfVectorizer(min_df=2)
    datafn = folder + '/textdata/rawtext.pickle'
    data = cPickle.load(open(datafn))
    bow = bow.fit(chain.from_iterable(data.values()))

    # create numerical labels
    Y = hstack(
        map((lambda x: ones(len(data[data.keys()[x]])) * x), range(len(data))))

    # create data matrix
    for key in data.keys():
        data[key] = bow.transform(data[key])

    X = vstack(data.values())

    # map sentiment vector to bow space
    words = load_sentiment()
    sentiment_vec = zeros(X.shape[1])
    for key in words.keys():
        if bow.vocabulary_.has_key(key):
            sentiment_vec[bow.vocabulary_[key]] = words[key]

    # do sentiment analysis
    sentiments = X.dot(sentiment_vec)

    # compute label-BoW-tfidf-feature correlation
    lb = LabelBinarizer()
    partylabels = zscore(lb.fit_transform(Y), axis=0)
    # sentiment  vs party correlation
    sentVsParty = corrcoef(partylabels.T, sentiments)[-1, :-1]
    fn = folder + '/sentiment_vs_party.json'

    for key in range(len(data.keys())):
        print "Sentiment vs Party %s: %0.2f" % (data.keys()[key],
                                                sentVsParty[key])

    json.dump(dict(zip(data.keys(), sentVsParty)), open(fn, 'wb'))

    wordidx2word = dict(zip(bow.vocabulary_.values(), bow.vocabulary_.keys()))
    allcors = dict(zip(data.keys(), [[]] * len(data.keys())))
    # this is extremely cumbersome and slow, ...
    # but computing the correlations naively on the matrices
    # requires densifying the matrix X, which is memory intense
    for partyidx in range(len(data.keys())):
        cors_words = []
        print 'Computing correlations for %s' % data.keys()[partyidx]
        for wordidx in range(X.shape[-1]):
            cors = corrcoef(X[:, wordidx].todense().flatten(),
                            partylabels[:, partyidx])[1, 0]
            if abs(cors) > .01:
                cors_words.append((wordidx2word[wordidx], cors))
        allcors[data.keys()[partyidx]] = dict(cors_words)
    fn = folder + '/words_correlations.json'
    json.dump(dict(allcors), open(fn, 'wb'))

Esempio n. 5

0

Mostra file

File: raicar.py Progetto: Avci23/pycar

 def weighted_average_aligned_runs(self,sources,mixing):
     '''
     Averages one aligned ICA run and calculates the reproducibility for each component.  This version does not
     add only super-threshold CCs to the reproducibililty index, and it uses a weighted average to form the 
     average components.  The weights are defined as w_i = sum_{j neq i} SCC(i,j).
     '''
     rep = np.triu(np.abs(corrcoef(sources)),1).sum()/(0.5*self.K*(self.K-1))
     rWeights = np.asarray([(np.abs(corrcoef(sources)[j,:]).sum() - 1.0)/(sources.shape[0]-1) for j in range(0,sources.shape[0])])[:,np.newaxis]
     return ((rWeights*sources).sum(axis=0))/(rWeights.sum()),((mixing*rWeights.T).sum(axis=1))/(rWeights.sum()),rep

Esempio n. 6

0

Mostra file

File: raicar.py Progetto: thelahunginjeet/pycar

 def weighted_average_aligned_runs(self,sources,mixing):
     '''
     Averages one aligned ICA run and calculates the reproducibility for each component.  This version does not
     add only super-threshold CCs to the reproducibililty index, and it uses a weighted average to form the
     average components.  The weights are defined as w_i = sum_{j neq i} SCC(i,j).
     '''
     rep = np.triu(np.abs(corrcoef(sources)),1).sum()/(0.5*self.K*(self.K-1))
     rWeights = np.asarray([(np.abs(corrcoef(sources)[j,:]).sum() - 1.0)/(sources.shape[0]-1) for j in range(0,sources.shape[0])])[:,np.newaxis]
     return ((rWeights*sources).sum(axis=0))/(rWeights.sum()),((mixing*rWeights.T).sum(axis=1))/(rWeights.sum()),rep

Esempio n. 7

0

Mostra file

File: example.py Progetto: christinakraus/political-affiliation-prediction

def word_party_correlations(folder='model'):
    stopwords = codecs.open("stopwords.txt", "r", "utf-8").readlines()[5:]
    stops = map(lambda x:x.lower().strip(),stopwords)

    # using now stopwords and filtering out digits
    bow = TfidfVectorizer(min_df=2)
    datafn = folder+'/textdata/rawtext.pickle'
    data = cPickle.load(open(datafn))
    bow = bow.fit(chain.from_iterable(data.values()))

    # create numerical labels
    Y = hstack(map((lambda x: ones(len(data[data.keys()[x]]))*x),range(len(data))))
    
    # create data matrix
    for key in data.keys():
        data[key] = bow.transform(data[key])
    
    X = vstack(data.values())
    
    # map sentiment vector to bow space
    words = load_sentiment()
    sentiment_vec = zeros(X.shape[1])
    for key in words.keys():
        if bow.vocabulary_.has_key(key):
            sentiment_vec[bow.vocabulary_[key]] = words[key]
 
    # do sentiment analysis
    sentiments = X.dot(sentiment_vec)    

    # compute label-BoW-tfidf-feature correlation
    lb = LabelBinarizer()
    partylabels = zscore(lb.fit_transform(Y),axis=0)
    # sentiment  vs party correlation
    sentVsParty = corrcoef(partylabels.T,sentiments)[-1,:-1]
    fn = folder+'/sentiment_vs_party.json'
    
    for key in range(len(data.keys())):
        print "Sentiment vs Party %s: %0.2f"%(data.keys()[key],sentVsParty[key])
    
    json.dump(dict(zip(data.keys(),sentVsParty)),open(fn,'wb'))
 
    wordidx2word = dict(zip(bow.vocabulary_.values(),bow.vocabulary_.keys()))
    allcors = dict(zip(data.keys(),[[]]*len(data.keys())))
    # this is extremely cumbersome and slow, ...
    # but computing the correlations naively on the matrices
    # requires densifying the matrix X, which is memory intense
    for partyidx in range(len(data.keys())):
        cors_words = []
        print 'Computing correlations for %s'%data.keys()[partyidx]
        for wordidx in range(X.shape[-1]):
            cors = corrcoef(X[:,wordidx].todense().flatten(),partylabels[:,partyidx])[1,0]
            if abs(cors)>.01:
                cors_words.append((wordidx2word[wordidx],cors))
        allcors[data.keys()[partyidx]] = dict(cors_words)   
    fn = folder+'/words_correlations.json' 
    json.dump(dict(allcors),open(fn,'wb'))

Esempio n. 8

0

Mostra file

File: raicar.py Progetto: thelahunginjeet/pycar

 def selective_average_aligned_runs(self,sources,mixing):
     '''
     Averages one aligned ICA run and calculates a reproducibility index.  This version uses the original
     definition in Yang et al.
     '''
     # threshold for inclusion
     thresh = 0.7
     corrsToSum = np.triu(np.abs(corrcoef(sources)),1).flatten()
     rep = (corrsToSum[np.nonzero(corrsToSum > thresh)].sum())/(0.5*self.K*(self.K-1))
     # now only add a component to the average if there is at least one correlation with the other RCs > threshold
     #    the > 1 statement is because the diagonal elements are always 1.0, so there will always be at least one
     #    cross-correlation (namely self-correlation) which is bigger than 1
     toInclude = ((np.abs(corrcoef(sources)) > thresh).sum(axis=0) > 1)
     return sources[toInclude,:].mean(axis=0),mixing[:,toInclude].mean(axis=1),rep

Esempio n. 9

0

Mostra file

File: raicar.py Progetto: Avci23/pycar

 def selective_average_aligned_runs(self,sources,mixing):
     '''
     Averages one aligned ICA run and calculates a reproducibility index.  This version uses the original 
     definition in Yang et al. 
     '''
     # threshold for inclusion
     thresh = 0.7
     corrsToSum = np.triu(np.abs(corrcoef(sources)),1).flatten()
     rep = (corrsToSum[np.nonzero(corrsToSum > thresh)].sum())/(0.5*self.K*(self.K-1))
     # now only add a component to the average if there is at least one correlation with the other RCs > threshold
     #    the > 1 statement is because the diagonal elements are always 1.0, so there will always be at least one
     #    cross-correlation (namely self-correlation) which is bigger than 1
     toInclude = ((np.abs(corrcoef(sources)) > thresh).sum(axis=0) > 1)
     return sources[toInclude,:].mean(axis=0),mixing[:,toInclude].mean(axis=1),rep

Esempio n. 10

0

Mostra file

File: multi_glasso_ver0.py Progetto: yiminghu/post-GWAS-pipeline

def multi_glasso_train(beta1_current,
                       chrom_betas1,
                       chrom_ld_dict,
                       ld_radius,
                       n_indV,
                       tune_idx,
                       Y,
                       X,
                       lambda1,
                       lambda2,
                       chr_list,
                       num_iter=60):
    print "Starting training with lambda1 = %.5f and lambda2 = %.5f" % (
        lambda1, lambda2)
    n_tune = len(tune_idx)
    predicted0 = np.zeros(n_tune)
    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            predicted0 += sp.dot(beta1_current[chrom_str][:, 0],
                                 X[chrom_str][:, tune_idx])
    #print "%.2f kg = %.2f lb = %.2f gal = %.2f l" % (var1, var2, var3, var4)
    tune_cor_old = sp.corrcoef(Y[tune_idx], predicted0)[0, 1]
    print "Tuning COR of initial: %.3f" % tune_cor_old
    for k in range(num_iter):
        predicted1 = np.zeros(n_tune)
        for chrom_str in chromosomes_list:
            if chrom_str in chr_list:
                beta1_current[chrom_str] = inner_iter(
                    beta_hats1=chrom_betas1[chrom_str],
                    n_indV=n_indV,
                    lambda1=lambda1,
                    lambda2=lambda2,
                    start_betas1=beta1_current[chrom_str],
                    ld_radius=ld_radius,
                    ld_dict1=chrom_ld_dict[chrom_str],
                )
                predicted1 += sp.dot(beta1_current[chrom_str][:, 0],
                                     X[chrom_str][:, tune_idx])
        #tune_err_new = np.mean((y1[tune_idx] - predicted)**2)
        tune_cor_new = sp.corrcoef(Y[tune_idx], predicted1)[0, 1]
        if np.isnan(tune_cor_new):
            break
        print "Tuning COR at %.1f step: %.3f" % (k, tune_cor_new)
        if tune_cor_new <= tune_cor_old:
            break
        else:
            tune_cor_old = tune_cor_new
    return beta1_current, tune_cor_new

Esempio n. 11

0

Mostra file

File: array_metrics.py Progetto: xuzhang5788/e3fp

def pearson(X, Y=None):
    """Compute the Pearson correlation between `X` and `Y`.

    Parameters
    ----------
    X : array_like or sparse matrix
        with shape (`n_fprints_X`, `n_bits`).
    Y : array_like or sparse matrix, optional
        with shape (`n_fprints_Y`, `n_bits`).

    Returns
    -------
    pearson : array of shape (`n_fprints_X`, `n_fprints_Y`)


    See Also
    --------
    soergel: Soergel similarity for non-binary data
    cosine, dice, tanimoto
    """
    X, Y = _check_array_pair(X, Y)
    Xlen = X.shape[0]
    if issparse(X):
        X = vstack((X, Y), format="csr")
        X = X - X.mean(axis=1)
        cov = (X * X.T) / (X.shape[1] - 1.0)
        d = np.sqrt(np.diag(cov))
        with np.errstate(divide="ignore"):  # handle 0 in denominator
            pearson = cov / np.outer(d, d)
    else:
        with np.errstate(divide="ignore"):  # handle 0 in denominator
            pearson = scipy.corrcoef(X, Y)
    return np.asarray(np.nan_to_num(pearson[:Xlen, Xlen:]))

Esempio n. 12

0

Mostra file

File: stocks.py Progetto: Cuffnela/Python-Project

def calculate_stock_correlation(data):
    """
    This function should take a list containing two lists of the form
    returned by get_yahoo_data (list of date, adj. close tuples) and
    return the correlation of the daily returns as defined above.
    """
    apple_returns = []
    google_returns = []
    
    apple_data = data[0]
    google_data = data[1]
    
    cm = apple_data[0][1]
    
    for i in range(1,len(apple_data)):
        cn = apple_data[i][1]
        daily_return = (cn-cm)/cm
        apple_returns.append(daily_return)
        cm = cn
    
    cm = google_data[0][1]
    
    for i in range(1,len(google_data)):
        cn = google_data[i][1]
        daily_return = (cn-cm)/cm
        google_returns.append(daily_return)
        cm = cn
      
    corr_matrix = scipy.corrcoef(google_returns,apple_returns)
    corr_value = corr_matrix[0][1]  
    return corr_value

Esempio n. 13

0

Mostra file

File: phenotypeData.py Progetto: bvilhjal/mixmogam

    def get_correlations(self, pids=None):
        """
        Returns correlation matrix between traits
        
        All traits are used if pids is left empty.
        """
        import bisect
        if not pids:
            pids = sorted(self.phen_dict.keys())

        num_traits = len(pids)
        corr_mat = sp.ones((num_traits, num_traits))
        for i, pid1 in enumerate(pids):
            pd = self.get_avg_value_dict(pid1)
            ets1 = pd['ecotypes']
            pvs1 = pd['values']
            for j, pid2 in enumerate(pids[:i]):
                pd = self.get_avg_value_dict(pid2)
                ets2 = pd['ecotypes']
                pvs2 = pd['values']
                common_ets = set(ets1).intersection(set(ets2))
                ets_ix1 = map(ets1.index, common_ets)
                ets_ix2 = map(ets2.index, common_ets)
                vs1 = [pvs1[et_i] for et_i in ets_ix1]
                vs2 = [pvs2[et_i] for et_i in ets_ix2]
                corr_mat[i, j] = sp.corrcoef(vs1, vs2)[0, 1]
                corr_mat[j, i] = corr_mat[i, j]
        return corr_mat, pids

Esempio n. 14

0

Mostra file

def calculate_stock_correlation(data):
    """
    This function should take a list containing two lists of the form
    returned by get_yahoo_data (list of date, adj. close tuples) and
    return the correlation of the daily returns as defined above.
    """
    apple_returns = []
    google_returns = []

    apple_data = data[0]
    google_data = data[1]

    cm = apple_data[0][1]

    for i in range(1, len(apple_data)):
        cn = apple_data[i][1]
        daily_return = (cn - cm) / cm
        apple_returns.append(daily_return)
        cm = cn

    cm = google_data[0][1]

    for i in range(1, len(google_data)):
        cn = google_data[i][1]
        daily_return = (cn - cm) / cm
        google_returns.append(daily_return)
        cm = cn

    corr_matrix = scipy.corrcoef(google_returns, apple_returns)
    corr_value = corr_matrix[0][1]
    return corr_value

Esempio n. 15

0

Mostra file

def corr(X, Y):
    """Compare two histories event by event and give a similarity score.

    Warning
    -------
    Note the asymmetry of X and Y; the latter is inferred and can therefore
    contain ties. We add an additional variable to denote the time
    of birth of an edge.

    Parameters
    ----------
    X : list of tuples
      Reference history vector; tuples represent edges.
      Position corresponds to time.
    Y : list of pairs
      Inferred history vector with ranking information.
      The first entry of the pair contains an edge (pair)
      The second entry contains the rank of the edge (float).

    Return
    ------
    score : float
      Correlation of of generated and infered history.
    """
    # Augment reference history with arrival times
    X = [(_, t) for t, _ in enumerate(X)]
    # Sort based on edges
    X = sorted(X, key=lambda x: x[0])
    Y = sorted(Y, key=lambda x: x[0])
    corr = sp.corrcoef([x[1] for x in X], [y[1] for y in Y])[0, 1]
    return corr

Esempio n. 16

0

Mostra file

def cal_coff(array, indicator):

    axis = indicator == 0
    if axis:
        length = array.shape[1]
    else:

        length = array.shape[0]

    for x in xrange(0, length):
        for y in xrange(0, length):
            if x != y:
                if axis:
                    yield sp.corrcoef(array[:, x], array[:, y])
                else:
                    yield sp.corrcoef(array[x, :], array[y, :])

Esempio n. 17

0

Mostra file

def run_STM_CV(Xc,yc,cbool_bin,yhat):
    num_components = 3
    num_features = 24
    k = 20

    KF = sklearn.model_selection.KFold(k,shuffle=True)

    yhat_model = np.zeros(yc.shape[0])
    MODELS =[]
    count=0
    for train_index,test_index in KF.split(Xc):
        count+=1
        print('\t{} of {} crossvalidations'.format(count,k))
        model = cmt.models.STM(Xc.shape[1],0,
                               num_components,
                               num_features,
                               cmt.nonlinear.ExponentialFunction,
                               cmt.models.Poisson)
        retval = model.train(Xc[train_index,:].T,yc[train_index,:].T,parameters=get_params())
        if not retval:
            print('Max_iter ({:.0f}) reached'.format(get_params()['max_iter']))
        MODELS.append(model)
        yhat_model[test_index] = model.predict(Xc[test_index].T)

    yhat[cbool_bin] =yhat_model
    yhat[yhat>binsize]=binsize
    r = scipy.corrcoef(yhat[cbool_bin].ravel(),yc.ravel())[0,1]
    print('\t\t corrcoef = {}'.format(r))
    return(r)

Esempio n. 18

0

Mostra file

File: numpystudy.py Progetto: isoyang/PythonStudy

def cal_coff(array,indicator):
    
    axis = indicator == 0;
    if axis:
        length = array.shape[1]
    else:
        
        length = array.shape[0]
        
    for x in xrange(0,length):       
        for y in xrange(0,length):            
            if x != y :               
                if axis:                    
                    yield sp.corrcoef(array[:,x], array[:,y])
                else:
                    yield sp.corrcoef(array[x,:], array[y,:])

Esempio n. 19

0

Mostra file

File: pcor.py Progetto: brakitsch/GNetLMM

def pcor(X,Y,Z):
    """
    computes the correlation amtrix of X and Y conditioning on Z
    """
    if X.ndim==1: X = X[:,SP.newaxis]
    if Y.ndim==1: Y = Y[:,SP.newaxis]
    
    if Z is None: return STATS.pearsonr(X,Y)

    if Z.ndim==1: Z = Z[:,SP.newaxis]
    nSamples = X.shape[0]
    betaX, _, _, _ = LA.lstsq(Z,X)
    betaY, _, _, _ = LA.lstsq(Z,Y)
    Xres = X - SP.dot(Z,betaX)
    Yres = Y - SP.dot(Z,betaY)
    corr_cond = SP.corrcoef(Xres[:,0],Yres[:,0])[0,1]
    dz = Z.shape[1]  # dimension of conditioning variable
    df = max(nSamples - dz - 2,0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr_cond / SP.sqrt(1.0 - corr_cond ** 2)  # calculate t statistic
        
    tstat = math.sqrt(df) * tstat
    pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr_cond,pv_cond

Esempio n. 20

0

Mostra file

File: phenotypeData.py Progetto: mahort/mixmogam

    def get_correlations(self, pids=None):
        """
        Returns correlation matrix between traits
        
        All traits are used if pids is left empty.
        """
        import bisect
        if not pids:
            pids = sorted(self.phen_dict.keys())

        num_traits = len(pids)
        corr_mat = sp.ones((num_traits, num_traits))
        for i, pid1 in enumerate(pids):
            pd = self.get_avg_value_dict(pid1)
            ets1 = pd['ecotypes']
            pvs1 = pd['values']
            for j, pid2 in enumerate(pids[:i]):
                pd = self.get_avg_value_dict(pid2)
                ets2 = pd['ecotypes']
                pvs2 = pd['values']
                common_ets = set(ets1).intersection(set(ets2))
                ets_ix1 = map(ets1.index, common_ets)
                ets_ix2 = map(ets2.index, common_ets)
                vs1 = [pvs1[et_i] for et_i in ets_ix1]
                vs2 = [pvs2[et_i] for et_i in ets_ix2]
                corr_mat[i, j] = sp.corrcoef(vs1, vs2)[0, 1]
                corr_mat[j, i] = corr_mat[i, j]
        return corr_mat, pids

Esempio n. 21

0

Mostra file

def pcor(X, Y, Z):
    """
    computes the correlation amtrix of X and Y conditioning on Z
    """
    if X.ndim == 1: X = X[:, SP.newaxis]
    if Y.ndim == 1: Y = Y[:, SP.newaxis]

    if Z is None: return STATS.pearsonr(X, Y)

    if Z.ndim == 1: Z = Z[:, SP.newaxis]
    nSamples = X.shape[0]
    betaX, _, _, _ = LA.lstsq(Z, X)
    betaY, _, _, _ = LA.lstsq(Z, Y)
    Xres = X - SP.dot(Z, betaX)
    Yres = Y - SP.dot(Z, betaY)
    corr_cond = SP.corrcoef(Xres[:, 0], Yres[:, 0])[0, 1]
    dz = Z.shape[1]  # dimension of conditioning variable
    df = max(nSamples - dz - 2, 0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr_cond / SP.sqrt(
            1.0 - corr_cond**2)  # calculate t statistic

    tstat = math.sqrt(df) * tstat
    pv_cond = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr_cond, pv_cond

Esempio n. 22

0

Mostra file

File: data.py Progetto: PMBio/pygp_kronsum

    def selectTraits(self,phenoMAF=None,corrMin=None,nUnique=False):
        """
        use only a subset of traits

        filter out all individuals that have missing values for the selected ones
        """
        self.idx_samples = SP.ones(self.n_s,dtype=bool)
        
        # filter out nan samples
        self.idx_samples[SP.isnan(self.Y[:,self.idx_traits]).any(1)] = False
        
        # filter out phenotypes that are not diverse enough
        if phenoMAF!=None:
            expr_mean = self.Y[self.idx_samples].mean(0)
            expr_std = self.Y[self.idx_samples].std(0)
            z_scores = SP.absolute(self.Y[self.idx_samples]-expr_mean)/SP.sqrt(expr_std)
            self.idx_traits[(z_scores>1.5).mean(0) < phenoMAF] = False

        # use only correlated phenotypes
        if corrMin!=None and self.Y.shape[1]>1:
            corr = SP.corrcoef(self.Y[self.idx_samples].T)
            corr-= SP.eye(corr.shape[0])
            self.idx_traits[SP.absolute(corr).max(0)<0.3] = False

        # filter out binary phenotypes
        if nUnique and self.Y.shape[1]>1:
            for i in range(self.Y.shape[1]):
                if len(SP.unique(self.Y[self.idx_samples][:,i]))<=nUnique:
                    self.idx_traits[i] = False

        LG.debug('number of traits(before filtering): %d'%self.n_t)
        LG.debug('number of traits(after filtering): %d'%self.idx_traits.sum())
        LG.debug('number of samples(before filtering): %d'%self.n_s)
        LG.debug('number of samples(after filtering): %d'%self.idx_samples.sum())

Esempio n. 23

0

Mostra file

File: sequence_motif.py Progetto: jakebiesinger/HTS-waterworks

def alignAndCompareMotifs(motif1,
                          motif2,
                          reportAll=False,
                          tryAllAlignments=True,
                          reverseComp=True,
                          quitThreshold=None,
                          normalizeRows=True,
                          fillValue=.25):
    """ Compare the PWM's for two motifs by calculating their correlation coefficient.
    By default, all possible alignments and orientations will be tried and the top coefficient will be reported.
    fillValue may be a number, or a 4-element array with nuc frequencies
    Returns (corrCoef, motif2_relative_posn, motif2_orientation) form best alignment, or the entire list if reportAll=True.
    """
    pwm1, pwm2 = motif1.matrix, motif2.matrix
    if normalizeRows:  # make sum in each row = 1
        pwm1, pwm2 = map(normalizePwmRows, [pwm1, pwm2])
    alignsToTry = xrange(
        -len(motif2) + 1,
        len(motif1) -
        1) if tryAllAlignments else [0]  # all possible shifts or no shifting
    results = []
    for curOffset in alignsToTry:
        curPwm1, curPwm2 = map(scipy.array,
                               extendPWMs(pwm1, pwm2, curOffset, fillValue))
        # flatten arrays and take 1-dimensional correlation between them
        corrCoef = scipy.corrcoef(
            curPwm1.ravel(),
            curPwm2.ravel())[0, 1]  # top-right is correlation between matrices
        results.append([corrCoef, curOffset, 1])
        if quitThreshold is not None and corrCoef > quitThreshold:
            # return immediately if quit threshold has been passed
            break
        if reverseComp:
            curPwm2 = scipy.array(reverseComplement(curPwm2))
            corrCoef = scipy.corrcoef(curPwm1.ravel(), curPwm2.ravel())[
                0, 1]  # top-right is correlation between matrices
            results.append([corrCoef, curOffset, -1])
        if quitThreshold is not None and corrCoef > quitThreshold:
            # return immediately if quit threshold has been passed
            break
    if reportBest:
        results = scipy.array(results)
        best = results[results[:, 0].argmax(
        ), :]  # choose the result (row) with the best corrCoef
        return best
    else:
        return results

Esempio n. 24

0

Mostra file

File: python_demo.py Progetto: PMBio/peer

def simple_supervised_demo():
    print "Simple demo of supervised factor inference"
    model = get_simple_model_object(expr_file='data/expression_sparse.csv') # simple object using default simulated dataset; see simple_unsupervised_demo for how it is constructed
    prior = SP.loadtxt("data/prior_sparse.csv",delimiter=",") # and prior for which factor regulates which gene. This matrix has entries between 0 and 1. The (g,k) entry represents the probability that gene g is affected by factor k
    model.setSparsityPrior(prior) # prior on which factors affect which genes
    model.update()
    for i in range(prior.shape[1]):
        print "Correlation between factor",i, "prior and weight",SP.corrcoef(model.getW()[:,i], prior[:,i])[0,1], "sum prior", sum(prior[:,i])

Esempio n. 25

0

Mostra file

File: c9_18_sharpe_ratio.py Progetto: ForkManager/Forked-Python-for-Finance-Second-Edition

def portfolio_var(R, w):
    cor = sp.corrcoef(R.T)
    std_dev = sp.std(R, axis=0)
    var = 0.0
    for i in xrange(n):
        for j in xrange(n):
            var += w[i] * w[j] * std_dev[i] * std_dev[j] * cor[i, j]
    return var

Esempio n. 26

0

Mostra file

File: report.py Progetto: pyaternev57/ecosystem_models

 def calc_vif(data_enc):
     cc = sp.corrcoef(data_enc.values, rowvar=False)
     if cc.ndim < 2:
         return []
     VIF = np.round(np.linalg.inv(cc).diagonal(), 6)
     return sorted(zip(data_enc.columns, VIF),
                   key=lambda item: item[1],
                   reverse=True)

Esempio n. 27

0

Mostra file

def pearsCorrRavel(Y1, Y2):
    """ calculated the prearson correlation between vec(Y1) and vec(Y2) """

    y1 = Y1.ravel()
    y2 = Y2.ravel()
    rv = SP.corrcoef(y1, y2)[0, 1]

    return rv

Esempio n. 28

0

Mostra file

File: MEGExamplesCorrelationMultiple.py Progetto: bejar/MEGData

def correlationMatrix(mdata,linit,lend,nstep):
    lstep=(lend-linit)/nstep
    corr=np.zeros((mdata.shape[0],mdata.shape[0]))
    for length in range(linit,lend,lstep):
        corrs=corrcoef(mdata[:,length:length+lstep])
        corr+=corrs    
    corr/=nstep
    return corr

Esempio n. 29

0

Mostra file

File: prediction.py Progetto: bvilhjal/suspect

def summarize_accuracy(prs_files):

    true_phens = []
    prs_phens = []
    ldpred_phens = []
    tp_prs_rs = []
    tp_ldpred_rs = []
    for prsf in prs_files:
        if os.path.isfile(prsf):
            rt = pd.read_csv(prsf,skipinitialspace=True, index_col=False)
            true_phens.extend(rt['true_phens'])
            prs_phens.extend(rt['raw_effects_prs'])
            ldpred_phens.extend(rt['pval_derived_effects_prs'])
            tp_prs_rs.append(sp.corrcoef(rt['true_phens'],rt['raw_effects_prs'])[0,1])
            tp_ldpred_rs.append(sp.corrcoef(rt['true_phens'],rt['pval_derived_effects_prs'])[0,1])

    return (sp.mean(tp_prs_rs),sp.mean(tp_ldpred_rs))

Esempio n. 30

0

Mostra file

File: 4375OS_08_69_sharpe_ratio_optimal_portfolio.py Progetto: PlamenStilyianov/Python

def portfolio_var(R,w):
    cor = sp.corrcoef(R.T)
    std_dev=sp.std(R,axis=0)
    var = 0.0
    for i in xrange(n):
        for j in xrange(n):
            var += w[i]*w[j]*std_dev[i]*std_dev[j]*cor[i, j]
    return var

Esempio n. 31

0

Mostra file

def pca(dat, npca=None, verbose = False):
    if isinstance(dat, sp.ndarray):
        dat = pd.DataFrame(dat)
        names = []
        for i in range(dat.shape[1]):
            names.append("x"+str(i+1))
        dat.columns = names
    names = list(dat.columns)
    nr = dat.shape[0]
    nc = dat.shape[1]
    r = sp.corrcoef(dat, rowvar=False)
    heikin = dat.mean(axis=0)
    bunsan = dat.var(axis=0, ddof=1)
    sd = sp.sqrt(bunsan)
    eval, evec = linalg.eig(r)
    eval = sp.real(eval)
    rank = rankdata(eval, method="ordinal")
    rank = nc+1-rank
    eval2 = eval.copy()
    evec2 = evec.copy()
    for i in range(nc):
        j = sp.where(rank == i+1)[0][0]
        eval[i] = eval2[j]
        evec[:, i] = evec2[:, j]
    contr = eval/nc*100
    cum_contr = sp.cumsum(contr)
    fl = (sp.sqrt(eval)*evec)
    for i in range(nc):
        dat.ix[:, i] = (dat.ix[:, i]-heikin[i]) / sd[i]
    fs = sp.dot(dat, evec*sp.sqrt(nr/(nr-1)))
    if npca is None:
        npca = sp.sum(eval >= 1)
    eval = eval[0:npca]
    cont = eval/nc
    cumc = sp.cumsum(cont)
    fl = fl[:, 0:npca]
    rcum = sp.sum((fl ** 2), axis=1)
    if verbose:
        print("            ", end="")
        for j in range(npca):
            print("{0:>8s}".format("PC"+str(j+1)), end="")
        print("  Contribution")
        for i in range(nc):
            print("{0:>12s}".format(names[i]), end="")
            for j in range(npca):
                print(" {0:7.3f}".format(fl[i, j]), end="")
            print(" {0:7.3f}".format(rcum[i]))
        print("  Eigenvalue", end="")
        for j in range(npca):
            print(" {0:7.3f}".format(eval[j]), end="")
        print("\nContribution", end="")
        for j in range(npca):
            print(" {0:7.3f}".format(cont[j]), end="")
        print("\nCum.contrib.", end="")
        for j in range(npca):
            print(" {0:7.3f}".format(cumc[j]), end="")
        print()
    return {"r":r, "fl":fl, "eval":eval, "fs":fs[:, 0:npca]}

Esempio n. 32

0

Mostra file

File: regression.py Progetto: RandanCSS/randan

    def coefficients_VIF(self):
        #eps = 1e-20
        x = self._model.model.exog[:, 1:].copy()
        inv_corr = np.linalg.inv(sp.corrcoef(x, rowvar=False))
        diag = list(inv_corr.diagonal())
        if self.include_constant:
            diag = [np.nan] + diag

        return pd.Series(diag, index=self._params_idx)

Esempio n. 33

0

Mostra file

File: calculate_vif.py Progetto: TDoT-Data-Viz/Stats

def generate_vif(csv):
    """Prints the Variance Inflation Factor (VIF) for the values in a given csv."""
    # Create pandas dataframe from csv.
    data = pd.read_csv(csv)
    # Calculate VIF from dataframe.
    cc = sp.corrcoef(data.values, rowvar=False)
    vif = np.linalg.inv(cc)
    v = vif.diagonal()
    return str(v)

Esempio n. 34

0

Mostra file

def pearsCorrMean(Y1, Y2):
    """ calculated the avg prearson correlation between columns of Y1 and Y2 """

    rv = 0
    for ic in range(Y.shape[1]):
        rv += SP.corrcoef(Y1[:, ic], Y2[:, ic])[0, 1]
    rv /= float(Y.shape[1])

    return rv

Esempio n. 35

0

Mostra file

File: python_demo.py Progetto: PMBio/peer

def supervised_prior_comparison_demo():
    print "Supervised factor inference demo, comparing different error rates in prior specification"
    prior = SP.loadtxt("data/prior_sparse.csv",delimiter=",")

    # compare outcomes of inference depending on uncertainty in prior
    for error in (0,0.01,0.1,0.2):
        print "Prior error=",error
        model = get_simple_model_object(expr_file='data/expression_sparse.csv') # simple object using default simulated dataset; see simple_unsupervised_demo for how it is constructed
        p = prior
        p[p > 0.5] = (1-error)
        p[p < 0.5] = error
        model.setSparsityPrior(p) # prior on which factors affect which genes
        model.update()

        for i in range(prior.shape[1]):
            if SP.isnan(model.getW()).any(): pdb.set_trace()
            elif SP.isnan(SP.corrcoef(model.getW()[:,i], prior[:,i])[0,1]): pdb.set_trace()
            print "Correlation between factor",i, "prior and weight",SP.corrcoef(model.getW()[:,i], prior[:,i])[0,1], "sum prior", sum(prior[:,i])

Esempio n. 36

0

Mostra file

File: tests.py Progetto: tomhebbron/Rouletter

def repeats_test(rsType, num_wheels=1, num_spins_per_wheel=1):
    wheel_segments = numpy.random.randint(low=0,high=100,size=100)
    hits = [0] * len(wheel_segments)
    for i in xrange(num_wheels):
        rs = rsType(wheel_segments)
        for j in xrange(num_spins_per_wheel):
            hits[rs.spin(random.random())] += 1
    corr = scipy.corrcoef(wheel_segments, hits)[0,1]
    print corr
    return corr

Esempio n. 37

0

Mostra file

def centrality_correlation(G=None):
    import scipy, pylab, cPickle
    if G == None:
        npr.seed(3)
        random.seed(3)
        des = centralityTreeDesign()
        #des = cdaDesign()

        #gives high correlation
        des.setParam('b', 15.)
        des.setParam('q', 0.7)

        #gives low correlation
        #des.setParam('b', 2.0)
        #des.setParam('q', 3.0)
        des.fixedParams['nn'] = 300
        G = des.buildNet()

    results = []
    centralities = nx.centrality.brandes_betweenness_centrality(G)
    for node in G:
        if G.degree(node) == 1:
            continue
        #jitter for visualization:
        #results.append((G.degree(node)+npr.rand()*0.4, centralities[node]))

        results.append((G.degree(node), centralities[node]))

    results = np.array(results)
    pylab.rc('text', usetex=True)
    pylab.plot(results[:, 0], results[:, 1], '.')
    #pylab.title(r'lambda=%2.1f'%(rat,))
    pylab.xlabel('Degree')
    pylab.ylabel('Betweenness')

    #fixme: statistics question - what are all the other coefficients:?
    corr = scipy.corrcoef(results[:, 0], results[:, 1])[0, 1]
    pylab.figtext(0.2, 0.8, 'Correlation=%f' % corr)

    filename = 'output/correlation_motion.vs.betweenness.pkl'
    outputFile = open(filename, 'wb')
    report = {'results': results, 'correlation': corr}
    cPickle.dump(report, outputFile)
    outputFile.close()
    print
    print 'Pickle: ' + filename + ' written!'

    results = np.array(results)
    print 'Correlation: %.4f' % corr

    try:
        pylab.savefig('output/correlation_motion.vs.betweenness_lambda=' +
                      str(rat) + '_results.eps')
    except:
        print 'Unable to save figure...'

Esempio n. 38

0

Mostra file

File: phenotypeData.py Progetto: jhuang2012/mixmogam

    def plot_phen_relatedness(self, k, k_accessions, plot_file_prefix, pids=None):
        import kinship
        import pylab
        import scipy as sp
        from scipy import linalg

        if not pids:
            pids = self.get_pids()
        self.convert_to_averages(pids)
        self.filter_ecotypes_2(k_accessions, pids)
        for pid in pids:
            ets = self.get_ecotypes(pid)
            vals = self.get_values(pid)
            k_m = kinship.prepare_k(k, k_accessions, ets)
            c = sp.sum((sp.eye(len(k_m)) - (1.0 / len(k_m)) * sp.ones(k_m.shape)) * sp.array(k_m))
            k_scaled = (len(k) - 1) * k / c
            p_her = self.get_pseudo_heritability(pid, k_m)
            x_list = []
            y_list = []
            for i in range(len(ets)):
                for j in range(i):
                    x_list.append(k_m[i, j])
                    y_list.append(vals[i] - vals[j])
            ys = sp.array(y_list)
            ys = ys * ys
            xs = sp.array(x_list)
            phen_name = self.get_name(pid)
            phen_name = phen_name.replace("<i>", "")
            phen_name = phen_name.replace("</i>", "")
            phen_name = phen_name.replace("+", "_plus_")
            phen_name = phen_name.replace("/", "_div_")
            file_name = plot_file_prefix + "_%d_%s.png" % (pid, phen_name)
            pylab.figure()
            pylab.plot(xs, ys, "k.", alpha=0.2)
            pylab.xlabel("Relatedness")
            pylab.ylabel("Squared phenotypic difference")
            # Plot regression line
            Y_mat = sp.mat(ys).T
            X_mat = sp.hstack((sp.mat(sp.ones(len(xs))).T, sp.mat(xs).T))
            (betas, residues, rank, s) = linalg.lstsq(X_mat, Y_mat)
            x_min, x_max = pylab.xlim()
            pylab.plot([x_min, x_max], [betas[0] + x_min * betas[1], betas[0] + x_max * betas[1]])
            corr = sp.corrcoef(xs, ys)[0, 1]
            y_min, y_max = pylab.ylim()
            x_range = x_max - x_min
            y_range = y_max - y_min
            pylab.axis(
                [x_min - 0.025 * x_range, x_max + 0.025 * x_range, y_min - 0.025 * y_range, y_max + 0.15 * y_range]
            )
            pylab.text(x_min + 0.1 * x_range, y_max + 0.03 * y_range, "Correlation: %0.4f" % (corr))
            pylab.text(x_min + 0.5 * x_range, y_max + 0.03 * y_range, "Pseudo-heritability: %0.4f" % (p_her))
            pylab.savefig(file_name)
            del k_m
            del k_scaled

Esempio n. 39

0

Mostra file

File: modisread.py Progetto: anjaroesel/modis-mpf

def bowtie_polynom(modis_img, cs, folder):
    print 'Determine overlap pattern... '
    sw = 10000 / cs  #stripwidth
    overlaplist = []  #define list to store number of overlapped lines
    #devide in parts with a width of 40 pixel
    for i in sp.arange(0, modis_img.shape[1] - 40, 40):
        part = modis_img[:, i:i + 39]
        #search in every scanning strip
        samples = []
        for j in sp.arange(sw - 2, part.shape[0] - sw, sw):
            target = part[
                j - 1:j +
                1, :]  #cut out a target, which overlapped counter-part shall be found
            searchwindow = part[
                j + 2:j + sw +
                2]  #,: cut out the window, where the overlapped counter part might be located
            #start the search
            c = [
            ]  #calculate correlation coefficients of every given offset from 3 to 11
            for offset in sp.arange(3, sw / 2 + 1):
                imgpart = searchwindow[
                    offset - 3:offset -
                    1]  #,: cut out image, which has to be compared with the target
                c.append(
                    sp.corrcoef(imgpart.flatten(), target.flatten())[
                        0, 1])  #calculate correlatoin coefficient
            c = sp.array(c)
            overl = sp.ndimage.measurements.maximum_position(
                c
            )[0] + 3  #find the overlap with the highes correlation coefficient
            samples.append([
                overl, c.max()
            ])  #attach overlap and correlation coefficient to the sample list
        samples = sp.array(samples)
        #print i, samples[:,1].mean()
        if samples[:, 1].mean() > 0.9:  #chek the mean correlation coefficient:
            #print('Bowtie Correlation high - removing effect')
            overlaplist.append([
                i + 20, samples[:, 0].mean()
            ])  #save result, if correlation coefficient is high
            #print(overlaplist)
            o = sp.array(overlaplist)
            X = o[:, 0]
            overlap = o[:, 1]
            #Calculate a second order Polynom to describe the overlap
            p = sp.polyfit(X, overlap, 2)
            #print 'done, Overlap polynom: '+str(p)
        else:
            #print('low Bowtie correlation')
            p = [1., 1., 1.]
            #overlaplist.append([i+20,1])
            #os.system('rm -r '+folder)
            #print('scene deleted')
    return p

Esempio n. 40

0

Mostra file

File: fund.py Progetto: deppyboy/Regression

            self.mapping[indexes[i]] = finalbeta[i]
        return self.mapping

    def stats(self, startdate, enddate, mktbasket, output = False):
        """
        Calculates statistics for a fund over a period.
        
        Parameters
        ----------
        startdate : datetime
            beginning of statistic period
        enddate : datetime
            end of statistic period
        mktbasket : dict
            dictionary of market streams
        output : bool
            if True, output results to db
        
        Returns
        -------
        stats : dict
            dictionary of statistics
        """
        inputmatrix, fundreturns, indexes, daterange = self.align(startdate, enddate, mktbasket)
        if self.mapping and not(inputmatrix is None):
            weights = scipy.array([self.mapping[mykey] if mykey in self.mapping else 0.0 for mykey in mktbasket.keys()])
            projected = scipy.dot(inputmatrix,weights.reshape(len(indexes),1)).flatten()
            actual = fundreturns.flatten()
            diff = actual-projected
            outdata = {
                     'TE'     : scipy.std(diff)*100.0*100.0,
                     'BETA'   : scipy.cov(projected,actual)[1,0]/scipy.var(projected),
                     'ALPHA'  : (scipy.product(diff+1.0))**(1.0/diff.size)-1.0,
                     'VOL'    : scipy.std(actual)*scipy.sqrt(252.0),
                     'PROJ'   : scipy.product(1.0+projected)-1.0,
                     'ACT'    : scipy.product(1.0+actual)-1.0,
                     'R2'     : 0.0 if scipy.all(actual==0.0) else scipy.corrcoef(projected,actual)[1,0]**2.0,
                     'AV'     : self.av(startdate),
                     'DELTA'  : self.deltaestimate(startdate)
                    }
            outdata['DIFF'] = outdata['ACT']-outdata['PROJ']
            outdata['PL'] = outdata['DELTA']*outdata['DIFF']*100.0 
            if output:
                cnxn = pyodbc.connect(ORACLESTRING)
                cursor = cnxn.cursor()
                sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
                sql = sql.format(self.fundcode,outdata['PROJ'],outdata['ACT'],outdata['DIFF'],
                           outdata['DELTA'],outdata['PL'],oracledatebuilder(startdate),
                           oracledatebuilder(enddate),outdata['TE'],outdata['R2'],outdata['BETA'],
                           outdata['ALPHA'],outdata['VOL'],outdata['AV'])
                cursor.execute(sql)
                cnxn.commit()            
                cnxn.close()

Esempio n. 41

0

Mostra file

File: fund.py Progetto: deppyboy/FundCodeRefactored

 def stats(self, startdate, enddate, mktbasket, avdate, output=False, mappingoverride=None):
     """
     Calculates statistics for a fund over a period.
     
     Parameters
     ----------
     startdate : datetime
         beginning of statistic period
     enddate : datetime
         end of statistic period
     mktbasket : dict
         dictionary of market streams
     output : bool
         if True, output results to db
     mappingoverride : None or mapping dictionary
     	whether to override the db mapping
     
     Returns
     -------
     stats : dict
         dictionary of statistics
     """
     actualstream, projstream = self.project(mktbasket, mappingoverride)
     if actualstream[startdate:enddate] is None: return None
     if projstream[startdate:enddate] is None: return None 
     actual = actualstream[startdate:enddate].returns
     projected = projstream[startdate:enddate].returns
     diff = actual - projected
     outdata = {
              'TE'     : scipy.std(diff) * 100.0 * 100.0,
              'BETA'   : scipy.cov(projected, actual, bias=1)[1, 0] / scipy.var(projected),
              'ALPHA'  : (scipy.product(diff + 1.0)) ** (1.0 / diff.size) - 1.0,
              'VOL'    : scipy.std(actual) * scipy.sqrt(252.0),
              'PROJ'   : scipy.product(1.0 + projected) - 1.0,
              'ACT'    : scipy.product(1.0 + actual) - 1.0,
              'R2'     : 0.0 if scipy.all(actual == 0.0) else scipy.corrcoef(projected, actual)[1, 0] ** 2.0,
              'AV'     : self.av(avdate),
              'DELTA'  : self.deltaestimate(avdate)
             }
     outdata['DIFF'] = outdata['ACT'] - outdata['PROJ']
     outdata['PL'] = outdata['DELTA'] * outdata['DIFF'] * 100.0 
     if output:
         cnxn = pyodbc.connect(ORACLESTRING)
         cursor = cnxn.cursor()
         sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});'
         sql = sql.format(self.fundcode, outdata['PROJ'], outdata['ACT'], outdata['DIFF'],
                    outdata['DELTA'], outdata['PL'], oracledatebuilder(startdate),
                    oracledatebuilder(enddate), outdata['TE'], outdata['R2'], outdata['BETA'],
                    outdata['ALPHA'], outdata['VOL'], outdata['AV'])
         cursor.execute(sql)
         cnxn.commit()
         cnxn.close()
     return outdata

Esempio n. 42

0

Mostra file

File: DSGE_simulation-Sara_Final.py Progetto: snowdj/byu_macro_boot_camp

def Autocorr(GDP,I,C):
	m = sp.shape(GDP)[1]
	GDPauto = []
	Iauto = []
	Cauto = []
	for i in range(0,m):
		#GDP autocorrelation coefficients for each series appended
		#to the empty GDPauto list
		gdp = GDP[:,i]
		gauto = sp.corrcoef(gdp[0:-1],gdp[1:])
		GDPauto.append(gauto[0,1])
		#Investment autocorrelation coefficients for each series
		#appended to the empty Iauto list
		invest = I[:,i]
		iauto = sp.corrcoef(invest[0:-1],invest[1:])
		Iauto.append(iauto[0,1])
		#Consumption autocorrelation coefficients for each series
		#appended to the empty Cauto list
		consum = C[:,i]
		cauto = sp.corrcoef(consum[0:-1],consum[1:])
		Cauto.append(cauto[0,1])
	#Calculate the mean and standard deviation of these moments
	#across the total number of simulations
	GDPAUTO = sp.array(GDPauto)
	gdpsimmean = sp.mean(GDPAUTO)
	gdpsimstdev = sp.std(GDPAUTO)
	IAUTO = sp.array(Iauto)
	isimmean = sp.mean(IAUTO)
	isimstdev = sp.std(IAUTO)
	CAUTO = sp.array(Cauto)
	csimmean = sp.mean(CAUTO)
	csimstdev = sp.std(CAUTO)
	sp.savetxt('GDPAUTO.csv',GDPAUTO)
	sp.savetxt('IAUTO.csv',IAUTO)
	sp.savetxt('CAUTO.csv',CAUTO)
	print "GDP/Investment/Consumption Simulations Mean/Standard Deviation"
	print "of Autocorrelation. The Autocorrelation Coefficients"
	print "of GDP,Investment,Consumption for each series have been saved"
	print "separately in csv files"
	return gdpsimmean, gdpsimstdev, isimmean, isimstdev, csimmean, csimstdev

Esempio n. 43

0

Mostra file

 def plot_phen_relatedness(self, k, k_accessions, plot_file_prefix, pids=None):
     import kinship
     import pylab
     import scipy as sp
     from scipy import linalg
     if not pids:
         pids = self.get_pids()
     self.convert_to_averages(pids)
     self.filter_ecotypes_2(k_accessions, pids)
     for pid in pids:
         ets = self.get_ecotypes(pid)
         vals = self.get_values(pid)
         k_m = kinship.prepare_k(k, k_accessions, ets)
         c = sp.sum((sp.eye(len(k_m)) - (1.0 / len(k_m)) * sp.ones(k_m.shape)) * sp.array(k_m))
         k_scaled = (len(k) - 1) * k / c
         p_her = self.get_pseudo_heritability(pid, k_m)
         x_list = []
         y_list = []
         for i in range(len(ets)):
             for j in range(i):
                 x_list.append(k_m[i, j])
                 y_list.append(vals[i] - vals[j])
         ys = sp.array(y_list)
         ys = ys * ys
         xs = sp.array(x_list)
         phen_name = self.get_name(pid)
         phen_name = phen_name.replace('<i>', '')
         phen_name = phen_name.replace('</i>', '')
         phen_name = phen_name.replace('+', '_plus_')
         phen_name = phen_name.replace('/', '_div_')
         file_name = plot_file_prefix + '_%d_%s.png' % (pid, phen_name)
         pylab.figure()
         pylab.plot(xs, ys, 'k.', alpha=0.2)
         pylab.xlabel('Relatedness')
         pylab.ylabel('Squared phenotypic difference')
         #Plot regression line
         Y_mat = sp.mat(ys).T
         X_mat = sp.hstack((sp.mat(sp.ones(len(xs))).T, sp.mat(xs).T))
         (betas, residues, rank, s) = linalg.lstsq(X_mat, Y_mat)
         x_min, x_max = pylab.xlim()
         pylab.plot([x_min, x_max], [betas[0] + x_min * betas[1], betas[0] + x_max * betas[1]])
         corr = sp.corrcoef(xs, ys)[0, 1]
         y_min, y_max = pylab.ylim()
         x_range = x_max - x_min
         y_range = y_max - y_min
         pylab.axis([x_min - 0.025 * x_range, x_max + 0.025 * x_range,
                 y_min - 0.025 * y_range, y_max + 0.15 * y_range])
         pylab.text(x_min + 0.1 * x_range, y_max + 0.03 * y_range, 'Correlation: %0.4f' % (corr))
         pylab.text(x_min + 0.5 * x_range, y_max + 0.03 * y_range, 'Pseudo-heritability: %0.4f' % (p_her))
         pylab.savefig(file_name)
         del k_m
         del k_scaled

Esempio n. 44

0

Mostra file

File: MonteCarloSimulation.py Progetto: snowdj/byu_macro_boot_camp

def MonteCarlo(P,Q,R,S,N,X0,Z,Xbar,alpha,delta,reps,T):
	C = sp.zeros((T,reps))
	L = sp.zeros((T,reps))
	Lauto = []
	Cauto = []
	Wauto = []
	LWcorr = []


	for i in range(reps):
		if i % 500 ==0:
			print 'Simulation #: ', i
		X=[]
		X = Xgen(X0,Z[:,i],P,Q,Xbar)

		#find C,L for each time period
		consumption,labor,wage=CLgen(X,Z[:,i],alpha,delta)


		#Consumption autocorrelation coefficients for each series
		#appended to the empty Cauto list
		#cauto = sp.corrcoef(consumption[0:-1],consumption[1:])
		cauto = sp.mean(consumption)
		Cauto.append(cauto)

		#Labor autocorrelation coefficients for each series
		#appended to the empty Lauto list
		#lauto = sp.corrcoef(labor[0:-1],labor[1:])
		lauto = sp.mean(labor)
		Lauto.append(lauto)

		#Wage autocorrelation for each series
		#appended to the empty Lauto list
		#wauto = sp.corrcoef(wage[0:-1],wage[1:])[0,1]
		wauto = sp.mean(wage)
		Wauto.append(wauto)
		
		#Wage and Labor correlation coefficient for each series
		#appended to the empty LWcorr list
		lwcorr = sp.corrcoef(labor,wage)
		LWcorr.append(lwcorr)

	#What we will return, arrays of the auto and correlation coefficients
	CAUTO = sp.array(Cauto)

	LAUTO = sp.array(Lauto)

	WAUTO = sp.array(Wauto)
	
	LWCORR = sp.array(LWcorr)

	return CAUTO,LAUTO,WAUTO,LWCORR

Esempio n. 45

0

Mostra file

def midparent_predictions(Y, parent1, parent2, Itrain=None, Itest=None):
    if Itrain is None:
        Itrain = sp.ones(Y.shape[0], dtype=bool)
        Itest = sp.ones(Y.shape[0], dtype=bool)
    modelmatrix = patsy.dmatrix("0 + parent1 + parent2")
    Rsquared = sp.zeros((Y.shape[1]))
    pred = sp.zeros_like(Y[Itest, :])
    for j in range(Y.shape[1]):
        lm = linear_model.LinearRegression()
        lm.fit(modelmatrix[Itrain, :], Y[Itrain, j])
        pred[:, j] = lm.predict(modelmatrix[Itest, :])
        Rsquared[j] = sp.corrcoef(pred[:, j], Y[Itest, j])[0, 1]**2
    return np.row_stack((Rsquared)), pred

Esempio n. 46

0

Mostra file

File: midparent.py Progetto: kasparmartens/y10k-prediction

def midparent_predictions(Y, parent1, parent2, Itrain=None, Itest=None):
    if Itrain is None:
    	Itrain = sp.ones(Y.shape[0], dtype=bool)
    	Itest = sp.ones(Y.shape[0], dtype=bool)
    modelmatrix = patsy.dmatrix("0 + parent1 + parent2")
    Rsquared = sp.zeros((Y.shape[1]))
    pred = sp.zeros_like(Y[Itest, :])
    for j in range(Y.shape[1]):
        lm = linear_model.LinearRegression()
        lm.fit(modelmatrix[Itrain, :], Y[Itrain, j])
        pred[:, j] = lm.predict(modelmatrix[Itest, :])
        Rsquared[j] = sp.corrcoef(pred[:, j], Y[Itest, j])[0,1]**2
    return np.row_stack((Rsquared)), pred

Esempio n. 47

0

Mostra file

File: python_demo.py Progetto: zpliu1126/peer

def supervised_prior_comparison_demo():
    print "Supervised factor inference demo, comparing different error rates in prior specification"
    prior = SP.loadtxt("data/prior_sparse.csv", delimiter=",")

    # compare outcomes of inference depending on uncertainty in prior
    for error in (0, 0.01, 0.1, 0.2):
        print "Prior error=", error
        model = get_simple_model_object(
            expr_file='data/expression_sparse.csv'
        )  # simple object using default simulated dataset; see simple_unsupervised_demo for how it is constructed
        p = prior
        p[p > 0.5] = (1 - error)
        p[p < 0.5] = error
        model.setSparsityPrior(p)  # prior on which factors affect which genes
        model.update()

        for i in range(prior.shape[1]):
            if SP.isnan(model.getW()).any(): pdb.set_trace()
            elif SP.isnan(SP.corrcoef(model.getW()[:, i], prior[:, i])[0, 1]):
                pdb.set_trace()
            print "Correlation between factor", i, "prior and weight", SP.corrcoef(
                model.getW()[:, i],
                prior[:, i])[0, 1], "sum prior", sum(prior[:, i])

Esempio n. 48

0

Mostra file

File: project.py Progetto: clholgat/Spring11

def calculate_stock_correlation(data):
    """
    This function should take a list containing two lists of the form
    returned by get_yahoo_data (list of date, adj. close tuples) and
    return the correlation of the daily returns as defined above.
    """
    one = []
    two = []
    for i in xrange(1, len(data[0])):
        one.append((data[0][i][1]-data[0][i-1][1])/data[0][i-1][1])
        two.append((data[1][i][1]-data[1][i-1][1])/data[1][i-1][1])
    
    return scipy.corrcoef(one, two)[0][1]
    pass

Esempio n. 49

0

Mostra file

File: python_demo.py Progetto: zpliu1126/peer

def simple_supervised_demo():
    print "Simple demo of supervised factor inference"
    model = get_simple_model_object(
        expr_file='data/expression_sparse.csv'
    )  # simple object using default simulated dataset; see simple_unsupervised_demo for how it is constructed
    prior = SP.loadtxt(
        "data/prior_sparse.csv", delimiter=","
    )  # and prior for which factor regulates which gene. This matrix has entries between 0 and 1. The (g,k) entry represents the probability that gene g is affected by factor k
    model.setSparsityPrior(prior)  # prior on which factors affect which genes
    model.update()
    for i in range(prior.shape[1]):
        print "Correlation between factor", i, "prior and weight", SP.corrcoef(
            model.getW()[:, i], prior[:, i])[0, 1], "sum prior", sum(prior[:,
                                                                           i])

Esempio n. 50

0

Mostra file

File: raicar.py Progetto: dangom/pycar

 def canonicalize_signs(self, sources, mixing):
     '''
     Accepts an set of sources and corresponding mixing matrices from an ICA component (should be a realization component,
     as this operation makes no sense for regular ICA realizations) and fixes the signs of the realizations, using the sign
     of the inter-source cross correlations.  Specifically, the 0th source is arbitrarily deemed to have the canonical sign;
     components which correlate positively with this component keep the same signs, and those which negatively correlate
     have their signs reversed.  This WILL NOT ensure all source-source correlations are positive, but will tend to cause
     the 'well matched' components to have the same sign.
     '''
     compSigns = np.sign(corrcoef(sources)[0, :])
     for i in range(1, sources.shape[0]):
         sources[i, :] = compSigns[i]*sources[i, :]
         mixing[:, i] = compSigns[i]*mixing[:, i]
     return sources, mixing

Esempio n. 51

0

Mostra file

File: MEGDataPreparationSubset.py Progetto: bejar/MEGData

def correlationMatrix(mdata,linit,lend,nstep):
    lstep=(lend-linit)/nstep
    corr=np.zeros((mdata.shape[0],mdata.shape[0]))
    liter= [linit+(i*lstep) for i in range(nstep)]
    #print liter, len(liter),lend
    zz=0
    for length in liter:
        corrs=corrcoef(mdata[:,length:length+lstep])
        corr+=corrs    
        zz+=1
        print '.',
    print
    corr/=nstep
    return corr

Esempio n. 52

0

Mostra file

def portfolioVariance(R, w):
    #find the correlation coefficient but before transform returns into three columns
    corr = sp.corrcoef(R.T)
    #find deviation along column axis
    standarDeviation = sp.std(R, axis=0)
    var = 0.0
    n = len(w)
    #since we have weights and standard deviation,we find variances by pairing
    #permutatively 2 stocks,find the vaariances and sum them as we do
    for i in range(n):
        for j in range(n):
            var += w[i] * w[j] * standarDeviation[i] * standarDeviation[
                j] * corr[i, j]
    return var

Esempio n. 53

0

Mostra file

File: raicar.py Progetto: thelahunginjeet/pycar

 def canonicalize_signs(self,sources,mixing):
     '''
     Accepts an set of sources and corresponding mixing matrices from an ICA component (should be a realization component,
     as this operation makes no sense for regular ICA realizations) and fixes the signs of the realizations, using the sign
     of the inter-source cross correlations.  Specifically, the 0th source is arbitrarily deemed to have the canonical sign;
     components which correlate positively with this component keep the same signs, and those which negatively correlate
     have their signs reversed.  This WILL NOT ensure all source-source correlations are positive, but will tend to cause
     the 'well matched' components to have the same sign.
     '''
     compSigns = np.sign(corrcoef(sources)[0,:])
     for i in range(1,sources.shape[0]):
         sources[i,:] = compSigns[i]*sources[i,:]
         mixing[:,i] = compSigns[i]*mixing[:,i]
     return sources,mixing

Esempio n. 54

0

Mostra file

def centrality_correlation(G):
    results = []
    centralities = nx.centrality.brandes_betweenness_centrality(
        G, normalized=False)
    for node in G:
        #if G.degree(node) == 1:  #if there is just one root node and the rest are leaves, get just one point
        #    continue
        #jitter for visualization:
        #results.append((G.degree(node)+npr.rand()*0.4, centralities[node]))
        results.append((G.degree(node), centralities[node]))

    results = np.array(results)

    corr = scipy.corrcoef(results[:, 0], results[:, 1])[0, 1]
    return corr

Esempio n. 55

0

Mostra file

File: correlation.py Progetto: bradjasper/Mixpanel-Statistics

def main(events):

    data = get_event_data(events)
    matrix = event_data_to_matrix(data, events)

    print "Correlation coefficients"

    for event1, event2 in list_to_pairs(events):

        data1 = matrix[:,events.index(event1)]
        data2 = matrix[:,events.index(event2)]

        coeff = corrcoef(data1, data2)[0][1]

        print "%s\tx\t%s:\t%f" % (event1, event2, coeff)

Esempio n. 56

0

Mostra file

def pred_accuracy(y_true, y_pred):
    y_true = sp.copy(y_true)
    if len(sp.unique(y_true)) == 2:
        print 'dichotomous trait, calculating AUC'
        y_min = y_true.min()
        y_max = y_true.max()
        if y_min != 0 or y_max != 1:
            y_true[y_true == y_min] = 0
            y_true[y_true == y_max] = 1
        fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
        auc = metrics.auc(fpr, tpr)
        return auc
    else:
        print 'continuous trait, calculating COR'
        cor = sp.corrcoef(y_true, y_pred)[0, 1]
        return cor

Esempio n. 57

0

Mostra file

File: pcor.py Progetto: brakitsch/GNetLMM

def corrParallelSym(Y,df=None):
    """
    computes the correlation matric of Y
    """
    nSamples = Y.shape[1]
    corr = SP.corrcoef(Y)
    if df is None:
        df = max(nSamples  - 2,0)  # degrees of freedom

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        tstat = corr / SP.sqrt(1.0 - corr ** 2)  # calculate t statistic

    tstat = math.sqrt(df) * tstat
    pv = 2 * t.cdf(-abs(tstat), df, loc=0, scale=1)  # calculate p value
    return corr,pv

Esempio n. 58

0

Mostra file

File: pred_main.py Progetto: yiminghu/AnnoPred

def pred_accuracy(y_true, y_pred):
    y_true = sp.copy(y_true)
    if len(sp.unique(y_true))==2:
        print 'dichotomous trait, calculating AUC'
        y_min = y_true.min()
        y_max = y_true.max()
        if y_min!= 0 or y_max!=1:
            y_true[y_true==y_min]=0
            y_true[y_true==y_max]=1
        fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
        auc = metrics.auc(fpr, tpr)
        return auc
    else:
        print 'continuous trait, calculating COR'
        cor = sp.corrcoef(y_true,y_pred)[0,1]
        return cor

Esempio n. 59

0

Mostra file

File: run_experiments.py Progetto: PMBio/pygp_kronsum

def get_r2(Y1,Y2):
    """
    return list of squared correlation coefficients (one per task)
    """
    if Y1.ndim==1:
        Y1 = SP.reshape(Y1,(Y1.shape[0],1))
    if Y2.ndim==1:
        Y2 = SP.reshape(Y2,(Y2.shape[0],1))

    t = Y1.shape[1]
    r2 = []
    for i in range(t):
        _r2 = SP.corrcoef(Y1[:,i],Y2[:,i])[0,1]**2
        r2.append(_r2)
    r2 = SP.array(r2)
    return r2