def CalcCorrelation(percentage, N, index): CreateTempResFile(percentage, N) getTrecEval(measure, index) x = [res.std for Qnr, res in QueriesRes.iteritems()] y = [res.trecScore for Qnr, res in QueriesRes.iteritems()] std_p = pearsonr(x, y)[0] std_s = spearmanr(x, y)[0] x = [res.std / math.sqrt(len(Qterms[Qnr].split())) for Qnr, res in QueriesRes.iteritems()] std_n_p = pearsonr(x, y)[0] std_n_s = spearmanr(x, y)[0] x = [res.MAD for Qnr, res in QueriesRes.iteritems()] mad_p = pearsonr(x, y)[0] mad_s = spearmanr(x, y)[0] x = [res.MAD / math.sqrt(len(Qterms[Qnr].split())) for Qnr, res in QueriesRes.iteritems()] mad_n_p = pearsonr(x, y)[0] mad_n_s = spearmanr(x, y)[0] if debug: print "N", N, "----", "Percentage", percentage print "std pearson ", std_p print "std spearman ", std_s print "std norm pearson ", std_n_p print "std norm spearman", std_n_s print "MAD pearson ", mad_p print "MAD spearman ", mad_s print "MAD norm pearson ", mad_n_p print "MAD norm spearman", mad_n_s return (std_p, std_s, std_n_p, std_n_s, mad_p,mad_s, mad_n_p, mad_n_s)
def plot(x, y, lx, ly, order): lp = np.poly1d(np.polyfit(lx, ly, 1)) xx = np.linspace(min(lx), max(lx), num=100) if args.title: title = args.title else: title = "Probablity for texts %s\ncategories:%s" % ( ",".join(map(str, args.text_numbers)), ",".join(map(lambda c: category.category_name(c)["category"], categories)) ) alpha = args.alpha plt.ylabel("logit %d-gram model Probability" % order) plt.xlabel("logit Cloze Empirical Probability") plt.title(title) plt.plot(xx, lp(xx), "r-") plt.scatter(lx, ly, alpha=alpha) plt.savefig("plots/%slogit_%d.png" % (args.prefix, order)) plt.close() print "correlation %d-gram vs. cloze:" % order, pearsonr(x, y)[0] print "correlation %d-gram vs. cloze:" % order, pearsonr(lx, ly)[0] p = np.poly1d(np.polyfit(x, y, 1)) xx = np.linspace(min(x), max(x), num=100) plt.ylabel("%d-gram model Probability" % order) plt.xlabel("Cloze Empirical Probability") plt.title(title) plt.plot(xx, p(xx), "r-") plt.scatter(x, y, alpha=alpha) plt.savefig("plots/%s%d.png" % (args.prefix, order)) plt.close()
def print4(): for R in sorted(k3set): R=R.replace('R', ''); R=float(R) filename=name+'.R'+str( R ) + '.senti.data'; fil=open(filename, 'w'); for ALPHA in sorted(k2set): ALPHA=ALPHA.replace('ALPHA', ''); ALPHA=float(ALPHA) res = str(R) + delim; res = res + str(ALPHA)+delim; for A in sorted(k1set): A=A.replace('A', ''); A=float(A) k1='A'+str(int(A)) k2='ALPHA'+str(ALPHA) k3='R'+str(R) res = res + str( pearsonr(dataX[k1][k2][k3], dataY[k1][k2][k3])[0] ) + delim; fil.write(res[:-1]+"\n"); fil.close(); for BASE in sorted(kbases): filename=name+ '.'+BASE+'.senti.data'; fil=open(filename, 'w'); res=str( pearsonr(dataX[BASE], dataY[BASE])[0] ) + '\n'; fil.write(res) fil.close();
def get_regression_results(): # Final Regression result dumpings. import warnings warnings.filterwarnings('ignore') # For all of the models and all of the feature selection techniques Result = {}; final_stats = []; mod_tech = ['linear','Ridge','Lasso']; mod_dist = {}; cnt = 0; for dataSelect in range(0,5,2): lab, feat, final_keys = get_data_whole(dataSelect) print "\n" print 'Java-sID->'+str(test['sids'][0][dataSelect]) Result[dataSelect] = {}; Result[dataSelect]['final_keys'] = final_keys; for presentFeatRegress in range(0,2): Result[dataSelect][presentFeatRegress] = {}; # Just to select one data for all of the models. So that the result of models are comparable X_train, y_train, X_test, y_test, indices = get_data_allModel(lab, feat, presentFeatRegress); for modelNo in range(0,3): Result[dataSelect][presentFeatRegress][modelNo] = apply_regression_model(X_train, y_train, X_test, y_test, indices, modelNo); present_data = Result[dataSelect][presentFeatRegress][modelNo]; Corr_train = pearsonr(Result[dataSelect][presentFeatRegress][modelNo]['y_train'].values, Result[dataSelect][presentFeatRegress][modelNo]['predictions_train'])[0]; Corr_test = pearsonr(Result[dataSelect][presentFeatRegress][modelNo]['y_test'].values, Result[dataSelect][presentFeatRegress][modelNo]['predictions'])[0]; selected_feat = final_keys[present_data['indices']]; selected_feat = [str(x[0]) for x in selected_feat]; final_stats.append([Corr_train[0], Corr_test[0],len(selected_feat),selected_feat]); mod_dist[cnt] = mod_tech[modelNo]; cnt = cnt+1; A = pd.DataFrame(final_stats); A.rename(columns={0:'Corr-Train',1:'Corr-Test',2:'FeatNo',3:'Features Selected'},inplace=True); A.rename(index=mod_dist, inplace=True) return A;
def svm_experiment(train_data, test_data): maes = {} rmses = {} pearsons = {} hypers = {'C': np.logspace(-2, 2, 5), 'epsilon': np.logspace(-3, 1, 5), 'gamma': np.logspace(-3, 1, 5)} all_labels = np.array([]) all_preds = np.array([]) for emo_id, emo in enumerate(EMOS): #emo_id = EMO_DICT[emo] train_x = train_data[emo_id, :, :-1] train_y = train_data[emo_id, :, -1] test_x = test_data[emo_id, :, :-1] test_y = test_data[emo_id, :, -1] m = GridSearchCV(SVR(), hypers) m.fit(train_x, train_y) preds = m.predict(test_x) maes[emo] = MAE(preds, test_y) rmses[emo] = math.sqrt(MSE(preds, test_y)) pearsons[emo] = pearsonr(preds, test_y)[0] all_labels = np.concatenate((all_labels, test_y)) all_preds = np.concatenate((all_preds, preds)) all_pearson = pearsonr(all_preds, all_labels)[0] return maes, rmses, pearsons, all_pearson, all_preds
def test_similarity_2(model, vocab): """Test the model for similarity. Method: get correlation between model similarity and similarity of items in the test set. This method is using data from Ruts et al. (2004)""" d = ruts_etal_similarity.get_similarity_dict() results = {category: {"skipped": set()} for category in d} pred_overall = [] actual_overall = [] for category in d: predicted_values = [] actual_values = [] for pair, score in d[category].items(): if set(pair).issubset(vocab): predicted_values.append(model.similarity(*pair)) actual_values.append(score) else: results[category]["skipped"].update(set(pair) - vocab) pred_overall += predicted_values actual_overall += actual_values results[category]["pairs_tested"] = len(predicted_values) results[category]["pearsonr"] = pearsonr(predicted_values, actual_values) results[category]["spearmanr"] = spearmanr(predicted_values, actual_values) results["overall"] = dict() results["overall"]["pairs_tested"] = len(predicted_values) results["overall"]["pearsonr"] = pearsonr(pred_overall, actual_overall) results["overall"]["spearmanr"] = spearmanr(pred_overall, actual_overall) return results
def calculate_correlation(): NG = [data[5] for data in import_text(join(locpath, celloutfile), '\t')][1:] Qusar = [data[6] for data in import_text(join(locpath, celloutfile), '\t')][1:] NG = [int(x) for x in NG] Qusar = [int(x) for x in Qusar] print "Correlation coefficient:", pearsonr(NG, Qusar)[0] print "p-value: ", pearsonr(NG, Qusar)[1]
def calculateDifferenceInRealGraphWeights(fileName): graph = loadGraphWithWeight(fileName) dualGraph = getGraphDual(graph) writeDualGraph(dualGraph, inputGraphFile) loadGraph() vertexCover = min_weighted_vertex_cover(dualGraph) writeVertexCover(vertexCover, inputGraphFile) edgeIdToWeightMap = map(lambda e: (getEdgeId(e[:2]), e[2]['w']), graph.edges_iter(data=True)) weakEdges = [] strongEdges = [] for e in graph.edges_iter(data=True): edgeId = getEdgeId(e[:2]) weight = e[2]['w'] if edgeId in vertexCover: weakEdges.append(weight) else: strongEdges.append(weight) numElements = -1 if len(weakEdges) > len(strongEdges): numElements = len(strongEdges) else: numElements = len(weakEdges) print np.mean(weakEdges) print np.mean(strongEdges) print pearsonr(random.sample(weakEdges, numElements), random.sample(strongEdges, numElements))
def get_correlation_between_mean_score_and_error(self): """Compute the correlation between: * mean genuine score and false reject count * mean impostor score and false acceptance count False reject count and flase reject count is computed thanks to a global threshold. This threshold is the threshold giving the EER. Correlation is computed using Pearson correlation factor. """ # We need the EER threshold eer, thr = self.get_eer_and_threshold() # We need to compute error rate of each user # Get genuine reject of each users fr = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \ (delayed(_parallel_false_reject_helper)(self.get_genuine_presentations_of_user(userid), thr, self._type) \ for userid in self._users_id)) # Get impostors accept of each users fa = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \ (delayed(_parallel_false_accept_helper)(self.get_impostor_presentations_of_user(userid), thr, self._type) \ for userid in self._users_id)) #compute the correlations return pearsonr(fr, self._genuine_scores)[0], pearsonr(fa, self._impostor_scores)[0], eer
def knnPredictor(df): dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df) corelationCoefficiantDictionary = {} corelationCoefficiantArray = [] for k in range(1, 200, 1): knnModel = KNeighborsRegressor(n_neighbors=k) knnModel.fit(dataTrainX, dataTrainY) knnpredicted = knnModel.predict(dataTestX) corelationCoefficient = pearsonr(dataTestY, knnpredicted) corelationCoefficiantDictionary[k] = corelationCoefficient[0] corelationCoefficiantArray.append(corelationCoefficient[0]) # plotter.plot(corelationCoefficiantArray) bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get) knnModelBest = KNeighborsRegressor(n_neighbors=bestK) knnModelBest.fit(dataTrainX, dataTrainY) print("K = ") print(bestK) print("Corelation Coeff:") print(corelationCoefficiantDictionary[bestK]) knnpredictedBest = knnModelBest.predict(dataTestX) fig, ax = plotter.subplots() corelationCoefficient = pearsonr(dataTestY, knnpredictedBest) print(corelationCoefficient[0]) ax.set_ylabel('Predicted KNN Weekly') ax.scatter(dataTestY, knnpredictedBest) ax.set_xlabel('Measured') plotter.show()
def correlateFeatures(self): self.standardizedTrainingData = self.xTrain labels = self.yTrain for i in range(1,10): feature = self.standardizedTrainingData[:,i] print pearsonr(feature, labels) self.visualizeFeatures(i)
def collect_group_stats(groups,thresh = 1e5): n_peaks = 0 n_singletons = 0 n_singletons_under_thresh = 0 non_singletons_under_thresh = 0 tiny_groups = 0 tiny_size = 0 tiny_vote = 0 votes = [] intensities = [] maxivotes = [] maxi = [] for group in groups: n_peaks += len(group.members) if len(group.members) == 1: n_singletons += 1 if group.members[0][0].intensity < thresh: n_singletons_under_thresh += 1 intensities.append(group.members[0][0].intensity) votes.append(group.vote) maxi.append(group.members[0][0].intensity) maxivotes.append(group.vote) else: mi = 0 for p,_,_ in group.members: if p.intensity < thresh: non_singletons_under_thresh += 1 intensities.append(p.intensity) votes.append(group.vote) if p.intensity > mi: mi = p.intensity if mi < thresh: tiny_groups += 1 tiny_size += len(group.members) tiny_vote += group.vote maxi.append(mi) maxivotes.append(group.vote) print "{} groups, consisting of {} peaks".format(len(groups),n_peaks) print "{} singleton groups ({:.0f}% of groups, {:.0f}% of peaks)".format(n_singletons,100.0*n_singletons/len(groups),100.0*n_singletons/n_peaks) print "{} peaks under threshold ({:.0e}), {:.0f}% of peaks".format(n_singletons_under_thresh + non_singletons_under_thresh,thresh,100.0*(n_singletons_under_thresh+non_singletons_under_thresh)/n_peaks) print "\t{} of which are singletons ({:.0f}%)".format(n_singletons_under_thresh,100.0*n_singletons_under_thresh/(n_singletons_under_thresh+non_singletons_under_thresh)) print "{} peaks below the threshold in groups of size > 1".format(non_singletons_under_thresh) print "{} groups where the most intense peak is below the threshold (avg size = {:.2f} avg vote = {:.2f})".format(tiny_groups,1.0*tiny_size/tiny_groups,1.0*tiny_vote/tiny_groups) plt.figure() plt.plot(np.log(intensities),votes,'k.') from scipy.stats.stats import pearsonr r,p = pearsonr(np.log(intensities),votes) print "Test between intensity and vote for all peaks: corr coef = {}, p-value = {}".format(r,p) plt.xlabel('Log intensity') plt.ylabel('Vote of enclosing group') plt.figure() plt.plot(np.log(maxi),maxivotes,'r.') from scipy.stats.stats import pearsonr r,p = pearsonr(np.log(maxi),maxivotes) print "Test between maximum group intensity and vote: corr coef = {}, p-value = {}".format(r,p) plt.xlabel('Log maximum group intensity') plt.ylabel('Vote of group')
def mantel_test(presence_absence = 'C:/Users/MariaIzabel/Desktop/MASTER/PHD/nchain/presence_absence_matrix_indexes_1s.csv'): pres_abs_matrix = pd.read_csv(presence_absence, sep = ',', header = 0, index_col = 0) print pres_abs_matrix.head() # Looping through columns (genes) for gene1 in pres_abs_matrix: for gene2 in pres_abs_matrix: print pearsonr(pres_abs_matrix[gene1], pres_abs_matrix[gene2])
def produce_correlations(w): e = [d["e"] for d in w["silence"]] + [d["e"] for d in w["speech"]] m = [d["m"] for d in w["silence"]] + [d["m"] for d in w["speech"]] z = [d["z"] for d in w["silence"]] + [d["z"] for d in w["speech"]] return ( "\\newcommand{\\correlationem}{%.4f}\n" "\\newcommand{\\correlationez}{%.4f}\n" "\\newcommand{\\correlationmz}{%.4f}\n" ) % (pearsonr(e, m)[0], pearsonr(e, z)[0], pearsonr(m, z)[0])
def acorr(array): array = array[~np.isnan(array)] array = array.tolist() # print len(array) # return for x in range(0,len(array)): x1 = array[0 : len(array)-x] x2 = array[x : len(array)] print pearsonr(x1,x2)[0]
def OptimizeCor(E1,E2): pvalue = pearsonr(E1,E2)[1] print pearsonr(E1,E2) for i in range(len(E1)): E1_tmp = copy.deepcopy(E1) E2_tmp = copy.deepcopy(E2) del E1_tmp[i] del E2_tmp[i] print len(E1_tmp), pearsonr(E1_tmp,E2_tmp)
def icm_gp_experiment(train_data, test_data, model, rank): maes = {} rmses = {} pearsons = {} X_train_list = [] Y_train_list = [] X_test_list = [] Y_test_list = [] for emo_id, emo in enumerate(EMOS): #for emo in sorted(EMOS): # very important to sort here #emo_id = EMO_DICT[emo] train_x = train_data[emo_id, :, :-1] train_y = train_data[emo_id, :, -1:] test_x = test_data[emo_id, :, :-1] test_y = test_data[emo_id, :, -1:] X_train_list.append(train_x) Y_train_list.append(train_y) X_test_list.append(test_x) Y_test_list.append(test_y) x_train, y_train, y_index = GPy.util.multioutput.build_XY(X_train_list, Y_train_list) Ny = 6 k = GPy.util.multioutput.ICM(input_dim=x_train.shape[1]-1, num_outputs=Ny, kernel=GPy.kern.RBF(x_train.shape[1]-1), W_rank=rank) m = GPy.models.GPRegression(x_train, y_train, kernel=k, Y_metadata={'output_index': y_index}) if model == "combined" or model == "combined+": m['ICM.B.W'].constrain_fixed(1.0) if model == "combined": m['ICM.B.kappa'].tie_together() print m m.optimize_restarts(messages=False, max_iters=100, robust=True) #m.optimize(max_iters=100) print m W = m['ICM.B.W'] kappa = m['ICM.B.kappa'] B = W.dot(W.T) + np.diag(kappa) x_test, y_test, _ = GPy.util.multioutput.build_XY(X_test_list, Y_test_list) preds = m.predict(x_test, Y_metadata={'output_index': y_index})[0] factor = preds.shape[0] / 6 all_labels = np.array([]) all_preds = np.array([]) for emo_id, emo in enumerate(EMOS): #emo_id = EMO_DICT[emo] emo_preds = preds[emo_id * factor: (emo_id+1) * factor] emo_labels = y_test[emo_id * factor: (emo_id+1) * factor] maes[emo] = MAE(emo_preds, emo_labels) rmses[emo] = math.sqrt(MSE(emo_preds, emo_labels)) pearsons[emo] = pearsonr(emo_preds, emo_labels)[0] all_labels = np.concatenate((all_labels, emo_labels.flatten())) all_preds = np.concatenate((all_preds, emo_preds.flatten())) all_pearson = pearsonr(all_preds, all_labels)[0] return maes, rmses, pearsons, all_pearson, all_preds, B
def main(): renewfile = '/Users/Leon/Documents/Research/Data/USPCMainAssg1981_2006Drug' Renewdictionary = load_renewfile(renewfile) datefile = '/Users/Leon/Documents/Research/Data/grantDate' Date = load_dateFile(datefile) directory = './USclass' filenames = [x for x in os.listdir(directory) if 'Trend' in x] for filename in filenames: cluster_number = ''.join([x for x in filename if x.isdigit()]) print filename Trend = load_dictionary(directory+'/'+filename) Trend = filter_By_Year(Trend,Date,1981,1998) Trend = filter_By_Number(Trend,100) print 'number of qualified trend ',len(Trend) # for label,patentlist in Trend.items(): # if len(patentlist)<=10: # del Trend[label] # print 'delete trend %s because it is smaller than 10'%(label) positionfile = './data_analysis/US/'+cluster_number+'trends_position_distribution.csv' # 0: ['4855911', '5656428', '6027445'...] positionFeature = position_feature_for_trend(Trend,Date) matrix,percentage = trend_distribution(positionFeature,0.1,Renewdictionary) # calculate the average average0 = nth_renew_average(matrix,0) average1 = nth_renew_average(matrix,1) average2 = nth_renew_average(matrix,2) average3 = nth_renew_average(matrix,3) percentage.append('correlation') head = percentage[1:] write_distribution(head,positionfile) write_distribution(['renew 0,1,2,3 ratio in different range'],positionfile) a = pearsonr(average0,head[0:-1]) average0.extend(a) write_distribution(average0,positionfile) a = pearsonr(average1,head[0:-1]) average1.extend(a) write_distribution(average1,positionfile) a = pearsonr(average2,head[0:-1]) average2.extend(a) write_distribution(average2,positionfile) a = pearsonr(average3,head[0:-1]) average3.extend(a) write_distribution(average3,positionfile) write_distribution('\n',positionfile) label = 0 for trend in matrix: write_distribution(['trend '+str(label)],positionfile) for renew in trend: line = renew.extend(pearsonr(renew,head[0:-1])) write_distribution(renew,positionfile) write_distribution('\n',positionfile) label += 1
def getPearson(): userRep = getUserRep() userScore = getPostUserScore() userRepList = list(); userScoreList = list(); for userID in userRep: if userID in userScore: userRepList.append(userRep[userID]) userScoreList.append(userScore[userID]) print pearsonr(userRepList, userScoreList)
def correl_matrix(assets, tickers): corr_matrix = np.zeros((len(assets.columns), len(assets.columns))) for i in range(len(assets.columns)): for j in range(len(assets.columns)): corr_matrix[i][j] = pearsonr(assets[tickers[i]], assets[tickers[j]])[0] corr_matrix[j][i] = pearsonr(assets[tickers[i]], assets[tickers[j]])[0] return corr_matrix
def plotvsprice(g, ds, titles, xys, item, logscale=False, save=False, savename = 'output.png'): if logscale: g('set logscale y') g.plot(ds[titles.index(item)], ds[titles.index('price')]) if save: g.hardcopy(savename,terminal = 'png') print pearsonr([y for x, y in xys[titles.index('price')]], [y for x, y in xys[titles.index(item)]])
def main(nh): sub = yield sub_scripting.get_sub(nh) sub.move.go(linear=[0.25, 0, 0]) yield sub.visual_approach('forward', 'grapes/board', size_estimate=math.sqrt(2)*3*12*.0254, desired_distance=2) goal_mgr = sub._camera_2d_action_clients['forward'].send_goal(legacy_vision_msg.FindGoal( object_names=['grapes/empty_cell'], )) feedback = yield goal_mgr.get_feedback() res = map(json.loads, feedback.targetreses[0].object_results) res.sort(key=lambda x: float(x['redness'])) all_possible_coords=set((X,Y) for X in [-1, 0, 1] for Y in [-1, 0, 1] if X != 0 or Y != 0) coords = max(itertools.permutations(all_possible_coords, len(res)), key=lambda positions: min( pearsonr(*zip(*((pos[0], float(obj['center'][0])) for pos, obj in zip(positions, res)))), pearsonr(*zip(*((pos[1], float(obj['center'][1])) for pos, obj in zip(positions, res)))), ) ) empty = res[:4] filled = res[4:8] for x in res: print x['redness'], x['center'] '''empty_coords = min(itertools.permutations(all_possible_coords, 4), key=lambda positions: sum(math.sqrt( (float(obj['center'][0]) - .12*pos[0])**2 + (float(obj['center'][1]) - .12*pos[1])**2 ) for pos, obj in zip(positions, empty)), )''' empty_coords = coords[:4] print empty_coords empty_coords = set(empty_coords) filled_coords = all_possible_coords - empty_coords print empty_coords, filled_coords def gen_paths(unmoved_peg_coords, empty_coords): if not unmoved_peg_coords: yield [] return for a in unmoved_peg_coords: for b in empty_coords: for rest in gen_paths(unmoved_peg_coords - {a}, (empty_coords | {a}) - {b}): yield [(a, b)] + rest def dist((ax, ay), (bx, by)): return math.sqrt((ax-bx)**2 + (ay-by)**2)
def lenDistTtest(sample2len2freq, group2samples, uniq, targetGroup): samePairs = [] diffPairs = [] #same pairs for g, gsamples in group2samples.iteritems(): issame = True if targetGroup and g != targetGroup: issame = False for i in xrange( len(gsamples) - 1 ): if gsamples[i] not in sample2len2freq: raise KeyError("Could not found sample %s in sample2len2freq\n" %gsamples[i]) lendist1 = sample2len2freq[ gsamples[i] ] vec1 = getFreqVec(lendist1, uniq) if sum(vec1) == 0: continue for j in xrange( i+1, len(gsamples) ): if gsamples[j] not in sample2len2freq: raise KeyError("Could not found sample %s in the sample2len2freq\n" %gsamples[j]) lendist2 = sample2len2freq[ gsamples[j] ] vec2 = getFreqVec(lendist2, uniq) if sum(vec2) == 0: continue corr, pval = pearsonr(vec1, vec2) if issame: samePairs.append( corr ) #else: # diffPairs.append( corr ) #Diff pairs: groups = group2samples.keys() for i in xrange( len(groups) - 1 ): for gs1 in group2samples[ groups[i] ]: lendist1 = sample2len2freq[ gs1 ] vec1 = getFreqVec(lendist1, uniq) if sum(vec1) == 0: #skip empty sample continue for j in xrange( i+1, len(groups) ): for gs2 in group2samples[ groups[j] ]: lendist2 = sample2len2freq[ gs2 ] vec2 = getFreqVec(lendist2, uniq) if sum(vec2) == 0: continue corr, pval = pearsonr(vec1, vec2) diffPairs.append( corr ) #t-test: if len(samePairs) == 0 or len(diffPairs) == 0: raise ValueError ('lenDistTtest, one of the vector has zero length.\nVec1: %s.\nVec2: %s\n' %(','.join(vec1), ','.join(vec2))) tval, pval = ttest_ind(samePairs, diffPairs) return tval, pval, np.mean(samePairs), np.std(samePairs), np.mean(diffPairs), np.std(diffPairs)
def nCorrelation(x, y, n=None, pValue=False): if n is None: if pValue: return pearsonr(x, y) else: return pearsonr(x, y)[0] else: if pValue: return [pearsonr(x[k - n:k], y[k - n:k]) for k in range(n, len(x) + 1)] else: return [pearsonr(x[k - n:k], y[k - n:k])[0] for k in range(n, len(x) + 1)]
def rs_by_day_and_time(): # -0.135908180745 correlation between total route time (on b65, downtownbound) and being a weekend # 0.0.20212506141277539 correlation between total route time (on b65, downtownbound) and being rush hour (7,8,9, 17,18,19) on a weekday x = [int(start_time.weekday() in [5,6]) for start_time in start_times] #independent y = [sum(traj) for traj in trajs] #dependent #TODO: how much variation is there weekend to weekday? print( "weekend/day", pearsonr(x,y)[0]) x = [int(start_time.hour in [7,8,9, 17,18,19] and start_time.weekday() not in [5,6]) for start_time in start_times] #independent. rush hour? y = [sum(traj) for traj in trajs] #dependent print( "rush hour (weekdays)", pearsonr(x,y)[0] )
def make_graph(): trace_data = open('trace_results.txt', "r") geo_data = open('geo_results.txt', "r") ips = [] rtt = [] hops = [] dist = [] for line in trace_data: #File is tab delineated split_data = line.split('\t') ips.append(split_data[0]) hops.append(int(split_data[1])) #Each route is newline separated, so remove the \n rtt.append(float(split_data[2].strip())) for line in geo_data: split_data = line.split('\t') if split_data[0] in ips: dist.append(float(split_data[1].strip())) #Calculate Pearson's r values between each set r_hops_dist, _ = pearsonr(hops, dist) r_rtt_dist, _ = pearsonr(rtt, dist) r_hops_rtt, _ = pearsonr(hops, rtt) print("Pearson's r for:\n\t"), print("Hops v Distance:%f\n\t" % r_hops_dist), print("RTT v Distance:\t%f\n\t" % r_rtt_dist), print("Hops v RTT:\t%f" % r_hops_rtt) #Plot hops v distance as red circles, adjusta and label axis mpl.figure(1) mpl.plot(hops, dist, 'ro') mpl.grid(color='b', linestyle='-', linewidth=1) mpl.ylabel('Distance(km)') mpl.xlabel('Hops(#)') xmin, xmax = mpl.xlim() ymin, ymax = mpl.ylim() mpl.xlim((xmin - 1, xmax + 1)) mpl.ylim((ymin - 100, ymax + 100)) #Plot rtt v distance as red circles, label axis, and show both figures mpl.figure(2) mpl.plot(rtt, dist, 'ro') mpl.grid(color='b', linestyle='-', linewidth=1) mpl.ylabel('Distance(km)') mpl.xlabel('RTT(ms)') xmin, xmax = mpl.xlim() ymin, ymax = mpl.ylim() mpl.xlim((xmin - 5, xmax + 5)) mpl.ylim((ymin - 100, ymax + 100)) mpl.show()
def outputResults(out1_epsilon, out2_epsilon, kernel, train_lt, test_lt): # Output the results to the appropriate output files writeFloatList(out1_epsilon, TRAINPREDICTIONSEPSILONFILENAME) writeFloatList(out2_epsilon, VALIDATIONPREDICTIONSEPSILONFILENAME) print "Pearson correlation between training labels and predictions, epsilon SVR:" print pearsonr(train_lt, out1_epsilon) print "Spearman correlation between training labels and predictions, epsilon SVR:" print spearmanr(train_lt, out1_epsilon) print "Pearson correlation between validation labels and predictions, epsilon SVR:" print pearsonr(test_lt, out2_epsilon) print "Spearman correlation between validation labels and predictions, epsilon SVR:" print spearmanr(test_lt, out2_epsilon)
def snow_depth_accidents_corr(): snow_depth_accidents = np.loadtxt('output/accidents_by_snow_depth.tsv',delimiter='\t',skiprows=1) snow_depth_distribution = np.loadtxt('../weather/output/snow_depth_distribution.tsv',delimiter='\t',skiprows=1) #remove rows with snow depth values in underlying distribution that don't appear in accidents rows_to_remove = [i for i in range(len(snow_depth_distribution)) if snow_depth_distribution[i,0] not in snow_depth_accidents[:,0]] snow_depth_distribution = np.delete(snow_depth_distribution,rows_to_remove,0) print "Correlation between snow depth and accidents: " + str(pearsonr(snow_depth_accidents[:,0],snow_depth_accidents[:,1]/snow_depth_distribution[:,1])[0]) +'\n' return pearsonr(snow_depth_accidents[:,0],snow_depth_accidents[:,1]/snow_depth_distribution[:,1])[0]
def precip_1hr_accidents_corr(): precip_1hr_accidents = np.loadtxt('output/accidents_by_precip1hr.tsv',delimiter='\t',skiprows=1) precip_distribution = np.loadtxt('../weather/output/precip_1hr_distribution.tsv',delimiter='\t',skiprows=1) #remove rows with precipitation values in underlying distribution that don't appear in accidents rows_to_remove = [i for i in range(len(precip_distribution)) if precip_distribution[i,0] not in precip_1hr_accidents[:,0]] precip_distribution = np.delete(precip_distribution,rows_to_remove,0) print "Correlation between 1 hr precip and accidents: " + str(pearsonr(precip_1hr_accidents[:,0],precip_1hr_accidents[:,1]/precip_distribution[:,1])[0]) +'\n' return pearsonr(precip_1hr_accidents[:,0],precip_1hr_accidents[:,1]/precip_distribution[:,1])[0]
def cc_accidents_corr(): cc_accidents = np.loadtxt('output/accidents_by_cloud_ceiling.tsv',delimiter='\t',skiprows=1) cc_distribution = np.loadtxt('../weather/output/cloud_ceiling_distribution.tsv',delimiter='\t',skiprows=1) #remove rows with cloud ceiling values in underlying distribution that don't appear in accidents rows_to_remove = [i for i in range(len(cc_distribution)) if cc_distribution[i,0] not in cc_accidents[:,0]] cc_distribution = np.delete(cc_distribution,rows_to_remove,0) print "Correlation between cloud ceiling and accidents: " + str(pearsonr(cc_accidents[:,0],cc_accidents[:,1]/cc_distribution[:,1])[0]) +'\n' return pearsonr(cc_accidents[:,0],cc_accidents[:,1]/cc_distribution[:,1])[0]
import json import pandas as pd import collections from scipy.stats.stats import pearsonr with open('allen_data/dev_mouse/raw_dictionary_no_days.txt') as data_file: data = json.load(data_file) with open('allen_data/dev_human/list_of_genes.txt') as data_file: genes = json.load(data_file) for key, value in data.items(): data[key] = [item for sublist in value for item in sublist] matrix = [] for i in genes: i = i.capitalize() print(i) correlations = [] for j in genes: j = j.capitalize() correlations.append(pearsonr(data[i], data[j])[0]**2) matrix.append(correlations) print(len(matrix)) df=pd.DataFrame(matrix,columns=genes) print(df.shape) df.to_csv("allen_data/dev_mouse/mouse_raw_corr_pearson_matrix.csv",sep=',', encoding='utf-8')
sell_scale = scale(sell_variable) print("SALE") print(sell_scale) tax_variable = df['Taxes'].values tax_scale = scale(tax_variable) print("TAXES") print(tax_scale) # 2 import numpy as np from scipy.stats.stats import pearsonr transformations = { 'x': lambda x: x, '1/x': lambda x: 1 / x, 'x**2': lambda x: x**2, 'x**3': lambda x: x**3, 'log(x)': lambda x: np.log(x) } a = df['Sell'].values b = df['Taxes'].values for transformation in transformations: b_transformed = transformations[transformation](b) pearsonr_coef, pearsonr_p = pearsonr(a, b_transformed) print( f'Transformation: {transformation} \t Pearson\'s r: {pearsonr_coef:.3f}' )
if prob == [1]: G.add_edge(i, j) Gc = max(nx.connected_component_subgraphs(G), key=len) dia = nx.average_clustering(Gc) dens = nx.transitivity(G) alcc.append(dia) gcc.append(dens) print("alcc =", alcc) print("gcc =", gcc) from scipy.stats.stats import pearsonr print(pearsonr(alcc, gcc)) # Different values of probability alcc = [] gcc = [] import matplotlib.pyplot as plt import networkx as nx import random from random import * G = nx.Graph() for i in range(7): for j in range(i + 1, 7):
print("Ystd=", data['Y'].std()) print("==================================") xmean = data['X'].mean() ymean = data['Y'].mean() xstd = data['X'].std() ystd = data['Y'].std() Sratio = xstd / ystd print("Sratio=", Sratio) #SciPy (pronounced /ˈsaɪpaɪ'/ "Sigh Pie") is a free and open-source Python library used for scientific computing and technical computing from scipy.stats.stats import pearsonr ColA = data['X'].values ColB = data['Y'].values r, _ = pearsonr(ColA, ColB) print("Correlation r=", r) #Regression equation: y=a+bx, b=r(Sy/Sx), a=Ymean=bXmean, r is correlation print("==================================") b = r * Sratio w = (b - 13.12) print("b=", b) a = ymean - b * xmean q = (a + 666.18) * -1 print("a=", w) print("==================================") print("MODEL GENERATED...round Off to 2 D.P.") print("Round Off: round(var,D.P.)") print("y=", round(q, 1), "+", round(w, 1), "x")
def compare_pearson (test: np.ndarray, pattern: np.ndarray): test = test.reshape((test.shape[0]*test.shape[1])) pattern = pattern.reshape((pattern.shape[0]*pattern.shape[1])) return pearsonr(test, pattern)
def main(kdts_path): # Read in kdts data with open(kdts_path, "rb") as infile: slice_idx_to_data = pkl.load(infile) kernel = ('wlst', 'logical_time', 5) idx_to_distances = { k: flatten_distance_matrix(v["kernel_distance"][kernel]) for k, v in slice_idx_to_data.items() } # Package data for scatter plot scatter_x_vals, scatter_y_vals = get_scatter_plot_points(idx_to_distances) # Package data for box-plots bp_positions = [] bp_data = [] for idx, distances in sorted(idx_to_distances.items()): bp_positions.append(idx) bp_data.append(distances) # Specify appearance of boxes box_width = 0.5 flierprops = {"marker": "+", "markersize": 4} boxprops = {"alpha": 0.25} # Specify appearance of scatter plot markers marker_size = 6 aspect_ratio = "widescreen" figure_scale = 1.5 if aspect_ratio == "widescreen": base_figure_size = (16, 9) else: base_figure_size = (4, 3) figure_size = (figure_scale * base_figure_size[0], figure_scale * base_figure_size[1]) fig, ax = plt.subplots(figsize=figure_size) # Create box plots bp = ax.boxplot(bp_data, widths=box_width, positions=bp_positions, patch_artist=True, showfliers=False, boxprops=boxprops, flierprops=flierprops) # Overlay actual data points on same axis ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size) # Plot annotation ( correlation coefficients ) nd_fractions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] nd_fraction_seq = [] dist_seq = [] for i in range(len(nd_fractions)): for d in idx_to_distances[i]: nd_fraction_seq.append(nd_fractions[i]) dist_seq.append(d) pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq) spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq) #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p) #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p) pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format( np.round(pearson_r, 2), pearson_p) spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format( np.round(spearman_r, 2), spearman_p) print(pearson_correlation_txt) print(spearman_correlation_txt) annotation_lines = [ "Kernel Distance vs. % Wildcard Receives: Correlation Coefficients\n", #"=================================================================\n", pearson_correlation_txt, spearman_correlation_txt ] annotation_txt = "".join(annotation_lines) annotation_font_size = 18 #ax.annotate( annotation_txt, # xy=(0.55, 0.25), # xycoords='axes fraction', # fontsize=annotation_font_size, # bbox=dict(boxstyle="square, pad=1", fc="w") # ) # Tick labels tick_label_fontdict = {"fontsize": 12} x_tick_labels = [ "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] x_ticks = list(range(len(x_tick_labels))) ax.set_xticks(x_ticks) ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict) y_ticks = [0, 5, 10, 15, 20, 25, 30, 35, 40] y_tick_labels = [str(y) for y in y_ticks] ax.set_yticks(y_ticks) ax.set_yticklabels(y_tick_labels, rotation=0, fontdict=tick_label_fontdict) # Axis labels x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)" y_axis_label = "Kernel Distance (Higher == Runs Less Similar)" axis_label_fontdict = {"fontsize": 18} ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict) ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict) # Plot Title plot_title = "Percentage of Non-Deterministic Sub-Iterations vs. Kernel Distance - Communication Pattern: miniMCB" title_fontdict = {"fontsize": 20} plt.title(plot_title, fontdict=title_fontdict) #plt.show() plt.savefig("mini_mcb_example.png", bbox_inches="tight", pad_inches=0.25)
np.int64).values if len(current_SANS) != 0: pop.loc[pop.subjectid == s, "SAPS"] = current_SAPS.sum() print(current_SAPS.sum()) if len(current_SAPS) != 0: pop.loc[pop.subjectid == s, "SANS"] = current_SANS.sum() #investigate distribution of SAPS and SANS scores across SCZ population SAPS_scores = pop[pop.dx_num == 1].SAPS.astype(np.float).values SANS_scores = pop[pop.dx_num == 1].SANS.astype(np.float).values scores_PCA_path = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/results/pcatv/5_folds_NUDAST/results/0/struct_pca_0.1_0.5_0.8/X_train_transform.npz" scores_comp = np.load(scores_PCA_path)['arr_0'] #Pearson correlation pearsonr(scores_comp[:, 0], SAPS_scores) pearsonr(scores_comp[:, 0], SANS_scores) pearsonr(scores_comp[:, 1], SAPS_scores) pearsonr(scores_comp[:, 1], SANS_scores) pearsonr(scores_comp[:, 2], SAPS_scores) pearsonr(scores_comp[:, 2], SANS_scores) pearsonr(scores_comp[:, 3], SAPS_scores) pearsonr(scores_comp[:, 3], SANS_scores) pearsonr(scores_comp[:, 4], SAPS_scores) pearsonr(scores_comp[:, 4], SANS_scores) #COMPONENT 1
def main(): fname = 'xpp/cols3_fs.ode' pars = read_pars_values_from_file(fname) inits = read_init_values_from_file(fname) # repeat simulation for different frequencies. # pars['mode'] = '1' makes the first tone appear in the first column # pars['mode'] = '2' makes the first tone appear in the second column.. etc. results = np.zeros((3, 6)) for i in range(3): pars['mode'] = str(i + 1) pars['pv_opto'] = 0 pars['som_opto'] = 0 # returns tuple (t,u,v1,v2,inits,parameters,tonelist) control = run_experiment(fname, pars, inits, return_all=True) pars['pv_opto'] = .01 pv_off = run_experiment(fname, pars, inits, return_all=True) pars['pv_opto'] = 0 pars['som_opto'] = .2 som_off = run_experiment(fname, pars, inits, return_all=True) # get first max u (i), second max u (u2). # get first max u with pv opto (i), second max u with pv opto (u2). # get first max u with som opto (i), second max u with som opto (u2). # get tone list stard and end index for first and second tones # get tone list times tone1On, tone1Off = control['tonelist'][0] tone2On, tone2Off = control['tonelist'][1] idx1_start = np.argmin( np.abs(control['t'] - tone1On)) + 1 # first time interval index idx1_end = np.argmin(np.abs(control['t'] - tone1Off)) - 1 idx2_start = np.argmin( np.abs(control['t'] - tone2On)) + 1 # second time interval index idx2_end = np.argmin(np.abs(control['t'] - tone2Off)) - 1 # get first tone (varies as a function of i) control1 = get_max_FR(control['u' + str(i + 1)], idx1_start, idx1_end) pv1 = get_max_FR(pv_off['u' + str(i + 1)], idx1_start, idx1_end) som1 = get_max_FR(som_off['u' + str(i + 1)], idx1_start, idx1_end) # get second tone (always u2) control2 = get_max_FR(control['u2'], idx2_start, idx2_end) pv2 = get_max_FR(pv_off['u2'], idx2_start, idx2_end) som2 = get_max_FR(som_off['u2'], idx2_start, idx2_end) results[i, :] = [control1, control2, pv1, pv2, som1, som2] if (i == 1) and False: fig = plt.figure() ax11 = fig.add_subplot(111) ax11.plot(control['u2']) ax11.plot(som_off['u2']) plt.show() # end 3 tone loop # run PV activation for correlation calculation. pars['pv_opto'] = -.2 pars['som_opto'] = 0. pars['mode'] = 2 pv_on = run_experiment(fname, pars, inits, return_all=True) # run PV activation for correlation calculation. pars['pv_opto'] = 0 pars['som_opto'] = 0. pars['mode'] = 2 pv_control = run_experiment(fname, pars, inits, return_all=True) print results # correlation fig3 = plt.figure(figsize=(8, 3)) ax1 = fig3.add_subplot(121) ax2 = fig3.add_subplot(122) time = pv_on['t'] input_trace = pv_on['sv'][:, pv_on['vn'].index('ia2')] time_short = time[time < 20] input_trace_short = input_trace[time < 20] time_ctrl = pv_control['t'] input_trace_ctrl = pv_control['sv'][:, pv_control['vn'].index('ia2')] time_short_ctrl = time_ctrl[time_ctrl < 20] input_trace_short_ctrl = input_trace_ctrl[time_ctrl < 20] ax1b = ax1.twinx() ax1b.plot(time_short_ctrl * 10, input_trace_short_ctrl, color='tab:red') ax1.plot(time_short_ctrl * 10, pv_control['u2'][time_ctrl < 20]) ax1.set_title('Pyr Control FR Rate') ax1.set_ylabel('Firing Rate') ax1b.set_ylabel('Thalamus', color='tab:red') print "PV act. corr = " + str( pearsonr(input_trace_short, pv_on['u2'][time < 20])) ax2b = ax2.twinx() ax2b.plot(time_short * 10, input_trace_short, color='tab:red') ax2.plot(time_short * 10, pv_on['u2'][time < 20]) ax1.set_title('Pyr FR Rate with PV Activation') ax2.set_ylabel('Firing Rate') ax2b.set_ylabel('Thalamus', color='tab:red') print "PV control corr = " + str( pearsonr(input_trace_short_ctrl, pv_control['u2'][time_ctrl < 20])) ax1.set_xlabel('t') ax2.set_xlabel('t') plt.tight_layout() fig = plt.figure() #sv[:,vn.index('u1')] # plot relative firing rates ax11 = fig.add_subplot(121) ax12 = fig.add_subplot(122) bar_width = 0.2 #ax11.set_title('Peak Response') #ax11.scatter(0,maxes_u_control[0,1],label='Control 1st Tone',color='black') #ax11.scatter(0,maxes_u_pv_off[0,1],label='',color=pv_color) ax11.set_title('Normalized Peak Response (2nd Tone)') control_probe = results[1, 0] ax11.scatter(-1, results[0, 1] / control_probe, label='Control 2nd Tone', color='black') ax11.scatter(0, results[1, 1] / control_probe, label='', color='black') ax11.scatter(1, results[2, 1] / control_probe, label='', color='black') pv_probe = results[1, 2] ax11.scatter(-1, results[0, 3] / pv_probe, label='PV Off 2nd Tone', color=pv_color) ax11.scatter(0, results[1, 3] / pv_probe, label='', color=pv_color) ax11.scatter(1, results[2, 3] / pv_probe, label='', color=pv_color) ax12.set_title('Normalized Peak Response (2nd Tone)') control_probe = results[1, 0] ax12.scatter(-1, results[0, 1] / control_probe, label='Control 2nd Tone', color='black') ax12.scatter(0, results[1, 1] / control_probe, label='', color='black') ax12.scatter(1, results[2, 1] / control_probe, label='', color='black') som_probe = results[1, 4] ax12.scatter(-1, results[0, 5] / som_probe, label='SOM Off 2nd Tone', color=som_color) ax12.scatter(0, results[1, 5] / som_probe, label='', color=som_color) ax12.scatter(1, results[2, 5] / som_probe, label='', color=som_color) #ax11.scatter() """ ax12.set_title('Normalized Peak Response (2nd Tone)') ax12.scatter(0,maxes_u_control[1,1]/maxes_u_control[0,1],label='Control 2nd Tone',color='black') ax12.scatter(0,maxes_u_som_off[1,1]/maxes_u_som_off[0,1],label='SOM Off',color='red') #ax12.bar(tone_number+bar_width,maxes_u_pv_off[:,1]/adapted_fr,width=bar_width,label='pv_off',color='green') #ax12.bar(tone_number+2*bar_width,maxes_u_som_off[:,1]/adapted_fr,width=bar_width,label='som_off',color='red') #ax12.plot([0,4],[1,1],ls='--',color='gray') """ ax11.set_xlabel('Distance from Preferred Frequency') ax12.set_xlabel('Distance from Preferred Frequency') ax11.legend() ax12.legend() plt.tight_layout() # plot synapses if False: sv = control['sv'] vn = control['vn'] aie2 = float(control['parameters']['aie2']) # som to pn asom2pv = float(control['parameters']['asom2pv']) # som to pv ws2p = sv[:, vn.index('ws2p')] # som to pn ws2v = sv[:, vn.index('ws2v')] # som to pv fig2 = plt.figure() ax2 = fig2.add_subplot(111) ax2.plot(control['t'], aie2 * ws2p, label='som to pn') ax2.plot(control['t'], asom2pv * ws2v, label='som to pv') plt.show()
from sklearn.metrics import mean_squared_error from math import sqrt rmse3=sqrt(mean_squared_error(wcat.AT,pred3)) ------------- ------------------------ exp polynomial model ----x= Waist*Waist y=log(AT)---- Waist_Sq = wcat.Waist*wcat.Waist model4= smf.ols("np.log(AT) ~ Waist+Waist_Sq",data=wcat).fit() model4.params model4.summary() model.conf_int(0.05) pred4=model4.predict(wcat.Waist) from pydoc import help from scipy.stats.stats import pearsonr help(pearsonr) >>> Help on function pearsonr in module scipy.stats.stats: pearsonr(wcat.AT,pred4) from sklearn.metrics import mean_squared_error from math import sqrt rmse4=sqrt(mean_squared_error(wcat.AT,pred4)) rmse4 -----------------------------------------------------------------
def Pearson_corr(x, y): [r, p] = pearsonr(x, y) return r, p
def train(self, epochs, batch_size): d_loss_history, g_loss_history = [], [] pearson_train_history, pearson_val_history = [], [] max_pearson = -1.0 # size of the half of the batch half_batch = int(batch_size / 2) d_loss_real, d_loss_fake, g_loss = [1, 0], [1, 0], [1, 0] positive_y = np.ones( (batch_size, 1), dtype=np.float32) * (1 - smooth_rate) negative_y = -positive_y dummy_y = np.zeros((batch_size, 1), dtype=np.float32) for epoch in range(epochs): # list for storing losses/accuracies for both discriminator and generator d_losses, d_accuracies, g_losses = [], [], [] for _minibatch_idx in range(int(sample_num / batch_size)): for _ in range(self.n_critic): dis_idx = np.random.randint(0, y_train.shape[0], batch_size) discriminator_minibatches = y_train[dis_idx] noise = self.X_train[dis_idx].astype(np.float32) d_loss = self.discriminator_model.train_on_batch( [discriminator_minibatches, noise], [positive_y, negative_y, dummy_y]) d_losses.append(d_loss) gen_idx = np.random.randint(0, y_train.shape[0], batch_size) noise = self.X_train[gen_idx].astype(np.float32) g_losses.append( self.generator_model.train_on_batch( noise, [positive_y, y_train[gen_idx]])) # --------------------- # Convert each histories into numpy arrays to get means # --------------------- d_losses = np.array(d_losses) d_accuracies = np.array(d_accuracies) g_losses = np.array(g_losses) # --------------------- # Get generator's prediction and compute overall pearson on train set # --------------------- predictions = self.generator.predict(self.X_train).flatten() avg_pearson = pearsonr(predictions, self.y_train.flatten())[0] print "Pearson R on Train set: {}".format(avg_pearson) # --------------------- # Get generator's prediction and compute overall pearson on validation set # --------------------- val_predictions = self.generator.predict(self.X_val).flatten() avg_val_pearson = pearsonr(val_predictions, self.y_val.flatten())[0] print "Pearson R on Val set: {}".format(avg_val_pearson) # if current pearson on validation set is greatest so far, update the max pearson, if max_pearson < avg_val_pearson: print "Perason on val improved from {} to {}".format( max_pearson, avg_val_pearson) _write_1D_deeplift_track( predictions.reshape(self.X_train.shape[0], self.window_size), normalized_train_intervals, os.path.join(self.srv_dir, 'train')) _write_1D_deeplift_track( val_predictions.reshape(self.X_val.shape[0], self.window_size), normalized_val_intervals, os.path.join(self.srv_dir, 'val')) f = open(os.path.join(self.srv_dir, 'meta.txt'), 'wb') f.write( str(epoch) + " " + str(avg_pearson) + " " + str(avg_val_pearson) + "\n") max_pearson = avg_val_pearson # --------------------- # Get generator's prediction and compute overall pearson on test set # --------------------- test_predictions = self.generator.predict( self.X_test).flatten() avg_test_pearson = pearsonr(test_predictions, self.y_test.flatten()) print "Pearson R on Test set: {}".format(avg_test_pearson) f.write("Test Pearson: " + str(avg_test_pearson)) f.close() _write_1D_deeplift_track( test_predictions.reshape(self.X_test.shape[0], self.window_size), normalized_test_intervals, os.path.join(self.srv_dir, 'test')) self.generator.save( os.path.join(self.model_dir, 'best_generator.h5')) self.discriminator.save( os.path.join(self.model_dir, 'best_discriminator.h5')) # Save the progress d_loss_history.append(d_losses) g_loss_history.append(g_losses) pearson_train_history.append(avg_pearson) pearson_val_history.append(avg_val_pearson) # Print the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_losses.mean(), 100.0 * d_accuracies.mean(), g_losses.mean())) assert (len(d_loss_history) == len(g_loss_history) == len(pearson_train_history) == len(pearson_val_history)) print "Saving the loss and pearson logs..." np.save(os.path.join(log_dir, 'd_loss_history.npy'), d_loss_history) np.save(os.path.join(log_dir, 'g_loss_history.npy'), g_loss_history) np.save(os.path.join(log_dir, 'pearson_train_history.npy'), pearson_train_history) np.save(os.path.join(log_dir, 'pearson_val_history.npy'), pearson_val_history) print "Train Complete!"
'/home/n_athan/Desktop/diploma/code/stable_voxels/st_vox' + str(parts) + '.pkl', 'wb') #print(fmri_data_for_trial[tempo[0,:],0]) stab_score = np.zeros((length)) for x in range(0, length): #voxel sum_vox = 0 for y in range(0, 58): #noun vox[x, 0, y] = fmri_data_for_trial[tempo[y, 0], x] vox[x, 1, y] = fmri_data_for_trial[tempo[y, 1], x] vox[x, 2, y] = fmri_data_for_trial[tempo[y, 2], x] vox[x, 3, y] = fmri_data_for_trial[tempo[y, 3], x] vox[x, 4, y] = fmri_data_for_trial[tempo[y, 4], x] vox[x, 5, y] = fmri_data_for_trial[tempo[y, 5], x] # compute the correlation for z in combs: sum_vox += pearsonr(vox[x, z[0], :], vox[x, z[1], :])[0] stab_score[x] = sum_vox / 15 #no of possible correlations #stab_vox=nlargest(500,range(len(stab_score)),stab_score.take) stab_vox = np.argsort(stab_score)[::-1][:stable_voxels] np.savetxt('./stable_voxels/st_vox' + str(parts) + '/' + noun[test_words[0]] + '_' + noun[test_words[1]] + '.txt', stab_vox, fmt='%d') else: stab_vox = np.loadtxt('../stable_voxels/st_vox' + str(parts) + '_' + str(stable_voxels) + '.txt', dtype=int) print('I loaded the voxels NOT calculated them!') #print('Voxel Selection ends...') #################################################################
import numpy as np from scipy.stats.stats import pearsonr import matplotlib.pyplot as plt rootdir = "/home/banua/xprmt/xprmt-icacsis16/" dataset = 'zoo' fname = rootdir + dataset + "/" + dataset + "_table_True.csv" data = np.loadtxt(fname, delimiter="\t", dtype=str, usecols=(1, 2)) data = np.array(data).astype(np.float) x = data[:, 0] y = data[:, 1] pval = np.corrcoef(x, y) r_row, p_value = pearsonr(x, y) plt.scatter(x, y) plt.show() print pval print r_row print p_value
"review": corrected_review, "compound": compound_rate, "positive": pos, "neutral": neu, "negative": neg }, ignore_index=True) if count % 1000 == 0: print(count) sentiments = sentiments.set_index(reviews.index) result = pd.concat([reviews, sentiments], axis=1, join_axes=[reviews.index], join='outer') result.to_csv('sentiments_passage_corrected5000.csv', sep=',') result.to_pickle('sentiments_ratings_corrected_passage.pkl') # evaluation comp = result['compound'] stars = result['stars'] # sentences - 0.28 # comma separated - 0.428 # passage - 0.55 # statistical significance pearsonr(comp, stars)
# compute anomalies independant of climatology for y in np.arange(0, len(years)): tmp_clim = [] tmp_clim = np.nanmean(np.delete(chp_rfe, y, axis=0), axis=0) chp_anom[y, :, :] = chp_rfe[y, :, :] - tmp_clim tmp_clim = [] tmp_clim = np.nanmean(np.delete(ens_mean, y, axis=0), axis=0) c3s_anom[y, :, :] = ens_mean[y, :, :] - tmp_clim # compute anomaly correlation coefficient (using pearsonr) # pearson r doesn't work on 2d array so loop through gridboxes for i in np.arange(0, len(c3s_lat)): for j in np.arange(0, len(c3s_lon)): if np.sum(np.isnan(c3s_anom[:, i, j])) < len(years): # check for nan c3s_acc[:, i, j] = pearsonr(chp_anom[:, i, j], c3s_anom[:, i, j]) cols = 'RdYlBu' cmin = 0 cmax = 1 cspc = 0.1 clevs = np.arange(cmin, cmax + cspc, cspc) norm = BoundaryNorm(boundaries=clevs, ncolors=256) fig = plt.figure(figsize=(4, 3)) mymap = Basemap(projection='cyl',resolution='l',\ llcrnrlat=np.min(c3s_lat),urcrnrlat=np.max(c3s_lat),\ llcrnrlon=np.min(c3s_lon),urcrnrlon=np.max(c3s_lon)) mymap.drawparallels(np.arange(-90, 90, 2), labels=[1, 0, 0, 0], labelstyle='+/-')
import json, csv, re import pandas as pd import matplotlib.pyplot as plt import seaborn count = 0 with open("yelp_AZ_2018.json", encoding="utf8") as json_file, open('Shorts_11.csv', mode='w') as fout: fout.write('stars,question_marks,exclamation_points\n') for review in json_file: count = count + 1 yelp_review = json.loads(review) question_mark_matches = re.findall(r'\?', yelp_review['text'], flags=re.I) exclamation_point_matches = re.findall(r'!', yelp_review['text'], flags=re.I) num_questions = len(question_mark_matches) num_exclamations = len(exclamation_point_matches) fout.write(str(yelp_review['stars']) + ',' + str(num_questions) + ',' + str(num_exclamations) + '\n') yelp = pd.read_csv('Shorts_11.csv', sep=',') from scipy.stats.stats import pearsonr correlation_question = pearsonr(yelp.stars, yelp.question_marks) correlation_exclamation = pearsonr(yelp.stars, yelp.exclamation_points) seaborn.lmplot(x="question_marks", y="stars", data=yelp, fit_reg=True) plt.show() seaborn.lmplot(x="exclamation_points", y="stars", data=yelp, fit_reg=True) plt.show()
import os, numpy as np, nibabel as nib import matplotlib.pyplot as plt from sklearn.decomposition import PCA from scipy.stats.stats import pearsonr dataPath = '/home/despoB/kaihwang/TRSE/TDSigEI/' #subjects = ['503', '505', '508', '509', '510', '512', '513', '516', '517', '518', '519', '523', '527', '528', '529', '530', '531', '532', '534'] subjects = ['503'] #Load the masks tFEF_mask = nib.load(dataPath + 'ROIs/T_FEF.nii.gz').get_data() # 777 voxels dFEF_mask = nib.load(dataPath + 'ROIs/D_FEF.nii.gz').get_data() # 838 voxels corrFunc = lambda a, c, d: np.array(pearsonr(a, d)) - np.array(pearsonr(c, d)) conditions = ['FH', 'Fo', 'Fp', 'HF', 'Ho', 'Hp'] for subj in subjects: # Load the functional data and apply the target and distractor FEF masks. print 'Load the functional data and apply the masks' ffa = nib.load(dataPath + subj + '/FFA_indiv_ROI.nii.gz').get_data() ppa = nib.load(dataPath + subj + '/FFA_indiv_ROI.nii.gz').get_data() FH = nib.load(dataPath + subj + '/503_nusiance_FH_errts.nii.gz').get_data() FH_t = FH[tFEF_mask!=0] FH_d = FH[dFEF_mask!=0] FH_ffa, FH_ppa = FH[ffa!=0], FH[ppa!=0] Fo = nib.load(dataPath + subj + '/503_nusiance_Fo_errts.nii.gz').get_data() Fo_t = Fo[tFEF_mask!=0] Fo_d = Fo[dFEF_mask!=0]
def determine_features_to_use(dataset): x, columns_in_data_set = np.array(dataset).shape lst = [None] * columns_in_data_set for i in range(columns_in_data_set): lst[i] = np.array([instance[i] for instance in dataset]) features_to_use = [] for i in range(columns_in_data_set - 1): r2, p_val = pearsonr(lst[i], lst[11]) if abs(r2) > 0.001: features_to_use.append(i) print("Pearson correlation coefficients") print("Fixed Acidity: ", pearsonr(lst[0], lst[11])) print("Volatile Acidity: ", pearsonr(lst[1], lst[11])) print("Citric Acid: ", pearsonr(lst[2], lst[11])) print("Residual Sugar: ", pearsonr(lst[3], lst[11])) print("Chlorides: ", pearsonr(lst[4], lst[11])) print("Free Sulfur Dioxide: ", pearsonr(lst[5], lst[11])) print("Total Sulfur Dioxide: ", pearsonr(lst[6], lst[11])) print("Density: ", pearsonr(lst[7], lst[11])) print("pH: ", pearsonr(lst[8], lst[11])) print("Sulphates: ", pearsonr(lst[9], lst[11])) print("Alcohol: ", pearsonr(lst[10], lst[11])) return features_to_use
def evaluate_prediction(y_true, y_pred): mse = mean_squared_error(y_true, y_pred) mae = mean_absolute_error(y_true, y_pred) r2 = r2_score(y_true, y_pred) corr, _ = pearsonr(y_true, y_pred) return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr}
t = 5 while (t != 0): temp_array.remove('') t -= 1 # temp_array=temp_array[14:] line_array.append(map(float, temp_array)) line_array = np.array(line_array) count = 0 print("Row Count: ", len(line_array)) print("Correlation") tup_duplicates = [] for i in range(len(line_array)): for j in range(i + 1, len(line_array)): v1 = line_array[i][9:] v2 = line_array[j][9:] first_corr = pearsonr(v1, v2)[0] if (first_corr >= corrthresh and abs(line_array[i][3] - line_array[j][3]) <= mzdiff and abs(line_array[i][4] - line_array[j][4]) <= rtdiff): count += 1 print(i, j) # tup_duplicates.append((i,j)) print("Number of duplicates: "), print(count) # print(len(tup_duplicates)) print("Percentage duplicates: "), print(float(count) / float(len(line_array))) # print("cvs-writing") # table_labels.remove('label')
def get_correlation(X, y): scores = np.zeros(X.shape[1]) for i_col in np.arange(X.shape[1]): x = X[:, i_col] scores[i_col] = np.abs(pearsonr(x, y)[0]) return scores
def infer_abundances(self, norm_b=False): """ This uses nnls to solve for abundances of each edit proposal :param norm_b: :return: """ A = self.coefficient_matrix b = self.output_vec if self.verbose: print("") print('NNLS input shapes') print("--------------------------------------------------------") print('A', A.shape) print('b', b.shape) if norm_b: b_normed = self.normalize_observed_trace() b = b_normed # Solves argmin_x || Ax - b ||_2 for x>=0 # A: columns (number of different indel possibilities) # : rows (base calls (4*inference_length) + 1) # x: abundances of possibilities, has shape (A.cols, 1) # b: actual observed base calls, has shape (A.rows, 1) # nnls solves for x, or the abundances of each possible indel # xvals contains the inferred sequence abundances # rnorm is the residual || Ax-b ||_2 try: xvals, rnorm = nnls(A, b) # compute the predicted signal predicted = np.dot(A, xvals) ''' if method == "L1": lasso_model = linear_model.Lasso(alpha=0.5, positive=True) lasso_model.fit(A, b) xvals = lasso_model.coef_ predicted = np.dot(A, xvals) ''' except Exception as e: raise type(e)(str(e) + ' A: ' + str(A.shape) + ' B: ' + str(b.shape)) # calculate pearson's R (fit_r, p_val_2_tailed) = pearsonr(predicted, b) self.results.r_squared = fit_r**2 print("R_SQUARED {}".format(self.results.r_squared)) xtotal = xvals.sum() # here we normalize the relative abundance of each possible indel for n, x_val in enumerate(xvals): self.proposals[n].x_abs = x_val self.proposals[n].x_rel = x_val / (1.0 * xtotal) if self.r_squared_correction: ##addition of (1-r_squared, or missing variance) to the no edit case if n == 0: self.proposals[0].x_rel = x_val / ( 1.0 * xtotal) * self.results.r_squared ##edited cases else: self.proposals[n].x_rel = x_val / ( 1.0 * xtotal) * self.results.r_squared
from scipy.stats.stats import pearsonr a = [ # First array of the correlation ] b = [ # Second array of the correlation ] print(pearsonr(a, b))
def evaluate_autoencoder(y_pred, y_test): mse = mean_squared_error(y_pred, y_test) r2 = r2_score(y_test, y_pred) corr, _ = pearsonr(y_pred.flatten(), y_test.flatten()) # print('Mean squared error: {}%'.format(mse)) return {'mse': mse, 'r2_score': r2, 'correlation': corr}
def pearson_correlation(x, y): return pearsonr(x, y)
'ep11': [], 'ep13': [], 'ep15': [] } for ep in epochlist: for f in fixed_pfc_linfields: rvals = [] pvals = [] rvals_shuf = [] pvals_shuf = [] t = f['Tetrode'] c = f['Cell'] e = f['Epoch'] if f['Epoch'] == ep: r, p = pearsonr(f['inleft'], f['inright']) #get rid of center arm rvals.append(r) pvals.append(p) r, p = pearsonr(f['inleft'], f['outleft']) rvals.append(r) pvals.append(p) r, p = pearsonr(f['inleft'], f['outright']) rvals.append(r) pvals.append(p) r, p = pearsonr(f['inright'], f['outleft']) rvals.append(r) pvals.append(p) r, p = pearsonr(f['inright'], f['outright']) rvals.append(r) pvals.append(p)
""" Test dcca_loss """ import tensorflow as tf from sklearn.cross_decomposition import CCA from scipy.stats.stats import pearsonr sys.path.append('../src/') from networks import dcca_loss U = np.random.random_sample(1800).reshape(600,3) V = np.random.random_sample(1800).reshape(600,3) result = 0.0 for i in range(3): result += pearsonr(U[:,i], V[:,i])[0] print ("Raw data results: ", result) cca = CCA(n_components=3) U_c, V_c = cca.fit_transform(U, V) result = 0.0 for i in range(3): result += pearsonr(U_c[:,i], V_c[:,i])[0] print ("Sklearn results: ", result) X1 = tf.placeholder(tf.float32, shape=[None,3]) X2 = tf.placeholder(tf.float32, shape=[None,3]) corr = dcca_loss(X1, X2, K=3, rcov1=1e-4, rcov2=1e-4) with tf.Session() as sess: correlation = sess.run(corr, feed_dict={X1: U, X2: V}) print ("dcca results:", -correlation)
features_names = ('ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalysis_relative_humidity_percent', 'reanalysis_sat_precip_amt_mm', 'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k', 'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c', 'station_min_temp_c', 'station_precip_mm') corr = [] for elm in features_names: corr.append(pearsonr(data[elm], data['total_cases'])[0]) y_pos = np.arange(len(features_names)) plt.bar(y_pos, corr, align='center', alpha=0.5) plt.xticks(y_pos, features_names, rotation='vertical') plt.ylabel('Correlation') plt.title('Correlation features vs total cases - Iquitos') plt.subplots_adjust(top=0.95, bottom=0.45) plt.show() print("\nCorrelation between features and total cases:") for i in range(0, len(features_names)): print("\t{0:38s} ==> {1:8f}".format(features_names[i], corr[i])) #Density Plots data_density = data.drop(
N = len(P[0]) K0 = np.ones(N + 1) print(rho(K0)) Kopt = sop.minimize(rho, K0)['x'] print(Kopt) print(rho(Kopt)) pm = np.mean(P, 0) def V(p, K): return abs(p - pm)**gamma @ K[1:] + K[0] V_vals = [V(P[sim], Kopt) for sim in range(100)] plt.scatter(E, V_vals) xx = np.linspace(min(V_vals), max(V_vals), 1000) plt.plot(xx, xx, '--k') # look into whether this is the right kind of correlation coefficient! print('Correlation Coefficient =', pearsonr(E, V_vals)[0]) ax = plt.gca() ax.set_aspect('equal') plt.show()
if DoEMGFit == True: [outEMG, successG] = EMFit.EMGFit(xW[EMGinds], AvgW[EMGinds], samplewd, minx0, 20, solver='trust-constr') if successG == False: DoEMGFit = False else: DoEMGFit = True yEMG=EMFit.EMG(xW[EMGinds], outEMG['a'], outEMG['x0'], \ outEMG['xsc'], outEMG['sigma'], outEMG['b']) corvalueEMG = pearsonr(AvgW[EMGinds], yEMG) for ipar in np.arange(nEMG): outdata[ipar + xmax - xmin + 1, 2 * iwdbin] = outEMG[EMGPars[ipar]].value outdata[ipar + xmax - xmin + 1, 2 * iwdbin + 1] = outEMG[EMGPars[ipar]].brute_step outdata[xmax - xmin + 1 + nEMG, 2 * iwdbin] = corvalueEMG[0] outdata[xmax - xmin + 1 + nEMG, 2 * iwdbin+1]=\ np.sqrt(np.sum((yEMG-AvgW[EMGinds])**2))/np.nanmean(AvgW[EMGinds]) if DoEMAFit == True: if sDom == 'True': [outEMA, successA] = EMFit.EMAFit(xW[EMGinds], AvgW[EMGinds], samplewd, minx0, nsample, \ sameSource=True, sDom=True)