Beispiel #1
0
def CalcCorrelation(percentage, N,  index):
    CreateTempResFile(percentage, N)
    getTrecEval(measure,  index)
    x = [res.std for Qnr, res in QueriesRes.iteritems()]
    y = [res.trecScore for Qnr, res in QueriesRes.iteritems()]
    std_p = pearsonr(x, y)[0]
    std_s = spearmanr(x, y)[0]
    x = [res.std / math.sqrt(len(Qterms[Qnr].split())) for Qnr, res in QueriesRes.iteritems()]
    std_n_p = pearsonr(x, y)[0]
    std_n_s = spearmanr(x, y)[0]
    x = [res.MAD for Qnr, res in QueriesRes.iteritems()]
    mad_p = pearsonr(x, y)[0]
    mad_s = spearmanr(x, y)[0]
    x = [res.MAD / math.sqrt(len(Qterms[Qnr].split())) for Qnr, res in QueriesRes.iteritems()]
    mad_n_p = pearsonr(x, y)[0]
    mad_n_s = spearmanr(x, y)[0]
    if debug:
        print "N", N, "----", "Percentage", percentage
        print "std pearson      ", std_p
        print "std spearman     ", std_s
        print "std norm pearson ", std_n_p
        print "std norm spearman", std_n_s
        print "MAD pearson      ", mad_p 
        print "MAD spearman     ", mad_s
        print "MAD norm pearson ", mad_n_p
        print "MAD norm spearman", mad_n_s
    return (std_p, std_s, std_n_p, std_n_s, mad_p,mad_s, mad_n_p, mad_n_s)
Beispiel #2
0
def plot(x, y, lx, ly, order):
    lp = np.poly1d(np.polyfit(lx, ly, 1))
    xx = np.linspace(min(lx), max(lx), num=100)
    if args.title:
        title = args.title
    else:
        title = "Probablity for texts %s\ncategories:%s" % (
            ",".join(map(str, args.text_numbers)),
            ",".join(map(lambda c: category.category_name(c)["category"], categories))
        )
    alpha = args.alpha
    plt.ylabel("logit %d-gram model Probability" % order)
    plt.xlabel("logit Cloze Empirical Probability")
    plt.title(title)
    plt.plot(xx, lp(xx), "r-")
    plt.scatter(lx, ly, alpha=alpha)
    plt.savefig("plots/%slogit_%d.png" % (args.prefix, order))
    plt.close()
    print "correlation %d-gram vs. cloze:" % order, pearsonr(x, y)[0]
    print "correlation %d-gram vs. cloze:" % order, pearsonr(lx, ly)[0]

    p = np.poly1d(np.polyfit(x, y, 1))
    xx = np.linspace(min(x), max(x), num=100)

    plt.ylabel("%d-gram model Probability" % order)
    plt.xlabel("Cloze Empirical Probability")
    plt.title(title)
    plt.plot(xx, p(xx), "r-")
    plt.scatter(x, y, alpha=alpha)
    plt.savefig("plots/%s%d.png" % (args.prefix, order))
    plt.close()
Beispiel #3
0
def print4():
	for R in sorted(k3set):
		R=R.replace('R', '');
		R=float(R)
		filename=name+'.R'+str( R ) + '.senti.data';
		fil=open(filename, 'w');
		for ALPHA in sorted(k2set):
			ALPHA=ALPHA.replace('ALPHA', '');
			ALPHA=float(ALPHA)
			res = str(R) + delim;
			res = res + str(ALPHA)+delim;
			for A in sorted(k1set):
				A=A.replace('A', '');
				A=float(A)
				k1='A'+str(int(A))
				k2='ALPHA'+str(ALPHA)
				k3='R'+str(R)
				res = res + str( pearsonr(dataX[k1][k2][k3], dataY[k1][k2][k3])[0] ) + delim;
			fil.write(res[:-1]+"\n");
		fil.close();
	for BASE in sorted(kbases):
		filename=name+ '.'+BASE+'.senti.data';
		fil=open(filename, 'w');	
		res=str( pearsonr(dataX[BASE], dataY[BASE])[0] ) + '\n';
		fil.write(res)
		fil.close();
Beispiel #4
0
def get_regression_results():
    # Final Regression result dumpings.
    import warnings
    warnings.filterwarnings('ignore')
    # For all of the models and all of the feature selection techniques
    Result = {};
    final_stats = [];
    mod_tech = ['linear','Ridge','Lasso'];
    mod_dist = {};
    cnt = 0;
    for dataSelect in range(0,5,2):
        lab, feat, final_keys = get_data_whole(dataSelect)
        print "\n"
        print 'Java-sID->'+str(test['sids'][0][dataSelect])
        Result[dataSelect] = {};
        Result[dataSelect]['final_keys'] = final_keys;
        for presentFeatRegress in range(0,2):
            Result[dataSelect][presentFeatRegress] = {};
            # Just to select one data for all of the models. So that the result of models are comparable
            X_train, y_train, X_test, y_test, indices = get_data_allModel(lab, feat, presentFeatRegress);
            for modelNo in range(0,3):
                Result[dataSelect][presentFeatRegress][modelNo] = apply_regression_model(X_train, y_train, X_test, y_test, indices, modelNo);
                present_data = Result[dataSelect][presentFeatRegress][modelNo];
                Corr_train = pearsonr(Result[dataSelect][presentFeatRegress][modelNo]['y_train'].values, Result[dataSelect][presentFeatRegress][modelNo]['predictions_train'])[0];
                Corr_test = pearsonr(Result[dataSelect][presentFeatRegress][modelNo]['y_test'].values, Result[dataSelect][presentFeatRegress][modelNo]['predictions'])[0];
                selected_feat = final_keys[present_data['indices']];
                selected_feat = [str(x[0]) for x in selected_feat];
                final_stats.append([Corr_train[0], Corr_test[0],len(selected_feat),selected_feat]);
                mod_dist[cnt] = mod_tech[modelNo];
                cnt = cnt+1;
    A = pd.DataFrame(final_stats);
    A.rename(columns={0:'Corr-Train',1:'Corr-Test',2:'FeatNo',3:'Features Selected'},inplace=True);
    A.rename(index=mod_dist, inplace=True)
    return A;
Beispiel #5
0
def svm_experiment(train_data, test_data):
    maes = {}
    rmses = {}
    pearsons = {}
    hypers = {'C': np.logspace(-2, 2, 5),
              'epsilon': np.logspace(-3, 1, 5),
              'gamma': np.logspace(-3, 1, 5)}
    all_labels = np.array([])
    all_preds = np.array([])
    for emo_id, emo in enumerate(EMOS):
        #emo_id = EMO_DICT[emo]
        train_x = train_data[emo_id, :, :-1]
        train_y = train_data[emo_id, :, -1]
        test_x = test_data[emo_id, :, :-1]
        test_y = test_data[emo_id, :, -1]
        
        m = GridSearchCV(SVR(), hypers)
        m.fit(train_x, train_y)
        preds = m.predict(test_x)
        maes[emo] = MAE(preds, test_y)
        rmses[emo] = math.sqrt(MSE(preds, test_y))
        pearsons[emo] = pearsonr(preds, test_y)[0]
        all_labels = np.concatenate((all_labels, test_y))
        all_preds = np.concatenate((all_preds, preds))
    all_pearson = pearsonr(all_preds, all_labels)[0]
    return maes, rmses, pearsons, all_pearson, all_preds
def test_similarity_2(model, vocab):
    """Test the model for similarity. Method: get correlation between model similarity
    and similarity of items in the test set.
    
    This method is using data from Ruts et al. (2004)"""
    d = ruts_etal_similarity.get_similarity_dict()
    results = {category: {"skipped": set()} for category in d}
    pred_overall = []
    actual_overall = []
    for category in d:
        predicted_values = []
        actual_values = []
        for pair, score in d[category].items():
            if set(pair).issubset(vocab):
                predicted_values.append(model.similarity(*pair))
                actual_values.append(score)
            else:
                results[category]["skipped"].update(set(pair) - vocab)
            pred_overall += predicted_values
            actual_overall += actual_values
        results[category]["pairs_tested"] = len(predicted_values)
        results[category]["pearsonr"] = pearsonr(predicted_values, actual_values)
        results[category]["spearmanr"] = spearmanr(predicted_values, actual_values)
    results["overall"] = dict()
    results["overall"]["pairs_tested"] = len(predicted_values)
    results["overall"]["pearsonr"] = pearsonr(pred_overall, actual_overall)
    results["overall"]["spearmanr"] = spearmanr(pred_overall, actual_overall)
    return results
def calculate_correlation():
    NG = [data[5] for data in import_text(join(locpath, celloutfile), '\t')][1:]
    Qusar = [data[6] for data in import_text(join(locpath, celloutfile), '\t')][1:]
    NG = [int(x) for x in NG]
    Qusar = [int(x) for x in Qusar]
    print "Correlation coefficient:", pearsonr(NG, Qusar)[0]
    print "p-value:                ", pearsonr(NG, Qusar)[1]
Beispiel #8
0
def calculateDifferenceInRealGraphWeights(fileName):
  graph = loadGraphWithWeight(fileName)
  
  dualGraph = getGraphDual(graph)
  writeDualGraph(dualGraph, inputGraphFile)
  
  loadGraph()
  
  vertexCover = min_weighted_vertex_cover(dualGraph)
  writeVertexCover(vertexCover, inputGraphFile)
  
  edgeIdToWeightMap = map(lambda e: (getEdgeId(e[:2]), e[2]['w']), graph.edges_iter(data=True))
  
  weakEdges = []
  strongEdges = []
  for e in graph.edges_iter(data=True):
    edgeId = getEdgeId(e[:2])
    weight = e[2]['w']
    if edgeId in vertexCover: weakEdges.append(weight)
    else: strongEdges.append(weight)
 
  numElements = -1
  if len(weakEdges) > len(strongEdges): numElements = len(strongEdges)
  else: numElements = len(weakEdges)
 
  print np.mean(weakEdges)
  print np.mean(strongEdges)
  print pearsonr(random.sample(weakEdges, numElements), random.sample(strongEdges, numElements))
    def get_correlation_between_mean_score_and_error(self):
        """Compute the correlation between:

         * mean genuine score and false reject count
         * mean impostor score and false acceptance count


        False reject count and flase reject count is computed thanks to a global threshold.
        This threshold is the threshold giving the EER.
        Correlation is computed using Pearson correlation factor.
        """

        # We need the EER threshold
        eer, thr = self.get_eer_and_threshold()

        # We need to compute error rate of each user
        # Get genuine reject of each users
        fr = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \
                (delayed(_parallel_false_reject_helper)(self.get_genuine_presentations_of_user(userid),
                    thr, self._type) \
                    for userid in self._users_id))


        # Get impostors accept of each users
        fa = np.asarray(Parallel(n_jobs=self.n_jobs, verbose=1) \
                (delayed(_parallel_false_accept_helper)(self.get_impostor_presentations_of_user(userid),
                    thr, self._type) \
                    for userid in self._users_id))



        #compute the correlations
        return pearsonr(fr, self._genuine_scores)[0], pearsonr(fa,
                self._impostor_scores)[0], eer
def knnPredictor(df):

    dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []

    for k in range(1, 200, 1):
        knnModel = KNeighborsRegressor(n_neighbors=k)

        knnModel.fit(dataTrainX, dataTrainY)

        knnpredicted = knnModel.predict(dataTestX)
        corelationCoefficient = pearsonr(dataTestY, knnpredicted)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])

    # plotter.plot(corelationCoefficiantArray)
    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)

    knnModelBest = KNeighborsRegressor(n_neighbors=bestK)
    knnModelBest.fit(dataTrainX, dataTrainY)
    print("K = ")
    print(bestK)
    print("Corelation Coeff:")
    print(corelationCoefficiantDictionary[bestK])

    knnpredictedBest = knnModelBest.predict(dataTestX)

    fig, ax = plotter.subplots()
    corelationCoefficient = pearsonr(dataTestY, knnpredictedBest)
    print(corelationCoefficient[0])
    ax.set_ylabel('Predicted KNN Weekly')
    ax.scatter(dataTestY, knnpredictedBest)
    ax.set_xlabel('Measured')
    plotter.show()
 def correlateFeatures(self):
     self.standardizedTrainingData = self.xTrain       
     labels = self.yTrain
     for i in range(1,10):
         feature = self.standardizedTrainingData[:,i]
         print pearsonr(feature, labels)
         self.visualizeFeatures(i)
Beispiel #12
0
def collect_group_stats(groups,thresh = 1e5):
	n_peaks = 0
	n_singletons = 0
	n_singletons_under_thresh = 0
	non_singletons_under_thresh = 0
	tiny_groups = 0
	tiny_size = 0
	tiny_vote = 0
	votes = []
	intensities = []
	maxivotes = []
	maxi = []
	for group in groups:
		n_peaks += len(group.members)
		if len(group.members) == 1:
			n_singletons += 1
			if group.members[0][0].intensity < thresh:
				n_singletons_under_thresh += 1
			intensities.append(group.members[0][0].intensity)
			votes.append(group.vote)
			maxi.append(group.members[0][0].intensity)
			maxivotes.append(group.vote)
		else:
			mi = 0
			for p,_,_ in group.members:
				if p.intensity < thresh:
					non_singletons_under_thresh += 1
				intensities.append(p.intensity)
				votes.append(group.vote)
				if p.intensity > mi:
					mi = p.intensity
			if mi < thresh:
				tiny_groups += 1
				tiny_size += len(group.members)
				tiny_vote += group.vote
			maxi.append(mi)
			maxivotes.append(group.vote)
	print "{} groups, consisting of {} peaks".format(len(groups),n_peaks)
	print "{} singleton groups ({:.0f}% of groups, {:.0f}% of peaks)".format(n_singletons,100.0*n_singletons/len(groups),100.0*n_singletons/n_peaks)
	print "{} peaks under threshold ({:.0e}), {:.0f}% of peaks".format(n_singletons_under_thresh + non_singletons_under_thresh,thresh,100.0*(n_singletons_under_thresh+non_singletons_under_thresh)/n_peaks)
	print "\t{} of which are singletons ({:.0f}%)".format(n_singletons_under_thresh,100.0*n_singletons_under_thresh/(n_singletons_under_thresh+non_singletons_under_thresh))
	print "{} peaks below the threshold in groups of size > 1".format(non_singletons_under_thresh)
	print "{} groups where the most intense peak is below the threshold (avg size = {:.2f} avg vote = {:.2f})".format(tiny_groups,1.0*tiny_size/tiny_groups,1.0*tiny_vote/tiny_groups)
	plt.figure()
	plt.plot(np.log(intensities),votes,'k.')
	from scipy.stats.stats import pearsonr
	r,p = pearsonr(np.log(intensities),votes)
	print "Test between intensity and vote for all peaks: corr coef = {}, p-value = {}".format(r,p)
	plt.xlabel('Log intensity')
	plt.ylabel('Vote of enclosing group')


	plt.figure()
	plt.plot(np.log(maxi),maxivotes,'r.')
	from scipy.stats.stats import pearsonr
	r,p = pearsonr(np.log(maxi),maxivotes)
	print "Test between maximum group intensity and vote: corr coef = {}, p-value = {}".format(r,p)
	plt.xlabel('Log maximum group intensity')
	plt.ylabel('Vote of group')
def mantel_test(presence_absence = 'C:/Users/MariaIzabel/Desktop/MASTER/PHD/nchain/presence_absence_matrix_indexes_1s.csv'):
  pres_abs_matrix = pd.read_csv(presence_absence, sep = ',', header = 0, index_col = 0)
  print pres_abs_matrix.head()

  # Looping through columns (genes)
  for gene1 in pres_abs_matrix:
    for gene2 in pres_abs_matrix:
      print pearsonr(pres_abs_matrix[gene1], pres_abs_matrix[gene2])
Beispiel #14
0
def produce_correlations(w):
    e = [d["e"] for d in w["silence"]] + [d["e"] for d in w["speech"]]
    m = [d["m"] for d in w["silence"]] + [d["m"] for d in w["speech"]]
    z = [d["z"] for d in w["silence"]] + [d["z"] for d in w["speech"]]
    return (
        "\\newcommand{\\correlationem}{%.4f}\n"
        "\\newcommand{\\correlationez}{%.4f}\n"
        "\\newcommand{\\correlationmz}{%.4f}\n"
    ) % (pearsonr(e, m)[0], pearsonr(e, z)[0], pearsonr(m, z)[0])
Beispiel #15
0
def acorr(array):
	array = array[~np.isnan(array)]
	array = array.tolist()
	# print len(array)
	# return
	for x in range(0,len(array)):
		x1 = array[0 : len(array)-x]
		x2 = array[x : len(array)]
		print pearsonr(x1,x2)[0]
Beispiel #16
0
def OptimizeCor(E1,E2):
  pvalue = pearsonr(E1,E2)[1]
  print pearsonr(E1,E2)
  for i in range(len(E1)):
    E1_tmp = copy.deepcopy(E1)
    E2_tmp = copy.deepcopy(E2)
    del E1_tmp[i]
    del E2_tmp[i]
    print len(E1_tmp), pearsonr(E1_tmp,E2_tmp)
Beispiel #17
0
def icm_gp_experiment(train_data, test_data, model, rank):
    maes = {}
    rmses = {}
    pearsons = {}
    X_train_list = []
    Y_train_list = []
    X_test_list = []
    Y_test_list = []
    for emo_id, emo in enumerate(EMOS):
    #for emo in sorted(EMOS): # very important to sort here
        #emo_id = EMO_DICT[emo]
        train_x = train_data[emo_id, :, :-1]
        train_y = train_data[emo_id, :, -1:]
        test_x = test_data[emo_id, :, :-1]
        test_y = test_data[emo_id, :, -1:]
        X_train_list.append(train_x)
        Y_train_list.append(train_y)
        X_test_list.append(test_x)
        Y_test_list.append(test_y)

    x_train, y_train, y_index = GPy.util.multioutput.build_XY(X_train_list, 
                                                              Y_train_list)
    Ny = 6
    k = GPy.util.multioutput.ICM(input_dim=x_train.shape[1]-1, num_outputs=Ny, 
                                 kernel=GPy.kern.RBF(x_train.shape[1]-1),
                                 W_rank=rank)
    m = GPy.models.GPRegression(x_train, y_train, kernel=k,
                                Y_metadata={'output_index': y_index})
    if model == "combined" or model == "combined+":
        m['ICM.B.W'].constrain_fixed(1.0)
    if model == "combined":
        m['ICM.B.kappa'].tie_together()
    print m
    m.optimize_restarts(messages=False, max_iters=100, robust=True)
    #m.optimize(max_iters=100)
    print m
    W = m['ICM.B.W']
    kappa = m['ICM.B.kappa']
    B = W.dot(W.T) + np.diag(kappa)
    
    x_test, y_test, _ = GPy.util.multioutput.build_XY(X_test_list, Y_test_list)
    preds = m.predict(x_test, Y_metadata={'output_index': y_index})[0]
    factor = preds.shape[0] / 6
    all_labels = np.array([])
    all_preds = np.array([])
    
    for emo_id, emo in enumerate(EMOS):
        #emo_id = EMO_DICT[emo]
        emo_preds = preds[emo_id * factor: (emo_id+1) * factor]
        emo_labels = y_test[emo_id * factor: (emo_id+1) * factor]
        maes[emo] = MAE(emo_preds, emo_labels)
        rmses[emo] = math.sqrt(MSE(emo_preds, emo_labels))
        pearsons[emo] = pearsonr(emo_preds, emo_labels)[0]
        all_labels = np.concatenate((all_labels, emo_labels.flatten()))
        all_preds = np.concatenate((all_preds, emo_preds.flatten()))
    all_pearson = pearsonr(all_preds, all_labels)[0]
    return maes, rmses, pearsons, all_pearson, all_preds, B
def main():
	renewfile = '/Users/Leon/Documents/Research/Data/USPCMainAssg1981_2006Drug'
	Renewdictionary = load_renewfile(renewfile)
	datefile = '/Users/Leon/Documents/Research/Data/grantDate'
	Date = load_dateFile(datefile)
	directory = './USclass'
	filenames = [x for x in os.listdir(directory) if 'Trend' in x]
	for filename in filenames:
		cluster_number = ''.join([x for x in filename if x.isdigit()])
		print filename

		Trend = load_dictionary(directory+'/'+filename)
		Trend = filter_By_Year(Trend,Date,1981,1998)
		Trend = filter_By_Number(Trend,100)
		print 'number of qualified trend ',len(Trend)
		# for label,patentlist in Trend.items():
		# 	if len(patentlist)<=10:
		# 		del Trend[label]
		# 		print 'delete trend %s because it is smaller than 10'%(label)

		positionfile = './data_analysis/US/'+cluster_number+'trends_position_distribution.csv'
		
		
		# 0: ['4855911', '5656428', '6027445'...]
		positionFeature = position_feature_for_trend(Trend,Date)
		matrix,percentage = trend_distribution(positionFeature,0.1,Renewdictionary)
		# calculate the average
		average0 = nth_renew_average(matrix,0)
		average1 = nth_renew_average(matrix,1)
		average2 = nth_renew_average(matrix,2)
		average3 = nth_renew_average(matrix,3)
		percentage.append('correlation')
		head = percentage[1:]
		write_distribution(head,positionfile)
		write_distribution(['renew 0,1,2,3 ratio in different range'],positionfile)

		a = pearsonr(average0,head[0:-1])
		average0.extend(a)
		write_distribution(average0,positionfile)
		a = pearsonr(average1,head[0:-1])
		average1.extend(a)
		write_distribution(average1,positionfile)
		a = pearsonr(average2,head[0:-1])
		average2.extend(a)
		write_distribution(average2,positionfile)
		a = pearsonr(average3,head[0:-1])
		average3.extend(a)
		write_distribution(average3,positionfile)
		write_distribution('\n',positionfile)
		label = 0
		for trend in matrix:
			write_distribution(['trend '+str(label)],positionfile)
			for renew in trend:
				line = renew.extend(pearsonr(renew,head[0:-1]))
				write_distribution(renew,positionfile)
			write_distribution('\n',positionfile)
			label += 1
def getPearson():
    userRep = getUserRep()
    userScore = getPostUserScore()
    userRepList = list();
    userScoreList = list();
    for userID in userRep:
        if userID in userScore:
            userRepList.append(userRep[userID])
            userScoreList.append(userScore[userID])
    print pearsonr(userRepList, userScoreList)
Beispiel #20
0
def correl_matrix(assets, tickers):
    corr_matrix = np.zeros((len(assets.columns), len(assets.columns)))
    for i in range(len(assets.columns)):
        for j in range(len(assets.columns)):
            corr_matrix[i][j] = pearsonr(assets[tickers[i]], 
                    assets[tickers[j]])[0]
            corr_matrix[j][i] = pearsonr(assets[tickers[i]], 
                    assets[tickers[j]])[0]

    return corr_matrix 
Beispiel #21
0
def plotvsprice(g, ds, titles, xys, item, logscale=False, save=False, savename = 'output.png'):
    if logscale:
        g('set logscale y')

    g.plot(ds[titles.index(item)], ds[titles.index('price')])
    
    if save:
        g.hardcopy(savename,terminal = 'png')

    print pearsonr([y for x, y in xys[titles.index('price')]], [y for x, y in xys[titles.index(item)]]) 
Beispiel #22
0
def main(nh):
    sub = yield sub_scripting.get_sub(nh)
    
    sub.move.go(linear=[0.25, 0, 0])
    
    yield sub.visual_approach('forward', 'grapes/board', size_estimate=math.sqrt(2)*3*12*.0254, desired_distance=2)
    
    goal_mgr = sub._camera_2d_action_clients['forward'].send_goal(legacy_vision_msg.FindGoal(
        object_names=['grapes/empty_cell'],
    ))
    feedback = yield goal_mgr.get_feedback()
    res = map(json.loads, feedback.targetreses[0].object_results)
    
    res.sort(key=lambda x: float(x['redness']))
    
    all_possible_coords=set((X,Y) for X in [-1, 0, 1] for Y in [-1, 0, 1] if X != 0 or Y != 0)
    coords = max(itertools.permutations(all_possible_coords, len(res)),
        key=lambda positions: min(
            pearsonr(*zip(*((pos[0], float(obj['center'][0])) for pos, obj in zip(positions, res)))),
            pearsonr(*zip(*((pos[1], float(obj['center'][1])) for pos, obj in zip(positions, res)))),
        )
    )
        
    empty = res[:4]
    filled = res[4:8]
    
    for x in res:
        print x['redness'], x['center']

    
    '''empty_coords = min(itertools.permutations(all_possible_coords, 4),
        key=lambda positions: sum(math.sqrt(
            (float(obj['center'][0]) - .12*pos[0])**2 +
            (float(obj['center'][1]) - .12*pos[1])**2
        ) for pos, obj in zip(positions, empty)),
    )'''
    empty_coords = coords[:4]
    print empty_coords
    
    empty_coords = set(empty_coords)
    
    filled_coords = all_possible_coords - empty_coords
    print empty_coords, filled_coords
    
    def gen_paths(unmoved_peg_coords, empty_coords):
        if not unmoved_peg_coords:
            yield []
            return
        for a in unmoved_peg_coords:
            for b in empty_coords:
                for rest in gen_paths(unmoved_peg_coords - {a}, (empty_coords | {a}) - {b}):
                    yield [(a, b)] + rest

    def dist((ax, ay), (bx, by)):
        return math.sqrt((ax-bx)**2 + (ay-by)**2)
Beispiel #23
0
def lenDistTtest(sample2len2freq, group2samples, uniq, targetGroup):
    samePairs = []
    diffPairs = []

    #same pairs
    for g, gsamples in group2samples.iteritems():
        issame = True
        if targetGroup and g != targetGroup:
            issame = False
        for i in xrange( len(gsamples) - 1 ):
            if gsamples[i] not in sample2len2freq:
                raise KeyError("Could not found sample %s in sample2len2freq\n" %gsamples[i])
            lendist1 = sample2len2freq[ gsamples[i] ]
            vec1 = getFreqVec(lendist1, uniq)
            if sum(vec1) == 0:
                continue

            for j in xrange( i+1, len(gsamples) ):
                if gsamples[j] not in sample2len2freq:
                    raise KeyError("Could not found sample %s in the sample2len2freq\n" %gsamples[j])
                lendist2 = sample2len2freq[ gsamples[j] ]
                vec2 = getFreqVec(lendist2, uniq)
                if sum(vec2) == 0:
                    continue

                corr, pval = pearsonr(vec1, vec2)
                if issame:
                    samePairs.append( corr )
                #else:
                #    diffPairs.append( corr )
    #Diff pairs:
    groups = group2samples.keys()
    for i in xrange( len(groups) - 1 ):
        for gs1 in group2samples[ groups[i] ]:
            lendist1 = sample2len2freq[ gs1 ]
            vec1 = getFreqVec(lendist1, uniq)

            if sum(vec1) == 0: #skip empty sample
                continue
            
            for j in xrange( i+1, len(groups) ):
                for gs2 in group2samples[ groups[j] ]:
                    lendist2 = sample2len2freq[ gs2 ]
                    vec2 = getFreqVec(lendist2, uniq)
                    if sum(vec2) == 0:
                        continue

                    corr, pval = pearsonr(vec1, vec2)
                    diffPairs.append( corr )

    #t-test:
    if len(samePairs) == 0 or len(diffPairs) == 0:
        raise ValueError ('lenDistTtest, one of the vector has zero length.\nVec1: %s.\nVec2: %s\n' %(','.join(vec1), ','.join(vec2)))
    tval, pval = ttest_ind(samePairs, diffPairs)
    return tval, pval, np.mean(samePairs), np.std(samePairs), np.mean(diffPairs), np.std(diffPairs)
Beispiel #24
0
def nCorrelation(x, y, n=None, pValue=False):
	if n is None:
		if pValue:
			return pearsonr(x, y)
		else:
			return pearsonr(x, y)[0]
	else:
		if pValue:
			return [pearsonr(x[k - n:k], y[k - n:k]) for k in range(n, len(x) + 1)]
		else:
			return [pearsonr(x[k - n:k], y[k - n:k])[0] for k in range(n, len(x) + 1)]
def rs_by_day_and_time():
  # -0.135908180745 correlation between total route time (on b65, downtownbound) and being a weekend
  #  0.0.20212506141277539 correlation between total route time (on b65, downtownbound) and being rush hour (7,8,9, 17,18,19) on a weekday
  x = [int(start_time.weekday() in [5,6]) for start_time in start_times] #independent
  y = [sum(traj) for traj in trajs] #dependent
  #TODO: how much variation is there weekend to weekday?
  print( "weekend/day", pearsonr(x,y)[0])

  x = [int(start_time.hour in [7,8,9, 17,18,19] and start_time.weekday() not in [5,6]) for start_time in start_times] #independent. rush hour?
  y = [sum(traj) for traj in trajs] #dependent
  print( "rush hour (weekdays)", pearsonr(x,y)[0]  )
def make_graph():
    trace_data = open('trace_results.txt', "r")
    geo_data = open('geo_results.txt', "r")
    
    ips = []
    rtt = []
    hops = []
    dist = []

    for line in trace_data:
        #File is tab delineated
        split_data = line.split('\t')
        ips.append(split_data[0])
        hops.append(int(split_data[1]))
        #Each route is newline separated, so remove the \n
        rtt.append(float(split_data[2].strip()))

    for line in geo_data:
        split_data = line.split('\t')
        if split_data[0] in ips:
            dist.append(float(split_data[1].strip()))

    #Calculate Pearson's r values between each set
    r_hops_dist, _ = pearsonr(hops, dist)
    r_rtt_dist, _ = pearsonr(rtt, dist)
    r_hops_rtt, _ = pearsonr(hops, rtt)

    print("Pearson's r for:\n\t"),
    print("Hops v Distance:%f\n\t" % r_hops_dist),
    print("RTT v Distance:\t%f\n\t" % r_rtt_dist),
    print("Hops v RTT:\t%f" % r_hops_rtt)

    #Plot hops v distance as red circles, adjusta and label axis
    mpl.figure(1)
    mpl.plot(hops, dist, 'ro')
    mpl.grid(color='b', linestyle='-', linewidth=1)
    mpl.ylabel('Distance(km)')
    mpl.xlabel('Hops(#)')
    xmin, xmax = mpl.xlim()
    ymin, ymax = mpl.ylim()
    mpl.xlim((xmin - 1, xmax + 1))
    mpl.ylim((ymin - 100, ymax + 100))

    #Plot rtt v distance as red circles, label axis, and show both figures
    mpl.figure(2)
    mpl.plot(rtt, dist, 'ro')
    mpl.grid(color='b', linestyle='-', linewidth=1)
    mpl.ylabel('Distance(km)')
    mpl.xlabel('RTT(ms)')
    xmin, xmax = mpl.xlim()
    ymin, ymax = mpl.ylim()
    mpl.xlim((xmin - 5, xmax + 5))
    mpl.ylim((ymin - 100, ymax + 100))
    mpl.show()
def outputResults(out1_epsilon, out2_epsilon, kernel,  train_lt, test_lt):
	# Output the results to the appropriate output files
	writeFloatList(out1_epsilon, TRAINPREDICTIONSEPSILONFILENAME)
	writeFloatList(out2_epsilon, VALIDATIONPREDICTIONSEPSILONFILENAME)
	print "Pearson correlation between training labels and predictions, epsilon SVR:"
	print pearsonr(train_lt, out1_epsilon)
	print "Spearman correlation between training labels and predictions, epsilon SVR:"
	print spearmanr(train_lt, out1_epsilon)
	print "Pearson correlation between validation labels and predictions, epsilon SVR:"
	print pearsonr(test_lt, out2_epsilon)
	print "Spearman correlation between validation labels and predictions, epsilon SVR:"
	print spearmanr(test_lt, out2_epsilon)
def snow_depth_accidents_corr():
	snow_depth_accidents = np.loadtxt('output/accidents_by_snow_depth.tsv',delimiter='\t',skiprows=1)

	snow_depth_distribution = np.loadtxt('../weather/output/snow_depth_distribution.tsv',delimiter='\t',skiprows=1)

	#remove rows with snow depth values in underlying distribution that don't appear in accidents
	rows_to_remove = [i for i in range(len(snow_depth_distribution)) if snow_depth_distribution[i,0] not in snow_depth_accidents[:,0]]
	snow_depth_distribution = np.delete(snow_depth_distribution,rows_to_remove,0)

	print "Correlation between snow depth and accidents: " + str(pearsonr(snow_depth_accidents[:,0],snow_depth_accidents[:,1]/snow_depth_distribution[:,1])[0]) +'\n'

	return pearsonr(snow_depth_accidents[:,0],snow_depth_accidents[:,1]/snow_depth_distribution[:,1])[0]
def precip_1hr_accidents_corr():
	precip_1hr_accidents = np.loadtxt('output/accidents_by_precip1hr.tsv',delimiter='\t',skiprows=1)

	precip_distribution = np.loadtxt('../weather/output/precip_1hr_distribution.tsv',delimiter='\t',skiprows=1)

	#remove rows with precipitation values in underlying distribution that don't appear in accidents
	rows_to_remove = [i for i in range(len(precip_distribution)) if precip_distribution[i,0] not in precip_1hr_accidents[:,0]]
	precip_distribution = np.delete(precip_distribution,rows_to_remove,0)

	print "Correlation between 1 hr precip and accidents: " + str(pearsonr(precip_1hr_accidents[:,0],precip_1hr_accidents[:,1]/precip_distribution[:,1])[0]) +'\n'

	return pearsonr(precip_1hr_accidents[:,0],precip_1hr_accidents[:,1]/precip_distribution[:,1])[0]
def cc_accidents_corr():
	cc_accidents = np.loadtxt('output/accidents_by_cloud_ceiling.tsv',delimiter='\t',skiprows=1)

	cc_distribution = np.loadtxt('../weather/output/cloud_ceiling_distribution.tsv',delimiter='\t',skiprows=1)

	#remove rows with cloud ceiling values in underlying distribution that don't appear in accidents
	rows_to_remove = [i for i in range(len(cc_distribution)) if cc_distribution[i,0] not in cc_accidents[:,0]]
	cc_distribution = np.delete(cc_distribution,rows_to_remove,0)

	print "Correlation between cloud ceiling and accidents: " + str(pearsonr(cc_accidents[:,0],cc_accidents[:,1]/cc_distribution[:,1])[0]) +'\n'

	return pearsonr(cc_accidents[:,0],cc_accidents[:,1]/cc_distribution[:,1])[0]
Beispiel #31
0
import json
import pandas as pd
import collections
from scipy.stats.stats import pearsonr

with open('allen_data/dev_mouse/raw_dictionary_no_days.txt') as data_file:    
    data = json.load(data_file)

with open('allen_data/dev_human/list_of_genes.txt') as data_file:    
    genes = json.load(data_file)

for key, value in data.items():
	data[key] = [item for sublist in value for item in sublist]

matrix = []

for i in genes:
	i = i.capitalize()
	print(i)
	correlations = []
	for j in genes:
		j = j.capitalize()
		correlations.append(pearsonr(data[i], data[j])[0]**2) 
	matrix.append(correlations)

print(len(matrix))

df=pd.DataFrame(matrix,columns=genes)
print(df.shape)

df.to_csv("allen_data/dev_mouse/mouse_raw_corr_pearson_matrix.csv",sep=',', encoding='utf-8')
sell_scale = scale(sell_variable)
print("SALE")
print(sell_scale)

tax_variable = df['Taxes'].values
tax_scale = scale(tax_variable)
print("TAXES")
print(tax_scale)

# 2

import numpy as np
from scipy.stats.stats import pearsonr

transformations = {
    'x': lambda x: x,
    '1/x': lambda x: 1 / x,
    'x**2': lambda x: x**2,
    'x**3': lambda x: x**3,
    'log(x)': lambda x: np.log(x)
}

a = df['Sell'].values
b = df['Taxes'].values

for transformation in transformations:
    b_transformed = transformations[transformation](b)
    pearsonr_coef, pearsonr_p = pearsonr(a, b_transformed)
    print(
        f'Transformation: {transformation} \t Pearson\'s r: {pearsonr_coef:.3f}'
    )
        if prob == [1]:
            G.add_edge(i, j)

Gc = max(nx.connected_component_subgraphs(G), key=len)
dia = nx.average_clustering(Gc)
dens = nx.transitivity(G)

alcc.append(dia)
gcc.append(dens)

print("alcc =", alcc)
print("gcc =", gcc)

from scipy.stats.stats import pearsonr

print(pearsonr(alcc, gcc))

# Different values of probability

alcc = []
gcc = []

import matplotlib.pyplot as plt
import networkx as nx
import random
from random import *

G = nx.Graph()

for i in range(7):
    for j in range(i + 1, 7):
print("Ystd=", data['Y'].std())
print("==================================")

xmean = data['X'].mean()
ymean = data['Y'].mean()
xstd = data['X'].std()
ystd = data['Y'].std()

Sratio = xstd / ystd
print("Sratio=", Sratio)

#SciPy (pronounced /ˈsaɪpaɪ'/ "Sigh Pie") is a free and open-source Python library used for scientific computing and technical computing
from scipy.stats.stats import pearsonr
ColA = data['X'].values
ColB = data['Y'].values
r, _ = pearsonr(ColA, ColB)
print("Correlation r=", r)

#Regression equation: y=a+bx, b=r(Sy/Sx), a=Ymean=bXmean, r is correlation
print("==================================")
b = r * Sratio
w = (b - 13.12)
print("b=", b)
a = ymean - b * xmean
q = (a + 666.18) * -1
print("a=", w)
print("==================================")
print("MODEL GENERATED...round Off to 2 D.P.")
print("Round Off: round(var,D.P.)")
print("y=", round(q, 1), "+", round(w, 1), "x")
Beispiel #35
0
def compare_pearson (test: np.ndarray, pattern: np.ndarray):
    test = test.reshape((test.shape[0]*test.shape[1]))
    pattern = pattern.reshape((pattern.shape[0]*pattern.shape[1]))

    return pearsonr(test, pattern)
Beispiel #36
0
def main(kdts_path):
    # Read in kdts data
    with open(kdts_path, "rb") as infile:
        slice_idx_to_data = pkl.load(infile)

    kernel = ('wlst', 'logical_time', 5)
    idx_to_distances = {
        k: flatten_distance_matrix(v["kernel_distance"][kernel])
        for k, v in slice_idx_to_data.items()
    }

    # Package data for scatter plot
    scatter_x_vals, scatter_y_vals = get_scatter_plot_points(idx_to_distances)

    # Package data for box-plots
    bp_positions = []
    bp_data = []
    for idx, distances in sorted(idx_to_distances.items()):
        bp_positions.append(idx)
        bp_data.append(distances)

    # Specify appearance of boxes
    box_width = 0.5
    flierprops = {"marker": "+", "markersize": 4}
    boxprops = {"alpha": 0.25}

    # Specify appearance of scatter plot markers
    marker_size = 6

    aspect_ratio = "widescreen"
    figure_scale = 1.5
    if aspect_ratio == "widescreen":
        base_figure_size = (16, 9)
    else:
        base_figure_size = (4, 3)

    figure_size = (figure_scale * base_figure_size[0],
                   figure_scale * base_figure_size[1])

    fig, ax = plt.subplots(figsize=figure_size)

    # Create box plots
    bp = ax.boxplot(bp_data,
                    widths=box_width,
                    positions=bp_positions,
                    patch_artist=True,
                    showfliers=False,
                    boxprops=boxprops,
                    flierprops=flierprops)

    # Overlay actual data points on same axis
    ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size)

    # Plot annotation ( correlation coefficients )
    nd_fractions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    nd_fraction_seq = []
    dist_seq = []
    for i in range(len(nd_fractions)):
        for d in idx_to_distances[i]:
            nd_fraction_seq.append(nd_fractions[i])
            dist_seq.append(d)
    pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq)
    spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq)
    #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p)
    #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p)

    pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format(
        np.round(pearson_r, 2), pearson_p)
    spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format(
        np.round(spearman_r, 2), spearman_p)
    print(pearson_correlation_txt)
    print(spearman_correlation_txt)

    annotation_lines = [
        "Kernel Distance vs. % Wildcard Receives: Correlation Coefficients\n",
        #"=================================================================\n",
        pearson_correlation_txt,
        spearman_correlation_txt
    ]

    annotation_txt = "".join(annotation_lines)
    annotation_font_size = 18
    #ax.annotate( annotation_txt,
    #             xy=(0.55, 0.25),
    #             xycoords='axes fraction',
    #             fontsize=annotation_font_size,
    #             bbox=dict(boxstyle="square, pad=1", fc="w")
    #           )

    # Tick labels
    tick_label_fontdict = {"fontsize": 12}
    x_tick_labels = [
        "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100"
    ]
    x_ticks = list(range(len(x_tick_labels)))
    ax.set_xticks(x_ticks)
    ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict)
    y_ticks = [0, 5, 10, 15, 20, 25, 30, 35, 40]
    y_tick_labels = [str(y) for y in y_ticks]
    ax.set_yticks(y_ticks)
    ax.set_yticklabels(y_tick_labels, rotation=0, fontdict=tick_label_fontdict)

    # Axis labels
    x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)"
    y_axis_label = "Kernel Distance (Higher == Runs Less Similar)"
    axis_label_fontdict = {"fontsize": 18}
    ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict)
    ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict)

    # Plot Title
    plot_title = "Percentage of Non-Deterministic Sub-Iterations vs. Kernel Distance - Communication Pattern: miniMCB"
    title_fontdict = {"fontsize": 20}
    plt.title(plot_title, fontdict=title_fontdict)

    #plt.show()
    plt.savefig("mini_mcb_example.png", bbox_inches="tight", pad_inches=0.25)
Beispiel #37
0
            np.int64).values
    if len(current_SANS) != 0:
        pop.loc[pop.subjectid == s, "SAPS"] = current_SAPS.sum()
        print(current_SAPS.sum())
    if len(current_SAPS) != 0:
        pop.loc[pop.subjectid == s, "SANS"] = current_SANS.sum()

#investigate distribution of SAPS and SANS scores across SCZ population
SAPS_scores = pop[pop.dx_num == 1].SAPS.astype(np.float).values
SANS_scores = pop[pop.dx_num == 1].SANS.astype(np.float).values

scores_PCA_path = "/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/VBM/results/pcatv/5_folds_NUDAST/results/0/struct_pca_0.1_0.5_0.8/X_train_transform.npz"
scores_comp = np.load(scores_PCA_path)['arr_0']

#Pearson correlation
pearsonr(scores_comp[:, 0], SAPS_scores)
pearsonr(scores_comp[:, 0], SANS_scores)

pearsonr(scores_comp[:, 1], SAPS_scores)
pearsonr(scores_comp[:, 1], SANS_scores)

pearsonr(scores_comp[:, 2], SAPS_scores)
pearsonr(scores_comp[:, 2], SANS_scores)

pearsonr(scores_comp[:, 3], SAPS_scores)
pearsonr(scores_comp[:, 3], SANS_scores)

pearsonr(scores_comp[:, 4], SAPS_scores)
pearsonr(scores_comp[:, 4], SANS_scores)

#COMPONENT 1
Beispiel #38
0
def main():

    fname = 'xpp/cols3_fs.ode'
    pars = read_pars_values_from_file(fname)
    inits = read_init_values_from_file(fname)

    # repeat simulation for different frequencies.
    # pars['mode'] = '1' makes the first tone appear in the first column
    # pars['mode'] = '2' makes the first tone appear in the second column.. etc.

    results = np.zeros((3, 6))

    for i in range(3):
        pars['mode'] = str(i + 1)

        pars['pv_opto'] = 0
        pars['som_opto'] = 0

        # returns tuple (t,u,v1,v2,inits,parameters,tonelist)
        control = run_experiment(fname, pars, inits, return_all=True)

        pars['pv_opto'] = .01

        pv_off = run_experiment(fname, pars, inits, return_all=True)

        pars['pv_opto'] = 0
        pars['som_opto'] = .2

        som_off = run_experiment(fname, pars, inits, return_all=True)

        # get first max u (i), second max u (u2).
        # get first max u with pv opto (i), second max u with pv opto (u2).
        # get first max u with som opto (i), second max u with som opto (u2).

        # get tone list stard and end index for first and second tones
        # get tone list times

        tone1On, tone1Off = control['tonelist'][0]
        tone2On, tone2Off = control['tonelist'][1]

        idx1_start = np.argmin(
            np.abs(control['t'] - tone1On)) + 1  # first time interval index
        idx1_end = np.argmin(np.abs(control['t'] - tone1Off)) - 1

        idx2_start = np.argmin(
            np.abs(control['t'] - tone2On)) + 1  # second time interval index
        idx2_end = np.argmin(np.abs(control['t'] - tone2Off)) - 1

        # get first tone (varies as a function of i)
        control1 = get_max_FR(control['u' + str(i + 1)], idx1_start, idx1_end)
        pv1 = get_max_FR(pv_off['u' + str(i + 1)], idx1_start, idx1_end)
        som1 = get_max_FR(som_off['u' + str(i + 1)], idx1_start, idx1_end)

        # get second tone (always u2)
        control2 = get_max_FR(control['u2'], idx2_start, idx2_end)
        pv2 = get_max_FR(pv_off['u2'], idx2_start, idx2_end)
        som2 = get_max_FR(som_off['u2'], idx2_start, idx2_end)

        results[i, :] = [control1, control2, pv1, pv2, som1, som2]

        if (i == 1) and False:
            fig = plt.figure()
            ax11 = fig.add_subplot(111)
            ax11.plot(control['u2'])
            ax11.plot(som_off['u2'])
            plt.show()

        # end 3 tone loop

    # run PV activation for correlation calculation.
    pars['pv_opto'] = -.2
    pars['som_opto'] = 0.
    pars['mode'] = 2

    pv_on = run_experiment(fname, pars, inits, return_all=True)

    # run PV activation for correlation calculation.
    pars['pv_opto'] = 0
    pars['som_opto'] = 0.
    pars['mode'] = 2

    pv_control = run_experiment(fname, pars, inits, return_all=True)

    print results

    # correlation
    fig3 = plt.figure(figsize=(8, 3))
    ax1 = fig3.add_subplot(121)
    ax2 = fig3.add_subplot(122)

    time = pv_on['t']
    input_trace = pv_on['sv'][:, pv_on['vn'].index('ia2')]

    time_short = time[time < 20]
    input_trace_short = input_trace[time < 20]

    time_ctrl = pv_control['t']
    input_trace_ctrl = pv_control['sv'][:, pv_control['vn'].index('ia2')]

    time_short_ctrl = time_ctrl[time_ctrl < 20]
    input_trace_short_ctrl = input_trace_ctrl[time_ctrl < 20]

    ax1b = ax1.twinx()
    ax1b.plot(time_short_ctrl * 10, input_trace_short_ctrl, color='tab:red')
    ax1.plot(time_short_ctrl * 10, pv_control['u2'][time_ctrl < 20])

    ax1.set_title('Pyr Control FR Rate')
    ax1.set_ylabel('Firing Rate')
    ax1b.set_ylabel('Thalamus', color='tab:red')

    print "PV act. corr = " + str(
        pearsonr(input_trace_short, pv_on['u2'][time < 20]))

    ax2b = ax2.twinx()
    ax2b.plot(time_short * 10, input_trace_short, color='tab:red')
    ax2.plot(time_short * 10, pv_on['u2'][time < 20])

    ax1.set_title('Pyr FR Rate with PV Activation')
    ax2.set_ylabel('Firing Rate')
    ax2b.set_ylabel('Thalamus', color='tab:red')

    print "PV control corr = " + str(
        pearsonr(input_trace_short_ctrl, pv_control['u2'][time_ctrl < 20]))

    ax1.set_xlabel('t')
    ax2.set_xlabel('t')
    plt.tight_layout()

    fig = plt.figure()
    #sv[:,vn.index('u1')]

    # plot relative firing rates
    ax11 = fig.add_subplot(121)
    ax12 = fig.add_subplot(122)

    bar_width = 0.2
    #ax11.set_title('Peak Response')
    #ax11.scatter(0,maxes_u_control[0,1],label='Control 1st Tone',color='black')
    #ax11.scatter(0,maxes_u_pv_off[0,1],label='',color=pv_color)

    ax11.set_title('Normalized Peak Response (2nd Tone)')

    control_probe = results[1, 0]
    ax11.scatter(-1,
                 results[0, 1] / control_probe,
                 label='Control 2nd Tone',
                 color='black')
    ax11.scatter(0, results[1, 1] / control_probe, label='', color='black')
    ax11.scatter(1, results[2, 1] / control_probe, label='', color='black')

    pv_probe = results[1, 2]
    ax11.scatter(-1,
                 results[0, 3] / pv_probe,
                 label='PV Off 2nd Tone',
                 color=pv_color)
    ax11.scatter(0, results[1, 3] / pv_probe, label='', color=pv_color)
    ax11.scatter(1, results[2, 3] / pv_probe, label='', color=pv_color)

    ax12.set_title('Normalized Peak Response (2nd Tone)')

    control_probe = results[1, 0]
    ax12.scatter(-1,
                 results[0, 1] / control_probe,
                 label='Control 2nd Tone',
                 color='black')
    ax12.scatter(0, results[1, 1] / control_probe, label='', color='black')
    ax12.scatter(1, results[2, 1] / control_probe, label='', color='black')

    som_probe = results[1, 4]
    ax12.scatter(-1,
                 results[0, 5] / som_probe,
                 label='SOM Off 2nd Tone',
                 color=som_color)
    ax12.scatter(0, results[1, 5] / som_probe, label='', color=som_color)
    ax12.scatter(1, results[2, 5] / som_probe, label='', color=som_color)

    #ax11.scatter()
    """
    ax12.set_title('Normalized Peak Response (2nd Tone)')
    ax12.scatter(0,maxes_u_control[1,1]/maxes_u_control[0,1],label='Control 2nd Tone',color='black')
    ax12.scatter(0,maxes_u_som_off[1,1]/maxes_u_som_off[0,1],label='SOM Off',color='red')
    
    #ax12.bar(tone_number+bar_width,maxes_u_pv_off[:,1]/adapted_fr,width=bar_width,label='pv_off',color='green')
    #ax12.bar(tone_number+2*bar_width,maxes_u_som_off[:,1]/adapted_fr,width=bar_width,label='som_off',color='red')
    #ax12.plot([0,4],[1,1],ls='--',color='gray')
    """

    ax11.set_xlabel('Distance from Preferred Frequency')
    ax12.set_xlabel('Distance from Preferred Frequency')

    ax11.legend()
    ax12.legend()

    plt.tight_layout()

    # plot synapses
    if False:
        sv = control['sv']
        vn = control['vn']

        aie2 = float(control['parameters']['aie2'])  # som to pn
        asom2pv = float(control['parameters']['asom2pv'])  # som to pv

        ws2p = sv[:, vn.index('ws2p')]  # som to pn
        ws2v = sv[:, vn.index('ws2v')]  # som to pv

        fig2 = plt.figure()
        ax2 = fig2.add_subplot(111)
        ax2.plot(control['t'], aie2 * ws2p, label='som to pn')
        ax2.plot(control['t'], asom2pv * ws2v, label='som to pv')

    plt.show()
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse3=sqrt(mean_squared_error(wcat.AT,pred3))
-------------
------------------------

exp polynomial model
 
----x= Waist*Waist   y=log(AT)----       
Waist_Sq = wcat.Waist*wcat.Waist

model4= smf.ols("np.log(AT) ~ Waist+Waist_Sq",data=wcat).fit()

model4.params
model4.summary()
model.conf_int(0.05)
pred4=model4.predict(wcat.Waist)

from pydoc import help
from scipy.stats.stats import pearsonr
help(pearsonr)

>>>
Help on function pearsonr in module scipy.stats.stats:

pearsonr(wcat.AT,pred4)
from sklearn.metrics import mean_squared_error
from math import sqrt
rmse4=sqrt(mean_squared_error(wcat.AT,pred4))
rmse4
-----------------------------------------------------------------
Beispiel #40
0
def Pearson_corr(x, y):
    [r, p] = pearsonr(x, y)
    return r, p
Beispiel #41
0
    def train(self, epochs, batch_size):
        d_loss_history, g_loss_history = [], []
        pearson_train_history, pearson_val_history = [], []

        max_pearson = -1.0

        # size of the half of the batch
        half_batch = int(batch_size / 2)
        d_loss_real, d_loss_fake, g_loss = [1, 0], [1, 0], [1, 0]

        positive_y = np.ones(
            (batch_size, 1), dtype=np.float32) * (1 - smooth_rate)
        negative_y = -positive_y
        dummy_y = np.zeros((batch_size, 1), dtype=np.float32)

        for epoch in range(epochs):
            # list for storing losses/accuracies for both discriminator and generator
            d_losses, d_accuracies, g_losses = [], [], []

            for _minibatch_idx in range(int(sample_num / batch_size)):
                for _ in range(self.n_critic):
                    dis_idx = np.random.randint(0, y_train.shape[0],
                                                batch_size)
                    discriminator_minibatches = y_train[dis_idx]
                    noise = self.X_train[dis_idx].astype(np.float32)
                    d_loss = self.discriminator_model.train_on_batch(
                        [discriminator_minibatches, noise],
                        [positive_y, negative_y, dummy_y])
                    d_losses.append(d_loss)
                gen_idx = np.random.randint(0, y_train.shape[0], batch_size)
                noise = self.X_train[gen_idx].astype(np.float32)
                g_losses.append(
                    self.generator_model.train_on_batch(
                        noise, [positive_y, y_train[gen_idx]]))

            # ---------------------
            # Convert each histories into numpy arrays to get means
            # ---------------------
            d_losses = np.array(d_losses)
            d_accuracies = np.array(d_accuracies)
            g_losses = np.array(g_losses)

            # ---------------------
            # Get generator's prediction and compute overall pearson on train set
            # ---------------------
            predictions = self.generator.predict(self.X_train).flatten()
            avg_pearson = pearsonr(predictions, self.y_train.flatten())[0]
            print "Pearson R on Train set: {}".format(avg_pearson)

            # ---------------------
            # Get generator's prediction and compute overall pearson on validation set
            # ---------------------
            val_predictions = self.generator.predict(self.X_val).flatten()
            avg_val_pearson = pearsonr(val_predictions,
                                       self.y_val.flatten())[0]
            print "Pearson R on Val set: {}".format(avg_val_pearson)

            # if current pearson on validation set is greatest so far, update the max pearson,
            if max_pearson < avg_val_pearson:
                print "Perason on val improved from {} to {}".format(
                    max_pearson, avg_val_pearson)
                _write_1D_deeplift_track(
                    predictions.reshape(self.X_train.shape[0],
                                        self.window_size),
                    normalized_train_intervals,
                    os.path.join(self.srv_dir, 'train'))
                _write_1D_deeplift_track(
                    val_predictions.reshape(self.X_val.shape[0],
                                            self.window_size),
                    normalized_val_intervals,
                    os.path.join(self.srv_dir, 'val'))
                f = open(os.path.join(self.srv_dir, 'meta.txt'), 'wb')
                f.write(
                    str(epoch) + " " + str(avg_pearson) + "  " +
                    str(avg_val_pearson) + "\n")
                max_pearson = avg_val_pearson

                # ---------------------
                # Get generator's prediction and compute overall pearson on test set
                # ---------------------
                test_predictions = self.generator.predict(
                    self.X_test).flatten()
                avg_test_pearson = pearsonr(test_predictions,
                                            self.y_test.flatten())
                print "Pearson R on Test set: {}".format(avg_test_pearson)
                f.write("Test Pearson: " + str(avg_test_pearson))
                f.close()
                _write_1D_deeplift_track(
                    test_predictions.reshape(self.X_test.shape[0],
                                             self.window_size),
                    normalized_test_intervals,
                    os.path.join(self.srv_dir, 'test'))

                self.generator.save(
                    os.path.join(self.model_dir, 'best_generator.h5'))
                self.discriminator.save(
                    os.path.join(self.model_dir, 'best_discriminator.h5'))

            # Save the progress
            d_loss_history.append(d_losses)
            g_loss_history.append(g_losses)
            pearson_train_history.append(avg_pearson)
            pearson_val_history.append(avg_val_pearson)

            # Print the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                  (epoch, d_losses.mean(), 100.0 * d_accuracies.mean(),
                   g_losses.mean()))

        assert (len(d_loss_history) == len(g_loss_history) ==
                len(pearson_train_history) == len(pearson_val_history))

        print "Saving the loss and pearson logs..."
        np.save(os.path.join(log_dir, 'd_loss_history.npy'), d_loss_history)
        np.save(os.path.join(log_dir, 'g_loss_history.npy'), g_loss_history)
        np.save(os.path.join(log_dir, 'pearson_train_history.npy'),
                pearson_train_history)
        np.save(os.path.join(log_dir, 'pearson_val_history.npy'),
                pearson_val_history)
        print "Train Complete!"
Beispiel #42
0
                    '/home/n_athan/Desktop/diploma/code/stable_voxels/st_vox' +
                    str(parts) + '.pkl', 'wb')
                #print(fmri_data_for_trial[tempo[0,:],0])
                stab_score = np.zeros((length))
                for x in range(0, length):  #voxel
                    sum_vox = 0
                    for y in range(0, 58):  #noun
                        vox[x, 0, y] = fmri_data_for_trial[tempo[y, 0], x]
                        vox[x, 1, y] = fmri_data_for_trial[tempo[y, 1], x]
                        vox[x, 2, y] = fmri_data_for_trial[tempo[y, 2], x]
                        vox[x, 3, y] = fmri_data_for_trial[tempo[y, 3], x]
                        vox[x, 4, y] = fmri_data_for_trial[tempo[y, 4], x]
                        vox[x, 5, y] = fmri_data_for_trial[tempo[y, 5], x]
                        # compute the correlation
                    for z in combs:
                        sum_vox += pearsonr(vox[x, z[0], :], vox[x,
                                                                 z[1], :])[0]
                    stab_score[x] = sum_vox / 15  #no of possible correlations
    #stab_vox=nlargest(500,range(len(stab_score)),stab_score.take)
                stab_vox = np.argsort(stab_score)[::-1][:stable_voxels]
                np.savetxt('./stable_voxels/st_vox' + str(parts) + '/' +
                           noun[test_words[0]] + '_' + noun[test_words[1]] +
                           '.txt',
                           stab_vox,
                           fmt='%d')
            else:
                stab_vox = np.loadtxt('../stable_voxels/st_vox' + str(parts) +
                                      '_' + str(stable_voxels) + '.txt',
                                      dtype=int)
                print('I loaded the voxels NOT calculated them!')
            #print('Voxel Selection ends...')
#################################################################
Beispiel #43
0
import numpy as np
from scipy.stats.stats import pearsonr
import matplotlib.pyplot as plt

rootdir = "/home/banua/xprmt/xprmt-icacsis16/"
dataset = 'zoo'
fname = rootdir + dataset + "/" + dataset + "_table_True.csv"

data = np.loadtxt(fname, delimiter="\t", dtype=str, usecols=(1, 2))
data = np.array(data).astype(np.float)

x = data[:, 0]
y = data[:, 1]

pval = np.corrcoef(x, y)
r_row, p_value = pearsonr(x, y)

plt.scatter(x, y)
plt.show()

print pval
print r_row
print p_value
Beispiel #44
0
            "review": corrected_review,
            "compound": compound_rate,
            "positive": pos,
            "neutral": neu,
            "negative": neg
        },
        ignore_index=True)
    if count % 1000 == 0:
        print(count)

sentiments = sentiments.set_index(reviews.index)
result = pd.concat([reviews, sentiments],
                   axis=1,
                   join_axes=[reviews.index],
                   join='outer')
result.to_csv('sentiments_passage_corrected5000.csv', sep=',')

result.to_pickle('sentiments_ratings_corrected_passage.pkl')

# evaluation

comp = result['compound']
stars = result['stars']

# sentences - 0.28
# comma separated - 0.428
# passage - 0.55

# statistical significance
pearsonr(comp, stars)
# compute anomalies independant of climatology
for y in np.arange(0, len(years)):
    tmp_clim = []
    tmp_clim = np.nanmean(np.delete(chp_rfe, y, axis=0), axis=0)
    chp_anom[y, :, :] = chp_rfe[y, :, :] - tmp_clim

    tmp_clim = []
    tmp_clim = np.nanmean(np.delete(ens_mean, y, axis=0), axis=0)
    c3s_anom[y, :, :] = ens_mean[y, :, :] - tmp_clim
# compute anomaly correlation coefficient (using pearsonr)
# pearson r doesn't work on 2d array so loop through gridboxes
for i in np.arange(0, len(c3s_lat)):
    for j in np.arange(0, len(c3s_lon)):
        if np.sum(np.isnan(c3s_anom[:, i, j])) < len(years):  # check for nan
            c3s_acc[:, i, j] = pearsonr(chp_anom[:, i, j], c3s_anom[:, i, j])

cols = 'RdYlBu'
cmin = 0
cmax = 1
cspc = 0.1
clevs = np.arange(cmin, cmax + cspc, cspc)
norm = BoundaryNorm(boundaries=clevs, ncolors=256)

fig = plt.figure(figsize=(4, 3))
mymap = Basemap(projection='cyl',resolution='l',\
        llcrnrlat=np.min(c3s_lat),urcrnrlat=np.max(c3s_lat),\
        llcrnrlon=np.min(c3s_lon),urcrnrlon=np.max(c3s_lon))
mymap.drawparallels(np.arange(-90, 90, 2),
                    labels=[1, 0, 0, 0],
                    labelstyle='+/-')
Beispiel #46
0
import json, csv, re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
count = 0


with open("yelp_AZ_2018.json", encoding="utf8") as json_file, open('Shorts_11.csv', mode='w') as fout:
    fout.write('stars,question_marks,exclamation_points\n')
    for review in json_file:
        count = count + 1
        yelp_review = json.loads(review)

        question_mark_matches = re.findall(r'\?', yelp_review['text'], flags=re.I)
        exclamation_point_matches = re.findall(r'!', yelp_review['text'], flags=re.I)
        num_questions = len(question_mark_matches)
        num_exclamations = len(exclamation_point_matches)

        fout.write(str(yelp_review['stars']) + ',' + str(num_questions) + ',' + str(num_exclamations) + '\n')


yelp = pd.read_csv('Shorts_11.csv', sep=',')
from scipy.stats.stats import pearsonr
correlation_question = pearsonr(yelp.stars, yelp.question_marks)
correlation_exclamation = pearsonr(yelp.stars, yelp.exclamation_points)
seaborn.lmplot(x="question_marks", y="stars", data=yelp, fit_reg=True)
plt.show()
seaborn.lmplot(x="exclamation_points", y="stars", data=yelp, fit_reg=True)
plt.show()
import os, numpy as np, nibabel as nib
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.stats.stats import pearsonr

dataPath = '/home/despoB/kaihwang/TRSE/TDSigEI/'
#subjects = ['503', '505', '508', '509', '510', '512', '513', '516', '517', '518', '519', '523', '527', '528', '529', '530', '531', '532', '534']
subjects = ['503']

#Load the masks
tFEF_mask = nib.load(dataPath + 'ROIs/T_FEF.nii.gz').get_data() # 777 voxels
dFEF_mask = nib.load(dataPath + 'ROIs/D_FEF.nii.gz').get_data() # 838 voxels

corrFunc = lambda a, c, d: np.array(pearsonr(a, d)) - np.array(pearsonr(c, d))

conditions = ['FH', 'Fo', 'Fp', 'HF', 'Ho', 'Hp']

for subj in subjects:
	# Load the functional data and apply the target and distractor FEF masks.
	print 'Load the functional data and apply the masks'

	ffa = nib.load(dataPath + subj + '/FFA_indiv_ROI.nii.gz').get_data()
	ppa = nib.load(dataPath + subj + '/FFA_indiv_ROI.nii.gz').get_data()

	FH = nib.load(dataPath + subj + '/503_nusiance_FH_errts.nii.gz').get_data()
	FH_t = FH[tFEF_mask!=0]
	FH_d = FH[dFEF_mask!=0]
	FH_ffa, FH_ppa = FH[ffa!=0], FH[ppa!=0]
	Fo = nib.load(dataPath + subj + '/503_nusiance_Fo_errts.nii.gz').get_data()
	Fo_t = Fo[tFEF_mask!=0]
	Fo_d = Fo[dFEF_mask!=0]
def determine_features_to_use(dataset):
    x, columns_in_data_set = np.array(dataset).shape
    lst = [None] * columns_in_data_set

    for i in range(columns_in_data_set):
        lst[i] = np.array([instance[i] for instance in dataset])

    features_to_use = []
    for i in range(columns_in_data_set - 1):
        r2, p_val = pearsonr(lst[i], lst[11])
        if abs(r2) > 0.001:
            features_to_use.append(i)

    print("Pearson correlation coefficients")
    print("Fixed Acidity: ", pearsonr(lst[0], lst[11]))
    print("Volatile Acidity: ", pearsonr(lst[1], lst[11]))
    print("Citric Acid: ", pearsonr(lst[2], lst[11]))
    print("Residual Sugar: ", pearsonr(lst[3], lst[11]))
    print("Chlorides: ", pearsonr(lst[4], lst[11]))
    print("Free Sulfur Dioxide: ", pearsonr(lst[5], lst[11]))
    print("Total Sulfur Dioxide: ", pearsonr(lst[6], lst[11]))
    print("Density: ", pearsonr(lst[7], lst[11]))
    print("pH: ", pearsonr(lst[8], lst[11]))
    print("Sulphates: ", pearsonr(lst[9], lst[11]))
    print("Alcohol: ", pearsonr(lst[10], lst[11]))

    return features_to_use
def evaluate_prediction(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    corr, _ = pearsonr(y_true, y_pred)
    return {'mse': mse, 'mae': mae, 'r2': r2, 'corr': corr}
        t = 5
        while (t != 0):
            temp_array.remove('')
            t -= 1
        # temp_array=temp_array[14:]
        line_array.append(map(float, temp_array))
line_array = np.array(line_array)
count = 0
print("Row Count: ", len(line_array))
print("Correlation")
tup_duplicates = []
for i in range(len(line_array)):
    for j in range(i + 1, len(line_array)):
        v1 = line_array[i][9:]
        v2 = line_array[j][9:]
        first_corr = pearsonr(v1, v2)[0]
        if (first_corr >= corrthresh
                and abs(line_array[i][3] - line_array[j][3]) <= mzdiff
                and abs(line_array[i][4] - line_array[j][4]) <= rtdiff):
            count += 1
            print(i, j)
            # tup_duplicates.append((i,j))
print("Number of duplicates: "),
print(count)
# print(len(tup_duplicates))
print("Percentage duplicates: "),
print(float(count) / float(len(line_array)))

# print("cvs-writing")

# table_labels.remove('label')
Beispiel #51
0
def get_correlation(X, y):
    scores = np.zeros(X.shape[1])
    for i_col in np.arange(X.shape[1]):
        x = X[:, i_col]
        scores[i_col] = np.abs(pearsonr(x, y)[0])
    return scores
Beispiel #52
0
    def infer_abundances(self, norm_b=False):
        """
        This uses nnls to solve for abundances of each edit proposal
        :param norm_b:
        :return:
        """
        A = self.coefficient_matrix

        b = self.output_vec

        if self.verbose:
            print("")
            print('NNLS input shapes')
            print("--------------------------------------------------------")
            print('A', A.shape)
            print('b', b.shape)

        if norm_b:
            b_normed = self.normalize_observed_trace()
            b = b_normed

        # Solves argmin_x || Ax - b ||_2 for x>=0
        # A: columns (number of different indel possibilities)
        #  : rows (base calls (4*inference_length) + 1)
        # x: abundances of possibilities, has shape (A.cols, 1)
        # b: actual observed base calls, has shape (A.rows, 1)
        # nnls solves for x, or the abundances of each possible indel
        # xvals contains the inferred sequence abundances
        # rnorm is the residual || Ax-b ||_2

        try:

            xvals, rnorm = nnls(A, b)

            # compute the predicted signal
            predicted = np.dot(A, xvals)
            '''
            if method == "L1":
                lasso_model = linear_model.Lasso(alpha=0.5, positive=True)
                lasso_model.fit(A, b)

                xvals = lasso_model.coef_
                predicted = np.dot(A, xvals)
            '''

        except Exception as e:

            raise type(e)(str(e) + ' A: ' + str(A.shape) + ' B: ' +
                          str(b.shape))

        # calculate pearson's R
        (fit_r, p_val_2_tailed) = pearsonr(predicted, b)
        self.results.r_squared = fit_r**2
        print("R_SQUARED {}".format(self.results.r_squared))

        xtotal = xvals.sum()
        # here we normalize the relative abundance of each possible indel
        for n, x_val in enumerate(xvals):
            self.proposals[n].x_abs = x_val
            self.proposals[n].x_rel = x_val / (1.0 * xtotal)

            if self.r_squared_correction:
                ##addition of (1-r_squared, or missing variance) to the no edit case
                if n == 0:
                    self.proposals[0].x_rel = x_val / (
                        1.0 * xtotal) * self.results.r_squared
                ##edited cases
                else:
                    self.proposals[n].x_rel = x_val / (
                        1.0 * xtotal) * self.results.r_squared
Beispiel #53
0
from scipy.stats.stats import pearsonr

a = [
  # First array of the correlation
]

b = [
  # Second array of the correlation
]

print(pearsonr(a, b))
Beispiel #54
0
def evaluate_autoencoder(y_pred, y_test):
    mse = mean_squared_error(y_pred, y_test)
    r2 = r2_score(y_test, y_pred)
    corr, _ = pearsonr(y_pred.flatten(), y_test.flatten())
    # print('Mean squared error: {}%'.format(mse))
    return {'mse': mse, 'r2_score': r2, 'correlation': corr}
 def pearson_correlation(x, y):
     return pearsonr(x, y)
    'ep11': [],
    'ep13': [],
    'ep15': []
}
for ep in epochlist:
    for f in fixed_pfc_linfields:
        rvals = []
        pvals = []
        rvals_shuf = []
        pvals_shuf = []
        t = f['Tetrode']
        c = f['Cell']
        e = f['Epoch']
        if f['Epoch'] == ep:

            r, p = pearsonr(f['inleft'], f['inright'])  #get rid of center arm
            rvals.append(r)
            pvals.append(p)
            r, p = pearsonr(f['inleft'], f['outleft'])
            rvals.append(r)
            pvals.append(p)
            r, p = pearsonr(f['inleft'], f['outright'])
            rvals.append(r)
            pvals.append(p)
            r, p = pearsonr(f['inright'], f['outleft'])
            rvals.append(r)
            pvals.append(p)
            r, p = pearsonr(f['inright'], f['outright'])
            rvals.append(r)
            pvals.append(p)
"""
Test dcca_loss
"""

import tensorflow as tf
from sklearn.cross_decomposition import CCA
from scipy.stats.stats import pearsonr
sys.path.append('../src/')
from networks import dcca_loss

U = np.random.random_sample(1800).reshape(600,3)
V = np.random.random_sample(1800).reshape(600,3)
result = 0.0
for i in range(3):
    result += pearsonr(U[:,i], V[:,i])[0]
print ("Raw data results: ", result)

cca = CCA(n_components=3)
U_c, V_c = cca.fit_transform(U, V)
result = 0.0
for i in range(3):
    result += pearsonr(U_c[:,i], V_c[:,i])[0]
print ("Sklearn results: ", result)

X1 = tf.placeholder(tf.float32, shape=[None,3])
X2 = tf.placeholder(tf.float32, shape=[None,3])
corr = dcca_loss(X1, X2, K=3, rcov1=1e-4, rcov2=1e-4)
with tf.Session() as sess:
    correlation = sess.run(corr, feed_dict={X1: U, X2: V})
print ("dcca results:", -correlation)
Beispiel #58
0
features_names = ('ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw',
                  'precipitation_amt_mm', 'reanalysis_air_temp_k',
                  'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k',
                  'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k',
                  'reanalysis_precip_amt_kg_per_m2',
                  'reanalysis_relative_humidity_percent',
                  'reanalysis_sat_precip_amt_mm',
                  'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k',
                  'station_avg_temp_c', 'station_diur_temp_rng_c',
                  'station_max_temp_c', 'station_min_temp_c',
                  'station_precip_mm')

corr = []
for elm in features_names:
    corr.append(pearsonr(data[elm], data['total_cases'])[0])

y_pos = np.arange(len(features_names))
plt.bar(y_pos, corr, align='center', alpha=0.5)
plt.xticks(y_pos, features_names, rotation='vertical')
plt.ylabel('Correlation')
plt.title('Correlation features vs total cases - Iquitos')
plt.subplots_adjust(top=0.95, bottom=0.45)
plt.show()

print("\nCorrelation between features and total cases:")
for i in range(0, len(features_names)):
    print("\t{0:38s} ==> {1:8f}".format(features_names[i], corr[i]))

#Density Plots
data_density = data.drop(
N = len(P[0])

K0 = np.ones(N + 1)

print(rho(K0))

Kopt = sop.minimize(rho, K0)['x']

print(Kopt)
print(rho(Kopt))

pm = np.mean(P, 0)


def V(p, K):
    return abs(p - pm)**gamma @ K[1:] + K[0]


V_vals = [V(P[sim], Kopt) for sim in range(100)]

plt.scatter(E, V_vals)
xx = np.linspace(min(V_vals), max(V_vals), 1000)
plt.plot(xx, xx, '--k')

# look into whether this is the right kind of correlation coefficient!
print('Correlation Coefficient =', pearsonr(E, V_vals)[0])

ax = plt.gca()
ax.set_aspect('equal')
plt.show()
Beispiel #60
0
        if DoEMGFit == True:

            [outEMG, successG] = EMFit.EMGFit(xW[EMGinds],
                                              AvgW[EMGinds],
                                              samplewd,
                                              minx0,
                                              20,
                                              solver='trust-constr')

            if successG == False:
                DoEMGFit = False
            else:
                DoEMGFit = True
                yEMG=EMFit.EMG(xW[EMGinds], outEMG['a'], outEMG['x0'], \
                                                 outEMG['xsc'], outEMG['sigma'], outEMG['b'])
                corvalueEMG = pearsonr(AvgW[EMGinds], yEMG)

                for ipar in np.arange(nEMG):
                    outdata[ipar + xmax - xmin + 1,
                            2 * iwdbin] = outEMG[EMGPars[ipar]].value
                    outdata[ipar + xmax - xmin + 1,
                            2 * iwdbin + 1] = outEMG[EMGPars[ipar]].brute_step
                outdata[xmax - xmin + 1 + nEMG, 2 * iwdbin] = corvalueEMG[0]
                outdata[xmax - xmin + 1 + nEMG, 2 * iwdbin+1]=\
                    np.sqrt(np.sum((yEMG-AvgW[EMGinds])**2))/np.nanmean(AvgW[EMGinds])

        if DoEMAFit == True:

            if sDom == 'True':
                [outEMA, successA] = EMFit.EMAFit(xW[EMGinds], AvgW[EMGinds], samplewd, minx0, nsample, \
                                                  sameSource=True, sDom=True)