Example #1
1
 def moments(self):
     """Calculate covariance and correlation matrices,
     trait, genotipic and ontogenetic means"""
     zs = np.array([ind["z"] for ind in self.pop])
     xs = np.array([ind["x"] for ind in self.pop])
     ys = np.array([ind["y"] for ind in self.pop])
     bs = np.array([ind["b"] for ind in self.pop])
     ymean = ys.mean(axis=0)
     zmean = zs.mean(axis=0)
     xmean = xs.mean(axis=0)
     ymean = ys.mean(axis=0)
     bmean = bs.mean(axis=0)
     phenotipic = np.cov(zs, rowvar=0, bias=1)
     genetic = np.cov(xs, rowvar=0, bias=1)
     heridability = genetic[np.diag_indices_from(genetic)] / phenotipic[np.diag_indices_from(phenotipic)]
     corr_phenotipic = np.corrcoef(zs, rowvar=0, bias=1)
     corr_genetic = np.corrcoef(xs, rowvar=0, bias=1)
     avgP = avg_ratio(corr_phenotipic, self.modules)
     avgG = avg_ratio(corr_genetic, self.modules)
     return {
         "y.mean": ymean,
         "b.mean": bmean,
         "z.mean": zmean,
         "x.mean": xmean,
         "P": phenotipic,
         "G": genetic,
         "h2": heridability,
         "avgP": avgP,
         "avgG": avgG,
         "corrP": corr_phenotipic,
         "corrG": corr_genetic,
     }
Example #2
0
def corr_xy(x, y, similar_type=ECoreCorrType.E_CORE_TYPE_PEARS, **kwargs):
    """
    计算两个可迭代序列相关系数对外函数
    :param x: 可迭代序列
    :param y: 可迭代序列
    :param similar_type: ECoreCorrType, 默认值ECoreCorrType.E_CORE_TYPE_PEARS
    :return: x与y的相关系数返回值
    """
    if similar_type == ECoreCorrType.E_CORE_TYPE_PEARS:
        # 皮尔逊相关系数计算
        return np.corrcoef(x, y)[0][1]
    elif similar_type == ECoreCorrType.E_CORE_TYPE_SPERM:
        # 斯皮尔曼相关系数计算, 使用自定义spearmanr,不计算p_value
        return spearmanr(x, y)[0][1]
    elif similar_type == ECoreCorrType.E_CORE_TYPE_SIGN:
        # 序列+-符号相关系数, 使用np.sign取符号后,再np.corrcoef计算
        sign_x = np.sign(x)
        sign_y = np.sign(y)
        return np.corrcoef(sign_x, sign_y)[0][1]
    elif similar_type == ECoreCorrType.E_CORE_TYPE_ROLLING:
        # pop参数window,默认使用g_rolling_corr_window
        window = kwargs.pop('window', g_rolling_corr_window)

        # 加权时间需要可迭代序列是pd.Series
        if not isinstance(x, pd.Series):
            x = pd.Series(x)
        if not isinstance(y, pd.Series):
            y = pd.Series(y)
        return rolling_corr(x, y, window=window)
Example #3
0
    def correlate(self, signal):
        """
        Correlate records against one or many one-dimensional arrays.

        Parameters
        ----------
        signal : array-like
            One or more signals to correlate against.
        """
        s = asarray(signal)

        if s.ndim == 1:
            if size(s) != self.shape[-1]:
                raise ValueError("Length of signal '%g' does not match record length '%g'"
                                 % (size(s), self.shape[-1]))

            return self.map(lambda x: corrcoef(x, s)[0, 1], index=[1])

        elif s.ndim == 2:
            if s.shape[1] != self.shape[-1]:
                raise ValueError("Length of signal '%g' does not match record length '%g'"
                                 % (s.shape[1], self.shape[-1]))
            newindex = arange(0, s.shape[0])
            return self.map(lambda x: array([corrcoef(x, y)[0, 1] for y in s]), index=newindex)

        else:
            raise Exception('Signal to correlate with must have 1 or 2 dimensions')
def simple_cv(valence_regressors, arousal_regressors, valence_movie_matrices, arousal_movie_matrices, 
	valence_labels_movies, arousal_labels_movies, threshold, valence_movie_t, arousal_movie_t):
	n_train_matrices = 21
	n_valid_matrices = 6
	n_test_matrices = 3
	valence_labels = join_vectors(valence_labels_movies)
	arousal_labels = join_vectors(arousal_labels_movies)
	print len(valence_labels), len(arousal_labels)
	processes = []
	n_valence_features, n_arousal_features = threshold_n_features(threshold, valence_movie_t, arousal_movie_t)
	valence_predictions, arousal_predictions = np.array([], dtype = 'float'), np.array([], dtype = 'float')
	for i in range(0, 10):
		valence_test_predictions, arousal_test_predictions = fold_training(valence_predictions, arousal_predictions, i, 
			valence_regressors, arousal_regressors, 
			valence_movie_matrices, arousal_movie_matrices, 
			valence_labels_movies, arousal_labels_movies, 
			n_test_matrices, n_train_matrices, n_valid_matrices, 
			n_valence_features, n_arousal_features)
		valence_predictions = np.append(valence_predictions, valence_test_predictions)
		arousal_predictions = np.append(arousal_predictions, arousal_test_predictions)

	print math.sqrt(mean_squared_error(valence_labels, valence_predictions)), np.corrcoef(valence_labels, 
			valence_predictions)[0][1]
	print math.sqrt(mean_squared_error(arousal_labels, arousal_predictions)), np.corrcoef(arousal_labels, 
			arousal_predictions)[0][1]
Example #5
0
 def on_epoch_end(self, epoch, logs=None):
     
     if self.currentEpoch % self.freq == 0:
         self.results["epochs"].append(self.currentEpoch) # add the epoch's number
         
         evaluation = "prediction (r^2)"
         resultsText = ""
 
         if self.M is not None:
             yhatKeras = self.model.predict(self.M)
             yhatKeras += self.modelEpsilon # for numerical stability
             rSQ = np.corrcoef( self.y, yhatKeras, rowvar=0)[1,0]**2  # 0.1569    
             self.results["train_accuracy"].append(rSQ) 
             resultsText += "Training " +evaluation +":" +  str(rSQ) + " / "
 
         
         if self.M_validation is not None:
             yhatKeras = self.model.predict(self.M_validation)
             yhatKeras += self.modelEpsilon # for numerical stability
             rSQ = np.corrcoef( self.y_validation, yhatKeras, rowvar=0)[1,0]**2  # 0.1569  
             self.results["test_accuracy"].append(rSQ)
             resultsText += "Test " +evaluation +":" +  str(rSQ)
             
         print(resultsText, flush = True)
         
     self.currentEpoch += 1
Example #6
0
def plotetc(x,y,stat,season):
	cc_all = np.corrcoef(x, y['All'])[0][1]
	cc_opt = np.corrcoef(x, y['Optimal'])[0][1]
	cc_b1 = np.corrcoef(x, y['b1'])[0][1]
	cc_b2 = np.corrcoef(x, y['b2'])[0][1]
	print "Correlation coefficients for scores with {0} NAO during {1}".format(stat, season)
	print "Optimal\tb1\tb2\tAll"
	print "{0:.3f}\t{1:.3f}\t{2:.3f}\t{3:.3f}\n".format(cc_opt, cc_b1, cc_b2, cc_all)
	# matplotlib.rcParams['axes.grid'] = True
# 	matplotlib.rcParams['legend.fancybox'] = True
# 	matplotlib.rcParams['figure.figsize'] = 18, 9
# 	matplotlib.rcParams['savefig.dpi'] = 300
# 	# Set figure name and number for pdf ploting
# 	pdfName = '{0}_{1}.pdf'.format(stat, season)
# 	pp1 = PdfPages(os.path.join('/Users/andrew/Google Drive/Work/MeltModel/Output/',pdfName))
# 	fig1 = plt.figure(1)
# 	ax1 = fig1.add_subplot(111)
# 	ax1.plot(x, y['Optimal'], 'ok', label='Optimum')
# 	ax1.plot(x, y['All'], 'or', label='All')
# 	ax1.plot(x, y['b1'], 'og', label='b1')
# 	ax1.plot(x, y['b2'], 'ob', label='b2')
# 	ax1.set_xlabel("NAO")
# 	ax1.set_xlim((-3,3))
# 	ax1.set_ylabel("Score")
# 	#
# 	#ax2 = ax1.twinx()
# 	#ax2.plot(x, y['AdjOptimal'], 'ok', label='Adjusted')
# 	#ax2.set_ylabel("Adjusted Score")
# 	plt.title(stat)
# 	plt.legend(loc='upper left')
# 	pp1.savefig(bbox_inches='tight')
# 	pp1.close()
# 	plt.close()
	return 0
Example #7
0
def make_figures():

    wave1 = make_wave(0)
    wave2 = make_wave(offset=1)

    thinkplot.preplot(2)
    wave1.segment(duration=0.01).plot(label='wave1')
    wave2.segment(duration=0.01).plot(label='wave2')

    numpy.corrcoef(wave1.ys, wave2.ys)

    thinkplot.save(root='autocorr1',
                   xlabel='time (s)',
                   ylabel='amplitude')


    offsets = numpy.linspace(0, PI2, 101)

    corrs = []
    for offset in offsets:
        wave2 = make_wave(offset)
        corr = numpy.corrcoef(wave1.ys, wave2.ys)[0, 1]
        corrs.append(corr)
    
    thinkplot.plot(offsets, corrs)
    thinkplot.save(root='autocorr2',
                   xlabel='offset (radians)',
                   ylabel='correlation',
                   xlim=[0, PI2])
Example #8
0
def corr(x, y, reps=10**4, prng=None):
    """
    Simulate permutation p-value for Spearman correlation coefficient

    Parameters
    ----------
    x : array-like
    y : array-like
    reps : int
    prng : RandomState instance or None, optional (default=None)
        If RandomState instance, prng is the pseudorandom number generator;
        If None, the pseudorandom number generator is the RandomState
        instance used by `np.random`.

    Returns
    -------
    tuple
        Returns test statistic, left-sided p-value,
        right-sided p-value, two-sided p-value, simulated distribution
    """
    if prng is None:
        prng = RandomState()
    tst = np.corrcoef(x, y)[0, 1]
    sims = [np.corrcoef(prng.permutation(x), y)[0, 1] for i in range(reps)]
    left_pv = np.sum(sims <= tst)/reps
    right_pv = np.sum(sims >= tst)/reps
    two_sided_pv = np.sum(np.abs(sims) >= np.abs(tst))/reps
    return tst, left_pv, right_pv, two_sided_pv, sims
def main():
	#change these ranges...
	Chloro_range = range(1,6)
	LAI_range = range(1,10)

	TGI_data = np.empty([len(Chloro_range),len(LAI_range)])
	G_R_data = np.empty([len(Chloro_range),len(LAI_range)])
	VARI_data = np.empty([len(Chloro_range),len(LAI_range)])
	for chloro in Chloro_range:
		for LAI in LAI_range:
			TGI, G_R_ratio, VARI = SimulatePlant(1.5, 10*chloro, 8, 0, 0.01, .009, 1, 10*LAI, 0.01, 30, 0, 10, 0, pyprosail.Planophile)
			TGI_data[chloro-1][LAI-1] = TGI
			G_R_data[chloro-1][LAI-1] = G_R_ratio
			VARI_data[chloro-1][LAI-1] = VARI
	print "TGI:"
	print TGI_data # in these, going down is increasing chlorophyl, and going to the right is increasing LAI
	print "G/R Index"
	print G_R_data
	print "VARI"
	print VARI_data

	#print "Chloro - TGI Corr:", np.corrcoef(Chloro_range, TGI_data[:,3])[1][0]
	#print "Chloro - Green/Red Ratio Corr:", np.corrcoef(Chloro_range, G_R_data[:,3])[1][0]
	#print "LAI - TGI Corr:", np.corrcoef(LAI_range, TGI_data[3])[1][0]
	#print "LAI - Green/Red Ratio Corr:", np.corrcoef(LAI_range, G_R_data[3])[1][0]

	print "LAI - VARI Corr:", np.corrcoef(LAI_range, VARI_data[3])[1][0]
	print "Chloro - VARI Corr:", np.corrcoef(Chloro_range, VARI_data[:,3])[1][0]

	D3Plot(VARI_data, "VARI", Chloro_range, LAI_range)
	D3Plot(TGI_data, "TGI", Chloro_range, LAI_range)
	D3Plot(G_R_data, "G/R", Chloro_range, LAI_range)
Example #10
0
def main():
    if len(sys.argv) < 2:
        print("Usage: ./bootstrap.py <project_dir>")
        sys.exit(-1)

    project_dir = sys.argv[1]
    project = Project(join(project_dir, "project.json"))

    # For each bootstraped model
    for (bootstrap_number, bootstrap) in enumerate(project.bootstraps):
        boot_dir = os.path.abspath(join(project_dir, "bootstrap{}-{}".format(bootstrap_number, type(bootstrap.base_model).__name__)))
        os.makedirs(boot_dir, exist_ok=True)

        # Interior
        f, ax = plot_covariance_matrix(np.corrcoef(bootstrap.internals, rowvar=0), ["fx", "fy", "ppx", "ppy", "ps"])
        savefigure(f, join(boot_dir, "covariance-interior"))

        # For each camera
        for (cam_number, cam) in enumerate(bootstrap.extract_cameras()):
            # Scatter and distribution plots
            f, ax = plot_scatter(cam[:,0], cam[:, 1])
            savefigure(f, join(boot_dir, "cam{}-xy".format(cam_number)))
            f, ax = plot_distribution(cam[:,2])
            ax.set_xlabel("Z")
            savefigure(f, join(boot_dir, "cam{}-z".format(cam_number)))

            # X, Y, Z and angles covariances matrices
            S = np.corrcoef(cam, rowvar=0)
            f, ax = plot_covariance_matrix(S[:3, :3], ["X", "Y", "Z"])
            savefigure(f, join(boot_dir, "covariance-cam{}-pos".format(cam_number)))
            f, ax = plot_covariance_matrix(S[3:, 3:], [r"$\Omega$", "$\phi$",
                r"$\kappa$"])
            savefigure(f, join(boot_dir, "covariance-cam{}-angles".format(cam_number)))
Example #11
0
    def calc_correlation(self, Xs):
        
        for X in Xs:
            pass
            print np.corrcoef(X.T)
#             np.savetxt("correlations.csv",  np.corrcoef(X.T), delimiter=",")
            print 3
Example #12
0
def corr(x, y, reps=10**4, seed=None):
    r"""
    Simulate permutation p-value for Spearman correlation coefficient

    Parameters
    ----------
    x : array-like
    y : array-like
    reps : int
    seed : RandomState instance or {None, int, RandomState instance}
        If None, the pseudorandom number generator is the RandomState
        instance used by `np.random`;
        If int, seed is the seed used by the random number generator;
        If RandomState instance, seed is the pseudorandom number generator


    Returns
    -------
    tuple
        Returns test statistic, left-sided p-value,
        right-sided p-value, two-sided p-value, simulated distribution
    """
    prng = get_prng(seed)
    tst = np.corrcoef(x, y)[0, 1]
    sims = [np.corrcoef(prng.permutation(x), y)[0, 1] for i in range(reps)]
    left_pv = np.sum(sims <= tst) / reps
    right_pv = np.sum(sims >= tst) / reps
    two_sided_pv = np.min([1, 2 * np.min([left_pv, right_pv])])
    return tst, left_pv, right_pv, two_sided_pv, sims
Example #13
0
def main():
    # Define matrix dimensions
    Nobs = 1000 # Number of observation
    Nvars = 50000 # Number of variables
    Ncomp = 100 # Number of components

    # Simulated true sources
    S_true = np.random.logistic(0,1,(Ncomp,Nvars))
    # Simulated true mixing
    A_true = np.random.normal(0,1,(Nobs,Ncomp))
    # X = AS
    X = np.dot(A_true,S_true)
    # add some noise
    X = X + np.random.normal(0,1,X.shape)
    # apply ICA on X and ask for 2 components

    model = ica1(Ncomp)
    
    start = time.time()
    A,S = model.fit(X)
    total = time.time() - start
    print('total time: {}'.format(total))
    # compare if our estimates are accurate
    # correlate A with Atrue and take 
    aCorr = np.abs(np.corrcoef(A.T,A_true.T)[:Ncomp,Ncomp:]).max(axis = 0).mean()
    sCorr = np.abs(np.corrcoef(S,S_true)[:Ncomp,Ncomp:]).max(axis = 0).mean()

    print("Accuracy of estimated sources: %.2f"%sCorr)
    print("Accuracy of estimated mixing: %.2f"%aCorr)
Example #14
0
def corr_matrix(df, similar_type=ECoreCorrType.E_CORE_TYPE_PEARS, **kwargs):
    """
    与corr_xy的区别主要是,非两两corr计算,输入参数除类别外,只有一个矩阵的输入,且输入必须为pd.DataFrame对象 or np.array
    :param df: pd.DataFrame or np.array, 之所以叫df,是因为在内部会统一转换为pd.DataFrame
    :param similar_type: ECoreCorrType, 默认值ECoreCorrType.E_CORE_TYPE_PEARS
    :return: pd.DataFrame对象
    """
    if isinstance(df, np.ndarray):
        # 把np.ndarray转DataFrame,便统一处理
        df = pd.DataFrame(df)

    if not isinstance(df, pd.DataFrame):
        raise TypeError('df must pd.DataFrame object!!!')

    # FIXME 这里不应该支持ECoreCorrType.E_CORE_TYPE_PEARS.value,只严格按照ECoreCorrType对象相等
    if similar_type == ECoreCorrType.E_CORE_TYPE_PEARS or similar_type == ECoreCorrType.E_CORE_TYPE_PEARS.value:
        # 皮尔逊相关系数计算
        corr = np.corrcoef(df.T)
    elif similar_type == ECoreCorrType.E_CORE_TYPE_SPERM or similar_type == ECoreCorrType.E_CORE_TYPE_SPERM.value:
        # 斯皮尔曼相关系数计算, 使用自定义spearmanr,不计算p_value
        corr = spearmanr(df)
    elif similar_type == ECoreCorrType.E_CORE_TYPE_SIGN or similar_type == ECoreCorrType.E_CORE_TYPE_SIGN.value:
        # 序列+-符号相关系数, 使用np.sign取符号后,再np.corrcoef计算
        corr = np.corrcoef(np.sign(df.T))
    elif similar_type == ECoreCorrType.E_CORE_TYPE_ROLLING or similar_type == ECoreCorrType.E_CORE_TYPE_ROLLING.value:
        # pop参数window,默认使用g_rolling_corr_window
        window = kwargs.pop('window', g_rolling_corr_window)
        corr = rolling_corr(df, window=window)
    else:
        # 还是给个默认的corr计算np.corrcoef(df.T)
        corr = np.corrcoef(df.T)
    # 将计算结果的corr转换为pd.DataFrame对象,行和列索引都使用df.columns
    corr = pd.DataFrame(corr, index=df.columns, columns=df.columns)
    return corr
def Sensitivity_printImpactResults(finalResults):
    # Performs numerical analysis on sensitivity trials
    resultsFile = "Results\\Impact\\Impact_Correlation.txt"
    with open(resultsFile, 'w') as f:
        writer = csv.writer(f, delimiter = '\n', quoting=csv.QUOTE_NONE, 
            quotechar='', escapechar='\\')
        for subResult in finalResults:
            plots = {
                1: "Depression", 
                2: "Concealment", 
                3: "Discrimination", 
                4: "Support", 
                5: "Policy Score"
            }

            xArr = subResult[0]
            label = subResult[-1]

            yArrCorrelation_1 = np.corrcoef(xArr, subResult[1])[0][1]
            yArrCorrelation_2 = np.corrcoef(xArr, subResult[2])[0][1]

            depressCorrelate = "{} vs. Depression Correlation: {}".\
                format(label, yArrCorrelation_1)
            concealCorrelate = "{} vs. Concealment Correlation: {}".\
                format(label, yArrCorrelation_2)

            row = [depressCorrelate, concealCorrelate]
            writer.writerow(row)

            for plot in plots:
                Sensitivity_plotGraphs(xArr, subResult[plot], label, 
                    plots[plot], "impact")
    def correlation(self):
        keys_a = set(self.gdp.keys())
        keys_b = set(self.complaint_allstate.keys())
        intersection = keys_a & keys_b
        corr_dict = {}
        ax= []
        ay = []
        for v in intersection:
            y = self.gdp[v].values()
            x = self.complaint_allstate[v].values()
            ax.append(x)
            ay.append(y)
            '''
            if(len(x) != len(y)):
                continue
            else:
                corr_dict.update({v:np.corrcoef(x,y)[0,1]})'''
        if len(ax) != len(ay):
            if(len(ax)> len(ay)):
                ay = ay[:len(ax)]
            else:
                ax = ax[:len(ay)]
        print len(flatten(ax)),len(flatten(ay))
        print np.corrcoef(flatten(ax)[:735],flatten(ay))[0,1]
        corrdict = OrderedDict(sorted(corr_dict.items(), key=itemgetter(1)))
        #print corrdict

        '''
Example #17
0
def learnStructure(dataP, dataS, Pp, Ps, TAN= True):
    tempMatrix = [[0 for i in range(len(dataP))] for j in range(len(dataP))]
    for i in range(len(dataP)):
        for j in range(i+1, len(dataP)):
            temp = 0.0
            if np.corrcoef(dataP[i], dataP[j])[0][1] != 1.0:
                temp += Pp * math.log(1-((np.corrcoef(dataP[i], dataP[j])[0][1])**2))
            if np.corrcoef(dataS[i], dataS[j])[0][1] != 1.0:
                temp += Ps * math.log(1-((np.corrcoef(dataS[i], dataS[j])[0][1])**2))
            temp *= (0.5)
            tempMatrix[i][j] = temp
            #tempMatrix[j][i] = temp
    MaxG = nx.DiGraph()
    if TAN:
        G = nx.from_scipy_sparse_matrix(minimum_spanning_tree(csr_matrix(tempMatrix)))
        adjList = G.adj
        i = 0
        notReturnable = {}
        MaxG = getDirectedTree(adjList, notReturnable, MaxG, i)
    else:
        G = nx.Graph(np.asmatrix(tempMatrix))
        adjList = sorted([(u,v,d['weight']) for (u,v,d) in G.edges(data=True)], key=lambda x:x[2])
        i = 2
        MaxG = getDirectedGraph(adjList, MaxG, i)
    return MaxG
Example #18
0
    def test_corrcoef2(self):
        # Test that _corrcoef2 returns the same result that np.corrcoef would
        n, m = tuple(np.random.randint(2, 5, size=2))
        mean = np.random.uniform(-1, 1, size=m)
        cov = np.random.uniform(0, 1./m, size=(m, m))
        cov = (cov + cov.T) / 2
        cov.flat[::m + 1] = 1.0
        X1 = np.random.multivariate_normal(mean, cov, size=n)
        X2 = np.random.multivariate_normal(mean, cov, size=n)
        expected = np.corrcoef(X1, X2, rowvar=True)[:n, n:]
        np.testing.assert_almost_equal(
            _corrcoef2(X1, X2, axis=1),
            expected,
            decimal=9
        )

        expected = np.corrcoef(X1, X2, rowvar=False)[:m, m:]
        np.testing.assert_almost_equal(
            _corrcoef2(X1, X2, axis=0),
            expected,
            decimal=9,
        )

        with self.assertRaises(ValueError):
            _corrcoef2(X1, X2, axis=10)
Example #19
0
def PrintResults(all_ground_truth,all_b1_output,all_b2_output,all_b3_output,all_b4_output,all_combined_output):
   print 'Error on baseline 1: ', numpy.std(all_ground_truth - all_b1_output,axis=0), \
   numpy.mean(numpy.std(all_ground_truth - all_b1_output,axis=0))
   correlation_matrix = numpy.corrcoef(all_ground_truth.T,all_b1_output.T)
   print 'cur_rho: ', correlation_matrix[0,3], correlation_matrix[1,4], correlation_matrix[2,5], \
   (correlation_matrix[0,3]+correlation_matrix[1,4]+correlation_matrix[2,5])/3

   print 'Error on baseline 2: ', numpy.std(all_ground_truth - all_b2_output,axis=0), \
   numpy.mean(numpy.std(all_ground_truth - all_b2_output,axis=0))
   correlation_matrix = numpy.corrcoef(all_ground_truth.T,all_b2_output.T)
   print 'cur_rho: ', correlation_matrix[0,3], correlation_matrix[1,4], correlation_matrix[2,5], \
   (correlation_matrix[0,3]+correlation_matrix[1,4]+correlation_matrix[2,5])/3

   print 'Error on baseline 3: ', numpy.std(all_ground_truth - all_b3_output,axis=0), \
   numpy.mean(numpy.std(all_ground_truth - all_b3_output,axis=0))
   correlation_matrix = numpy.corrcoef(all_ground_truth.T,all_b3_output.T)
   print 'cur_rho: ', correlation_matrix[0,3], correlation_matrix[1,4], correlation_matrix[2,5], \
   (correlation_matrix[0,3]+correlation_matrix[1,4]+correlation_matrix[2,5])/3

   print 'Error on baseline 4: ', numpy.std(all_ground_truth - all_b4_output,axis=0), \
   numpy.mean(numpy.std(all_ground_truth - all_b4_output,axis=0))
   correlation_matrix = numpy.corrcoef(all_ground_truth.T,all_b4_output.T)
   print 'cur_rho: ', correlation_matrix[0,3], correlation_matrix[1,4], correlation_matrix[2,5], \
   (correlation_matrix[0,3]+correlation_matrix[1,4]+correlation_matrix[2,5])/3

   print 'Error on combined: ', numpy.std(all_ground_truth - all_combined_output,axis=0), \
   numpy.mean(numpy.std(all_ground_truth - all_combined_output,axis=0))
   correlation_matrix = numpy.corrcoef(all_ground_truth.T,all_combined_output.T)
   print 'cur_rho: ', correlation_matrix[0,3], correlation_matrix[1,4], correlation_matrix[2,5], \
   (correlation_matrix[0,3]+correlation_matrix[1,4]+correlation_matrix[2,5])/3
Example #20
0
def covandcoef(compare_data):
	hx = []
	hy = []

	ox = []
	oy = []

	tx = []
	ty = []

	for i in compare_data:
		hx.append(i[4])
		hy.append(i[7])


	for i in range(0,7):
		ox.append(compare_data[i][4])
		oy.append(compare_data[i][7])


	for i in range(0,89):
		tx.append(compare_data[i][4])
		ty.append(compare_data[i][7])


	X = np.vstack((hx,hy))
	Z = np.vstack((ox,oy))
	Y = np.vstack((tx,ty))


	return [[np.cov(X)[0][1],np.corrcoef(X)[0][1]],[np.cov(Y)[0][1],np.corrcoef(Y)[0][1]],[np.cov(Z)[0][1],np.corrcoef(Z)[0][1]]]
def correl():
	for eof in [ 1, 2 ]:
		cook=[]
		glue=[]
		for model in models:
			fmod = '{0}/run1/dtred/{0}.space{1}.txt'.format(model,eof)
			fobs = '../../sst-data/detrend/ersst.space{0}.txt'.format(eof)
			eof_mod = np.loadtxt(fmod)
			print fmod
			eof_obs = np.loadtxt(fobs)
			#print eof_mod[0:40]
			idm = np.where(eof_mod == 999.)
			ido = np.where(eof_obs == 999.)
			eof_mod = np.delete(eof_mod, idm)
			eof_obs = np.delete(eof_obs, idm)
			cook.append([ model, np.corrcoef(eof_mod, eof_obs)[0, 1]] )
			
			fmodpc = '{0}/run1/dtred/PC{1}.{0}.txt'.format(model,eof)
			fobspc = '../../sst-data/detrend/PC{0}.annual.txt'.format(eof)
			pc_mod = np.loadtxt(fmodpc)
			pc_obs = np.loadtxt(fobspc)
			#print pc_mod.shape, pc_obs.shape
			glue.append([model, np.corrcoef(pc_mod, pc_obs)[0, 1]] )

	# --- Writing spatial correlation from models and Observation - EOF
		npcook = np.array(cook)
		np.savetxt('eof{0}.ar4.correl.txt'.format(eof), npcook, fmt= '%s     %6s')

	# --- Writing time correlation from models and Observation - PC
		npglue = np.array(glue)
		np.savetxt('pc{0}.ar4.correl.txt'.format(eof), npglue, fmt= '%s     %6s')
 def test_nancorr_pearson(self):
     targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
     targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
     self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson")
     targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
     targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
     self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson")
def correlate(name, p_index, c_index, d_mean, d_sd):
    object = bpy.data.objects[name]
    uvs = getUVs(object, p_index)
    distances = getDistancesPerParticle(model.CONNECTION_RESULTS[c_index]['d'])

    uvs2 = []
    delays = []
    for index, ds in enumerate(distances):
        samples = []
        for i in range(1):
            delay_mm = max(delayModel_delayDistribLogNormal(d_mean, d_sd), 0.1)
            uvs2.append(list(uvs[index,:]))
            delays.append(max(ds * delay_mm, 0.1))
            #samples.append( max(ds * delay_mm, 0.1) )
        #delays.append(np.mean(samples))
    delays = np.array(delays)
    
    uvs2 = np.array(uvs2)
    print(len(uvs2))
    
    corr_dist_x = np.corrcoef(uvs[:,0], distances)
    corr_dist_y = np.corrcoef(uvs[:,1], distances)    
    corr_dela_x = np.corrcoef(uvs2[:,0], delays)
    corr_dela_y = np.corrcoef(uvs2[:,1], delays)    
    print('Correlation x with distance: %f' % corr_dist_x[0][1])
    print('Correlation x with delay: %f' % corr_dela_x[0][1])
    print('Correlation y with distance: %f' % corr_dist_y[0][1])
    print('Correlation y with delay: %f' % corr_dela_y[0][1])
Example #24
0
    def traverseplot(Xin,Yin,Field,name):
        string,nodeind,leaf,label=TraverseTree(regTree,Xin,Field)
        nband=Yin.shape[1]
        k=0
        for j in leaf:
            Ytemp=Yin[nodeind[j],:]
            Xtemp=Xin[nodeind[j],:]
            Yptemp=regTreeModel.predict(Xtemp)

            fitmodel=fitModelList[k]
            if predind.ndim==1:
                Ypnewtemp=fitmodel.predict(Xtemp[:,predind.astype(int)])
            else:
                Ypnewtemp=fitmodel.predict(Xtemp[:,predind[k,:].astype(int)])

            rmse,rmse_band=RMSECal(Yptemp,Ytemp)
            rmsenew,rmse_bandnew=RMSECal(Ypnewtemp,Ytemp)

            n=nband
            f, axarr = plt.subplots(int(np.ceil(n/2)), 2,figsize=(10,12))
            for i in range(n):
                pj=int(np.ceil(i/2))
                pi=int(i%2)
                axarr[pj, pi].plot(Yptemp[:,i],Ytemp[:,i],'.')
                axarr[pj, pi].plot(Ypnewtemp[:,i],Ytemp[:,i],'.r')
                axarr[pj, pi].set_title('cluster %s,\n cc=%.3f -> %.3f, r=%.3f -> %.3f'\
                                        %(i,np.corrcoef(Yptemp[:,i],Ytemp[:,i])[0,1],np.corrcoef(Ypnewtemp[:,i],Ytemp[:,i])[0,1],
                                          rmse_band[i],rmse_bandnew[i]))
                plotFun.plot121line(axarr[pj, pi])
            f.tight_layout()
            f.suptitle(string[j],fontsize=8)
            f.subplots_adjust(top=0.9)
            plt.savefig(savedir+name+"_node%i"%j)
            plt.close()
            k=k+1
def test_c_within_and_c_between():

  # mocking the correlation values store
  test_c_values_store = {"test_network_1":{"test_roi_1":((0,0,0),(0,0,1)), "test_roi_2":((0,1,0),(0,1,1))},
                    "test_network_2":{"test_roi_3":((1,0,0),(1,0,1)), "test_roi_4":((1,1,0),(1,1,1))}}


  data = np.zeros((2,2,2,3))
  data[0,0,0] = [1,2,3]
  data[0,0,1] = [1,2,3]
  data[0,1,0] = [-1,-2,-3]
  data[0,1,1] = [-1,-2,-3]
  data[1,0,0] = [5,4,37]
  data[1,0,1] = [5,4,37]
  data[1,1,0] = [-3,-244,-1]
  data[1,1,1] = [-3,-244,-1]


  actual = connectivity_utils.c_within(data, test_c_values_store)

  # expected values are explicitly calculated according to the rules explained in the paper
  expected = {'test_network_1':(np.corrcoef([1,2,3],[-1,-2,-3])[1,0],), 'test_network_2': (np.corrcoef([5,4,37],[-3,-244,-1])[1,0],)}

  assert_almost_equal(expected['test_network_1'], expected['test_network_1'])
  assert_almost_equal(expected['test_network_2'], expected['test_network_2'])

  actual = connectivity_utils.c_between(data, test_c_values_store)

  # expected values are explicitly calculated according to the rules explained in the paper
  expected = [np.corrcoef([1,2,3],[5,4,37])[1,0], np.corrcoef([1,2,3],[-3,-244,-1])[1,0],np.corrcoef([-1,-2,-3],[5,4,37])[1,0], np.corrcoef([-1,-2,-3],[-3,-244,-1])[1,0]]

  assert_almost_equal(np.sort(expected), np.sort(actual['test_network_1-test_network_2']))
def testSVM(linkSet, patterns = None):
    cont, ncont, vectors = [], [], []
    print "\nTesting\n"
    classifier = None
    if patterns == None:
		classifier = pickle.load(open("svm-classifier", "r"))
    else:
		classifier = pickle.load(open("svm-classifier2", "r"))
    
    for link in linkSet:	
		vec = getFeatures(link, patterns)
		vectors += [vec]
		result = classifier.predict(vec)
		if result == 1.0:
			if link.endswith(".htm"):
				cont += [link + 'l']
			else:
				cont += [link]
		else:
			ncont += [link]
	
    cont = [link for link in cont if checkBoilerplate(link)]
    #ones, zeros = clusterSimilar(cont)
    
    #cont = ones
    #ncont += zeros

    print "\nCorrelation Matrix\n"
    print numpy.corrcoef(numpy.transpose(numpy.array(vectors)))
    return sorted(cont, key=lambda x: len(x)), sorted(ncont, key=lambda x: len(x))
Example #27
0
    def _call(self, dataset):
        """Computes the aslmap_dcm = sl_dcm(group_data)verage correlation in similarity structure across chunks."""
        
        chunks_attr = self.chunks_attr
        nchunks = len(np.unique(dataset.sa[chunks_attr]))
        if nchunks < 2:
            raise StandardError("This measure calculates similarity consistency across "
                                "chunks and is not meaningful for datasets with only "
                                "one chunk:")

        #calc neur sim b/w targ_comp targets per subject
        neur_sim={}
        for s in np.unique(dataset.sa[chunks_attr]):
            ds_s = dataset[dataset.sa.chunks == s]
            neur_sim[s+'1'] = 1 - np.corrcoef(ds_s[ds_s.sa.targets == self.targ_comp1[0]],ds_s[ds_s.sa.targets == self.targ_comp1[1]])[0][1]            
            neur_sim[s+'2'] = 1 - np.corrcoef(ds_s[ds_s.sa.targets == self.targ_comp2[0]],ds_s[ds_s.sa.targets == self.targ_comp2[1]])[0][1]            

        #combine xSs_behavs
        xSs_behav = {}
        for s in self.xSs_behav1:
            xSs_behav[s+'1'] = self.xSs_behav1[s]
        for s in self.xSs_behav2:
            xSs_behav[s+'2'] = self.xSs_behav2[s]

        #create dsets where cols are neural sim and mt sim for correlations
        behav_neur = np.array([[xSs_behav[s],neur_sim[s]] for s in neur_sim])
        #correlate behav with neur sim b/w subjects
        if self.comparison_metric == 'spearman':
            xSs_corr = pearsonr(rankdata(behav_neur[:,0]),rankdata(behav_neur[:,1])) 
        xSs_corr = pearsonr(behav_neur[:,0],behav_neur[:,1])
        
        #returns fish z transformed r coeff ; could change to be p value if wanted...
        return Dataset(np.array([np.arctanh(xSs_corr[0])])) 
def _region_features_for(histone, dna, region):
    pixels0 = histone[region].ravel()
    pixels1 = dna[region].ravel()
    bin0 = pixels0 > histone.mean()
    bin1 = pixels1 > dna.mean()
    overlap = [np.corrcoef(pixels0, pixels1)[0, 1], (bin0 & bin1).mean(), (bin0 | bin1).mean()]

    spi = mh.sobel(histone, just_filter=1)
    sp = spi[mh.erode(region)]
    sdi = mh.sobel(dna, just_filter=1)
    sd = sdi[mh.erode(region)]
    sobels = [
        np.dot(sp, sp) / len(sp),
        np.abs(sp).mean(),
        np.dot(sd, sd) / len(sd),
        np.abs(sd).mean(),
        np.corrcoef(sp, sd)[0, 1],
        np.corrcoef(sp, sd)[0, 1] ** 2,
        sp.std(),
        sd.std(),
    ]

    return np.concatenate(
        [
            [region.sum()],
            haralick(histone * region, ignore_zeros=True).mean(0),
            haralick(dna * region, ignore_zeros=True).mean(0),
            overlap,
            sobels,
            haralick(mh.stretch(sdi * region), ignore_zeros=True).mean(0),
            haralick(mh.stretch(spi * region), ignore_zeros=True).mean(0),
        ]
    )
Example #29
0
 def test_simulate_density(self):
     # generate a rings object both from an atomic and density model and
     # ensure the correlations match
     
     num_shots = 100
     num_phi   = 1024
     
     nq = 100 # number of q vectors
     q_values = [1.0, 2.0]
     
     # atomic model
     traj = mdtraj.load(ref_file('pentagon.pdb'))        
     r1 = xray.Rings.simulate(traj, 1, q_values, num_phi, num_shots)
                               
     # density model
     grid_dimensions = [151,] * 3
     grid_spacing = 1.0 # Angstroms
     grid = structure.atomic_to_density(traj, grid_dimensions, 
                                        grid_spacing)
                                        
     r2 = xray.Rings.simulate_density(grid, grid_spacing, num_shots, 
                                      q_values, num_phi)        
     
     # compute correlations & ensure match
     c1 = r1.correlate_intra(1.0, 1.0)
     c2 = r2.correlate_intra(1.0, 1.0)
     R = np.corrcoef(c1, c2)[0,1]
     assert R > 0.95
     
     c1 = r1.correlate_intra(2.0, 2.0)
     c2 = r2.correlate_intra(2.0, 2.0)
     R = np.corrcoef(c1, c2)[0,1]
     assert R > 0.95
Example #30
0
def test():
    data = SimData(400, 4, 15)
    cor = np.nan_to_num(np.corrcoef(data.answers, rowvar=0)) # pearson metric
    cor = np.nan_to_num(np.corrcoef(cor))
    label1 = kmeans2(cor, 6, minit='points', iter=100)[1] # hack pocet komponent
    label2 = kmeans(cor, 6, True)

    xs, ys = mds(cor, euclid=True)
    plt.subplot(1, 2, 1)
    plt.title('kmeans2 ' + str(adjusted_rand_score(data.item_concept, label1)))
    plot_clustering(
        range(cor.shape[0]), xs, ys,
        labels=label1,
        shapes=data.item_concept,
    )

    plt.subplot(1, 2, 2)
    plt.title('Kmeans ' + str(adjusted_rand_score(data.item_concept, label2)))
    plot_clustering(
        range(cor.shape[0]), xs, ys,
        labels=label2,
        shapes=data.item_concept,
    )

    plt.show()
Example #31
0
    def __init__(self,
                 bip,
                 target,
                 thr_dis=75,
                 thr_corr=0.89,
                 type_cor="global",
                 drop_outliers=False,
                 whisk=1.8):
        if isinstance(bip, ClassicBip.ClassicBip):
            __type__ = "Classic"
            self.bip = bip
        elif isinstance(bip, CanonicalBip.CanonicalBip):
            __type__ = "Canonical"
            self.bip = bip
        else:
            raise ValueError('Undefined biplotpy class')

        if isinstance(target, pandas.core.series.Series):
            if isinstance(
                    list(set([type(el) for el in target]))[0], (int, float)):
                self.y = numpy.array(target)
        elif isinstance(target, numpy.ndarray):
            self.y = target
        else:
            raise ValueError('Nor ndarray numpy nor series pandas type')

        if isinstance(thr_dis, (float, int)) == False:
            raise ValueError('Nor ndarray numpy nor series pandas type')
        elif thr_dis > 100:
            raise ValueError('thr_dis must be between 25 and 100')
        elif thr_dis < 0:
            raise ValueError('thr_dis must be positive')

        if __type__ == "Classic":
            Project = bip.RowCoord.dot(bip.ColCoord.T)
            C = bip.ColCoord
        elif __type__ == "Canonical":
            Project = bip.Ind_Coord.dot(bip.Var_Coord.T)
            C = bip.Var_Coord

        # Positive rescalation of projections

        v_min = numpy.array(
            [abs(el) if el < 0 else el for el in Project.min(axis=0)])

        for i, proj in enumerate(Project.T):
            Project[:, i] = proj + v_min[i]

        classes = numpy.unique(target)

        def get_outliers(d, whis):
            q1 = numpy.percentile(d, 25)
            q3 = numpy.percentile(d, 75)
            iq = q3 - q1
            hi_val = q3 + whis * iq
            wisk_hi = numpy.compress(d <= hi_val, d)
            if len(wisk_hi) == 0 or numpy.max(wisk_hi) < q3:
                wisk_hi = q3
            else:
                wisk_hi = max(wisk_hi)
            # get low extreme
            lo_val = q1 - whis * iq
            wisk_lo = numpy.compress(d >= lo_val, d)
            if len(wisk_lo) == 0 or numpy.min(wisk_lo) > q1:
                wisk_lo = q1
            else:
                wisk_lo = min(wisk_lo)
            return list(numpy.where(d > wisk_hi)[0]) + list(
                numpy.where(d < wisk_lo)[0])

        if drop_outliers == True:
            outliers = []
            for var in Project.T:
                outliers = outliers + get_outliers(var, whisk)

            outliers = list(set(outliers))
            self.outliers_ind = outliers

            perc_drop = len(outliers) * 100 / bip.data.shape[0]
            Project = numpy.delete(Project, (outliers), axis=0)
            target = numpy.delete(target, (outliers), axis=0)

            if perc_drop > 5.5:
                warnings.warn((
                    "You're dropping %s of the data. Try to increase 'whisk'" %
                    perc_drop))

        # Tracking class index

        IND = []
        for cl in classes:
            ind_class = []
            for i, el in enumerate(target):
                if el == cl:
                    ind_class.append(i)
            IND.append(ind_class)

        # Number of combinations

        num_c = int(len(classes) * (len(classes) - 1) / 2)

        Disc = numpy.zeros((bip.data.shape[1], num_c))

        comb = numpy.array(list(itertools.combinations(classes, r=2)))

        # Disc vectors

        for i, cmb in enumerate(comb):
            Disc[:, i] = abs(Project[IND[cmb[0]]].mean(axis=0) -
                             Project[IND[cmb[1]]].mean(axis=0))

        # Drop correlated variables

        POS = []
        for v in Disc.T:
            for i, el in enumerate(v):
                if el > numpy.percentile(v, thr_dis):
                    POS.append(i)
        POS = list(set(POS))

        if type_cor == "global":
            Corr_matr = numpy.tril(numpy.corrcoef(bip.data[:, POS].T), -1)
        elif type_cor == "coord":
            Corr_matr = numpy.tril(numpy.corrcoef(C[POS, :]), -1)
        elif type_cor == "discr":
            Corr_matr = numpy.tril(numpy.corrcoef(Disc[POS, :]), -1)
        else:
            raise ValueError('type_cor must be "global", "coord" or "discr"')

        self.Corr_matr = Corr_matr

        ### Correlation threshold (23/01/2018)

        #pos_corr = numpy.where(Corr_matr > thr_corr)
        #disc_vect = Disc.sum(axis = 1)

        #self.disc_vect = disc_vect

        #del_el = []
        #if pos_corr:
        #	for i in range(len(pos_corr[0])):
        #		ind = [pos_corr[0][i],pos_corr[1][i]]
        #		ind_del = []
        #		if ((ind[0] in POS) and (ind[1] in POS)):
        #			a = numpy.array([disc_vect[ind[0]],disc_vect[ind[1]]])
        #			ind_del.append(POS.index(pos_corr[ numpy.argwhere(a.min() == a)[0][0] ][0]))

        ### Correlation threshold (01/02/2018)

        pos_corr = numpy.where(Corr_matr > thr_corr)
        disc_vect = Disc[POS, :].sum(axis=1)

        ind_del = []
        if pos_corr:
            for i in range(len(pos_corr[0])):
                if disc_vect[pos_corr[0][i]] > disc_vect[pos_corr[1][i]]:
                    ind_del.append(pos_corr[1][i])
                else:
                    ind_del.append(pos_corr[0][i])

        ind_del = list(set(ind_del))
        if ind_del:
            POS = [el for i, el in enumerate(POS) if i not in ind_del]

        self.var_sel = list(numpy.array(bip.col_names)[POS])
Example #32
0
y = np.asarray(y)
vt = np.asarray(vt)
vmax = np.asarray(vmax)
pc = np.asarray(pc)
rmax = np.asarray(rmax)
r35 = np.asarray(r35)
dpc = np.asarray(dpc)
r35_holland1980 = np.asarray(r35_holland1980)
AL = np.asarray(AL)

# R35 is actually delta R35
r35 = r35 - rmax
r35_holland1980 = r35_holland1980 - rmax

# Correlations
xcorrelation = np.corrcoef(r35, x)
ycorrelation = np.corrcoef(r35, y)
timecorrelation = np.corrcoef(r35, time)
vmaxcorrelation = np.corrcoef(r35, vmax)
dpccorrelation = np.corrcoef(r35, dpc)
vtcorrelation = np.corrcoef(r35, vt)
rmaxcorrelation = np.corrcoef(r35, rmax)

#==============================================================================
# Part 2: necessity -> other relations result in large error + confidence bounds around coeffcients
#==============================================================================
if plot_rels == 1:
    plt.close('all')
    r35_holland1980a = r35_holland1980[~np.isnan(r35_holland1980)]
    r35a = r35[~np.isnan(r35_holland1980)]
    vmaxa = vmax[~np.isnan(r35_holland1980)]
Example #33
0
#!/usr/bin/env python

# A light statistical warm-up in Python

# Task: Implement these functions
# (without using the numpy built-ins)
#  * my_mean
#  * my_var
#  * my_cov
#  * my_cor
# So that this file can be run without error

### YOUR CODE HERE

# Do not edit below this line

import numpy as np

x, y = (np.random.randn(100) for _ in range(2))


def equal(a, b):
    np.testing.assert_allclose(a, b)


equal(my_mean(x), np.mean(x))
equal(my_var(x), np.var(x))
equal(my_cov(x, y), np.cov(x, y))
equal(my_cor(x, y), np.corrcoef(x, y))
Example #34
0
        compute_all_hippocampus('D:\\Data\\Registration_Philip3_Hippocampus')

    ave_data = np.load('ave_patches.npy')
    # matrix_r = np.corrcoef(ave_data)
    # matrix_r_abs = np.abs(matrix_r)
    hipp_data = np.load('hipp_label_patches.npy')
    data_list = np.where(np.sum(ave_data, axis=1) > 10000)
    hipp_list = np.where(np.sum(hipp_data, axis=1) > 0.1)
    print(np.sum(hipp_data))
    for seed in hipp_list[0]:
        if seed not in data_list[0]:
            print("Not all hipp in ave_list")
    end_nodes = ave_data[data_list]
    start_nodes = hipp_data[hipp_list]

    matrix_r = np.corrcoef(end_nodes)
    matrix_r_abs = np.abs(matrix_r)
    s = np.zeros(len(matrix_r_abs))
    Y = np.zeros(len(matrix_r_abs))
    for i in range(len(matrix_r_abs)):
        for j in range(len(matrix_r_abs)):
            if matrix_r_abs[i, j] > 0.3 and i != j:
                s[i] = s[i] + 1
    for i in range(len(matrix_r_abs)):
        for j in range(len(matrix_r_abs)):
            if matrix_r_abs[i, j] > 0.3 and i != j:
                Y[i] = Y[i] + pow((matrix_r_abs[i, j] / s[i]), 2)
    plt.plot(Y)
    df = pd.DataFrame(matrix_r_abs)
    print(df)
    # sns.heatmap(df, annot=True)
def main():
    # Read settings ----------------------------------------------------

    # Brain data
    brain_dir = '/home/share/data/fmri_shared/datasets/Deeprecon/fmriprep'
    subjects_list = {'TH': 'TH_ImageNetTest_volume_native.h5'}

    rois_list = {
        'VC': 'ROI_VC = 1',
    }

    # Image features
    features_dir = '/home/ho/Documents/brain-decoding-examples/python/feature-prediction/data/features/ImageNetTest'
    network = 'caffe/VGG_ILSVRC_19_layers'
    features_list = [
        'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2',
        'conv3_3', 'conv3_4', 'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4',
        'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', 'fc6', 'fc7', 'fc8'
    ][::-1]
    features_list = ['fc6', 'fc7', 'fc8'][::-1]
    target_subject = 'AM'

    Lambda = 0.1
    data_rep = 5

    # Model parameters
    gpu_device = 1

    # Results directory
    results_dir_root = './NCconverter_results'

    # Converter models
    nc_models_dir_root = os.path.join(results_dir_root,
                                      'pytorch_converter_training', 'model')
    selected_converter_type = 'conv5'

    # Misc settings
    analysis_basename = os.path.splitext(os.path.basename(__file__))[0]

    # Pretrained model metadata
    pre_results_dir_root = '/home/share/data/contents_shared/ImageNetTraining/derivatives/feature_decoders'
    pre_analysis_basename = 'deeprecon_fmriprep_rep5_500voxel_allunits_fastl2lir_alpha100'
    pre_models_dir_root = os.path.join(pre_results_dir_root,
                                       pre_analysis_basename)

    # Load data --------------------------------------------------------
    print('----------------------------------------')
    print('Loading data')

    data_brain = {
        sbj: bdpy.BData(os.path.join(brain_dir, dat_file))
        for sbj, dat_file in subjects_list.items()
    }
    data_features = Features(os.path.join(features_dir, network))

    # Initialize directories -------------------------------------------
    makedir_ifnot(results_dir_root)
    makedir_ifnot('tmp')

    # Analysis loop ----------------------------------------------------
    print('----------------------------------------')
    print('Analysis loop')

    for sbj, roi, feat in product(subjects_list, rois_list, features_list):
        print('--------------------')
        print('Subject:    %s' % sbj)
        print('ROI:        %s' % roi)

        # Distributed computation setup
        # -----------------------------
        subject_name = sbj + '2' + target_subject + '_' + str(
            data_rep * 20) + 'p' + '_lambda' + str(Lambda)
        analysis_id = analysis_basename + '-' + subject_name + '-' + roi + '-' + feat
        results_dir_prediction = os.path.join(results_dir_root,
                                              analysis_basename,
                                              'decoded_features', network,
                                              feat, subject_name, roi)
        results_dir_accuracy = os.path.join(results_dir_root,
                                            analysis_basename,
                                            'prediction_accuracy', network,
                                            feat, subject_name, roi)

        if os.path.exists(results_dir_prediction):
            print('%s is already done. Skipped.' % analysis_id)
            continue

        dist = DistComp(lockdir='tmp', comp_id=analysis_id)
        if dist.islocked_lock():
            print('%s is already running. Skipped.' % analysis_id)
            continue

        # Preparing data
        # --------------
        print('Preparing data')

        start_time = time()

        # Brain data
        x = data_brain[sbj].select(rois_list[roi])  # Brain data
        x_labels = data_brain[sbj].select(
            'image_index')  # Image labels in the brain data

        # Target features and image labels (file names)
        y = data_features.get_features(feat)
        y_labels = data_features.index
        image_names = data_features.labels

        # Get test data
        x_test = x
        x_test_labels = x_labels

        y_test = y
        y_test_labels = y_labels

        # Averaging brain data
        x_test_labels_unique = np.unique(x_test_labels)
        x_test_averaged = np.vstack([
            np.mean(x_test[(x_test_labels == lb).flatten(), :], axis=0)
            for lb in x_test_labels_unique
        ])

        print('Total elapsed time (data preparation): %f' %
              (time() - start_time))

        # Convert x_test_averaged
        nc_models_dir = os.path.join(nc_models_dir_root, subject_name, roi,
                                     'model')
        x_test_averaged = test_ncconverter(nc_models_dir, x_test_averaged,
                                           gpu_device)

        # Prediction
        # ----------
        print('Prediction')

        start_time = time()
        y_pred = test_fastl2lir_div(
            os.path.join(pre_models_dir_root, network, feat, target_subject,
                         roi, 'model'), x_test_averaged)
        print('Total elapsed time (prediction): %f' % (time() - start_time))

        # Calculate prediction accuracy
        # -----------------------------
        print('Prediction accuracy')

        start_time = time()

        y_pred_2d = y_pred.reshape([y_pred.shape[0], -1])
        y_true_2d = y.reshape([y.shape[0], -1])

        y_true_2d = get_refdata(y_true_2d, y_labels, x_test_labels_unique)

        n_units = y_true_2d.shape[1]

        accuracy = np.array([
            np.corrcoef(y_pred_2d[:, i].flatten(),
                        y_true_2d[:, i].flatten())[0, 1]
            for i in range(n_units)
        ])
        accuracy = accuracy.reshape((1, ) + y_pred.shape[1:])

        print('Mean prediction accuracy: {}'.format(np.mean(accuracy)))

        print('Total elapsed time (prediction accuracy): %f' %
              (time() - start_time))

        # Save results
        # ------------
        print('Saving results')

        makedir_ifnot(results_dir_prediction)
        makedir_ifnot(results_dir_accuracy)

        start_time = time()

        # Predicted features
        for i, lb in enumerate(x_test_labels_unique):
            # Predicted features
            feat = np.array([y_pred[i, ]
                             ])  # To make feat shape 1 x M x N x ...

            image_filename = image_names[
                int(lb) - 1]  # Image labels are one-based image indexes

            # Save file name
            save_file = os.path.join(results_dir_prediction,
                                     '%s.mat' % image_filename)

            # Save
            hdf5storage.savemat(save_file, {u'feat': feat},
                                format='7.3',
                                oned_as='column',
                                store_python_metadata=True)

        print('Saved %s' % results_dir_prediction)

        # Prediction accuracy
        save_file = os.path.join(results_dir_accuracy, 'accuracy.mat')
        hdf5storage.savemat(save_file, {u'accuracy': accuracy},
                            format='7.3',
                            oned_as='column',
                            store_python_metadata=True)
        print('Saved %s' % save_file)

        print('Elapsed time (saving results): %f' % (time() - start_time))

        dist.unlock()

    print('%s finished.' % analysis_basename)
img1 = cv2.imread('/home/sinadabiri/Dropbox/Images/cell1.tif',0)
img2 = cv2.imread('/home/sinadabiri/Dropbox/Images/cell2.tif',0)

row, col = img1.shape

# row1, col1 = img1.shape
# row2, col2 = img2.shape
print "height =" ,row, "width = " , col 
# centerRow1, centerCol1 = row1/2, col1/2
# centerRow2, centerCol2 = row2/2, col2/2
width = col-1

i=0
j = 0
overLapCorrCoef = np.zeros((col),np.uint8)
overLapCorrCoef= np.corrcoef(img1[:,116:1:-1], img2[:,1:116], rowvar=False)[116,:]
# overLapCorrCoef= np.corrcoef(img1[:,116:1:-1], img2[:,1:116], rowvar=True)
# overLapCorrCoef= np.corrcoef(img1[:,116], img2[:,1], rowvar=True)
print (overLapCorrCoef)
print (np.size(overLapCorrCoef))
# print "image 1 ", img1[:,116], "image 2 ", img2[:,1]

# for i in range(col):
# 	if i < col:

# 		overLapCorrCoef[:,i]= np.corrcoef(img1[:,width],img2[:,i], rowvar=False)[1,0]

# 		print (overLapCorrCoef[i])
# 		# width = width -1
# 	else:
# 		break
Example #37
0
def scores(key, paths, config):
    values = [mapreduce.OutputCollector(p) for p in paths]
    try:
        values = [item.load() for item in values]
    except Exception as e:
        print(e)
        return None

    y_true_splits = [item["y_true"].ravel() for item in values]
    y_pred_splits = [item["y_pred"].ravel() for item in values]
    y_true = np.concatenate(y_true_splits)
    y_pred = np.concatenate(y_pred_splits)
    prob_pred_splits = [item["prob_pred"].ravel() for item in values]
    prob_pred = np.concatenate(prob_pred_splits)

    # Prediction performances
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
    auc = roc_auc_score(y_true, prob_pred)

    # balanced accuracy (recall_mean)
    bacc_splits = [
        recall_score(y_true_splits[f], y_pred_splits[f], average=None).mean()
        for f in range(len(y_true_splits))
    ]
    auc_splits = [
        roc_auc_score(y_true_splits[f], prob_pred_splits[f])
        for f in range(len(y_true_splits))
    ]

    print("bacc all - mean(bacc) %.3f" % (r.mean() - np.mean(bacc_splits)))
    # P-values
    success = r * s
    success = success.astype('int')
    prob_class1 = np.count_nonzero(y_true) / float(len(y_true))
    pvalue_recall0_true_prob = binom_test(success[0],
                                          s[0],
                                          1 - prob_class1,
                                          alternative='greater')
    pvalue_recall1_true_prob = binom_test(success[1],
                                          s[1],
                                          prob_class1,
                                          alternative='greater')
    pvalue_recall0_unknwon_prob = binom_test(success[0],
                                             s[0],
                                             0.5,
                                             alternative='greater')
    pvalue_recall1_unknown_prob = binom_test(success[1],
                                             s[1],
                                             0.5,
                                             alternative='greater')
    pvalue_bacc = binom_test(success[0] + success[1],
                             s[0] + s[1],
                             p=0.5,
                             alternative='greater')

    # Beta's measures of similarity
    betas = np.hstack([item["beta"][:, penalty_start:].T for item in values]).T
    # Correlation
    R = np.corrcoef(betas)
    R = R[np.triu_indices_from(R, 1)]
    # Fisher z-transformation / average
    z_bar = np.mean(1. / 2. * np.log((1 + R) / (1 - R)))
    # bracktransform
    r_bar = (np.exp(2 * z_bar) - 1) / (np.exp(2 * z_bar) + 1)

    # threshold betas to compute fleiss_kappa and DICE
    try:
        betas_t = np.vstack([
            array_utils.arr_threshold_from_norm2_ratio(betas[i, :], .99)[0]
            for i in range(betas.shape[0])
        ])
        # Compute fleiss kappa statistics
        beta_signed = np.sign(betas_t)
        table = np.zeros((beta_signed.shape[1], 3))
        table[:, 0] = np.sum(beta_signed == 0, 0)
        table[:, 1] = np.sum(beta_signed == 1, 0)
        table[:, 2] = np.sum(beta_signed == -1, 0)
        fleiss_kappa_stat = fleiss_kappa(table)

        # Paire-wise Dice coeficient
        ij = [[i, j] for i in range(betas.shape[0])
              for j in range(i + 1, betas.shape[0])]
        dices = list()
        for idx in ij:
            A, B = beta_signed[idx[0], :], beta_signed[idx[1], :]
            dices.append(
                float(np.sum((A == B)[(A != 0) & (B != 0)])) /
                (np.sum(A != 0) + np.sum(B != 0)))
        dice_bar = np.mean(dices)
    except:
        dice_bar = fleiss_kappa_stat = 0

    # Proportion of selection within the support accross the CV
    support_count = (betas_t != 0).sum(axis=0)
    support_count = support_count[support_count > 0]
    support_prop = support_count / betas_t.shape[0]

    scores = OrderedDict()
    scores['key'] = key
    scores['recall_0'] = r[0]
    scores['recall_1'] = r[1]
    scores['bacc'] = r.mean()
    scores['bacc_se'] = np.std(bacc_splits) / np.sqrt(len(bacc_splits))
    scores["auc"] = auc
    scores['auc_se'] = np.std(auc_splits) / np.sqrt(len(auc_splits))
    scores['pvalue_recall0_true_prob_one_sided'] = pvalue_recall0_true_prob
    scores['pvalue_recall1_true_prob_one_sided'] = pvalue_recall1_true_prob
    scores[
        'pvalue_recall0_unknwon_prob_one_sided'] = pvalue_recall0_unknwon_prob
    scores[
        'pvalue_recall1_unknown_prob_one_sided'] = pvalue_recall1_unknown_prob
    scores['pvalue_bacc_mean'] = pvalue_bacc
    scores['prop_non_zeros_mean'] = float(np.count_nonzero(betas_t)) / \
                                    float(np.prod(betas.shape))
    scores['beta_r_bar'] = r_bar
    scores['beta_fleiss_kappa'] = fleiss_kappa_stat
    scores['beta_dice_bar'] = dice_bar
    scores['beta_dice'] = str(dices)
    scores['beta_r'] = str(R)
    scores['beta_support_prop_select_mean'] = support_prop.mean()
    scores['beta_support_prop_select_sd'] = support_prop.std()

    return scores
Example #38
0
# Read all the cleaned data files.
cleaned_train = pd.read_csv("Cleaned_train.csv")
cleaned_test = pd.read_csv("Cleaned_test.csv")

# Keep numerical features
num_features = cleaned_train.select_dtypes(include=np.number)
correl = num_features.corr()

# SalePrice correlation matrix
k = 11
plt.figure(figsize=(10, 10))
sns.set_style(style='white')
figtext_args, figtext_kwargs = add_fignum(
    "Fig 8. Correlation Matrix Heatmap of Sale Price")
cols = correl.nlargest(k, 'SalePrice')['SalePrice'].index
cm = np.corrcoef(cleaned_train[cols].values.T)
sns.set(font_scale=1.25)
plt.title("Correlation Heatmap of Sale Price with 10 most related variable\n",
          weight='bold')
mask = np.triu(np.ones_like(cm, dtype=np.bool))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
hm = sns.heatmap(cm,
                 mask=mask,
                 cmap=cmap,
                 cbar=True,
                 annot=True,
                 square=True,
                 fmt='.2f',
                 annot_kws={'size': 10},
                 yticklabels=cols.values,
                 xticklabels=cols.values)
Example #39
0
#%%

same_bins = np.linspace(0, 12, 50)

bins, pdf = PDF(cube[12].data.flatten() * 0.304, same_bins)
bins_con, pdf_con = PDF(cube_con[13].data.flatten() * 0.304, same_bins)
#bins_old,pdf_old=PDF(data_old[13].flatten()*0.304,same_bins)
bins_3ord, pdf_3ord = PDF(cube_3ord[13].data.flatten() * 0.304, same_bins)
bins_2m, pdf_2m = PDF(cube_2m[13].data.flatten() * 0.304, same_bins)
#bins_nh,pdf_nh=PDF(cube_nh.data.flatten(),same_bins)
sat_bins, sat_pdf = PDF(grid_z1.flatten() / 1000., same_bins)
plt.figure()

plt.plot(bins,
         pdf,
         label='ALL_ICE_PROC R=%1.2f' % np.corrcoef(pdf[:], sat_pdf[:])[0, 1])
#plt.plot(bins_con, pdf_con,label='con R=%1.2f'%np.corrcoef(pdf_con[:],sat_pdf[:])[0,1])
#plt.plot(bins_old, pdf_old,label='old R=%1.2f'%np.corrcoef(pdf_old[20:],sat_pdf[20:])[0,1])
#plt.plot(bins_nh, pdf_nh,label='no hallet R=%1.2f'%np.corrcoef(pdf_nh[20:],sat_pdf[20:])[0,1])
plt.plot(bins_2m,
         pdf_2m,
         label='2_ORD_MORE R=%1.2f' % np.corrcoef(pdf_2m[:], sat_pdf[:])[0, 1])
plt.plot(bins_3ord,
         pdf_3ord,
         label='3_ORD_LESS R=%1.2f' %
         np.corrcoef(pdf_3ord[:], sat_pdf[:])[0, 1])
plt.plot(sat_bins, sat_pdf, label='satellite')
plt.legend()
plt.title('Cloud top height')
plt.ylabel('Normalized PDF')
plt.xlabel('Cloud top height $Km$')
        if 'spark' in pref[1]:
            children.append( famStruct[l.strip('\n\r')][2] )
        else:
            children.append(l.strip('\n\r'))

f = np.load(gnpFn, 'rb')
print("Keys: %s" % list(f.keys()), file=sys.stderr)
gnp = f.get('GN').astype(float)
print("reading parents npz:", file=sys.stderr)
print("gnp.shape", gnp.shape, file=sys.stderr)
f.close()


f = np.load(gncFn, 'rb')
print("Keys: %s" % list(f.keys()), file=sys.stderr)
gnc = f.get('GN').astype(float)
print("reading children npz:", file=sys.stderr)
print("gnc.shape", gnc.shape, file=sys.stderr)
f.close()

prs = gnp
chn = gnc

corr = np.corrcoef(chn)
id = np.where(corr > 0.95)
l = len(id[0])
idd = set([id[0][i] for i in range(l) if id[0][i] != id[1][i]])

for i in idd:
    print(children[i])
Example #41
0
def correlation(predictions, targets):
    ranked_preds = predictions.rank(pct=True, method="first")
    return np.corrcoef(ranked_preds, targets)[0, 1]
Example #42
0
def main():

    print(MODEL_FILE)

    print("Loading data...")

    # The training data is used to train your model how to predict the targets.
    #training_data = read_csv("numerai_training_data.csv")
    # The tournament data is the data that Numerai uses to evaluate your model.
    #tournament_data = read_csv("numerai_tournament_data.csv")

    contest = str(233)
    directory = 'F:\\Numerai\\numerai' + contest + '\\'

    print("Loading data...")

    # The training data is used to train your model how to predict the targets.
    training_data = pd.read_csv(directory +
                                "numerai_training_data.csv").set_index("id")

    # The tournament data is the data that Numerai uses to evaluate your model.
    tournament_data = pd.read_csv(
        directory + "numerai_tournament_data.csv").set_index("id")

    #MODEL_FILE = directory + "example_model.xgb"

    feature_names = [
        f for f in training_data.columns if f.startswith("feature")
    ]
    print(f"Loaded {len(feature_names)} features")

    # This is the model that generates the included example predictions file.
    # Taking too long? Set learning_rate=0.1 and n_estimators=200 to make this run faster.
    # Remember to delete example_model.xgb if you change any of the parameters below.
    model = XGBRegressor(max_depth=5,
                         learning_rate=0.01,
                         n_estimators=2000,
                         n_jobs=-1,
                         colsample_bytree=0.1)
    if MODEL_FILE.is_file():
        print("Loading pre-trained model...")
        model.load_model(MODEL_FILE)
    else:
        print("Training model...")
        model.fit(training_data[feature_names], training_data[TARGET_NAME])
        print("Training model... {MODEL_FILE}")
        model.save_model("F:\\Numerai\\numerai233\\example_model.xgb")

    # Generate predictions on both training and tournament data
    print("Generating predictions...")
    training_data[PREDICTION_NAME] = model.predict(
        training_data[feature_names])
    tournament_data[PREDICTION_NAME] = model.predict(
        tournament_data[feature_names])

    # Check the per-era correlations on the training set (in sample)
    train_correlations = training_data.groupby("era").apply(score)
    print(
        f"On training the correlation has mean {train_correlations.mean()} and std {train_correlations.std()}"
    )
    print(
        f"On training the average per-era payout is {payout(train_correlations).mean()}"
    )
    """Validation Metrics"""
    # Check the per-era correlations on the validation set (out of sample)
    validation_data = tournament_data[tournament_data.data_type ==
                                      "validation"]
    validation_correlations = validation_data.groupby("era").apply(score)
    print(
        f"On validation the correlation has mean {validation_correlations.mean()} and "
        f"std {validation_correlations.std()}")
    print(
        f"On validation the average per-era payout is {payout(validation_correlations).mean()}"
    )

    # Check the "sharpe" ratio on the validation set
    validation_sharpe = validation_correlations.mean(
    ) / validation_correlations.std()
    print(f"Validation Sharpe: {validation_sharpe}")

    print("checking max drawdown...")
    rolling_max = (validation_correlations + 1).cumprod().rolling(
        window=100, min_periods=1).max()
    daily_value = (validation_correlations + 1).cumprod()
    max_drawdown = -(rolling_max - daily_value).max()
    print(f"max drawdown: {max_drawdown}")

    # Check the feature exposure of your validation predictions
    feature_exposures = validation_data[feature_names].apply(
        lambda d: correlation(validation_data[PREDICTION_NAME], d), axis=0)
    max_feature_exposure = np.max(np.abs(feature_exposures))
    print(f"Max Feature Exposure: {max_feature_exposure}")

    # Check feature neutral mean
    print("Calculating feature neutral mean...")
    feature_neutral_mean = get_feature_neutral_mean(validation_data)
    print(f"Feature Neutral Mean is {feature_neutral_mean}")

    # Load example preds to get MMC metrics
    example_preds = pd.read_csv(
        "F:\\Numerai\\numerai233\\example_predictions_target_kazutsugi.csv"
    ).set_index("id")["prediction_kazutsugi"]
    validation_example_preds = example_preds.loc[validation_data.index]
    validation_data["ExamplePreds"] = validation_example_preds

    print("calculating MMC stats...")
    # MMC over validation
    mmc_scores = []
    corr_scores = []
    for _, x in validation_data.groupby("era"):
        series = neutralize_series(pd.Series(unif(x[PREDICTION_NAME])),
                                   pd.Series(unif(x["ExamplePreds"])))
        mmc_scores.append(np.cov(series, x[TARGET_NAME])[0, 1] / (0.29**2))
        corr_scores.append(
            correlation(unif(x[PREDICTION_NAME]), x[TARGET_NAME]))

    val_mmc_mean = np.mean(mmc_scores)
    val_mmc_std = np.std(mmc_scores)
    val_mmc_sharpe = val_mmc_mean / val_mmc_std
    corr_plus_mmcs = [c + m for c, m in zip(corr_scores, mmc_scores)]
    corr_plus_mmc_sharpe = np.mean(corr_plus_mmcs) / np.std(corr_plus_mmcs)
    corr_plus_mmc_mean = np.mean(corr_plus_mmcs)
    corr_plus_mmc_sharpe_diff = corr_plus_mmc_sharpe - validation_sharpe

    print(f"MMC Mean: {val_mmc_mean}\n"
          f"Corr Plus MMC Sharpe:{corr_plus_mmc_sharpe}\n"
          f"Corr Plus MMC Diff:{corr_plus_mmc_sharpe_diff}")

    # Check correlation with example predictions
    corr_with_example_preds = np.corrcoef(
        validation_example_preds.rank(pct=True, method="first"),
        validation_data[PREDICTION_NAME].rank(pct=True, method="first"))[0, 1]
    print(f"Corr with example preds: {corr_with_example_preds}")

    # Save predictions as a CSV and upload to https://numer.ai
    tournament_data[PREDICTION_NAME].to_csv("F:\\Numerai\\numerai233\\" +
                                            TOURNAMENT_NAME +
                                            "_submission.csv")
def plot_correlations(out_dir, ref_dir):
    """
    plots correlation and L2-norm values between reference and output seismograms
    """
    print('comparing seismograms')
    print('  reference directory: %s' % ref_dir)
    print('  output directory   : %s\n' % out_dir)

    # checks if directory exists
    if not os.path.isdir(ref_dir):
        print("Please check if directory exists: ", ref_dir)
        sys.exit(1)
    if not os.path.isdir(out_dir):
        print("Please check if directory exists: ", out_dir)
        sys.exit(1)

    # seismogram file ending
    ## global version: ending = '.sem.ascii' # MX*.sem.ascii, ..
    ## cartesian version:
    ending = '.sem*'  # .semd, .semv, .sema, .semp, ..

    # gets seismograms
    files = glob.glob(out_dir + '/*' + ending)
    if len(files) == 0:
        print("no seismogram files with ending ", ending, " found")
        print("Please check directory: ", out_dir)
        sys.exit(1)

    files.sort()

    corr_min = 1.0
    err_max = 0.0
    shift_max = 0.0

    # gets time step size from first file
    syn_file = files[0]
    print("  time step: reading from first file ", syn_file)
    syn_time = np.loadtxt(syn_file)[:, 0]
    dt = syn_time[1] - syn_time[0]
    print("  time step: size = ", dt)
    # warning
    if dt <= 0.0:
        print("warning: invalid time step size for file ", files[0])

    # determines window length
    if USE_SUB_WINDOW_CORR:
        # moving window
        print("  using correlations in moving sub-windows")
        print("  minimum period: ", TMIN)
        # checks
        if dt <= 0.0:
            # use no moving window
            window_length = len(syn_time) - 1
        else:
            # window length for minimum period
            window_length = int(TMIN / dt)

        print("  moving window length: ", window_length)

    print("")
    print("comparing ", len(files), "seismograms")
    print("")

    # outputs table header
    print("|%-30s| %13s| %13s| %13s|" %
          ('file name', 'corr', 'err', 'time shift'))

    # counter
    n = 0

    for f in files:
        # build reference and synthetics file names
        # specfem file: **network**.**station**.**comp**.sem.ascii
        fname = os.path.basename(f)
        names = str.split(fname, ".")

        # trace
        net = names[0]
        sta = names[1]
        cha = names[2]

        # filenames
        # old format
        #fname_old = sta + '.' + net + '.' + cha + '.sem.ascii'
        #ref_file = ref_dir + '/' + fname_old
        #syn_file = out_dir + '/' + fname_old
        # new format
        ref_file = ref_dir + '/' + fname
        syn_file = out_dir + '/' + fname

        # makes sure files are both available
        if not os.path.isfile(ref_file):
            print("  file " + ref_file + " not found")
            continue
        if not os.path.isfile(syn_file):
            print("  file " + syn_file + " not found")
            continue

        # numpy: reads in file data
        ref0 = np.loadtxt(ref_file)[:, 1]
        syn0 = np.loadtxt(syn_file)[:, 1]

        #debug
        #print("  seismogram: ", fname, "vs", fname_old,"  lengths: ",len(ref0),len(syn0))

        # cuts common length
        length = min(len(ref0), len(syn0))
        if length <= 1: continue

        # length warning
        if len(ref0) != len(syn0):
            print(
                "** warning: mismatch of file length in both files syn/ref = %d / %d"
                % (len(syn0), len(ref0)))
            #print("** warning: using smaller length %d" % length)

        # time step size in reference file
        ref_time = np.loadtxt(ref_file)[:, 0]
        dt_ref = ref_time[1] - ref_time[0]
        # mismatch warning
        if abs(dt - dt_ref) / dt > 1.e-5:
            print(
                "** warning: mismatch of time step size in both files syn/ref = %e / %e"
                % (dt, dt_ref))
            #print("** warning: using time step size %e from file %s" %(dt,syn_file))

        #debug
        #print("common length: ",length)

        ref = ref0[0:length]
        syn = syn0[0:length]

        # least square test
        norm = np.linalg.norm
        sqrt = np.sqrt

        # normalized by power in reference solution
        fac_norm = norm(ref)
        # or normalized by power in (ref*syn)
        #fac_norm = sqrt(norm(ref)*norm(syn))

        if fac_norm > 0.0:
            err = norm(ref - syn) / fac_norm
        else:
            err = norm(ref - syn)

        #debug
        #print('norm syn = %e norm ref = %e' % (norm(syn),fac_norm))

        # correlation test
        # total length
        if fac_norm > 0.0:
            corr_mat = np.corrcoef(ref, syn)
        else:
            if norm(ref - syn) > 0.0:
                corr_mat = np.cov(ref - syn)
            else:
                # both zero traces
                print("** warning: comparing zero traces")
                corr_mat = 1.0
        corr = np.min(corr_mat)

        # time shift
        if fac_norm > 0.0:
            # shift (in s) by cross correlation
            shift = get_cross_correlation_timeshift(ref, syn, dt)
        else:
            # no correlation with zero trace
            shift = 0.0

        # correlation in moving window
        if USE_SUB_WINDOW_CORR:
            # moves window through seismogram
            for i in range(0, length - window_length):
                # windowed signals
                x = ref[i:i + window_length]
                y = syn[i:i + window_length]

                # correlations
                corr_win = np.corrcoef(x, y)
                corr_w = np.min(corr_win)
                corr = min(corr, corr_w)

                # cross-correlation array
                shift_w = get_cross_correlation_timeshift(x, y, dt)
                if abs(shift) < abs(shift_w): shift = shift_w

        # statistics
        corr_min = min(corr, corr_min)
        err_max = max(err, err_max)
        if abs(shift_max) < abs(shift): shift_max = shift

        # info string
        info = ""
        if corr < TOL_CORR: info += "  poor correlation"
        if err > TOL_ERR: info += "      poor match"
        if abs(shift) > TOL_SHIFT: info += "      significant shift"

        # print results to screen
        print("|%-30s| %13.5f| %13.5le| %13.5le| %s" %
              (fname, corr, err, shift, info))

        # counter
        n += 1

    # check if any comparison done
    if n == 0:
        # values indicating failure
        corr_min = 0.0
        err_max = 1.e9
        shift_max = 1.e9

    # print min(coor) max(err)
    print(
        "|---------------------------------------------------------------------------|"
    )
    print("|%30s| %13.5f| %13.5le| %13.5le|" %
          ('min/max', corr_min, err_max, shift_max))

    # output summary
    print("\nsummary:")
    print("%d seismograms compared\n" % n)
    if n == 0:
        print("\nno seismograms found for comparison!\n\n")

    print("correlations: values 1.0 perfect, < %.1f poor correlation" %
          TOL_CORR)
    if corr_min < TOL_CORR:
        print("              poor correlation seismograms found")
    else:
        print("              no poor correlations found")
    print("")

    print("L2-error    : values 0.0 perfect, > %.2f poor match" % TOL_ERR)
    if err_max > TOL_ERR:
        print("              poor matching seismograms found")
    else:
        print("              no poor matches found")
    print("")

    print("Time shift  : values 0.0 perfect, > %.2f significant shift" %
          TOL_SHIFT)
    if abs(shift_max) > TOL_SHIFT:
        print("              significant time shift in seismograms found")
    else:
        print("              no significant time shifts found")
    print("")
xi = soln[0]
sig = soln[1]

mparams = (q, nu_S, nu_W, kappa, muW, muS, lambd, chiS, eta, xi, mu, \
       gamma, yvect, rho, sig)

Hhist, yhist, Ahist, Chist, bhist, Uhist, zhist = runsim(T, epshist, mparams)

HrsSlept = np.zeros(ndays)
for d in range(0, ndays):
    HrsSlept[d] = np.sum(Ahist[d:d + q - 1]) / pph

HrsMean = np.mean(HrsSlept)
HrsStd = np.std(HrsSlept)
HrsAuto = np.corrcoef(HrsSlept[0:ndays - 1], HrsSlept[1:ndays])
HrsAuto = HrsAuto[0, 1]

print('xi:    ', xi)
print('sigma: ', sig)
print('average hours of sleep per day:   ', HrsMean)
print('st dev of hours of sleep per day: ', HrsStd)
print('autocorreation of sleep per day: ', HrsAuto)
print(' ')
data = (HrsMean, HrsStd)
pkl.dump(data, open(name + '.pkl', 'wb'))

# Simulate with no shocks to find SS
sig = 0.
ndays = 5
T = ndays * q  # number of periods to simulate
Example #45
0
    def scaling(self):

        data = self.raw
        train = data.drop(['測站', '測項', '日期'], axis=1)
        train = np.array(train.replace('NR', 0), dtype=np.float32)

        # Transform to 18 factors x #train_data
        train_norm = np.zeros((18, 1))
        for i in range(240):
            single_day = train[i * 18:(i + 1) * 18, :]
            train_norm = np.append(train_norm, single_day, axis=1)
        train_norm = np.delete(train_norm, 0, axis=1)

        # Insert pm2.5, pm10 square and pm2.5 ^ 3
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[9, :]**2,
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[8, :]**2,
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[8, :] * train_norm[9, :],
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[5, :]**2,
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[6, :]**2,
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[7, :]**2,
                               axis=0)
        train_norm = np.insert(train_norm,
                               len(train_norm),
                               train_norm[12, :]**2,
                               axis=0)

        # Extract labels
        self.__label = []
        for mon in range(12):
            for hr in range(471):
                self.__label.append(train_norm[9, (mon * 480) + hr + 9])

        # Standardization
        self.mu = train_norm.mean(axis=1)
        self.std = train_norm.std(axis=1)
        for j in range(train_norm.shape[0]):
            if self.std[j] != 0:
                train_norm[j, ] = (train_norm[j, ] - self.mu[j]) / self.std[j]

        # Shrink unrelevent feature into 0.0 by PCC to pm2.5
        cor_mat = np.corrcoef(train_norm)[9, :]
        print(cor_mat)
        d = []
        for i in range(len(cor_mat)):
            if abs(cor_mat[i]) < 0.2:
                d.append(i)
        train_norm = np.delete(train_norm, d, axis=0)

        # Extract features
        self.__data = []
        for mon in range(12):
            for hr in range(471):
                feature = train_norm[:, (mon * 480) + hr:(mon * 480) + hr + 9]
                self.__data.append(feature)

        return self, d
Example #46
0
    y_pred = np.sign(np.inner(w, phi))
    if y_pred == 0: y_pred = 1

    if y != y_pred:
        # update w_s and A_s
        Wlast = copy.deepcopy(W)
        w = w + y * np.dot(np.linalg.inv(np.kron(A, np.eye(d))), phi)
        W = np.reshape(w, (d, K), order='F')
        if t >= EPOCH:
            A = np.linalg.inv((1.0 / (K + 1)) * (np.eye(K) + np.ones((K, K))))
        s += 1

print("Task relatedness Matrix for CMTL:\n", np.linalg.inv(A))
print('\n')
print("Learned task weights correlation for CMTL:\n",
      np.corrcoef([w[0:10], w[10:20], w[20:30]]))
print("\n")
print("True weights correlation for CMTL:\n", np.corrcoef([w1, w2, w3]))

nIncorrect = 0
#Testing Accuracy:
for dataSetNumber in range(100):
    ypred_1 = np.sign(np.dot(w[0:10], x_1[:, dataSetNumber]))
    if ypred_1 != y_1[dataSetNumber]:
        nIncorrect += 1

    ypred_2 = np.sign(np.dot(w[10:20], x_2[:, dataSetNumber]))
    if ypred_2 != y_2[dataSetNumber]:
        nIncorrect += 1

    ypred_3 = np.sign(np.dot(w[20:30], x_3[:, dataSetNumber]))
Example #47
0
def delete_mixtures(params, nb_cpu, nb_gpu, use_gpu):

    templates = load_data(params, 'templates')

    data_file = params.data_file
    N_e = params.getint('data', 'N_e')
    N_total = params.nb_channels
    N_t = params.getint('detection', 'N_t')
    template_shift = params.getint('detection', 'template_shift')
    cc_merge = params.getfloat('clustering', 'cc_merge')
    x, N_tm = templates.shape
    nb_temp = N_tm // 2
    merged = [nb_temp, 0]
    mixtures = []
    to_remove = []

    overlap = get_overlaps(params,
                           extension='-mixtures',
                           erase=True,
                           normalize=False,
                           maxoverlap=False,
                           verbose=False,
                           half=True,
                           use_gpu=use_gpu,
                           nb_cpu=nb_cpu,
                           nb_gpu=nb_gpu)
    filename = params.get('data', 'file_out_suff') + '.overlap-mixtures.hdf5'
    result = []

    norm_templates = load_data(params, 'norm-templates')
    templates = load_data(params, 'templates')
    result = load_data(params, 'clusters')
    best_elec = load_data(params, 'electrodes')
    limits = load_data(params, 'limits')
    nodes, edges = get_nodes_and_edges(params)
    inv_nodes = numpy.zeros(N_total, dtype=numpy.int32)
    inv_nodes[nodes] = numpy.argsort(nodes)

    distances = numpy.zeros((nb_temp, nb_temp), dtype=numpy.float32)

    over_x = overlap.get('over_x')[:]
    over_y = overlap.get('over_y')[:]
    over_data = overlap.get('over_data')[:]
    over_shape = overlap.get('over_shape')[:]
    overlap.close()

    overlap = scipy.sparse.csr_matrix((over_data, (over_x, over_y)),
                                      shape=over_shape)

    for i in xrange(nb_temp - 1):
        distances[i, i + 1:] = numpy.argmax(
            overlap[i * nb_temp + i + 1:(i + 1) * nb_temp].toarray(), 1)
        distances[i + 1:, i] = distances[i, i + 1:]

    all_temp = numpy.arange(comm.rank, nb_temp, comm.size)
    overlap_0 = overlap[:, N_t].toarray().reshape(nb_temp, nb_temp)

    sorted_temp = numpy.argsort(
        norm_templates[:nb_temp])[::-1][comm.rank::comm.size]
    M = numpy.zeros((2, 2), dtype=numpy.float32)
    V = numpy.zeros((2, 1), dtype=numpy.float32)

    to_explore = xrange(comm.rank, len(sorted_temp), comm.size)
    if comm.rank == 0:
        to_explore = get_tqdm_progressbar(to_explore)

    for count, k in enumerate(to_explore):

        k = sorted_temp[k]
        electrodes = numpy.take(inv_nodes, edges[nodes[best_elec[k]]])
        overlap_k = overlap[k * nb_temp:(k + 1) * nb_temp].tolil()
        is_in_area = numpy.in1d(best_elec, electrodes)
        all_idx = numpy.arange(len(best_elec))[is_in_area]
        been_found = False

        for i in all_idx:
            if not been_found:
                overlap_i = overlap[i * nb_temp:(i + 1) * nb_temp].tolil()
                M[0, 0] = overlap_0[i, i]
                V[0, 0] = overlap_k[i, distances[k, i]]
                for j in all_idx[i + 1:]:
                    M[1, 1] = overlap_0[j, j]
                    M[1, 0] = overlap_i[j, distances[k, i] - distances[k, j]]
                    M[0, 1] = M[1, 0]
                    V[1, 0] = overlap_k[j, distances[k, j]]
                    try:
                        [a1, a2] = numpy.dot(scipy.linalg.inv(M), V)
                    except Exception:
                        [a1, a2] = [0, 0]
                    a1_lim = limits[i]
                    a2_lim = limits[j]
                    is_a1 = (a1_lim[0] <= a1) and (a1 <= a1_lim[1])
                    is_a2 = (a2_lim[0] <= a2) and (a2 <= a2_lim[1])
                    if is_a1 and is_a2:
                        new_template = (
                            a1 * templates[:, i].toarray() +
                            a2 * templates[:, j].toarray()).ravel()
                        similarity = numpy.corrcoef(
                            templates[:, k].toarray().ravel(), new_template)[0,
                                                                             1]
                        if similarity > cc_merge:
                            if k not in mixtures:
                                mixtures += [k]
                                been_found = True
                                break
                                #print "Template", k, 'is sum of (%d, %g) and (%d,%g)' %(i, a1, j, a2)

    #print mixtures
    to_remove = numpy.unique(numpy.array(mixtures, dtype=numpy.int32))
    to_remove = all_gather_array(to_remove, comm, 0, dtype='int32')

    if len(to_remove) > 0:
        slice_templates(params, to_remove)
        slice_clusters(params, result, to_remove=to_remove)

    comm.Barrier()

    if comm.rank == 0:
        os.remove(filename)

    return [nb_temp, len(to_remove)]
y_pred=sess.run(real_logits,feed_dict={X: pred})

Prob_pred=sess.run(tf.sigmoid(y_pred))


#Check if the Cov and mean are good
np.set_printoptions(suppress=True)

Mean_pred = np.mean(np.transpose(pred),axis=1)
Mean_X = np.mean(np.transpose(X_batch),axis=1)
Cov_pred = np.around(np.cov(np.transpose(pred)), decimals=3)
#print(np.around(np.cov(np.transpose(pred)), decimals=2))
Cov_X = np.around(np.cov(np.transpose(X_batch)), decimals=3)
#print(np.around(np.cov(np.transpose(X_batch)), decimals=2))

Corr_pred = np.around(np.corrcoef(np.transpose(pred)), decimals=3)
Corr_X = np.around(np.corrcoef(np.transpose(X_batch)), decimals=3)

#plot the loss
plt.figure(num=0, figsize=(7, 5))

plot_loss(d_loss_list,g_loss_list)

plt.figure(num=1, figsize=(7, 5))

D0 = pd.DataFrame(np.transpose((X_batch[:,0],pred[:,0]))) 
D0.plot.density()
plt.xlim((-25, 25))
plt.title('return series of stock 1')

plt.figure(num=2, figsize=(7, 5))
Example #49
0
def correlation(x, y):
    return np.corrcoef(x, y)[0, 1]
Example #50
0
def adjacency_correlation(signals):
    ''' Faster version of adjacency matrix with correlation metric '''
    signals = np.reshape(signals, (signals.shape[0], -1))
    return np.abs(np.nan_to_num(np.corrcoef(signals)))
Example #51
0
score_x = numpy.random.normal(171.77, 5.54, n)
score_y = numpy.random.normal(62.49, 7.89, n)

#適当にちょっとノイズ入れる
score_x.sort()
score_x = numpy.around(score_x + numpy.random.normal(scale=3.0, size=n), 2)
score_y.sort()
score_y = numpy.around(score_y + numpy.random.normal(size=n), 2)

#最大値
print "Max x: " + str(numpy.max(score_x)) + " y: " + str(numpy.max(score_y))
#最小値
print "Min x: " + str(numpy.min(score_x)) + " y: " + str(numpy.min(score_y))
#平均値
print "Avg x: " + str(numpy.mean(score_x)) + " y: " + str(numpy.mean(score_y))
#第1四分位
print "1Q x:" + str(stats.scoreatpercentile(score_x, 25)) + " y: " + str(
    stats.scoreatpercentile(score_y, 25))
#中央値
print "Med x: " + str(numpy.median(score_x)) + " y: " + str(
    numpy.median(score_y))
#第3四分位
print "3Q x:" + str(stats.scoreatpercentile(score_x, 75)) + " y: " + str(
    stats.scoreatpercentile(score_y, 75))
#分散
print "Var x: " + str(numpy.var(score_x)) + " y: " + str(numpy.var(score_y))
#標準偏差
print "S.D. x: " + str(numpy.std(score_x)) + " y:" + str(numpy.std(score_y))
#相関係数
cor = numpy.corrcoef(score_x, score_y)
print "Correlation Coefficient : " + str(cor[0, 1])
Example #52
0
    def crossvalidate(self,
                      X,
                      Y,
                      zDim_list=np.linspace(0, 10, 11),
                      n_folds=10,
                      verbose=True,
                      rand_seed=None):
        N, D = X.shape

        # make sure z dims are integers
        z_list = zDim_list.astype(int)

        # create k-fold iterator
        if verbose:
            print('Crossvalidating pCCA model to choose # of dims...')
        cv_kfold = ms.KFold(n_splits=n_folds,
                            shuffle=True,
                            random_state=rand_seed)

        # iterate through train/test splits
        i = 0
        LLs = np.zeros([n_folds, len(z_list)])
        for train_idx, test_idx in cv_kfold.split(X):
            if verbose:
                print('   Fold ', i + 1, ' of ', n_folds, '...')

            X_train, X_test = X[train_idx], X[test_idx]
            Y_train, Y_test = Y[train_idx], Y[test_idx]
            # iterate through each zDim
            for j in range(len(z_list)):
                tmp = prob_cca()
                tmp.train_maxLL(X_train, Y_train, z_list[j])
                z, curr_LL = tmp.estep(X_test, Y_test)
                LLs[i, j] = curr_LL
            i = i + 1

        sum_LLs = LLs.sum(axis=0)

        # find the best # of z dimensions and train CCA model
        max_idx = np.argmax(sum_LLs)
        zDim = z_list[max_idx]
        self.train_maxLL(X, Y, zDim)

        # cross-validate to get canonical correlations
        if verbose:
            print('Crossvalidating pCCA model to compute canon corrs...')
        zx, zy = np.zeros((2, N, zDim))
        for train_idx, test_idx in cv_kfold.split(X):
            X_train, X_test = X[train_idx], X[test_idx]
            Y_train, Y_test = Y[train_idx], Y[test_idx]

            tmp = prob_cca()
            tmp.train_maxLL(X_train, Y_train, zDim)
            z, curr_LL = tmp.estep(X_test, Y_test)

            zx[test_idx, :] = z['zx_mu']
            zy[test_idx, :] = z['zy_mu']

        cv_rho = np.zeros(zDim)
        for i in range(zDim):
            tmp = np.corrcoef(zx[:, i], zy[:, i])
            cv_rho[i] = tmp[0, 1]

        self.params['cv_rho'] = cv_rho

        return sum_LLs, z_list, sum_LLs[max_idx], z_list[max_idx]
Example #53
0
def evaluation(img_akaze, img_circle):

    ##################################### PROGRAMMATIC ANALYSIS OF CHECK-POINTS #########################################

    # load the image and convert it to grayscale
    img_perfect = cv2.imread("team_id_2_comparison.png")

    coordinates = [[29.5, 250.0], [38.0, 385.0], [160.97999572753906, 417.5],
                   [114.12354278564453, 338.9093322753906], [88.5, 259.0],
                   [158.53448486328125, 202.6724090576172], [187.5, 38.5],
                   [261.2481384277344, 121.8302230834961], [270.5, 243.0],
                   [291.4565124511719, 422.2826232910156],
                   [387.043701171875, 360.78155517578125], [343.0, 274.5],
                   [362.0, 166.5]]

    feature_list = [636, 395, 1046, 500, 1605]

    # programmatic checkpoints
    circle_radius = 8
    check_list = []
    check_counter = 0

    for i in coordinates:
        i[0] = int(i[0])
        i[1] = int(i[1])

        roi = img_circle[i[1] - (3 * circle_radius):i[1] + (3 * circle_radius),
                         i[0] - (3 * circle_radius):i[0] + (3 * circle_radius)]
        roi = roi.reshape(int(roi.size / 3), 3)

        if [255, 255, 255] in roi.tolist():
            check_list.append(1)
            check_counter += 1

        else:
            check_list.append(0)

    check_result = ((check_counter / check_list.__len__()) * 100)

    print("Programmatic Analysis Result = ")
    print(check_result)

    ####################################### ANALYSIS USING FEATURE MATCHING ##################################################

    # load the image and convert it to grayscale
    gray1 = cv2.cvtColor(img_perfect, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(img_akaze, cv2.COLOR_BGR2GRAY)

    # initialize the AKAZE descriptor, then detect keypoints and extract
    # local invariant descriptors from the image
    sift = cv2.xfeatures2d.SIFT_create()
    surf = cv2.xfeatures2d.SURF_create()
    akaze = cv2.AKAZE_create()
    brisk = cv2.BRISK_create()
    orb = cv2.ORB_create()

    (akazekps1, akazedescs1) = akaze.detectAndCompute(gray1, None)
    (akazekps2, akazedescs2) = akaze.detectAndCompute(gray2, None)
    (siftkps1, siftdescs1) = sift.detectAndCompute(gray1, None)
    (siftkps2, siftdescs2) = sift.detectAndCompute(gray2, None)
    (surfkps1, surfdescs1) = surf.detectAndCompute(gray1, None)
    (surfkps2, surfdescs2) = surf.detectAndCompute(gray2, None)
    (briskkps1, briskdescs1) = brisk.detectAndCompute(gray1, None)
    (briskkps2, briskdescs2) = brisk.detectAndCompute(gray2, None)
    (orbkps1, orbdescs1) = orb.detectAndCompute(gray1, None)
    (orbkps2, orbdescs2) = orb.detectAndCompute(gray2, None)

    #print("No of KeyPoints:")
    #print("akazekeypoints1: {}, akazedescriptors1: {}".format(len(akazekps1), akazedescs1.shape))
    #print("akazekeypoints2: {}, akazedescriptors2: {}".format(len(akazekps2), akazedescs2.shape))
    #print("siftkeypoints1: {}, siftdescriptors1: {}".format(len(siftkps1), siftdescs1.shape))
    #print("siftkeypoints2: {}, siftdescriptors2: {}".format(len(siftkps2), siftdescs2.shape))
    #print("surfkeypoints1: {}, surfdescriptors1: {}".format(len(surfkps1), surfdescs1.shape))
    #print("surfkeypoints2: {}, surfdescriptors2: {}".format(len(surfkps2), surfdescs2.shape))
    #print("briskkeypoints1: {}, briskdescriptors1: {}".format(len(briskkps1), briskdescs1.shape))
    #print("briskkeypoints2: {}, briskdescriptors2: {}".format(len(briskkps2), briskdescs2.shape))
    #print("orbkeypoints1: {}, orbdescriptors1: {}".format(len(orbkps1), orbdescs1.shape))
    #print("orbkeypoints2: {}, orbdescriptors2: {}".format(len(orbkps2), orbdescs2.shape))

    # Match the fezatures
    bfakaze = cv2.BFMatcher(cv2.NORM_HAMMING)
    bf = cv2.BFMatcher(cv2.NORM_L2)
    akazematches = bfakaze.knnMatch(akazedescs1, akazedescs2, k=2)
    siftmatches = bf.knnMatch(siftdescs1, siftdescs2, k=2)
    surfmatches = bf.knnMatch(surfdescs1, surfdescs2, k=2)
    briskmatches = bf.knnMatch(briskdescs1, briskdescs2, k=2)
    orbmatches = bf.knnMatch(orbdescs1, orbdescs2, k=2)

    # Apply ratio test on AKAZE matches
    goodakaze = []
    for m, n in akazematches:
        if m.distance < 0.9 * n.distance:
            goodakaze.append([m])

    im3akaze = cv2.drawMatchesKnn(img_perfect,
                                  akazekps1,
                                  img_akaze,
                                  akazekps2,
                                  goodakaze[:100],
                                  None,
                                  flags=2)
    cv2.imshow("AKAZE matching", im3akaze)
    goodakaze = np.asarray(goodakaze)
    print("akaze")
    similarity_akaze = (goodakaze.shape[0] / feature_list[0]) * 100
    print(similarity_akaze)

    # Apply ratio test on SIFT matches
    goodsift = []
    for m, n in siftmatches:
        if m.distance < 0.9 * n.distance:
            goodsift.append([m])

    im3sift = cv2.drawMatchesKnn(img_perfect,
                                 siftkps1,
                                 img_akaze,
                                 siftkps2,
                                 goodsift[:],
                                 None,
                                 flags=2)
    cv2.imshow("SIFT matching", im3sift)
    goodsift = np.asarray(goodsift)
    print("sift")
    similarity_sift = (goodsift.shape[0] / feature_list[1]) * 100
    print(similarity_sift)

    # Apply ratio test on SURF matches
    goodsurf = []
    for m, n in surfmatches:
        if m.distance < 0.9 * n.distance:
            goodsurf.append([m])

    im3surf = cv2.drawMatchesKnn(img_perfect,
                                 surfkps1,
                                 img_akaze,
                                 surfkps2,
                                 goodsurf[:],
                                 None,
                                 flags=2)
    cv2.imshow("SURF matching", im3surf)
    goodsurf = np.asarray(goodsurf)
    print("surf")
    similarity_surf = (goodsurf.shape[0] / feature_list[2]) * 100
    print(similarity_surf)

    # Apply ratio test on ORB matches
    goodorb = []
    for m, n in orbmatches:
        if m.distance < 0.9 * n.distance:
            goodorb.append([m])
    im3orb = cv2.drawMatchesKnn(img_perfect,
                                orbkps1,
                                img_akaze,
                                orbkps2,
                                goodorb[:],
                                None,
                                flags=2)
    cv2.imshow("ORB matching", im3orb)
    goodorb = np.asarray(goodorb)
    print("orb")
    similarity_orb = (goodorb.shape[0] / feature_list[3]) * 100
    print(similarity_orb)

    # Apply ratio test on BRISK matches
    goodbrisk = []
    for m, n in briskmatches:
        if m.distance < 0.9 * n.distance:
            goodbrisk.append([m])

    im3brisk = cv2.drawMatchesKnn(img_perfect,
                                  briskkps1,
                                  img_akaze,
                                  briskkps2,
                                  goodbrisk[:],
                                  None,
                                  flags=2)
    cv2.imshow("BRISK matching", im3brisk)
    goodbrisk = np.asarray(goodbrisk)
    print("brisk")
    similarity_brisk = (goodbrisk.shape[0] / feature_list[4]) * 100
    print(similarity_brisk)
    features_result = (similarity_akaze + similarity_brisk + similarity_orb +
                       similarity_sift + similarity_surf) / 5
    print("Overall similarity using features: ")
    print()

    ######################################### HOG CORRELATION ###############################################

    bin_n = 16
    #img = cv2.imread("not_perfect_trajectory2.png")
    gx = cv2.Sobel(img_perfect, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img_perfect, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    # quantizing binvalues in (0...16)
    bins = np.int32(bin_n * ang / (2 * np.pi))
    # Divide to 4 sub-squares
    bin_cells = bins[:10, :10], bins[10:, :10], bins[:10, 10:], bins[10:, 10:]
    mag_cells = mag[:10, :10], mag[10:, :10], mag[:10, 10:], mag[10:, 10:]
    hists = [
        np.bincount(b.ravel(), m.ravel(), bin_n)
        for b, m in zip(bin_cells, mag_cells)
    ]
    hist1 = np.hstack(hists)

    rows, cols, _ = img_akaze.shape
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 0, 1)
    img_akaze = cv2.warpAffine(img_akaze, M, (cols, rows))
    gx = cv2.Sobel(img_akaze, cv2.CV_32F, 1, 0)
    gy = cv2.Sobel(img_akaze, cv2.CV_32F, 0, 1)
    mag, ang = cv2.cartToPolar(gx, gy)
    # quantizing binvalues in (0...16)
    bins = np.int32(bin_n * ang / (2 * np.pi))
    # Divide to 4 sub-squares
    bin_cells = bins[:10, :10], bins[10:, :10], bins[:10, 10:], bins[10:, 10:]
    mag_cells = mag[:10, :10], mag[10:, :10], mag[:10, 10:], mag[10:, 10:]
    hists = [
        np.bincount(b.ravel(), m.ravel(), bin_n)
        for b, m in zip(bin_cells, mag_cells)
    ]
    hist2 = np.hstack(hists)

    hog_result = (np.corrcoef((hist1, hist2)[0][1]) * 100)
    print("HOG CORRELATION RESULT = ")
    print(hog_result)

    return (check_result, features_result, hog_result)

    cv2.imshow("image_akaze", img_akaze)
    cv2.imshow("img_circle", img_circle)
    cv2.waitKey(0)
Example #54
0
    if os.path.exists('hahow_courses.json'):
        with open('hahow_courses.json', 'r', encoding='utf-8') as f:
            courses = json.load(f)
    else:
        courses = crawl()
    print('hahow 共有 %d 堂課程' % len(courses))

    # 取出程式類課程
    #programming_classes = [c for c in courses if '55de81ac9d1fa51000f94770' in c['categories']]

    # 取出程式類課程的募資價/上線價/學生數,並顯示統計資料
    pre_order_prices = list()
    prices = list()
    tickets = list()
    lengths = list()
    for c in courses:
        if '55de81ac9d1fa51000f94770' in c['categories']:
            pre_order_prices.append(c['preOrderedPrice'])
            prices.append(c['price'])
            tickets.append(c['numSoldTickets'])
            lengths.append(c['totalVideoLengthInSeconds'])
    print('平均募資價:', np.mean(pre_order_prices))
    print('平均上線價:', np.mean(prices))
    print('平均學生數:', np.mean(tickets))
    print('平均課程分鐘:', np.mean(lengths) / 60)
    # print(np.corrcoef([tickets, pre_order_prices, prices, length]))
    corrcoef = np.corrcoef([tickets, pre_order_prices, prices, lengths])
    print('募資價與學生數之相關係數: ', corrcoef[0, 1])
    print('上線價與學生數之相關係數: ', corrcoef[0, 2])
    print('課程長度與學生數之相關係數: ', corrcoef[0, 3])
Example #55
0
 def pearson_corr(self, all_auto_array):
     pearson_result = np.corrcoef(all_auto_array[:, 1000:])
     print("pearson result is completely calculated")
Example #56
0
X.shape

# In[11]:

X1 = X

# In[12]:

SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

# In[13]:

correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

# In[14]:

X.index[99]

# In[15]:

i = "6117036094"

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

# In[16]:
# -------------------------------------------------------------------
# 1.caculate the square differenz between two vectors
sq_diff_ab = np.square(mean_vector_amp - mean_vector_bp)
sse_ab = np.sum(sq_diff_ab)
norm_ab = np.sqrt(sse_ab)
print('the L2-Norm is %.2f' % norm_ab)

# 2.threshold and ratio
counter = 0
threshold = 0.01
print('the threshold is %.2f%%' % (threshold * 100))
for i in range(num_elements):
    diff = np.abs(mean_vector_amp[0][i] - mean_vector_bp[0][i])
    if diff <= threshold:
        counter += 1
ratio = float(counter) / num_elements
print('the ratio is %.2f%%' % (ratio * 100))

# 3.caculate the correlation between two vectors
cocoef_matrix = np.corrcoef(mean_array_amp, mean_array_bp)
cocoef = cocoef_matrix[0, 1]
print('the correlation coefficient is %0.3f' % cocoef)

# 4.kruskalwallis test for median difference between two distribution
H, pvalue = kruskalwallis(mean_vector_amp[0], mean_vector_bp[0])
print('the p-value is %.2f' % pvalue)

if pvalue > 0.05:
    print("accept null hypothesis: no significant difference between two groups")
# -------------------------------------------------------------------
Example #58
0
def average_pearson_score(x):
    if isinstance(x, DataFrame):
        x = x.values
    rho = corrcoef(x, rowvar=0)
    return mean(abs(rho[triu_indices_from(rho, 1)]))
Example #59
0
def pearson_correlation(y_true, y_pred):
    return np.corrcoef(y_true, y_pred)[0][1]
Example #60
0
    def evaluate(self, y_true, y_pred, silent=False, auxiliary_metrics=False, detailed_report=True, high_always_good=False):
        """ Evaluate predictions.
            Args:
                silent (bool): Should we print which metric is being used as well as performance.
                auxiliary_metrics (bool): Should we compute other (problem_type specific) metrics in addition to the default metric?
                detailed_report (bool): Should we computed more-detailed versions of the auxiliary_metrics? (requires auxiliary_metrics=True).
                high_always_good (bool): If True, this means higher values of returned metric are ALWAYS superior (so metrics like MSE should be returned negated)

            Returns single performance-value if auxiliary_metrics=False.
            Otherwise returns dict where keys = metrics, values = performance along each metric.
        """
        assert isinstance(y_true, (np.ndarray, pd.Series))
        assert isinstance(y_pred, (np.ndarray, pd.Series))  # TODO: Enable DataFrame for y_pred_proba

        # TODO: Consider removing _remove_missing_labels, this creates an inconsistency between how .score, .score_debug, and .evaluate compute scores.
        y_true, y_pred = self._remove_missing_labels(y_true, y_pred)
        performance = self.eval_metric(y_true, y_pred)

        metric = self.eval_metric.name

        if not high_always_good:
            performance = performance * self.eval_metric._sign  # flip negative once again back to positive (so higher is no longer necessarily better)

        if not silent:
            logger.log(20, f"Evaluation: {metric} on test data: {performance}")

        if not auxiliary_metrics:
            return performance

        # Otherwise compute auxiliary metrics:
        auxiliary_metrics = []
        if self.problem_type == REGRESSION:  # Adding regression metrics
            pearson_corr = lambda x, y: corrcoef(x, y)[0][1]
            pearson_corr.__name__ = 'pearson_correlation'
            auxiliary_metrics += [
                mean_absolute_error, explained_variance_score, r2_score, pearson_corr, mean_squared_error, median_absolute_error,
                # max_error
            ]
        else:  # Adding classification metrics
            auxiliary_metrics += [accuracy_score, balanced_accuracy_score, matthews_corrcoef]
            if self.problem_type == BINARY:  # binary-specific metrics
                # def auc_score(y_true, y_pred): # TODO: this requires y_pred to be probability-scores
                #     fpr, tpr, _ = roc_curve(y_true, y_pred, pos_label)
                #   return auc(fpr, tpr)
                f1micro_score = lambda y_true, y_pred: f1_score(y_true, y_pred, average='micro')
                f1micro_score.__name__ = f1_score.__name__
                auxiliary_metrics += [f1micro_score]  # TODO: add auc?
            # elif self.problem_type == MULTICLASS:  # multiclass metrics
            #     auxiliary_metrics += []  # TODO: No multi-class specific metrics for now. Include top-5, top-10 accuracy here.

        performance_dict = OrderedDict({metric: performance})
        for metric_function in auxiliary_metrics:
            if isinstance(metric_function, tuple):
                metric_function, metric_kwargs = metric_function
            else:
                metric_kwargs = None
            metric_name = metric_function.__name__
            if metric_name not in performance_dict:
                try:  # only compute auxiliary metrics which do not error (y_pred = class-probabilities may cause some metrics to error)
                    if metric_kwargs:
                        performance_dict[metric_name] = metric_function(y_true, y_pred, **metric_kwargs)
                    else:
                        performance_dict[metric_name] = metric_function(y_true, y_pred)
                except ValueError:
                    pass

        if not silent:
            logger.log(20, "Evaluations on test data:")
            logger.log(20, json.dumps(performance_dict, indent=4))

        if detailed_report and (self.problem_type != REGRESSION):
            # Construct confusion matrix
            try:
                performance_dict['confusion_matrix'] = confusion_matrix(y_true, y_pred, labels=self.label_cleaner.ordered_class_labels, output_format='pandas_dataframe')
            except ValueError:
                pass
            # One final set of metrics to report
            cl_metric = lambda y_true, y_pred: classification_report(y_true, y_pred, output_dict=True)
            metric_name = 'classification_report'
            if metric_name not in performance_dict:
                try:  # only compute auxiliary metrics which do not error (y_pred = class-probabilities may cause some metrics to error)
                    performance_dict[metric_name] = cl_metric(y_true, y_pred)
                except ValueError:
                    pass
                if not silent and metric_name in performance_dict:
                    logger.log(20, "Detailed (per-class) classification report:")
                    logger.log(20, json.dumps(performance_dict[metric_name], indent=4))
        return performance_dict