def correlation(mat_file_1, mat_file_2):
    """
    Draws the plot
    """

    blockades_1 = read_mat(mat_file_1)
    blockades_1 = sp._fractional_blockades(blockades_1)
    blockades_1 = sp._filter_by_duration(blockades_1, 0.5, 20)
    blockades_1 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_1)

    blockades_2 = read_mat(mat_file_2)
    blockades_2 = sp._fractional_blockades(blockades_2)
    blockades_2 = sp._filter_by_duration(blockades_2, 0.5, 20)
    blockades_2 = map(lambda b: sp.discretize(sp._trim_flank_noise(b.eventTrace), 20), blockades_2)

    self_corr = []
    cross_corr = []
    for blockade in blockades_1:
        block_self = []
        for other in blockades_1:
            block_self.append(1 - distance.correlation(blockade, other))
        block_cross = []
        for other in blockades_2:
            block_cross.append(1 - distance.correlation(blockade, other))
        self_corr.append(np.mean(block_self))
        cross_corr.append(np.mean(block_cross))

    mean_self = np.median(self_corr)
    mean_cross = np.median(cross_corr)

    matplotlib.rcParams.update({"font.size": 16})
    fig = plt.subplot()

    fig.spines["right"].set_visible(False)
    fig.spines["top"].set_visible(False)
    fig.get_xaxis().tick_bottom()
    fig.get_yaxis().tick_left()
    fig.set_xlim(-0.6, 0.6)
    fig.set_ylim(-0.6, 0.6)
    fig.set_xlabel("(H3 tail, H3 tail) correlation")
    fig.set_ylabel("(H3 tail, CCL5) correlation")

    for y in [-0.4, -0.2, 0, 0.2, 0.4]:
        plt.plot((-0.6, 0.6), (y, y), "--",
                 lw=0.5, color="black")
        plt.plot((y, y), (-0.6, 0.6), "--",
                 lw=0.5, color="black")

    plt.plot((-0.6, 0.6), (mean_cross, mean_cross), "--",
             lw=1.5, color="red")
    plt.plot((mean_self, mean_self), (-0.6, 0.6), "--",
             lw=1.5, color="red")

    fig.scatter(self_corr, cross_corr, linewidth=0.5, c="dodgerblue", 
                s=30, edgecolor="blue")

    plt.tight_layout()
    plt.show()
Exemple #2
0
def alignment(agent_a_before, agent_b_before, agent_a_after, agent_b_after):
    """Change in correlation distance between two agents."""
    d_before = dist.correlation(
        dist.squareform(agent_a_before.op.graph.adj),
        dist.squareform(agent_b_before.op.graph.adj),
    )
    d_after = dist.correlation(
        dist.squareform(agent_a_after.op.graph.adj),
        dist.squareform(agent_b_after.op.graph.adj),
    )
    return -1 * (d_after - d_before)
Exemple #3
0
def seqcor(m1, m2, seq=None):
    """Calculates motif similarity based on Pearson correlation of scores.

    Based on Kielbasa (2015) and Grau (2015).
    Scores are calculated based on scanning a de Bruijn sequence of 7-mers.
    This sequence is taken from ShortCAKE (Orenstein & Shamir, 2015). 
    Optionally another sequence can be given as an argument.

    Parameters
    ----------
    m1 : Motif instance
        Motif 1 to compare.
    
    m2 : Motif instance
        Motif 2 to compare.
    
    seq : str, optional
        Sequence to use for scanning instead of k=7 de Bruijn sequence.
    
    Returns
    -------
    score, position, strand
    """
    l1 = len(m1)
    l2 = len(m2)

    l = max(l1, l2)

    if seq is None:
        seq = RCDB 
    
    L = len(seq)

    # Scan RC de Bruijn sequence
    result1 = pfmscan(seq, m1.pwm, m1.pwm_min_score(), len(seq), False, True)
    result2 = pfmscan(seq, m2.pwm, m2.pwm_min_score(), len(seq), False, True)
    
    # Reverse complement of motif 2
    result3 = pfmscan(seq, m2.rc().pwm, m2.rc().pwm_min_score(), len(seq), False, True)
    
    result1 = np.array(result1)
    result2 = np.array(result2)
    result3 = np.array(result3)

    # Return maximum correlation
    c = []
    for i in range(l1 - l1 // 3):
        c.append([1 - distance.correlation(result1[:L-l-i],result2[i:L-l]), i, 1])
        c.append([1 - distance.correlation(result1[:L-l-i],result3[i:L-l]), i, -1])
    for i in range(l2 - l2 // 3):
        c.append([1 - distance.correlation(result1[i:L-l],result2[:L-l-i]), -i, 1])
        c.append([1 - distance.correlation(result1[i:L-l],result3[:L-l-i]), -i, -1])
    
    return sorted(c, key=lambda x: x[0])[-1]
Exemple #4
0
def cor_dist(a, b):
    """
    Calculates the correlation coefficient distance
    between a (list of) vector(s) b and reference vector a
    :param a: A single query image
    :param b: One or more reference images
    :return:
    """
    a = a.flatten()
    if isinstance(b, list):
        return [correlation(a, img.flatten()) for img in b]

    return correlation(a, b.flatten())
Exemple #5
0
def smaf(X, d, lda1, lda2, maxItr=10, UW=None, posW=False, posU=True, use_chol=False, module_lower=500,
         activity_lower=5, donorm=False, mode=1, mink=5, U0=[], U0_delta=0.1, doprint=False):
    # use Cholesky when we expect a very sparse result
    # this tends to happen more on the full vs subsampled matrices
    if UW == None:
        U, W = spams.nmf(np.asfortranarray(X), return_lasso=True, K=d, numThreads=THREADS)
        W = np.asarray(W.todense())
    else:
        U, W = UW
    Xhat = U.dot(W)
    Xnorm = np.linalg.norm(X) ** 2 / X.shape[1]
    for itr in range(maxItr):
        if mode == 1:
            # In this mode the ldas correspond to an approximate desired fit
            # Higher lda will be a worse fit, but will result in a sparser sol'n
            U = spams.lasso(np.asfortranarray(X.T), D=np.asfortranarray(W.T),
                            lambda1=lda2 * Xnorm, mode=1, numThreads=THREADS, cholesky=use_chol, pos=posU)
            U = np.asarray(U.todense()).T
        elif mode == 2:
            if len(U0) > 0:
                U = projected_grad_desc(W.T, X.T, U.T, U0.T, lda2, U0_delta, maxItr=400)
                U = U.T
            else:
                U = spams.lasso(np.asfortranarray(X.T), D=np.asfortranarray(W.T),
                                lambda1=lda2, lambda2=0.0, mode=2, numThreads=THREADS, cholesky=use_chol, pos=posU)
                U = np.asarray(U.todense()).T
        if donorm:
            U = U / np.linalg.norm(U, axis=0)
            U[np.isnan(U)] = 0
        if mode == 1:
            wf = (1 - lda2)
            W = sparse_decode(X, U, lda1, worstFit=wf, mink=mink)
        elif mode == 2:
            if len(U0) > 0:
                W = projected_grad_desc(U, X, W, [], lda1, 0., nonneg=posW, maxItr=400)
            else:
                W = spams.lasso(np.asfortranarray(X), D=np.asfortranarray(U),
                                lambda1=lda1, lambda2=1.0, mode=2, numThreads=THREADS, cholesky=use_chol, pos=posW)
                W = np.asarray(W.todense())
        Xhat = U.dot(W)
        module_size = np.average([np.exp(entropy(u)) for u in U.T if u.sum() > 0])
        activity_size = np.average([np.exp(entropy(abs(w))) for w in W.T])
        if doprint:
            print distance.correlation(X.flatten(), Xhat.flatten()), module_size, activity_size, lda1, lda2
        if module_size < module_lower:
            lda2 /= 2.
        if activity_size < activity_lower:
            lda2 /= 2.
    return U, W
 def get_feat(trainDTMatirix, male, female):
     featMatrix = []
     for i in range (0, trainDTMatirix.shape[0]):
         tempfeat = []
         tempfeat.append(correlation(male,trainDTMatirix[i,:].tolist()[0]))
         tempfeat.append(cosine(male,trainDTMatirix[i,:].tolist()[0]))
         tempfeat.append(euclidean(male,trainDTMatirix[i,:].tolist()[0]))
         tempfeat.append(correlation(female,trainDTMatirix[i,:].tolist()[0]))
         tempfeat.append(cosine(female,trainDTMatirix[i,:].tolist()[0]))
         tempfeat.append(euclidean(female,trainDTMatirix[i,:].tolist()[0]))
         featMatrix.append(tempfeat)
     featMatrix = numpy.matrix(featMatrix)
     featMatrix = numpy.nan_to_num(featMatrix)
     trainDTMatirix = featMatrix
     return trainDTMatirix
Exemple #7
0
def cosine_similarity(number_of_recomm, user_input_movies, user_input_ratings):
    #Create the mean-filled dense_matrix
    dense_matrix = create_dense()
    #Create the user array
    mean_rating_1 = mean_rating()
    user = np.repeat(mean_rating_1, dense_matrix.shape[1])
    user_df = pd.DataFrame([user], columns = dense_matrix.columns)
    #Collect user input
    user_mov_index = convert_user_input(user_input_movies, user_input_ratings)
    #Impute user ratings
    for mov_id in user_mov_index:
        user_df[mov_id[0]] = mov_id[1]
    #Append it to the original user_movie_matrix
    dense_matrix_user = pd.concat([dense_matrix, user_df], ignore_index=False)
    #Create user-user sparse matrix
    UU = np.zeros((len(dense_matrix_user), len(dense_matrix_user)))
    UU = pd.DataFrame(UU, index=dense_matrix_user.index, columns=dense_matrix_user.index)
    # calculate pairwise similarities
    u = 0
    for v in UU.columns:
        # 2. step: calculate similarities
        UU.loc[u, v] = 1-distance.correlation(dense_matrix_user.loc[u],
                                                    dense_matrix_user.loc[v])
    active_user = 0
    # find similarities for active_user and sort it, take 1 to 5 entries
    # entry at 0 contains the similrity with itself
    neighbors = UU.loc[active_user].sort_values(ascending=False)[1:6]
    #Final matrix
    neighbors_m = dense_matrix_user.loc[neighbors.index]
    #Take the first user and suggest movies that person liked
    random_mov = np.random.randint(6)
    movies_list = list(neighbors_m.iloc[random_mov].sort_values(ascending = False)
                        .head(number_of_recomm).index.map(movie_id_dict))

    return movies_list
Exemple #8
0
def correlation(x, y):
    try:
        return distance.correlation(x, y)
    except ValueError:
        return np.NaN
    except:
        return np.NaN
Exemple #9
0
    def get_nearest_neighbor(self, x_test, k, sample_class):
        distances = []
        targets_index = []
        for i in range(len(sample_class)):
            if (sample_class[i][:] != x_test).any():
                if self.distance_calculator == 'jaccard':
                    distance = dis.jaccard(x_test, sample_class[i][:])
                elif self.distance_calculator == 'dice':
                    distance = dis.dice(x_test, sample_class[i][:])
                elif self.distance_calculator == 'correlation':
                    distance = dis.correlation(x_test, sample_class[i][:])
                elif self.distance_calculator == 'yule':
                    distance = dis.yule(x_test, sample_class[i][:])
                elif self.distance_calculator == 'russelo-rao':
                    distance = dis.russellrao(x_test, sample_class[i][:])
                elif self.distance_calculator == 'sokal-michener':
                    distance = dis.sokalmichener(x_test, sample_class[i][:])
                elif self.distance_calculator == 'rogers-tanimoto':
                    distance = dis.rogerstanimoto(x_test, sample_class[i][:])
                elif self.distance_calculator == 'kulzinsky':
                    distance = dis.kulsinski(x_test, sample_class[i][:])
                distances.append([distance, i])

        # make a list of the k neighbors' targets
        distances.sort()
        for i in range(k):
            targets_index.append(distances[i][1])
        return targets_index
def isADoor(contX, contY):
	contXNew, contYNew = getNewXAndY(contX, contY, len(x))
	# contArr = np.divide(contXNew, contYNew)
	# print(contArr)
	# corr = ssd.correlation(ynew, contYNew)
	# spear = ss.spearmanr(ynew, contYNew)
	# pearson = np.correlate(ynew, contYNew, mode='valid')

	# corr = ssd.correlation(y, contYNew)
	# spear = ss.spearmanr(y, contYNew)
	# pearson = np.correlate(y, contYNew, mode='valid')

	# corr = ssd.correlation(templateArr, contArr)
	# spear = ss.spearmanr(templateArr, contArr)
	# pearson = np.correlate(templateArr, contArr)

	deltaX, deltaY = genDeltaXAndY(x,y)
	contDeltaX, contDeltaY = genDeltaXAndY(contXNew, contYNew)

	corr = ssd.correlation(deltaY, contDeltaY)
	spear = ss.spearmanr(deltaY, contDeltaY)
	pearson = np.correlate(deltaY, contDeltaY, mode='valid')

	# print(corr, corr**2,spear, pearson),
	# plt.figure()
	# plt.plot(x, y, 'r', contXNew, contYNew, 'g')
	# plt.plot(deltaX, deltaY, 'r', contDeltaX, contDeltaY, 'g')
	# plt.show()
	# global i
	# plt.savefig("RadialProfiles/GraphComp"+str(i)+".png")
	# i += 1
	# plt.clf()
	return corr < 0.1
Exemple #11
0
def test_embeddingset_plot_arrow_emb_axis_with_different_axis_metric(embset):
    fig, ax = mpl.pyplot.subplots()
    embset.plot(
        kind="arrow",
        x_axis=embset["blue"],
        y_axis="red",
        axis_metric=[scipy_distance.correlation, "cosine_similarity"],
        x_label="xx",
        color="magenta",
    )
    vectors = []
    for emb in embset.embeddings.values():
        vectors.append([
            scipy_distance.correlation(emb.vector, embset["blue"].vector),
            1.0 - scipy_distance.cosine(emb.vector, embset["red"].vector),
        ])
    vectors = np.array(vectors)
    props = {
        "type": mpl.collections.PolyCollection,
        "data": vectors,
        "x_label": "xx",
        "y_label": "red",
        "title": "",
        "label": list(embset.embeddings.keys()),
        "color": mpl.colors.to_rgba_array("magenta"),
        "aspect": "auto",
    }
    UV = np.concatenate(
        (ax.collections[1].U[:, None], ax.collections[1].V[:, None]), axis=-1)
    assert isinstance(ax.collections[1], props["type"])
    assert np.array_equal(UV, props["data"])
    assert [t.get_text() for t in ax.texts] == props["label"]
    assert np.array_equal(ax.collections[1].get_facecolors(), props["color"])
    validate_plot_general_properties(ax, props)
    mpl.pyplot.close(fig)
def evaluate_continue_change(con_distri_features,soft_add,software):
    #roc_auc_DF=evaluation_ranks(passed_qc_sc_DF_cond,soft_add,software,UBI=UBIs[1:5])
    #plot_evaluate_heat(passed_qc_sc_DF_RO,soft_add,con_distri_features,software,UBIs)
    if software=='multi-metric':
        passed_qc_sc_DF=pd.read_table(soft_add,header=0,index_col=0)
        phenotime=passed_qc_sc_DF[['ord']]
    elif (software=='wishbone') | (software=='CIRCLET'):
        phenotime=pd.read_table(soft_add,header=None,index_col=0)
    phenotime.columns=['Pseudotime']
    ordIndex=phenotime.sort_values(by='Pseudotime')
    old_sc_name=ordIndex.index[-1]
    sc_name=ordIndex.index[0]
    corr_list=list()
    for sc_name in ordIndex.index:
        x=con_distri_features.loc[old_sc_name]
        y=con_distri_features.loc[sc_name]
        old_sc_name=sc_name
        #temp=stats.pearsonr(x,y)[0]
        #temp=distance.cosine(x,y)
        #temp=np.abs(distance.cosine(x,y)-1)
        temp=np.abs(distance.correlation(x,y)-1)
        corr_list.append(temp)
    evaluation_value=np.mean(corr_list)
    #print(evaluation_value)
    return evaluation_value
def Dist(array1, array2, dist):
    if dist == 'braycurtis':
        return distance.braycurtis(array1, array2)
    elif dist == 'correlation':
        return distance.correlation(array1, array2)
    elif dist == 'mahalanobis':
        return distance.mahalanobis(array1, array2)
    elif dist == 'minkowski':
        return distance.minkowski(array1, array2)
    elif dist == 'seuclidean':
        return distance.seuclidean(array1, array2)
    elif dist == 'sqeuclidean':
        return distance.sqeuclidean(array1, array2)
    elif dist == 'pearsonp':
        r, p = pearsonr(array1, array2)
        return p
    elif dist == 'pearsonr':
        r, p = pearsonr(array1, array2)
        return r
    elif dist == 'spearmanp':
        r, p = spearmanr(array1, array2)
        return p
    elif dist == 'spearmanr':
        r, p = spearmanr(array1, array2)
        return r
Exemple #14
0
 def _dodetect(self):
     dist = np.zeros((self.img1.shape[0], self.img1.shape[1]))
     for i in range(self.img1.shape[0]):
         for j in range(self.img1.shape[1]):
             dist[i, j] = distance.correlation(self.img1[i, j, :],
                                               self.img2[i, j, :])
     self.change = dist
Exemple #15
0
def computeDistance(func: str, Vi: typ.List[float],
                    Vj: typ.List[float]) -> typ.Union[float, int]:
    """ Computes the distance using the provided distance function.

    :param func: Lowercase string name of the function to use.
    :param Vi: First 1d vector
    :param Vj: Second 1d vector
    
    :type func: str
    :type Vi: typ.List[float]
    :type Vj: typ.List[float]
    
    :return: vector of distance values
    :rtype: typ.Union[float, int]
    """

    if func == "czekanowski":  # if the function provided was Czekanowski,
        return __Czekanowski(Vi, Vj)

    elif func == "euclidean":  # if the euclidean distance was requested
        return __Euclidean(Vi, Vj)

    elif func == "correlation":  # if the correlation distance/value was requested
        return sp.correlation(Vi, Vj)

    elif func == "cosine":  # if the cosine similarity function was requested
        # NOTE: this computes the distance, to compute similarity subtract result from 1
        return sp.cosine(Vi, Vj)

    else:  # if no valid distance function was provided, default to the euclidean distance
        return __Euclidean(Vi, Vj)
 def calculateL2(self, feat1, feat2, c_type='euclidean'):
     assert np.shape(feat1) == np.shape(feat2)
     if config.insight:
         [
             len_,
         ] = np.shape(feat1)
         #print(np.shape(feat1))
     else:
         _, len_ = np.shape(feat1)
     #print("len ",len_)
     if c_type == "cosine":
         s_d = distance.cosine(feat1, feat2)
     elif c_type == "euclidean":
         #s_d = np.sqrt(np.sum(np.square(feat1-feat2)))
         #s_d = distance.euclidean(feat1,feat2,w=1./len_)
         s_d = distance.euclidean(feat1, feat2, w=1)
     elif c_type == "correlation":
         s_d = distance.correlation(feat1, feat2)
     elif c_type == "braycurtis":
         s_d = distance.braycurtis(feat1, feat2)
     elif c_type == 'canberra':
         s_d = distance.canberra(feat1, feat2)
     elif c_type == "chebyshev":
         s_d = distance.chebyshev(feat1, feat2)
     return s_d
Exemple #17
0
    def correlate(self, preFeature, curFeature):
        #return a correlation score between img1 and img2. The higher the better!

        #Feature for VGG
        return 1 - correlation(preFeature, curFeature)

        return cv2.compareHist(preFeature, curFeature, cv2.HISTCMP_CORREL)
Exemple #18
0
def get_most_similar(v1,res_features):
	dist,best = float("inf"),None
	for i in res_features:
		if v1!=res_features[i]:
			distancia = distance.correlation(v1,res_features[i])
			if distancia<dist: dist,best = distancia,i
	return best
Exemple #19
0
def NewsToTweetsScor_pair(newsVecList, newsWordList, tweetVecList,
                          tweetWordList, scoreFile):
    print 'Score pair wise start'
    newsVecList_len = len(newsVecList)
    tweetVecList_len = len(tweetVecList)
    total_dist = []
    for i in range(newsVecList_len):
        u = newsVecList[i]
        print i, ' = ',
        u_to_v = []
        for j in range(tweetVecList_len):

            v = tweetVecList[j]
            val = distance.cosine(u, v)
            val += distance.euclidean(u, v)
            val += distance.dice(u, v)
            val += distance.correlation(u, v)
            val += distance.jaccard(u, v)
            val += distance.cityblock(u, v)
            val = val / 6.0
            u_to_v.append(val)
        total_dist.append(u_to_v)

    print 'pair wise end'
    return total_dist
Exemple #20
0
def kmeans_classify(d, means, metric="Euclidean"):
    ids = [0] * d.shape[0]
    squared_dis = [float("inf")] * d.shape[0]
    distances = [float("inf")] * d.shape[0]
    for i in range(d.shape[0]):
        for j in range(means.shape[0]):
            if metric == "Euclidean":
                dis = distance.euclidean(d[i], means[j])
                if dis <= distances[i]:
                    distances[i] = dis
                    ids[i] = j
            elif metric == "L1-Norm":
                dis = distance.cityblock(d[i], means[j])
                if dis <= distances[i]:
                    distances[i] = dis
                    ids[i] = j
            elif metric == "Hamming":
                dis = distance.hamming(d[i], means[j])
                if dis <= distances[i]:
                    distances[i] = dis
                    ids[i] = j
            elif metric == "Correlation":
                dis = distance.correlation(d[i], means[j])
                if dis <= distances[i]:
                    distances[i] = dis
                    ids[i] = j
            elif metric == "Cosine":
                dis = distance.cosine(d[i], means[j])
                if dis <= distances[i]:
                    distances[i] = dis
                    ids[i] = j

    return np.matrix(ids).reshape(d.shape[0], 1), np.matrix(distances).reshape(
        d.shape[0], 1)
Exemple #21
0
    def correlation_am_word2vec(self, row):
        try:
            if row['id'] % 10000 == 0:
                elapsed = time.time() - start_time
                print("Processed {:10.0f} questions in {:10.0f} s ".format(
                    row['id'], elapsed))
        except KeyError:
            if row['test_id'] % 10000 == 0:
                elapsed = time.time() - start_time
                print("Processed {:10.0f} questions in {:10.0f} s ".format(
                    row['test_id'], elapsed))
        q1 = self.getWordVecs(row['question1'])
        q2 = self.getWordVecs(row['question2'])

        if len(q1) == 0 or len(q2) == 0:
            return 0

        q1_vec = np.zeros(300)
        q2_vec = np.zeros(300)

        for word in q1:
            q1_vec += self.wordvecs[word]
        q1_vec /= len(q1)
        for word in q2:
            q2_vec += self.wordvecs[word]
        q2_vec /= len(q2)

        score = correlation(q1_vec, q2_vec)
        return score
def profile_sim(
    prof1: Iterable[float],
    prof2: Iterable[float],
) -> float:
    """Calculates the similarity of two activity_profiles of the same length.
    The profiles are compared by distance correlation
    ``scipy.spatial.distance.correlation()`` (same as Pearson correlation).

    Parameters:
    ===========
        prof1: The first profile to compare.
        prof2: The second profile to compare.
            The two profiles have to be of equal length.


    Returns:
    ========
        Similarity value between 0.0 .. 1.0 (0.0 being very dissimilar and 1.0 identical)."""

    assert len(prof1) == len(
        prof2), "Activity Profiles must have the same length to be compared."

    if not isinstance(prof1, np.ndarray):
        prof1 = np.array(prof1)
    prof1 = np.clip(prof1, -25.0, 25.0)
    if not isinstance(prof2, np.ndarray):
        prof2 = np.array(prof2)
    prof2 = np.clip(prof2, -25.0, 25.0)
    result = 1 - dist.correlation(prof1, prof2)
    if np.isnan(result) or result < 0.0:
        result = 0.0
    return result
Exemple #23
0
def compare_stability_matrices(ism_a, ism_b):
    """
    Calculate the distance between two different stability maps
    
    Parameters
    ----------
    ism_a : array_like
        A numpy stability matrix of shape (`V`, `V`), `V` voxels.
    ism_b : array_like
        A numpy stability matrix of shape (`V`, `V`), `V` voxels.

    Returns
    -------
    similarity : array_like
        The distance between the two input matrices.

    """
    from sklearn.preprocessing import normalize
    from scipy.spatial.distance import correlation

    ism_a = normalize(ism_a, norm='l2')
    ism_b = normalize(ism_b, norm='l2')
    distance = correlation(ism_a.ravel(), ism_b.ravel())
    similarity = 1 - distance

    return similarity
Exemple #24
0
def resample_symbols(rx_frame, rx_p_ref, intp_n=10):
    """ This function works around the imperfect-sampling-position problem.
        First, the received frame (rx_frame) is interpolated by intp_n times; Then, find a 
        best downsample group by comparing to the reference preamble (rx_p_ref);
        at last, downsample and return the resampled frame (rx_resampled).
    """
    rx_frame = np.concatenate([rx_frame, [rx_frame[-1]]])
    p_len = len(rx_p_ref)
    nsymbol = len(rx_frame)
    # pad the signal with more detail before down-sampling
    x_origin = np.arange(0, nsymbol)
    x_interp = np.arange(0, nsymbol - 1, nsymbol / (nsymbol * intp_n))
    f_interp = interpolate.interp1d(x_origin, rx_frame, 'cubic')
    rx_interp = f_interp(x_interp)
    rx_interp_left = np.concatenate([[rx_interp[0]] * intp_n,
                                     rx_interp[0:-1 * intp_n]])
    rx_candicate = np.concatenate([
        np.reshape(rx_interp_left, newshape=(intp_n, -1), order='F'),
        np.reshape(rx_interp, newshape=(intp_n, -1), order='F')
    ])
    # The following line is to sort out a candidate sublist which has the
    # shortest distance from the reference signal. Execept for correlation,
    # other "distances": braycurtis,cosine,canberra,chebyshev,correlation
    dist = [correlation(candi, rx_p_ref) for candi in rx_candicate[:, 0:p_len]]
    rx_resampled = rx_candicate[np.argmin(dist)]
    return rx_resampled
Exemple #25
0
def dist(a, b, t='euclidean'):
    if t == 'euclidean':
        return np.sqrt(np.sum((a - b)**2))  # or   ssd.euclidean(a,b)
    elif t == 'correlation':
        return ssd.correlation(a, b)
    elif t == 'dtw':
        return dtw.dtw(a, b, distance_only=True).distance
Exemple #26
0
def get_distance_vectors(vector1, vector2):

    mahalonobis_distance = distance.cityblock(vector1, vector2)
    cosine_distance = distance.cosine(vector1, vector2)
    correlation_distance = distance.correlation(vector1, vector2)

    return mahalonobis_distance, cosine_distance, correlation_distance
Exemple #27
0
def kmeansClassify(A, means, distType = "euclidean"):

	codesErrors = []
	for i in range(A.shape[0]):
		d = [0, sys.maxint] #check it against all means, and store the row index of mean with distance with mean
		for j in range(means.shape[0]): #calculate distance metrics other than euclidean

			if distType == "euclidean":
				newd = dist.euclidean(A[i,:], means[j,:])
			elif  distType == "cosine":
				newd = dist.cosine(A[i,:], means[j,:])
			elif distType == "canberra":
				newd = dist.canberra(A[i,:], means[j,:])
			elif distType == "manhattan":
				newd = dist.cityblock(A[i,:], means[j,:])
			elif distType == "correlation":
				newd = dist.correlation(A[i,:], means[j,:])
			elif distType == "hamming":
				newd = dist.hamming(A[i,:], means[j,:])


			if newd < d[1]:
				d = [j, newd]

		codesErrors.append(d)

	return (np.matrix(codesErrors)[:,0], np.matrix(codesErrors)[:,1])  #returns the codes and errors
def computeDistance(X, Y, method):
    if 'cosine' in method:
        dist = spdistance.cosine(X, Y)
    elif 'dot' in method:
        dist = 1.0 - X.dot(Y)
    elif 'chi2' in method:
        dist = chiSquare2(X, Y)
    elif 'chi3' in method:
        dist = chiSquare3(X, Y)
    elif 'chi' in method:
        dist = chiSquare(X, Y)
    elif 'euclidean' in method:
        dist = cv2.norm(X, Y)
    elif 'canberra' in method:
        dist = spdistance.canberra(X, Y)
    elif 'correl' in method:
        dist = spdistance.correlation(X, Y)
    else:
        # does that work?
        dist = cv2.compareHist(X, Y, method)

    if hasattr(cv2, 'cv') and 'cv2.cv.CV_COMP_CORREL' in method:
        dist = 1 - dist
    elif hasattr(cv2, 'HISTCMP_CORREL') and 'cv2.HISTCMP_CORREL' in method:
        dist = 1 - dist
    elif hasattr(cv2, 'cv') and 'cv2.cv.CV_COMP_INTERSECT' in method:
        dist = 1 - dist
    elif hasattr(cv2,
                 'HISTCMP_INTERSECT') and 'cv2.HISTCMP_INTERSECT' in method:
        dist = 1 - dist

    return dist
    def kMeans(self):
        for numClusters in range(self.minClusters, self.maxClusters + 1):
            self.gain[numClusters] = {}
            self.gain[numClusters]["avg"] = []
            for rep in range(n):
                clustId = numClusters
                print "Running on %s clusters, rep %s" % (numClusters, rep + 1)
                self.gain[clustId]["labels"] = list(KMeans(numClusters).fit(np.array(self.data)).labels_)
                centroids = [[0 for x in range(len(self.data[0]))]
                             for y in range(numClusters)]

                print "\tFinding Centroids"
                for index, pt in enumerate(self.data):
                    cluster = self.gain[clustId]["labels"][index]
                    prevCenter = centroids[cluster]
                    centroids[cluster] = self._solve_centroid(pt, prevCenter)

                self.gain[clustId]["cosine"] = 0
                self.gain[clustId]["cheby"] = 0
                self.gain[clustId]["euclid"] = 0
                self.gain[clustId]["jaccard"] = 0
                for index, pt in enumerate(self.data):
                    cluster = self.gain[clustId]["labels"][index]
                    centroid = centroids[cluster]
                    self.gain[clustId]["cosine"] += distance.cosine(centroid, pt) / len(self.data)
                    self.gain[clustId]["cheby"] += distance.chebyshev(centroid, pt) / len(self.data)
                    self.gain[clustId]["jaccard"] += distance.correlation(centroid, pt) / len(self.data)

                marginGain = self.bestMarginalGain(clustId, rep, centroids)
                if marginGain[0] is False:
                    return marginGain[1], self.gain[marginGain[0]]["labels"]

        print "Max clusters is best marginal gain," + \
              "consider rerunning with higher max"
        return self.maxClusters, self.gain[clustId]["labels"]
Exemple #30
0
def calc_distance(v1,v2):
	cor=[]	
	#print len(v1),len(v2)
	for vector in range(len(v1)):
		#print v1[vector],v2[vector]
		cor.append(correlation(v1[vector],v2[vector]))
	return sum(cor)
def corr_dspec_shape(data, proto, sigma=2.0):
    # validating feature sizes
    if data.shape[1] != proto.shape[1]:
        raise Exception('Both "data" and "prototypes" must have the same feature sizes.')

    # getting samples and prototypes count
    sc = data.shape[0]
    pc = proto.shape[0]

    # resulting dissimilarity representation
    d = np.zeros((sc, pc))

    # derivative filter for both data and prototypes
    data2 = derfilter(data, sigma)
    proto2 = derfilter(proto, sigma)

    # normalizing each row by its maximum value
    data2 = np.apply_along_axis(lambda row: row / row.max(), 1, data2)
    proto2 = np.apply_along_axis(lambda row: row / row.max(), 1, proto2)

    # change here!!!!!!
    # TODO: Optimization here!: 1-list comprehension, 2-out=np.vstack(list_comprehension)
    for i in range(pc):
        t = np.apply_along_axis(lambda row: correlation(row, proto2[i, :]), 1, data2)
        d[:, i] = t

    # the dissimilarity representation
    return d
def corr_shape_measure(x, y, sigma=2.0):
    """Computes the shape dissimilarity value.

    Args:
        x (list): The first vector.
        y (list): The second vector.
        sigma (float): The smoothing parameter

    Returns:
        float: The shape dissimilarity value between vectors x and y.

    """

    # getting the length of the vectors
    x_length = len(x)
    y_length = len(y)

    # validating parameters
    if x_length != y_length:
        raise Exception('Vectors with different sizes')

    # TODO: Here it is assumed that x and y are lists. Analyze the possibility for them to be tuples or numpy arrays

    # converting x and y to numpy arrays
    x_arr = np.array(x, np.float32)
    y_arr = np.array(y, np.float32)

    # applying a first gaussian derivative filter to both
    x_gauss = scipy_gauss1d(x_arr, sigma, order=1)
    y_gauss = scipy_gauss1d(y_arr, sigma, order=1)

    # computing the shape dissimilarity
    return correlation(x_gauss, y_gauss)
Exemple #33
0
def similarity(a, b):
    # Get common elements and remove 0 values (no review)
    commons_a = []
    commons_b = []
    for j in range(0, len(a)):
        if a[j] != 0 and b[j] != 0:
            commons_a.append(a[j])

    for j in range(0, len(b)):
        if b[j] != 0 and a[j] != 0:
            commons_b.append(b[j])

    commons_count = len(commons_a)

    # If there are no common elements, return zero; otherwise
    # compute the coefficient
    if commons_count == 0:
        return 0

    pearson_correlation = correlation(commons_a, commons_b)
    # If divisor is zero
    if math.isnan(pearson_correlation):
        return 0

    return round(1 - pearson_correlation, 2)
Exemple #34
0
    def search(image):
        # Get feature maps from image
        feature_maps = Wear.get_feature_maps(image)

        # Searching in database
        wears = Database.connect().wears.find({})

        # Set predictions to image
        predictions = []

        for wear in wears:
            # Get feature maps from wear
            wear_fm = pickle.loads(str(wear['feature_maps']))['_feature_maps']

            """ Calculating distances """
        
            # Euclidean distance
            euclidean_distance = np.sqrt(np.sum((feature_maps - wear_fm)**2.))

            # Cosine distance
            cosine_distance = cosine(feature_maps, wear_fm)

            # Correlation distance
            correlation_distance = correlation(feature_maps, wear_fm)

            predictions.append([str(wear['image']), str(wear['link']), euclidean_distance])

        return predictions
def comparefiles(pypath, cudaresult, writeresult, dtype):
    # Takes 2 paths for the files to compare and a path to write the result.
    f = open(pypath, 'r')
    ff = open(cudaresult, 'r')
    if f.mode == 'r':
        data = np.loadtxt(f,
                          dtype=dtype,
                          converters={
                              0:
                              lambda s: complex(s.decode().replace(
                                  '+-', '-').replace('(', '').replace(')', ''))
                          })
    if ff.mode == 'r':
        data2 = np.loadtxt(
            ff,
            dtype=dtype,
            converters={0: lambda s: complex(s.decode().replace('+-', '-'))})
    # WIP: other distance measurements might be more meaningful, this is a first try.
    euclideandst = distance.euclidean(data, data2)
    manhattendst = distance.cityblock(data, data2)
    correlationdst = distance.correlation(data, data2)
    # Print the output on cmd.
    print("Euclidiean Distance between the Scripts is:")
    print(euclideandst)
    print("Manhatten Distance between the Scripts is:")
    print(manhattendst)
    print("Correlation between the Scripts is:")
    print(correlationdst)
    # Write the output to custom path.
    result = open(writeresult, "a")
    result.write("Euclidiean Distance:" + str(euclideandst) +
                 "\nManhatten Distance:" + str(manhattendst) +
                 "\nCorrelation:" + str(correlationdst))
    result.close()
def evaluate_centrality(object, clust):
    sumcentrality = 0
    for elem in clust:
        dist = scidist.correlation(data_matrix[elem], data_matrix[object])
        # print pearson
        sumcentrality += math.e ** (-10 * (dist ** 2))

    return sumcentrality / float(len(clust))
def correlate(IM1,IM2):
    IM1 = list(IM1.ravel())
    IM2 = list(IM2.ravel())
    indexes = np.unique(np.concatenate((np.where(IM1 == 0),np.where(IM2 == 0.0654)),1))
    for index in sorted(indexes, reverse=True):
        print index
        del IM1[index]
        del IM2[index]
    return correlation(IM1, IM2)
def correlation_MDS(data):
    seed = np.random.RandomState(seed=3)
    similarities = [[0 for x in range(len(data))] for x in range(len(data))]
    for i in range(len(data)):
        for j in range(len(data)):
            similarities[i][j] = correlation(data[i], data[j])
    mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
                   dissimilarity="precomputed", n_jobs=1)
    pos = mds.fit_transform(similarities)
    return pos
Exemple #39
0
def GetCorr( XAxis, YAxis, ZAxis ):

    X = array(XAxis)
    Y = array(YAxis)
    Z = array(ZAxis)
    
    #Normalize
    X_A = (X - mean(X)) / (std(X) * len(X))
    Y_A = (Y - mean(Y)) /  std(Y)
    CorrA = 1.0-correlation(X_A, Y_A)

    Y_B = (Y - mean(Y)) / (std(Y) * len(Y))
    Z_B = (Z - mean(Z)) /  std(Z)
    CorrB = 1.0-correlation(Y_B, Z_B)

    Z_C = (Z - mean(Z)) / (std(Z) * len(Z))
    X_C = (X - mean(X)) /  std(X)
    CorrC = 1.0-correlation(Z_C, X_C)
    
    return CorrA, CorrB, CorrC
Exemple #40
0
 def compute_similarity(self, arr1, arr2):
     if self.simfcn == "cosine":
         return self.d_to_sim(cosine(arr1, arr2))
     elif self.simfcn == "pearson":
         return self.d_to_sim(correlation(arr1, arr2))
     elif self.simfcn == "hamming":
         return 1 - hamming(arr1, arr2)
     elif self.simfcn == "jaccard":
         return 1 - jaccard(arr1, arr2)
     else:
         print "Similiarity Function Not Yet Supported"
         exit()
def seqcor(m1,m2):
    l1 = len(m1)
    l2 = len(m2)

    l = max(l1, l2)

    # Create random sequence
    nucs = []
    L = 10 ** 4
    for i in range(L):
        nucs.append(random.choice(['A', 'C', 'T', 'G']))
    random_seq = "".join(nucs)

    # Scan random sequence
    result1 = pwmscan(random_seq.upper(), m1.pwm, m1.pwm_min_score(), len(random_seq), False, True)
    result2 = pwmscan(random_seq.upper(), m2.pwm, m2.pwm_min_score(), len(random_seq), False, True)

    # Return maximum correlation
    c = []
    for i in range(l1):
        c.append(1 - distance.correlation(result1[:L-l-i],result2[i:L-l]))
    for i in range(l2):
        c.append(1 - distance.correlation(result1[i:L-l],result2[:L-l-i]))
    return max(c)
def get_direction_score(direction_map, coord, matrix):
    sum = 0
    x, y = matrix.shape[:2]
    a, b = coord
    point = matrix[a, b, ::]
    n = 0
    for i, k in enumerate(direction_map):
        if k[0] is 0 and k[1] is 0:
            continue
        if 0 <= a + k[0] < x and 0 <= b + k[1] < y:
            neighbour = matrix[a + k[0], b + k[1],::]
            corr = correlation(point, neighbour)
            # It has a range of 0..2 and should be 0..1, so we divide by 2
            # See http://stackoverflow.com/questions/35988933/scipy-distance-correlation-is-higher-than-1
            sum += corr / 2
            n += 1

    return 1 - (sum / n)
Exemple #43
0
def getSimilarity(baseDict, tidDict):
    # generates vectors for two tracks to be used in similarity function
    #returns similarity between base vector and track vector    
    
    baseVector = []
    tidVector = []
    
    if len(baseDict) >= len(tidDict) :
        for (k,v) in baseDict.iteritems():
            baseVector.append(v)
            if k in tidDict:
                tidVector.append(tidDict[k])
            else:
                tidVector.append(0)
    else:
        for (k,v) in tidDict.iteritems():
            tidVector.append(v)
            if k in baseDict:
                baseVector.append(baseDict[k])
            else:
                baseVector.append(0)
    
    return dis.correlation(baseVector, tidVector)
Exemple #44
0
def main():
    print "# KNN Classifier"
    parser = ld.parse_arguments()

    stopwords = None
    if parser.stopwords_path:
        stopwords = ld.load_stopwords(parser.stopwords_path)

    # priting args
    print '\t-k = ' + str(parser.k)
    print '\t-d = ' + parser.distance

    # loading the necessary data
    (vocabulary, neigh_classes) = ld.load_train(parser.train_path, stopwords)

    print "# Tamanho do vocabulário:", len(vocabulary)

    # transforming each item to a v-dimensional space
    (train, test) = space.transform(vocabulary, parser.train_path,
                                    parser.test_path)

    # output file
    out_path = parser.distance + '_' + str(parser.k)
    out_path += '.txt'
    out_file = open(out_path, 'w')

    # knn classification
    print "# Classifying", len(train) * parser.percentage
    for item in test:
        dist_heap = []

        # calculates the distance to every point in the training set
        for i in xrange(int(len(train) * parser.percentage)):
            point = train[i]
            distance = 0.0

            if parser.distance == 'cosine':
                distance = spd.cosine(item, point)
            elif parser.distance == 'jaccard':
                distance = spd.jaccard(item, point)
            elif parser.distance == 'euclidean':
                distance = spd.euclidean(item, point)
            elif parser.distance == 'hamming':
                distance = spd.hamming(item, point)
            elif parser.distance == 'correlation':
                distance = spd.correlation(item, point)
            elif parser.distance == 'manhattan':
                distance = spd.cityblock(item, point)
            else:
                print >> stderr, "ERRO! -  Distância informada inválida."
                exit()

            tup = (distance, i)
            heapq.heappush(dist_heap, tup)

        # return the highest k similar points
        top_k = heapq.nsmallest(parser.k, dist_heap)

        # classifing
        classification = np.zeros(2)
        for (_, idi) in top_k:
            classe = neigh_classes[idi]
            classification[int(classe)] += 1

        # DEBUG
        print classification,

        # outputing classification
        if(classification[0] >= classification[1]):
            print >> out_file, '0'
            print '0'
        else:
            print >> out_file, '1'
            print '1'

    print
    print "# Resultados salvos no arquivo: " + out_path
    out_file.close()
    result.result("../data/imdb_test", out_path)
Exemple #45
0
def correlation(itemset1, itemset2):
    return dist.correlation(itemset1, itemset2)
Exemple #46
0
def main():
    print "# KNN Classifier"
    parser = ld.parse_arguments()

    # priting args
    print '\t-k = ' + str(parser.k)
    print '\t-d = ' + parser.distance

    stopwords = None
    if parser.stopwords_path:
        stopwords = ld.load_stopwords(parser.stopwords_path)

    voc = load_vocabulary(parser.train_path, stopwords)
    answers = load_answers(parser.train_path)

    train = transform(voc, parser.train_path)
    test = transform(voc, parser.test_path)

    # output file
    out_path = '../results/' + parser.distance + '_' + str(parser.k)
    out_path += '.txt'
    out_file = open(out_path, 'w')

    for point in test:
        neighbors = []
        for i in xrange(len(train)):
            neigh = train[i]
            distance = 0.0

            if parser.distance == 'cosine':
                distance = spd.cosine(neigh, point)
            elif parser.distance == 'jaccard':
                distance = spd.jaccard(neigh, point)
            elif parser.distance == 'euclidean':
                distance = spd.euclidean(neigh, point)
            elif parser.distance == 'dice':
                distance = spd.dice(neigh, point)
            elif parser.distance == 'correlation':
                distance = spd.correlation(neigh, point)
            elif parser.distance == 'manhattan':
                distance = spd.cityblock(neigh, point)
            else:
                print >> stderr, "ERRO! -  Distância informada inválida."
                exit()

            tup = (distance, i)
            heapq.heappush(neighbors, tup)

        # return the highest k similar points
        top_k = heapq.nsmallest(parser.k, neighbors)

        # classifing
        classification = np.zeros(2)
        for (_, idi) in top_k:
            classe = answers[idi]
            classification[int(classe)] += 1

        # outputing classification
        if(classification[0] >= classification[1]):
            print >> out_file, '0'
            print '0'
        else:
            print >> out_file, '1'
            print '1'

    # outputing the results'
    print
    print "# Resultados salvos no arquivo: " + out_path
    out_file.close()
    result.result("../data/imdb_test", out_path)
Exemple #47
0
def proj3(pos, x): # pos: position, x: new data point
      if pos == 'C': # XTrain: training data for NB, yTrain: training labels for NB
            XTrain = np.loadtxt('Ctrain.txt')
            #yTrain = np.loadtxt('CtrainL.txt')
            XTest = np.loadtxt('C.txt')
            yTest = np.loadtxt('Clabs.txt')
      elif pos == 'PF':
            XTrain = np.loadtxt('PFtrain.txt')
            #yTrain = np.loadtxt('PFtrainL.txt')
            XTest = np.loadtxt('PF.txt')
            yTest = np.loadtxt('PFlabs.txt')
      elif pos == 'PG':
            XTrain = np.loadtxt('PGTrain.txt')
            #yTrain = np.loadtxt('PGtrainL.txt')
            XTest = np.loadtxt('PG.txt')
            yTest = np.loadtxt('PGlabs.txt')
      elif pos == 'SF':
            XTrain = np.loadtxt('SFTrain.txt')
            #yTrain = np.loadtxt('SFtrainL.txt')
            XTest = np.loadtxt('SF.txt')
            yTest = np.loadtxt('SFlabs.txt')
      elif pos == 'SG':
            XTrain = np.loadtxt('SGTrain.txt')
            #yTrain = np.loadtxt('SGtrainL.txt')
            XTest = np.loadtxt('SG.txt')
            yTest = np.loadtxt('SGlabs.txt')
      else:
            print "Please reinput the position."

      D = XTrain.shape[1] # number of features
      #NBtr = GaussianNB()
      #NBtr.fit(XTrain, yTrain)
      #ytr = NBtr.predict(x) # predicting the class of x (w.r.t training data)
      NBte = GaussianNB()
      NBte.fit(XTest, yTest)
      yTrain = NBte.predict(XTrain) # predicting the classes of XTrain's rows
      yte = NBte.predict(x) # predicting the class of x (w.r.t testing data)

      '''create training data from the players (2009-2011) in the same class as x (ytr)'''
      tmpTrain = np.zeros(D) # create training data for Ranking SVM
      TrIndex = [] # store the indices of tmpTrain
      for i in range(len(yTrain)):
            if yTrain[i] == yte: # the same class as new data point                 
                  tmpTrain = np.vstack((tmpTrain, XTrain[i]))
                  TrIndex = np.append(TrIndex, i)
            else: # different classes from new data point
                  pass
      tmpTrain = np.delete(tmpTrain, 0, 0) # delete the initializing row

      '''calculate correlation distances between rows of tmpTrain and x'''
      TrCorrD = np.zeros(np.shape(TrIndex)) # initialize correlation distances
      for i in range(len(TrCorrD)):
            TrCorrD[i] = spd.correlation(tmpTrain[i], x)
      TrRank = np.argsort(TrCorrD)
      if len(TrIndex) < 10:
            noTrPts = len(TrIndex)
      else:
            noTrPts = 10 # select top 10 relevant training points
      vecTrain = tmpTrain[TrRank[:noTrPts]]
      #print TrIndex[TrRank[:noTrPts]]

      '''create training feature vectors'''
      noFt = 2 # number of features
      ftTrain = np.zeros((noTrPts,noFt))
      for i in range(noTrPts):
            ftTrain[i] = np.array([spd.euclidean(vecTrain[i],x), spd.cosine(vecTrain[i],x)])

      '''create taining matrix and labels for SVM from vecTrain and TrRank'''
      SVMTrain = np.zeros((noTrPts*(noTrPts-1), noFt))
      SVMLabel = np.zeros(np.shape(SVMTrain)[0]) - 1
      for i in range(noTrPts):
            for j in range(noTrPts):
                  if i > j:
                        SVMTrain[i*(noTrPts-1)+j] = ftTrain[i] - ftTrain[j]
                        if TrRank[i] < TrRank[j]: # smaller rank => closer distance
                              SVMLabel[i*(noTrPts-1)+j] = 1
                        else:
                              SVMLabel[i*(noTrPts-1)+j] = 0
                  elif i < j:
                        SVMTrain[i*(noTrPts-1)+j-1] = ftTrain[i] - ftTrain[j]
                        if TrRank[i] < TrRank[j]:
                              SVMLabel[i*(noTrPts-1)+j-1] = 1
                        else:
                              SVMLabel[i*(noTrPts-1)+j-1] = 0
                  else:
                        pass #if i == j, pass


      '''create testing data from the players (2011-2015) in the same class as x (ytr)'''
      tmpTest = np.zeros(D) # extract data of the same class of x in testing data
      TeIndex = [] # store the indices of testing data
      for i in range(len(yTest)):
            if yTest[i] == yte: # the same class as new data point
                  tmpTest = np.vstack((tmpTest, XTest[i]))
                  TeIndex = np.append(TeIndex, i)
            else:
                  pass
      tmpTest = np.delete(tmpTest, 0, 0) # delete the initializing row
      
      '''calculate correlation distances between testing data and x'''
      TeCorrD = np.zeros(np.shape(TeIndex))
      for i in range(len(TeCorrD)):
            TeCorrD[i] = spd.correlation(tmpTest[i], x)
      TeRank = np.argsort(TeCorrD)
      noTePts = noTrPts # select top 10 relevant testing points
      vecTest = tmpTest[TeRank[:noTePts]]
      #print TeIndex[TeRank[:10]]
      '''calculate NDCG'''
      TeGrade = np.arange(noTePts,0,-1)
      TeGains = 2 ** TeGrade - 1
      TeDisct = 1 / np.log2(np.arange(2,2+noTePts))
      TeDcg = np.zeros((noTePts))
      for i in range(noTePts):
            TeDcg[i] = TeDcg[i-1] + TeGains[i]*TeDisct[i]

      '''create testing feature vectors'''
      ftTest = np.zeros((noTePts,noFt))
      for i in range(noTePts):
            ftTest[i] = np.array([spd.euclidean(vecTest[i],x), spd.cosine(vecTest[i],x)])

      '''create testing matrix and labels for SVM from vecTest and TeRank'''
      SVMTest = np.zeros((noTePts*(noTePts-1), noFt))
      TeLabs = np.zeros(np.shape(SVMTest)[0]) - 1 # testing labels (used as a comparion with results)
      for i in range(noTePts):
            for j in range(noTePts):
                  if i > j:
                        SVMTest[i*(noTePts-1)+j] = ftTest[i] - ftTest[j]
                        if TeRank[i] < TeRank[j]:
                              TeLabs[i*(noTePts-1)+j] = 1
                        else:
                              TeLabs[i*(noTePts-1)+j] = 0
                  elif i < j:
                        SVMTest[i*(noTePts-1)+j-1] = ftTest[i] - ftTest[j]
                        if TeRank[i] < TeRank[j]:
                              TeLabs[i*(noTePts-1)+j-1] = 1
                        else:
                              TeLabs[i*(noTePts-1)+j-1] = 0
                  else:
                        pass

      '''train the ranking SVM'''
      clf = SVC(C=0.01, kernel='linear')
      clf.fit(SVMTrain, SVMLabel)
      pred_labels = clf.predict(SVMTest) # predict labels
      visTest = np.reshape(pred_labels, (noTePts,noTePts-1)) # make the testing results visualized
      ids = (-np.sum(visTest, axis=1)).argsort()[:noTePts] # descending order
      ReIndex = TeIndex[TeRank[:noTePts]][ids] # ranking svm results of testing data (indices)
      MatchPlayerList = np.int_(ReIndex)
      MatchPlayer = list(MatchPlayerList)[0]
      return MatchPlayer
      '''calculate NDCG'''
      ReGrade = np.zeros((noTePts))
      for i in range(noTePts):
            for j in range(noTePts):
                  if ReIndex[i] == TeIndex[TeRank[:noTePts]][j]:
                        ReGrade[i] = TeGrade[j]
                  else:
                        pass
      ReGains = 2 ** ReGrade - 1
      ReDisct = TeDisct
      ReDcg = np.zeros((noTePts))
      for i in range(noTePts):
            ReDcg[i] = ReDcg[i-1] + ReGains[i]*ReDisct[i]
      ReNdcg = ReDcg / TeDcg
def correlation_similarity(x, y):
    c = distance.correlation(x, y)
    if np.isnan(c): return 0
    else: return 1 - c
    X_pos = []
    X_neg = []
    X_obj = []
    
    fileName = TRAINING_FILES_PATTERN+Value
    for line in fileinput.input([fileName]):
        split = line.split("\t")
        if split[0] == POSITIVE_POLARITY_FOR_SCORER:
            X_pos.append(float(split[1]))
        elif split[0] == NEGATIVE_POLARITY_FOR_SCORER:
            X_neg.append(float(split[1]))
        else:
            X_obj.append(float(split[1]))
    return X_pos, X_neg, X_obj


max_pos, max_neg, max_obj = plotter(maxValue)
min_pos, min_neg, min_obj = plotter(minValue)

max = max_pos + max_neg + max_obj
min = min_pos + min_neg + min_obj

print max
print min

print "Corelation is: "+ str(correlation(max, min))

plt.plot(max, min, marker = 'o', ls='')
plt.xlabel("Value of alpha as 10^0")
plt.ylabel("Value of alpha as 10^-7")
plt.show()  
Exemple #50
0
def plot_blockades(blockades_file, model_files,
                   cluster_size, show_text):
    """
    Pretty plotting
    """
    WINDOW = 4

    blockades = read_mat(blockades_file)
    clusters = sp.preprocess_blockades(blockades, cluster_size=cluster_size,
                                       min_dwell=0.5, max_dwell=20)
    peptide = clusters[0].blockades[0].peptide

    models = []
    for model_file in model_files:
        models.append(load_model(model_file))
    #svr_signal = model.peptide_signal(peptide)
    #mv_signal = MvBlockade().peptide_signal(peptide)

    for cluster in clusters:
        #cluster.consensus = sp.discretize(cluster.consensus, len(peptide))
        signal_length = len(cluster.consensus)

        x_axis = np.linspace(0, len(peptide) + 1, signal_length)
        matplotlib.rcParams.update({"font.size": 16})
        fig = plt.subplot()

        fig.spines["right"].set_visible(False)
        fig.spines["top"].set_visible(False)
        fig.get_xaxis().tick_bottom()
        fig.get_yaxis().tick_left()
        fig.set_xlim(0, len(peptide) + 1)
        fig.set_xlabel("Putative AA position")
        fig.set_ylabel("Normalized signal")

        fig.plot(x_axis, cluster.consensus, label="Empirical signal", linewidth=1.5)

        ################
        for model in models:
            model_signal = model.peptide_signal(peptide)
            model_grid = [i * signal_length / (len(model_signal) - 1)
                          for i in xrange(len(model_signal))]

            interp_fun = interp1d(model_grid, model_signal, kind="linear")
            model_interp = interp_fun(xrange(signal_length))

            corr = 1 - distance.correlation(cluster.consensus, model_interp)
            print("{0} correlation: {1:5.2f}\t".format(model.name, corr),
                  file=sys.stderr)
            fig.plot(x_axis, model_interp, label=model.name, linewidth=2)
        ##############

        legend = fig.legend(loc="lower left", frameon=False)
        for label in legend.get_lines():
            label.set_linewidth(2)
        for label in legend.get_texts():
            label.set_fontsize(16)

        if show_text:
            #adding AAs text:
            event_mean = np.mean(cluster.consensus)
            acids_pos = _get_aa_positions(peptide, WINDOW, x_axis[-1])
            for i, aa in enumerate(peptide):
                fig.text(acids_pos[i], event_mean - 2, aa, fontsize=16)

        plt.show()
DEVELOPMENT_KEY = '../../dev.key'
SCORER_SCRIPT = '../../scorer.py'
WEIGHTS_FILE = "averaged_weight_vector_akj"
LOG_POSITIVE_FILE = "versus_file_bayes"
DISTINCT_TOKENS = 11083


for line in fileinput.input([GENERATED_FILES_DIRECTORY+WEIGHTS_FILE]):
    if len(line) == 0:
        break
    lister = line.split(" ")
    positive_weights = []
    for weight in lister[:DISTINCT_TOKENS]:
        append_weight = float(weight)
        positive_weights.append(append_weight)
#Need to plot positive weights against the values in versus_bayes
log_weights = []
actual_positives = []
for line in fileinput.input([GENERATED_FILES_DIRECTORY+LOG_POSITIVE_FILE]):
    lister = line.split("\t")
    log_weights.append(float(lister[1]))
    actual_positives.append(positive_weights[int(lister[2])])

plt.plot(log_weights, actual_positives, marker ='o', ls ='')
plt.xlabel("Log weights of positive tokens (Naive Bayes Classifier - ALPHA = 10^-3)")
plt.ylabel("Weights of the positive tokens (Perceptron Classifier)")
print "Corelation is: "+ str(correlation(log_weights, actual_positives))

#plt.plot(log_weights, 'rs')
plt.show()
def correlate(IM1,IM2):
    score = correlation(IM1.ravel(), IM2.ravel())
    return score
    def evaluate(self, dataset, vectorizer, high_dim_kron=False):
        pairs = list(dataset.dependency_graphs_pairs())

        # TODO: Refactor to mimic scikit-learn pipeline.
        sent_vectors = (
            (
                g1,
                vectorizer.vectorize(g1),
                g2,
                vectorizer.vectorize(g2),
                score,
            ) + tuple(extra)
            for g1, g2, score, *extra in pairs
        )

        if not high_dim_kron:
            sent_vectors = (
                (g1, v1.toarray().flatten(), g2, v2.toarray().flatten(), score) + tuple(extra)
                for g1, v1, g2, v2, score, *extra in sent_vectors
            )
            result_values = (
                (
                    g1,
                    g2,

                    1 / (1 + distance.euclidean(v1, v2)),
                    1 - distance.cosine(v1, v2),
                    1 - distance.correlation(v1, v2),
                    v1.dot(v2.T),

                    score,
                ) + tuple(extra)
                for g1, v1, g2, v2, score, *extra in sent_vectors
            )
            result_columns = (
                    'euclidean',
                    'cos',
                    'correlation',
                    'inner_product',
                )
        else:
            result_values = (
                (
                    g1,
                    g2,

                    (v1 * s1).dot(v2 * s2).T * (v1 * o1).dot((v2 * o2).T),

                    score,
                ) + tuple(extra)
                for g1, (s1, v1, o1), g2, (s2, v2, o2), score, *extra in sent_vectors
            )
            result_columns = (
                    'inner_product',
                )

        result = pd.DataFrame.from_records(
            [
                (tree(g1), tree(g2)) + tuple(rest)
                for g1, g2, *rest in self.progressify(
                    result_values,
                    description='Similarity',
                    max=len(pairs),
                )
            ],
            columns=(
                ('unit1', 'unit2', ) + result_columns + ('score', ) + getattr(dataset, 'extra_fields', tuple())
            )
        )

        if not result.notnull().all().all():
            logger.warning('Null values in similarity scores.')

        for column in result_columns:

            rho, p = stats.spearmanr(result[[column, 'score']])
            print(
                'Spearman correlation {info}, {column}: '
                '{style.BOLD}rho={rho:.3f}{style.RESET}, p={p:.5f}, support={support}'
                .format(
                    rho=rho,
                    p=p,
                    style=style,
                    info=vectorizer.info(),
                    support=len(result),
                    column=column,
                )
            )

        return result
#print "question cosine similairity-->",cosine_similarity(QuestionTVectorArray[0:1],QuestionTVectorArray)
#print "answer cosine similarity-->",cosine_similarity(AnswerTVectorArray[0:1],AnswerTVectorArray)
Qcosines=cosine_similarity(QuestionTVectorArray[0:1],QuestionTVectorArray)
Acosines=cosine_similarity(AnswerTVectorArray[0:1],AnswerTVectorArray)

Qbray=[dist.braycurtis(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Abray=[dist.braycurtis(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcanberra=[dist.canberra(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acanberra=[dist.canberra(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qhamming=[dist.hamming(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ahamming=[dist.hamming(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcorrelation=[dist.correlation(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acorrelation=[dist.correlation(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qcityblock=[dist.cityblock(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Acityblock=[dist.cityblock(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qdice=[dist.dice(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Adice=[dist.dice(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

Qyule=[dist.yule(QuestionTVectorArray[0].toarray(),u.toarray()) for u in QuestionTVectorArray]
Ayule=[dist.yule(AnswerTVectorArray[0].toarray(),u.toarray()) for u in AnswerTVectorArray]

#C_Q=np.histogram2d(QuestionTVectorArray[1],QuestionTVectorArray[1])[0]

#print "question mutual info-->",mutual_info_score(None,None,contigency=C_Q)#QuestionTVectorArray[0:1],QuestionTVectorArray)
#QuestionVectorArray=Qvectorizer.fit_transform(all_questions).toarray()
def correlation((x, y)):
    # return list(pearsonr(x, y))
    return distance.correlation(x, y)
Exemple #56
0
def wvCorr(a):
	return [distance.correlation(x[0], x[1]) for x in a]
def pearson_corr(ind1, ind2, matrix, _):
    # TODO: fix mean
    v1 = matrix[ind1]
    v2 = matrix[ind2]
    return 1 - distance.correlation(v1, v2)