Ejemplo n.º 1
0
 def test_calc_initial_position(self):
     input = (wordcount.Word(distance=np.array([1, 1, 1, 1])),
              wordcount.Word(distance=np.array([1, 1, 1, 0])),
              wordcount.Word(distance=np.array([0, 0, 0, 1])),
              wordcount.Word(distance=np.array([0, 0, 1, 1])))
     got = self.wm.calc_initial_position(input)
     dist_zero_one = rogerstanimoto((got[0].x, got[0].y), (got[1].x, got[1].y))
     dist_zero_two = rogerstanimoto((got[0].x, got[0].y), (got[2].x, got[2].y))
     assert_true(dist_zero_one < dist_zero_two)
Ejemplo n.º 2
0
    def _get_node_distance_matrix(self, datapoint, som_array):
        """Get distance of datapoint and node using Euclidean distance.

        Parameters
        ----------
        datapoint : np.array, shape=(X.shape[1])
            Datapoint = one row of the dataset `X`
        som_array : np.array
            Weight vectors of the SOM,
            shape = (self.n_rows, self.n_columns, X.shape[1])

        Returns
        -------
        distmat : np.array of float
            Distance between datapoint and each SOM node

        """
        # algorithms on the full matrix
        if self.distance_metric == "euclidean":
            return np.linalg.norm(som_array - datapoint, axis=2)

        # node-by-node algorithms
        distmat = np.zeros((self.n_rows, self.n_columns))
        if self.distance_metric == "manhattan":
            for node in self.node_list_:
                distmat[node] = dist.cityblock(
                    som_array[node[0], node[1]], datapoint)

        elif self.distance_metric == "mahalanobis":
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                cov = np.cov(np.stack((datapoint, som_node), axis=0),
                             rowvar=False)
                cov_pinv = np.linalg.pinv(cov)   # pseudo-inverse
                distmat[node] = dist.mahalanobis(
                    datapoint, som_node, cov_pinv)

        elif self.distance_metric == "tanimoto":
            # Note that this is a binary distance measure.
            # Therefore, the vectors have to be converted.
            # Source: Melssen 2006, Supervised Kohonen networks for
            #         classification problems
            # VERY SLOW ALGORITHM!!!
            threshold = 0.5
            for node in self.node_list_:
                som_node = som_array[node[0], node[1]]
                distmat[node] = dist.rogerstanimoto(
                    binarize(datapoint.reshape(1, -1), threshold=threshold,
                             copy=True),
                    binarize(som_node.reshape(1, -1), threshold=threshold,
                             copy=True))

        elif self.distance_metric == "spectralangle":
            for node in self.node_list_:
                distmat[node] = np.arccos(np.divide(
                    np.dot(som_array[node[0], node[1]], datapoint),
                    np.multiply(np.linalg.norm(som_array),
                                np.linalg.norm(datapoint))))

        return distmat
 def calculate_distance_metrics(self, vstrack):
     # Subset to top N regions if necessary:
     if self.nregions > 0:
         self.maxdata = np.maximum(self.maindata, self.vsdata)
         self.topidx = np.argpartition(self.maxdata,
                                       -self.nregions)[-self.nregions:]
         self.x = self.maindata[self.topidx]
         self.y = self.vsdata[self.topidx]
     else:
         self.x = self.maindata
         self.y = self.vsdata
     # Calculate normalized euclidean distance:
     normdist = 0.5 * (np.var(self.x - self.y) /
                       (np.var(self.x) + np.var(self.y)))
     # Pearson correlation:
     cormat = np.corrcoef(self.x, self.y)
     cor = cormat[0, 1]
     # Spearman correlation:
     rho, pval = spearmanr(self.x, self.y)
     # Also calculate binary:
     if self.ismainobs:
         self.bx = 1.0 * (self.x >= 2.0)
     else:
         self.bx = 1.0 * (self.x >= self.cutoff)
     if self.isvsobs:
         self.by = 1.0 * (self.y >= 2.0)
     else:
         self.by = 1.0 * (self.y >= self.cutoff)
     # Jaccard distance:
     jacc = distance.jaccard(self.bx, self.by)
     # Rogers-Tanimoto distance:
     rtd = distance.rogerstanimoto(self.bx, self.by)
     # Aggregate all:
     self.distance_dict[vstrack] = [cor, normdist, rho, jacc, rtd]
     print(self.distance_dict[vstrack])
Ejemplo n.º 4
0
    def get_nearest_neighbor(self, x_test, k, sample_class):
        distances = []
        targets_index = []
        for i in range(len(sample_class)):
            if (sample_class[i][:] != x_test).any():
                if self.distance_calculator == 'jaccard':
                    distance = dis.jaccard(x_test, sample_class[i][:])
                elif self.distance_calculator == 'dice':
                    distance = dis.dice(x_test, sample_class[i][:])
                elif self.distance_calculator == 'correlation':
                    distance = dis.correlation(x_test, sample_class[i][:])
                elif self.distance_calculator == 'yule':
                    distance = dis.yule(x_test, sample_class[i][:])
                elif self.distance_calculator == 'russelo-rao':
                    distance = dis.russellrao(x_test, sample_class[i][:])
                elif self.distance_calculator == 'sokal-michener':
                    distance = dis.sokalmichener(x_test, sample_class[i][:])
                elif self.distance_calculator == 'rogers-tanimoto':
                    distance = dis.rogerstanimoto(x_test, sample_class[i][:])
                elif self.distance_calculator == 'kulzinsky':
                    distance = dis.kulsinski(x_test, sample_class[i][:])
                distances.append([distance, i])

        # make a list of the k neighbors' targets
        distances.sort()
        for i in range(k):
            targets_index.append(distances[i][1])
        return targets_index
Ejemplo n.º 5
0
    def tanimoto(vector_x: dict, vector_y: dict) -> float:
        """Коэффициент Танимото. Используется для оценки схожести двух образцов.

        :param vector_x:
        :param vector_y:
        :return: [0;+1]
        """
        items_vector_x, items_vector_y = Similarity._get_lists(
            vector_x, vector_y)
        return distance.rogerstanimoto(items_vector_x, items_vector_y)
Ejemplo n.º 6
0
    def rogerstanimoto(self, x=None, y=None, w=None):
        """
        田本罗杰斯差异

        x = [1, 0, 0]
        y = [0, 1, 0]
        """
        x = x or self.x
        y = y or self.y
        w = w or self.w
        return distance.rogerstanimoto(x, y, w)
Ejemplo n.º 7
0
def cross_channel_boolean_distance_features(mask):
    """calculates the cross channel distance features 
    
    Calculates the distances across channels 

    Parameters
    ----------
    mask : 3D array, shape (M, N, C)
        The input mask with multiple channels. 

    Returns
    -------
    features :  dict  
        dictionary including different distances across channels

    """

    features = dict()
    for ch1 in range(mask.shape[2]):
        for ch2 in range(ch1 + 1, mask.shape[2]):
            # rehaping the channels to 1D
            channel1 = mask[:, :, ch1].ravel()
            channel2 = mask[:, :, ch2].ravel()

            # creating the suffix name for better readability
            suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1)

            # storing the distance values
            features["dice_distance" + suffix] = dist.dice(channel1, channel2)
            features["hamming_distance" + suffix] = dist.hamming(
                channel1, channel2)
            features["jaccard_distance" + suffix] = dist.jaccard(
                channel1, channel2)
            features["kulsinski_distance" + suffix] = dist.kulsinski(
                channel1, channel2)
            features["rogerstanimoto_distance" + suffix] = dist.rogerstanimoto(
                channel1, channel2)
            features["russellrao_distance" + suffix] = dist.russellrao(
                channel1, channel2)
            features["sokalmichener_distance" + suffix] = dist.sokalmichener(
                channel1, channel2)
            features["sokalsneath_distance" + suffix] = dist.sokalsneath(
                channel1, channel2)
            features["yule_distance" + suffix] = dist.yule(channel1, channel2)

    return features
Ejemplo n.º 8
0
def tanimoto_distances_matrix (param_grid, temporal_filtering, global_nuis_correction, prefs_method, prefs_thr):

    from scipy.spatial.distance import rogerstanimoto

    wd, xd, dd, labelsf, phenof, dataf, masks, dilmasks, templates, pipe = get_filepaths(temporal_filtering, global_nuis_correction)

    #loop will load all classification results dataf (or classify again) and calculate performance metrics
    param_gridlist = list(ParameterGrid(param_grid))
    n = len(param_gridlist)

    tanis = np.zeros((n,n))
    ticks = []

    for j1idx, j1 in enumerate(param_gridlist):

        subjsf    = j1['subjsf']
        cl        = j1['cl']

        fidx   = dataf.index(subjsf)
        maskf  = masks[fidx]

        result_file1 = os.path.join(wd, get_resultfile_name (subjsf, cl, prefs_method, prefs_thr))
        presels_vol1 = get_localizations_from_datashelf(result_file1, subjsf, labelsf, maskf)

        ticks.append(subjsf.split('_')[1])

        print('Tanimoto comparing ' + result_file1)

        for j2idx, j2 in enumerate(param_gridlist):

            subjsf    = j2['subjsf']
            cl        = j2['cl']

            fidx   = dataf.index(subjsf)
            maskf  = masks[fidx]

            result_file2 = os.path.join(wd, get_resultfile_name (subjsf, cl, prefs_method, prefs_thr))
            presels_vol2 = get_localizations_from_datashelf(result_file2, subjsf, labelsf, maskf)

            print('with ' + result_file2)

            tanis[j1idx, j2idx] = rogerstanimoto(presels_vol1.flatten().astype(bool), presels_vol2.flatten().astype(bool))

    return tanis, ticks
Ejemplo n.º 9
0
 def calculate_pss(self,
                   profile,
                   ignore=None,
                   method="pairwise"):
     """
     Calculate Profiles Similarity Score.
     """
     if len(self) != len(profile):
         raise ProfileError("Different profiles' lengths")
     prof_1 = self
     prof_2 = profile
     if ignore:
         for i in ignore:
             try:
                 prof_1.profile = list(prof_1.profile)
                 del prof_1.profile[prof_1.query.index(i)]
                 prof_1.profile = tuple(prof_1.profile)
             except IndexError:
                 raise ProfileError("Element to ignore not in profile")
             try:
                 prof_2.profile = list(prof_2.profile)
                 del prof_2.profile[prof_2.query.index(i)]
                 prof_2.profile = tuple(prof_2.profile)
             except IndexError:
                 raise ProfileError("Element to ignore not in profile")
     if method == "pairwise":
         return sum(a == b for a, b in zip(prof_1.profile, prof_2.profile))
     elif method == "jaccard":
         return dist.jaccard(prof_1.profile, prof_2.profile)
     elif method == "yule":
         return dist.yule(prof_1.profile, prof_2.profile)
     elif method == "dice":
         return dist.dice(prof_1.profile, prof_2.profile)
     elif method == "hamming":
         return dist.hamming(prof_1.profile, prof_2.profile)
     elif method == "kulsinski":
         return dist.kulsinski(prof_1.profile, prof_2.profile)
     elif method == "rogerstanimoto":
         return dist.rogerstanimoto(prof_1.profile, prof_2.profile)
     elif method == "russellrao":
         return dist.russellrao(prof_1.profile, prof_2.profile)
     elif method == "sokalmichener":
         return dist.sokalmichener(prof_1.profile, prof_2.profile)
Ejemplo n.º 10
0
 def fitness_func(chromosome):
     return 1 - scipy_distance.rogerstanimoto(chromosome, target_chromosome)
Ejemplo n.º 11
0
def exec_similarity(dct, algorithm):
    if validate_similarity_algorithms(dct, algorithm):
        return {}
    if algorithm == 'braycurtis':
        return [
            answer.update({
                algorithm:
                braycurtis(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'canberra':
        return [
            answer.update({
                algorithm:
                canberra(ndarray_dict(dct['tf_idf']),
                         ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'chebyshev':
        return [
            answer.update({
                algorithm:
                chebyshev(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'cityblock':
        return [
            answer.update({
                algorithm:
                cityblock(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'correlation':
        return [
            answer.update({
                algorithm:
                correlation(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'cosine':
        return [
            answer.update({
                algorithm:
                cosine(ndarray_dict(dct['tf_idf']),
                       ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'euclidean':
        return [
            answer.update({
                algorithm:
                euclidean(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'mahalanobis':
        return [
            answer.update({
                algorithm:
                mahalanobis(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    #elif algorithm is 'minkowski':
    #return [answer.update({algorithm:minkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf']))}) for answer in dct['answers']]
    elif algorithm == 'seuclidean':
        return [
            answer.update({
                algorithm:
                seuclidean(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sqeuclidean':
        return [
            answer.update({
                algorithm:
                sqeuclidean(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'wminkowski':
        return [
            answer.update({
                algorithm:
                wminkowski(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'dice':
        return [
            answer.update({
                algorithm:
                dice(ndarray_dict(dct['tf_idf']),
                     ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'hamming':
        return [
            answer.update({
                algorithm:
                hamming(ndarray_dict(dct['tf_idf']),
                        ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'jaccard':
        return [
            answer.update({
                algorithm:
                jaccard(ndarray_dict(dct['tf_idf']),
                        ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'kulsinski':
        return [
            answer.update({
                algorithm:
                kulsinski(ndarray_dict(dct['tf_idf']),
                          ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'rogerstanimoto':
        return [
            answer.update({
                algorithm:
                rogerstanimoto(ndarray_dict(dct['tf_idf']),
                               ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'russellrao':
        return [
            answer.update({
                algorithm:
                russellrao(ndarray_dict(dct['tf_idf']),
                           ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sokalmichener':
        return [
            answer.update({
                algorithm:
                sokalmichener(ndarray_dict(dct['tf_idf']),
                              ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'sokalsneath':
        return [
            answer.update({
                algorithm:
                sokalsneath(ndarray_dict(dct['tf_idf']),
                            ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
    elif algorithm == 'yule':
        return [
            answer.update({
                algorithm:
                yule(ndarray_dict(dct['tf_idf']),
                     ndarray_dict(answer['tf_idf']))
            }) for answer in dct['answers']
        ]
Ejemplo n.º 12
0
def rogerstanimoto_(x, y):
    try:
        return rogerstanimoto(x, y)
    except ZeroDivisionError:
        return 0
Ejemplo n.º 13
0
		desired_organism = sys.argv[5]
	except IndexError:
		desired_organism = None	
	if desired_organism is not None:
		models = [mod for mod in models if model_info[mod.split(sep)[-1].split('.')[0]][4] == desired_organism]
		print ' Predicting for organism : ' + desired_organism
		output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '_' + desired_organism[:3] + '.txt'
	else: 	output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '.txt'
	print ' Total Number of Classes : ' + str(len(models))
	print ' Using TPR threshold of : ' + str(threshold)
	output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '.txt'
	out_file = open(output_name, 'w')
	querymatrix,smiles = importQuery(input_name)
	prediction_results = performTargetPrediction(models)
	print ' Total Number of Query Molecules file 1 : ' + str(len(querymatrix))
	querymatrix,smiles2 = importQuery(input_name2)
	prediction_results2 = performTargetPrediction(models)
	print ' Total Number of Query Molecules file 2 : ' + str(len(querymatrix))
	sim_output = []
	sim_output2 = []
	for idx in range(prediction_results.shape[1]):
		sim_output.append(rogerstanimoto(prediction_results[:,idx],prediction_results2[:,idx]))
		sim_output2.append(jaccard(prediction_results[:,idx],prediction_results2[:,idx]))
	out_file.write('Compound Pair No.\tSmiles 1\tSmiles 2\tRogers Tanimoto\tJaccard Sim\n')
	for idx, comp1 in enumerate(smiles):
		comp2 = smiles2[idx]
		s = sim_output[idx]
		s2 = sim_output2[idx]
		out_file.write('\t'.join(map(str,[idx,comp1,comp2,1.0-s,1.0-s2])) + '\n')
	print '\n Wrote Results to: ' + output_name
	out_file.close()
Ejemplo n.º 14
0
def rogerstanimoto_(x, y):
    try:
        return rogerstanimoto(x, y)
    except ZeroDivisionError:
        return 0
 def distance(self, vector1, vector2, type_):
     """ 
     Calculate distance between two vectors.
     
     Args:
         vector1 (list of int/float/bool): Vector in vector space
         vector2 (list of int/float/bool): Vector in vector space
         type_ (str): Type of distance calculation. Allowed types are:
             * For numeric vectors *
             - braycurtis: Computes the Bray-Curtis distance between two arrays.
             - canberra: Computes the Canberra distance between two arrays.
             - chebyshev: 	Computes the Chebyshev distance.
             - cityblock: Computes the City Block (Manhattan) distance.
             - correlation: Computes the correlation distance between two arrays.
             - cosine: Computes the Cosine distance between arrays.
             - euclidean: Computes the Euclidean distance between two arrays.
             - sqeuclidean: Computes the squared Euclidean distance between two arrays.
             
             * For boolean vectors *
             - dice: Computes the Dice dissimilarity between two boolean arrays.
             - hamming: Computes the Hamming distance between two arrays.
             - jaccard: Computes the Jaccard-Needham dissimilarity between two boolean arrays.
             - kulsinski: Computes the Kulsinski dissimilarity between two boolean arrays.
             - rogerstanimoto: Computes the Rogers-Tanimoto dissimilarity between two boolean arrays.
             - russellrao: Computes the Russell-Rao dissimilarity between two boolean arrays.
             - sokalmichener: Computes the Sokal-Michener dissimilarity between two boolean arrays.
             - sokalsneath: Computes the Sokal-Sneath dissimilarity between two boolean arrays.
             - yule: Computes the Yule dissimilarity between two boolean arrays.
             
     Returns:
         float: Distance between vectors.
     """
     if type_ == "braycurtis":
         return distance.braycurtis(vector1, vector2)
     elif type_ == "canberra":
         return distance.canberra(vector1, vector2)
     elif type_ == "chebyshev":
         return distance.chebyshev(vector1, vector2)
     elif type_ == "cityblock":
         return distance.cityblock(vector1, vector2)
     elif type_ == "correlation":
         return distance.correlation(vector1, vector2)
     elif type_ == "cosine":
         return distance.cosine(vector1, vector2)
     elif type_ == "euclidean":
         return distance.euclidean(vector1, vector2)
     elif type_ == "sqeuclidean":
         return distance.sqeuclidean(vector1, vector2)
     elif type_ == "dice":
         return distance.dice(vector1, vector2)
     elif type_ == "hamming":
         return distance.hamming(vector1, vector2)
     elif type_ == "jaccard":
         return distance.jaccard(vector1, vector2)
     elif type_ == "kulsinski":
         return distance.kulsinski(vector1, vector2)
     elif type_ == "kulsinski":
         return distance.kulsinski(vector1, vector2)
     elif type_ == "rogerstanimoto":
         return distance.rogerstanimoto(vector1, vector2)
     elif type_ == "russellrao":
         return distance.russellrao(vector1, vector2)
     elif type_ == "sokalmichener":
         return distance.sokalmichener(vector1, vector2)
     elif type_ == "sokalsneath":
         return distance.sokalsneath(vector1, vector2)
     elif type_ == "yule":
         return distance.yule(vector1, vector2)
     else:
         raise ValueError(
             """Wrong value for type_. Please enter one of supported values.
                          Type help(distance) to see supported values.""")
Ejemplo n.º 16
0
def tanimoto_similarity(base, target):
    evaluation = rogerstanimoto(base, target)
    return evaluation
def getDistancesFromMeds(noBits, signBitOption, aggregateTreatmentsOption,
                         binaryDistanceMethod, useManualDistanceMethod,
                         segmentLength):
    # Pull in previously made med sequence matrix and delete the patient identifiers
    treatmentData = np.delete(gv.medSequenceMatrix, 0, 1)
    # Create an array of the binary strings which are encoded with each segment's treatments
    monthlyBinaryTreatmentVectors = []
    for i in range(0, treatmentData.shape[0]):
        temp = []
        for j in range(0, segmentLength):
            binaryTreatment = convTreatmentToBinaryArray(
                treatmentData[i, j], signBitOption, noBits)
            temp.append(binaryTreatment)
        monthlyBinaryTreatmentVectors.append(temp)
    monthlyBinaryTreatmentVectors = np.array(monthlyBinaryTreatmentVectors)
    binaryTreatmentVectors = []
    # If the aggregate option is chosen, then logically OR each binary string in a segment to form one
    # binary string of length noBits per each segment
    if (aggregateTreatmentsOption):
        boolMonthlyTreatVectors = monthlyBinaryTreatmentVectors.astype(bool)
        for i in range(monthlyBinaryTreatmentVectors.shape[0]):
            rowOfMonths = np.zeros(noBits + signBitOption).astype(bool)
            for j in range(monthlyBinaryTreatmentVectors.shape[1]):
                rowOfMonths = np.logical_or(rowOfMonths,
                                            boolMonthlyTreatVectors[i, j])
            binaryTreatmentVectors.append(rowOfMonths.astype(int))
    # If the aggregate treatment option is not chosen, then monthly granujlarity is preserved by
    # Concatenating every binary string in a segment to form one long binary string of length
    # noBits * segmentLength per each segment
    else:
        for i in range(monthlyBinaryTreatmentVectors.shape[0]):
            rowOfMonths = np.array([])
            for j in range(monthlyBinaryTreatmentVectors.shape[1]):
                rowOfMonths = np.concatenate(
                    [rowOfMonths, monthlyBinaryTreatmentVectors[i, j]])
            binaryTreatmentVectors.append(rowOfMonths)
    binaryTreatmentVectors = np.array(binaryTreatmentVectors)
    # Initialize distance matrix and compute distances based on the selected distance method
    distanceMatrix = np.zeros(
        (len(binaryTreatmentVectors), len(binaryTreatmentVectors)))
    if binaryDistanceMethod == 1:  #Sokal Michener
        for i in range(binaryTreatmentVectors.shape[0]):
            for j in range(i, binaryTreatmentVectors.shape[0]):
                if useManualDistanceMethod:
                    dist = np.sum(
                        np.logical_xor(binaryTreatmentVectors[i].astype(bool),
                                       binaryTreatmentVectors[j].astype(
                                           bool))) / (noBits + signBitOption)
                else:
                    dist = distance.hamming(
                        binaryTreatmentVectors[i].astype(bool),
                        binaryTreatmentVectors[j].astype(bool))
                distanceMatrix[i, j] = dist
                distanceMatrix[j, i] = dist
    elif binaryDistanceMethod == 2:  #Jaccard
        for i in range(binaryTreatmentVectors.shape[0]):
            for j in range(i, binaryTreatmentVectors.shape[0]):
                if np.array_equal(binaryTreatmentVectors[i],
                                  binaryTreatmentVectors[j]):
                    dist = 0
                else:
                    if useManualDistanceMethod:
                        numerator = np.sum(
                            np.logical_xor(
                                binaryTreatmentVectors[i].astype(bool),
                                binaryTreatmentVectors[j].astype(bool)))
                        denomenator = np.sum(
                            np.logical_or(
                                binaryTreatmentVectors[i].astype(bool),
                                binaryTreatmentVectors[j].astype(bool)))
                        dist = numerator / denomenator
                    else:
                        dist = distance.jaccard(
                            binaryTreatmentVectors[i].astype(bool),
                            binaryTreatmentVectors[j].astype(bool))
                distanceMatrix[i, j] = dist
                distanceMatrix[j, i] = dist
    elif binaryDistanceMethod == 3:  #Rogers Tanimoto
        for i in range(binaryTreatmentVectors.shape[0]):
            for j in range(i, binaryTreatmentVectors.shape[0]):
                if useManualDistanceMethod:
                    numerator = 2 * np.sum(
                        np.logical_xor(binaryTreatmentVectors[i].astype(bool),
                                       binaryTreatmentVectors[j].astype(bool)))
                    denomenator = np.sum(
                        np.logical_xor(binaryTreatmentVectors[i].astype(bool),
                                       binaryTreatmentVectors[j].astype(
                                           bool))) + noBits + signBitOption
                    dist = numerator / denomenator
                else:
                    dist = distance.rogerstanimoto(
                        binaryTreatmentVectors[i].astype(bool),
                        binaryTreatmentVectors[j].astype(bool))
                distanceMatrix[i, j] = dist
                distanceMatrix[j, i] = dist
    else:  #Sokal Sneath II
        for i in range(binaryTreatmentVectors.shape[0]):
            for j in range(binaryTreatmentVectors.shape[0]):
                if np.array_equal(binaryTreatmentVectors[i],
                                  binaryTreatmentVectors[j]):
                    dist = 0
                else:
                    if useManualDistanceMethod:
                        numerator = 2 * np.sum(
                            np.logical_xor(
                                binaryTreatmentVectors[i].astype(bool),
                                binaryTreatmentVectors[j].astype(bool)))
                        denomenator = (2 * np.sum(
                            np.logical_xor(
                                binaryTreatmentVectors[i].astype(bool),
                                binaryTreatmentVectors[j].astype(bool))
                        )) + np.sum(
                            np.logical_and(
                                binaryTreatmentVectors[i].astype(bool),
                                binaryTreatmentVectors[j].astype(bool)))
                        dist = numerator / denomenator
                    else:
                        dist = distance.sokalsneath(
                            binaryTreatmentVectors[i].astype(bool),
                            binaryTreatmentVectors[j].astype(bool))
                distanceMatrix[i, j] = dist
    # Normalize distance matrix by dividing the whole thing by the largest value.
    max = np.amax(distanceMatrix)
    distanceMatrix = np.divide(distanceMatrix, max)
    return distanceMatrix
Ejemplo n.º 18
0
def my_dist(u, v):
    return cosine(u, v) * yule(u, v) * braycurtis(u, v) * np.abs(
        rogerstanimoto(u, v))
Ejemplo n.º 19
0
def tanimoto_dist(A, B):
    return rogerstanimoto(A, B)
Ejemplo n.º 20
0
jaccard_sim = np.zeros((3, 3))
for i in range(0, 3):
    for j in range(0, 3):
        if i == j:
            jaccard_sim[i, j] = 1
        else:
            jaccard_sim[i, j] = jaccard_score(y[i, :], y[j, :])

jaccard_sim

#simple matching score
simatch_sim = np.zeros((3, 3))
for i in range(0, 3):
    for j in range(0, 3):
        if i == j:
            simatch_sim[i, j] = 1
        else:
            simatch_sim[i, j] = sum(y[i, :] == y[j, :]) / 8

simatch_sim

#roger score
rogerstan_sim = np.zeros((3, 3))
for i in range(0, 3):
    for j in range(0, 3):
        if i == j:
            rogerstan_sim[i, j] = 1
        else:
            rogerstan_sim[i, j] = 1 - distance.rogerstanimoto(y[i, :], y[j, :])

rogerstan_sim
Ejemplo n.º 21
0
def rogerstanimoto(app1SyscallsVector, app2SyscallsVector):
    return spDist.rogerstanimoto(app1SyscallsVector, app2SyscallsVector)
Ejemplo n.º 22
0
import sys
import numpy as np
from scipy.spatial import distance

filename1 = sys.argv[1]
filename2 = sys.argv[2]
pad1 = np.loadtxt(filename1,
                  dtype='float',
                  delimiter='\t',
                  usecols=(5),
                  unpack=True,
                  skiprows=11)
pad2 = np.loadtxt(filename2,
                  dtype='float',
                  delimiter='\t',
                  usecols=(5),
                  unpack=True,
                  skiprows=11)
dE = distance.euclidean(pad1, pad2)

rog = distance.rogerstanimoto(pad1, pad2)
print("Euclidean", dE)

print("Rogers", rog)
    print ' Total Number of Classes : ' + str(len(models))
    print ' Using TPR threshold of : ' + str(threshold)
    output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(
        threshold) + '.txt'
    out_file = open(output_name, 'w')
    querymatrix, smiles, ids = importQuery(input_name)
    prediction_results = performTargetPrediction(models)
    print ' Total Number of Query Molecules file 1 : ' + str(len(querymatrix))
    querymatrix, smiles2, ids2 = importQuery(input_name2)
    prediction_results2 = performTargetPrediction(models)
    print ' Total Number of Query Molecules file 2 : ' + str(len(querymatrix))
    sim_output = []
    sim_output2 = []
    for idx in range(prediction_results.shape[1]):
        sim_output.append(
            rogerstanimoto(prediction_results[:, idx],
                           prediction_results2[:, idx]))
        sim_output2.append(
            jaccard(prediction_results[:, idx], prediction_results2[:, idx]))
    out_file.write(
        'Compound Pair No.\tSmiles 1\tSmiles 2\tRogers Tanimoto\tJaccard Sim\n'
    )
    for idx, comp1 in enumerate(ids):
        comp2 = ids2[idx]
        s = sim_output[idx]
        s2 = sim_output2[idx]
        out_file.write(
            '\t'.join(map(str, [idx, comp1, comp2, 1.0 - s, 1.0 - s2])) + '\n')
    print '\n Wrote Results to: ' + output_name
    out_file.close()
Ejemplo n.º 24
0
def do_roger_tanimoto(m, rogtan, vec):
    for i in range(m):
        for j in range(m):
            rogtan[i, j] = distance.rogerstanimoto(vec[i], vec[j])
    return rogtan
Ejemplo n.º 25
0
def main():
    from scipy.spatial import distance
    a = np.array([1, 2, 43])
    b = np.array([3, 2, 1])

    d = Distance()
    print('-----------------------------------------------------------------')

    print('My       braycurtis: {}'.format(d.braycurtis(a, b)))
    print('SciPy    braycurtis: {}'.format(distance.braycurtis(a, b)))
    print('-----------------------------------------------------------------')

    print('My       canberra: {}'.format(d.canberra(a, b)))
    print('SciPy    canberra: {}'.format(distance.canberra(a, b)))
    print('-----------------------------------------------------------------')

    print('My       chebyshev: {}'.format(d.chebyshev(a, b)))
    print('SciPy    chebyshev: {}'.format(distance.chebyshev(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cityblock: {}'.format(d.cityblock(a, b)))
    print('SciPy    cityblock: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       correlation: {}'.format(d.correlation(a, b)))
    print('SciPy    correlation: {}'.format(distance.correlation(a, b)))
    print('-----------------------------------------------------------------')

    print('My       euclidean: {}'.format(d.euclidean(a, b)))
    print('SciPy    euclidean: {}'.format(distance.euclidean(a, b)))
    print('-----------------------------------------------------------------')

    print('My       hamming: {}'.format(d.hamming(a, b)))
    print('SciPy    hamming: {}'.format(distance.hamming(a, b)))
    print('-----------------------------------------------------------------')

    print('My       jaccard: {}'.format(d.jaccard(a, b)))
    print('SciPy    jaccard: {}'.format(distance.jaccard(a, b)))
    print('-----------------------------------------------------------------')

    print('My       manhattan: {}'.format(d.cityblock(a, b)))
    print('SciPy    manhattan: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cosine: {}'.format(d.cosine(a, b)))
    print('SciPy    cosine: {}'.format(distance.cosine(a, b)))
    print('-----------------------------------------------------------------')

    print('My       dice: {}'.format(d.dice(a, b)))
    print('SciPy    dice: {}'.format(distance.dice(a, b)))
    print('-----------------------------------------------------------------')

    print('My       kulsinski: {}'.format(d.kulsinski(a, b)))
    print('SciPy    kulsinski: {}'.format(distance.kulsinski(a, b)))
    print('-----------------------------------------------------------------')

    iv = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]])
    print('My       mahalanobis: {}'.format(d.mahalanobis(a, b, iv)))
    print('SciPy    mahalanobis: {}'.format(distance.mahalanobis(a, b, iv)))
    print('-----------------------------------------------------------------')

    print('My       seuclidean: {}'.format(
        d.seuclidean(a, b, np.array([0.1, 0.1, 0.1]))))
    print('SciPy    seuclidean: {}'.format(
        distance.seuclidean(a, b, [0.1, 0.1, 0.1])))
    print('-----------------------------------------------------------------')

    print('My       sokalmichener: {}'.format(d.sokalmichener(a, b)))
    print('SciPy    sokalmichener: {}'.format(distance.sokalmichener(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sokal_sneath: {}'.format(d.sokalsneath(a, b)))
    print('SciPy    sokal_sneath: {}'.format(distance.sokalsneath(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sqeuclidean: {}'.format(d.sqeuclidean(a, b)))
    print('SciPy    sqeuclidean: {}'.format(distance.sqeuclidean(a, b)))
    print('-----------------------------------------------------------------')

    print('My       minkowski: {}'.format(d.minkowski(a, b, 2)))
    print('SciPy    minkowski: {}'.format(distance.minkowski(a, b, 2)))
    print('-----------------------------------------------------------------')

    print('My       rogerstanimoto: {}'.format(d.rogerstanimoto(a, b)))
    print('SciPy    rogerstanimoto: {}'.format(distance.rogerstanimoto(a, b)))
    print('-----------------------------------------------------------------')

    print('My       russellrao: {}'.format(d.russellrao(a, b)))
    print('SciPy    russellrao: {}'.format(distance.russellrao(a, b)))
    print('-----------------------------------------------------------------')

    print('My       wminkowski: {}'.format(d.wminkowski(a, b, 2, np.ones(3))))
    print('SciPy    wminkowski: {}'.format(
        distance.wminkowski(a, b, 2, np.ones(3))))
    print('-----------------------------------------------------------------')

    print('My       yule: {}'.format(d.yule(a, b)))
    print('SciPy    yule: {}'.format(distance.yule(a, b)))
    print('-----------------------------------------------------------------')