def test_calc_initial_position(self): input = (wordcount.Word(distance=np.array([1, 1, 1, 1])), wordcount.Word(distance=np.array([1, 1, 1, 0])), wordcount.Word(distance=np.array([0, 0, 0, 1])), wordcount.Word(distance=np.array([0, 0, 1, 1]))) got = self.wm.calc_initial_position(input) dist_zero_one = rogerstanimoto((got[0].x, got[0].y), (got[1].x, got[1].y)) dist_zero_two = rogerstanimoto((got[0].x, got[0].y), (got[2].x, got[2].y)) assert_true(dist_zero_one < dist_zero_two)
def _get_node_distance_matrix(self, datapoint, som_array): """Get distance of datapoint and node using Euclidean distance. Parameters ---------- datapoint : np.array, shape=(X.shape[1]) Datapoint = one row of the dataset `X` som_array : np.array Weight vectors of the SOM, shape = (self.n_rows, self.n_columns, X.shape[1]) Returns ------- distmat : np.array of float Distance between datapoint and each SOM node """ # algorithms on the full matrix if self.distance_metric == "euclidean": return np.linalg.norm(som_array - datapoint, axis=2) # node-by-node algorithms distmat = np.zeros((self.n_rows, self.n_columns)) if self.distance_metric == "manhattan": for node in self.node_list_: distmat[node] = dist.cityblock( som_array[node[0], node[1]], datapoint) elif self.distance_metric == "mahalanobis": for node in self.node_list_: som_node = som_array[node[0], node[1]] cov = np.cov(np.stack((datapoint, som_node), axis=0), rowvar=False) cov_pinv = np.linalg.pinv(cov) # pseudo-inverse distmat[node] = dist.mahalanobis( datapoint, som_node, cov_pinv) elif self.distance_metric == "tanimoto": # Note that this is a binary distance measure. # Therefore, the vectors have to be converted. # Source: Melssen 2006, Supervised Kohonen networks for # classification problems # VERY SLOW ALGORITHM!!! threshold = 0.5 for node in self.node_list_: som_node = som_array[node[0], node[1]] distmat[node] = dist.rogerstanimoto( binarize(datapoint.reshape(1, -1), threshold=threshold, copy=True), binarize(som_node.reshape(1, -1), threshold=threshold, copy=True)) elif self.distance_metric == "spectralangle": for node in self.node_list_: distmat[node] = np.arccos(np.divide( np.dot(som_array[node[0], node[1]], datapoint), np.multiply(np.linalg.norm(som_array), np.linalg.norm(datapoint)))) return distmat
def calculate_distance_metrics(self, vstrack): # Subset to top N regions if necessary: if self.nregions > 0: self.maxdata = np.maximum(self.maindata, self.vsdata) self.topidx = np.argpartition(self.maxdata, -self.nregions)[-self.nregions:] self.x = self.maindata[self.topidx] self.y = self.vsdata[self.topidx] else: self.x = self.maindata self.y = self.vsdata # Calculate normalized euclidean distance: normdist = 0.5 * (np.var(self.x - self.y) / (np.var(self.x) + np.var(self.y))) # Pearson correlation: cormat = np.corrcoef(self.x, self.y) cor = cormat[0, 1] # Spearman correlation: rho, pval = spearmanr(self.x, self.y) # Also calculate binary: if self.ismainobs: self.bx = 1.0 * (self.x >= 2.0) else: self.bx = 1.0 * (self.x >= self.cutoff) if self.isvsobs: self.by = 1.0 * (self.y >= 2.0) else: self.by = 1.0 * (self.y >= self.cutoff) # Jaccard distance: jacc = distance.jaccard(self.bx, self.by) # Rogers-Tanimoto distance: rtd = distance.rogerstanimoto(self.bx, self.by) # Aggregate all: self.distance_dict[vstrack] = [cor, normdist, rho, jacc, rtd] print(self.distance_dict[vstrack])
def get_nearest_neighbor(self, x_test, k, sample_class): distances = [] targets_index = [] for i in range(len(sample_class)): if (sample_class[i][:] != x_test).any(): if self.distance_calculator == 'jaccard': distance = dis.jaccard(x_test, sample_class[i][:]) elif self.distance_calculator == 'dice': distance = dis.dice(x_test, sample_class[i][:]) elif self.distance_calculator == 'correlation': distance = dis.correlation(x_test, sample_class[i][:]) elif self.distance_calculator == 'yule': distance = dis.yule(x_test, sample_class[i][:]) elif self.distance_calculator == 'russelo-rao': distance = dis.russellrao(x_test, sample_class[i][:]) elif self.distance_calculator == 'sokal-michener': distance = dis.sokalmichener(x_test, sample_class[i][:]) elif self.distance_calculator == 'rogers-tanimoto': distance = dis.rogerstanimoto(x_test, sample_class[i][:]) elif self.distance_calculator == 'kulzinsky': distance = dis.kulsinski(x_test, sample_class[i][:]) distances.append([distance, i]) # make a list of the k neighbors' targets distances.sort() for i in range(k): targets_index.append(distances[i][1]) return targets_index
def tanimoto(vector_x: dict, vector_y: dict) -> float: """Коэффициент Танимото. Используется для оценки схожести двух образцов. :param vector_x: :param vector_y: :return: [0;+1] """ items_vector_x, items_vector_y = Similarity._get_lists( vector_x, vector_y) return distance.rogerstanimoto(items_vector_x, items_vector_y)
def rogerstanimoto(self, x=None, y=None, w=None): """ 田本罗杰斯差异 x = [1, 0, 0] y = [0, 1, 0] """ x = x or self.x y = y or self.y w = w or self.w return distance.rogerstanimoto(x, y, w)
def cross_channel_boolean_distance_features(mask): """calculates the cross channel distance features Calculates the distances across channels Parameters ---------- mask : 3D array, shape (M, N, C) The input mask with multiple channels. Returns ------- features : dict dictionary including different distances across channels """ features = dict() for ch1 in range(mask.shape[2]): for ch2 in range(ch1 + 1, mask.shape[2]): # rehaping the channels to 1D channel1 = mask[:, :, ch1].ravel() channel2 = mask[:, :, ch2].ravel() # creating the suffix name for better readability suffix = "_Ch" + str(ch1 + 1) + "_Ch" + str(ch2 + 1) # storing the distance values features["dice_distance" + suffix] = dist.dice(channel1, channel2) features["hamming_distance" + suffix] = dist.hamming( channel1, channel2) features["jaccard_distance" + suffix] = dist.jaccard( channel1, channel2) features["kulsinski_distance" + suffix] = dist.kulsinski( channel1, channel2) features["rogerstanimoto_distance" + suffix] = dist.rogerstanimoto( channel1, channel2) features["russellrao_distance" + suffix] = dist.russellrao( channel1, channel2) features["sokalmichener_distance" + suffix] = dist.sokalmichener( channel1, channel2) features["sokalsneath_distance" + suffix] = dist.sokalsneath( channel1, channel2) features["yule_distance" + suffix] = dist.yule(channel1, channel2) return features
def tanimoto_distances_matrix (param_grid, temporal_filtering, global_nuis_correction, prefs_method, prefs_thr): from scipy.spatial.distance import rogerstanimoto wd, xd, dd, labelsf, phenof, dataf, masks, dilmasks, templates, pipe = get_filepaths(temporal_filtering, global_nuis_correction) #loop will load all classification results dataf (or classify again) and calculate performance metrics param_gridlist = list(ParameterGrid(param_grid)) n = len(param_gridlist) tanis = np.zeros((n,n)) ticks = [] for j1idx, j1 in enumerate(param_gridlist): subjsf = j1['subjsf'] cl = j1['cl'] fidx = dataf.index(subjsf) maskf = masks[fidx] result_file1 = os.path.join(wd, get_resultfile_name (subjsf, cl, prefs_method, prefs_thr)) presels_vol1 = get_localizations_from_datashelf(result_file1, subjsf, labelsf, maskf) ticks.append(subjsf.split('_')[1]) print('Tanimoto comparing ' + result_file1) for j2idx, j2 in enumerate(param_gridlist): subjsf = j2['subjsf'] cl = j2['cl'] fidx = dataf.index(subjsf) maskf = masks[fidx] result_file2 = os.path.join(wd, get_resultfile_name (subjsf, cl, prefs_method, prefs_thr)) presels_vol2 = get_localizations_from_datashelf(result_file2, subjsf, labelsf, maskf) print('with ' + result_file2) tanis[j1idx, j2idx] = rogerstanimoto(presels_vol1.flatten().astype(bool), presels_vol2.flatten().astype(bool)) return tanis, ticks
def calculate_pss(self, profile, ignore=None, method="pairwise"): """ Calculate Profiles Similarity Score. """ if len(self) != len(profile): raise ProfileError("Different profiles' lengths") prof_1 = self prof_2 = profile if ignore: for i in ignore: try: prof_1.profile = list(prof_1.profile) del prof_1.profile[prof_1.query.index(i)] prof_1.profile = tuple(prof_1.profile) except IndexError: raise ProfileError("Element to ignore not in profile") try: prof_2.profile = list(prof_2.profile) del prof_2.profile[prof_2.query.index(i)] prof_2.profile = tuple(prof_2.profile) except IndexError: raise ProfileError("Element to ignore not in profile") if method == "pairwise": return sum(a == b for a, b in zip(prof_1.profile, prof_2.profile)) elif method == "jaccard": return dist.jaccard(prof_1.profile, prof_2.profile) elif method == "yule": return dist.yule(prof_1.profile, prof_2.profile) elif method == "dice": return dist.dice(prof_1.profile, prof_2.profile) elif method == "hamming": return dist.hamming(prof_1.profile, prof_2.profile) elif method == "kulsinski": return dist.kulsinski(prof_1.profile, prof_2.profile) elif method == "rogerstanimoto": return dist.rogerstanimoto(prof_1.profile, prof_2.profile) elif method == "russellrao": return dist.russellrao(prof_1.profile, prof_2.profile) elif method == "sokalmichener": return dist.sokalmichener(prof_1.profile, prof_2.profile)
def fitness_func(chromosome): return 1 - scipy_distance.rogerstanimoto(chromosome, target_chromosome)
def exec_similarity(dct, algorithm): if validate_similarity_algorithms(dct, algorithm): return {} if algorithm == 'braycurtis': return [ answer.update({ algorithm: braycurtis(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'canberra': return [ answer.update({ algorithm: canberra(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'chebyshev': return [ answer.update({ algorithm: chebyshev(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'cityblock': return [ answer.update({ algorithm: cityblock(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'correlation': return [ answer.update({ algorithm: correlation(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'cosine': return [ answer.update({ algorithm: cosine(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'euclidean': return [ answer.update({ algorithm: euclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'mahalanobis': return [ answer.update({ algorithm: mahalanobis(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] #elif algorithm is 'minkowski': #return [answer.update({algorithm:minkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf']))}) for answer in dct['answers']] elif algorithm == 'seuclidean': return [ answer.update({ algorithm: seuclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sqeuclidean': return [ answer.update({ algorithm: sqeuclidean(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'wminkowski': return [ answer.update({ algorithm: wminkowski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'dice': return [ answer.update({ algorithm: dice(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'hamming': return [ answer.update({ algorithm: hamming(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'jaccard': return [ answer.update({ algorithm: jaccard(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'kulsinski': return [ answer.update({ algorithm: kulsinski(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'rogerstanimoto': return [ answer.update({ algorithm: rogerstanimoto(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'russellrao': return [ answer.update({ algorithm: russellrao(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sokalmichener': return [ answer.update({ algorithm: sokalmichener(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'sokalsneath': return [ answer.update({ algorithm: sokalsneath(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ] elif algorithm == 'yule': return [ answer.update({ algorithm: yule(ndarray_dict(dct['tf_idf']), ndarray_dict(answer['tf_idf'])) }) for answer in dct['answers'] ]
def rogerstanimoto_(x, y): try: return rogerstanimoto(x, y) except ZeroDivisionError: return 0
desired_organism = sys.argv[5] except IndexError: desired_organism = None if desired_organism is not None: models = [mod for mod in models if model_info[mod.split(sep)[-1].split('.')[0]][4] == desired_organism] print ' Predicting for organism : ' + desired_organism output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '_' + desired_organism[:3] + '.txt' else: output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '.txt' print ' Total Number of Classes : ' + str(len(models)) print ' Using TPR threshold of : ' + str(threshold) output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str(threshold) + '.txt' out_file = open(output_name, 'w') querymatrix,smiles = importQuery(input_name) prediction_results = performTargetPrediction(models) print ' Total Number of Query Molecules file 1 : ' + str(len(querymatrix)) querymatrix,smiles2 = importQuery(input_name2) prediction_results2 = performTargetPrediction(models) print ' Total Number of Query Molecules file 2 : ' + str(len(querymatrix)) sim_output = [] sim_output2 = [] for idx in range(prediction_results.shape[1]): sim_output.append(rogerstanimoto(prediction_results[:,idx],prediction_results2[:,idx])) sim_output2.append(jaccard(prediction_results[:,idx],prediction_results2[:,idx])) out_file.write('Compound Pair No.\tSmiles 1\tSmiles 2\tRogers Tanimoto\tJaccard Sim\n') for idx, comp1 in enumerate(smiles): comp2 = smiles2[idx] s = sim_output[idx] s2 = sim_output2[idx] out_file.write('\t'.join(map(str,[idx,comp1,comp2,1.0-s,1.0-s2])) + '\n') print '\n Wrote Results to: ' + output_name out_file.close()
def distance(self, vector1, vector2, type_): """ Calculate distance between two vectors. Args: vector1 (list of int/float/bool): Vector in vector space vector2 (list of int/float/bool): Vector in vector space type_ (str): Type of distance calculation. Allowed types are: * For numeric vectors * - braycurtis: Computes the Bray-Curtis distance between two arrays. - canberra: Computes the Canberra distance between two arrays. - chebyshev: Computes the Chebyshev distance. - cityblock: Computes the City Block (Manhattan) distance. - correlation: Computes the correlation distance between two arrays. - cosine: Computes the Cosine distance between arrays. - euclidean: Computes the Euclidean distance between two arrays. - sqeuclidean: Computes the squared Euclidean distance between two arrays. * For boolean vectors * - dice: Computes the Dice dissimilarity between two boolean arrays. - hamming: Computes the Hamming distance between two arrays. - jaccard: Computes the Jaccard-Needham dissimilarity between two boolean arrays. - kulsinski: Computes the Kulsinski dissimilarity between two boolean arrays. - rogerstanimoto: Computes the Rogers-Tanimoto dissimilarity between two boolean arrays. - russellrao: Computes the Russell-Rao dissimilarity between two boolean arrays. - sokalmichener: Computes the Sokal-Michener dissimilarity between two boolean arrays. - sokalsneath: Computes the Sokal-Sneath dissimilarity between two boolean arrays. - yule: Computes the Yule dissimilarity between two boolean arrays. Returns: float: Distance between vectors. """ if type_ == "braycurtis": return distance.braycurtis(vector1, vector2) elif type_ == "canberra": return distance.canberra(vector1, vector2) elif type_ == "chebyshev": return distance.chebyshev(vector1, vector2) elif type_ == "cityblock": return distance.cityblock(vector1, vector2) elif type_ == "correlation": return distance.correlation(vector1, vector2) elif type_ == "cosine": return distance.cosine(vector1, vector2) elif type_ == "euclidean": return distance.euclidean(vector1, vector2) elif type_ == "sqeuclidean": return distance.sqeuclidean(vector1, vector2) elif type_ == "dice": return distance.dice(vector1, vector2) elif type_ == "hamming": return distance.hamming(vector1, vector2) elif type_ == "jaccard": return distance.jaccard(vector1, vector2) elif type_ == "kulsinski": return distance.kulsinski(vector1, vector2) elif type_ == "kulsinski": return distance.kulsinski(vector1, vector2) elif type_ == "rogerstanimoto": return distance.rogerstanimoto(vector1, vector2) elif type_ == "russellrao": return distance.russellrao(vector1, vector2) elif type_ == "sokalmichener": return distance.sokalmichener(vector1, vector2) elif type_ == "sokalsneath": return distance.sokalsneath(vector1, vector2) elif type_ == "yule": return distance.yule(vector1, vector2) else: raise ValueError( """Wrong value for type_. Please enter one of supported values. Type help(distance) to see supported values.""")
def tanimoto_similarity(base, target): evaluation = rogerstanimoto(base, target) return evaluation
def getDistancesFromMeds(noBits, signBitOption, aggregateTreatmentsOption, binaryDistanceMethod, useManualDistanceMethod, segmentLength): # Pull in previously made med sequence matrix and delete the patient identifiers treatmentData = np.delete(gv.medSequenceMatrix, 0, 1) # Create an array of the binary strings which are encoded with each segment's treatments monthlyBinaryTreatmentVectors = [] for i in range(0, treatmentData.shape[0]): temp = [] for j in range(0, segmentLength): binaryTreatment = convTreatmentToBinaryArray( treatmentData[i, j], signBitOption, noBits) temp.append(binaryTreatment) monthlyBinaryTreatmentVectors.append(temp) monthlyBinaryTreatmentVectors = np.array(monthlyBinaryTreatmentVectors) binaryTreatmentVectors = [] # If the aggregate option is chosen, then logically OR each binary string in a segment to form one # binary string of length noBits per each segment if (aggregateTreatmentsOption): boolMonthlyTreatVectors = monthlyBinaryTreatmentVectors.astype(bool) for i in range(monthlyBinaryTreatmentVectors.shape[0]): rowOfMonths = np.zeros(noBits + signBitOption).astype(bool) for j in range(monthlyBinaryTreatmentVectors.shape[1]): rowOfMonths = np.logical_or(rowOfMonths, boolMonthlyTreatVectors[i, j]) binaryTreatmentVectors.append(rowOfMonths.astype(int)) # If the aggregate treatment option is not chosen, then monthly granujlarity is preserved by # Concatenating every binary string in a segment to form one long binary string of length # noBits * segmentLength per each segment else: for i in range(monthlyBinaryTreatmentVectors.shape[0]): rowOfMonths = np.array([]) for j in range(monthlyBinaryTreatmentVectors.shape[1]): rowOfMonths = np.concatenate( [rowOfMonths, monthlyBinaryTreatmentVectors[i, j]]) binaryTreatmentVectors.append(rowOfMonths) binaryTreatmentVectors = np.array(binaryTreatmentVectors) # Initialize distance matrix and compute distances based on the selected distance method distanceMatrix = np.zeros( (len(binaryTreatmentVectors), len(binaryTreatmentVectors))) if binaryDistanceMethod == 1: #Sokal Michener for i in range(binaryTreatmentVectors.shape[0]): for j in range(i, binaryTreatmentVectors.shape[0]): if useManualDistanceMethod: dist = np.sum( np.logical_xor(binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype( bool))) / (noBits + signBitOption) else: dist = distance.hamming( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool)) distanceMatrix[i, j] = dist distanceMatrix[j, i] = dist elif binaryDistanceMethod == 2: #Jaccard for i in range(binaryTreatmentVectors.shape[0]): for j in range(i, binaryTreatmentVectors.shape[0]): if np.array_equal(binaryTreatmentVectors[i], binaryTreatmentVectors[j]): dist = 0 else: if useManualDistanceMethod: numerator = np.sum( np.logical_xor( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool))) denomenator = np.sum( np.logical_or( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool))) dist = numerator / denomenator else: dist = distance.jaccard( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool)) distanceMatrix[i, j] = dist distanceMatrix[j, i] = dist elif binaryDistanceMethod == 3: #Rogers Tanimoto for i in range(binaryTreatmentVectors.shape[0]): for j in range(i, binaryTreatmentVectors.shape[0]): if useManualDistanceMethod: numerator = 2 * np.sum( np.logical_xor(binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool))) denomenator = np.sum( np.logical_xor(binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype( bool))) + noBits + signBitOption dist = numerator / denomenator else: dist = distance.rogerstanimoto( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool)) distanceMatrix[i, j] = dist distanceMatrix[j, i] = dist else: #Sokal Sneath II for i in range(binaryTreatmentVectors.shape[0]): for j in range(binaryTreatmentVectors.shape[0]): if np.array_equal(binaryTreatmentVectors[i], binaryTreatmentVectors[j]): dist = 0 else: if useManualDistanceMethod: numerator = 2 * np.sum( np.logical_xor( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool))) denomenator = (2 * np.sum( np.logical_xor( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool)) )) + np.sum( np.logical_and( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool))) dist = numerator / denomenator else: dist = distance.sokalsneath( binaryTreatmentVectors[i].astype(bool), binaryTreatmentVectors[j].astype(bool)) distanceMatrix[i, j] = dist # Normalize distance matrix by dividing the whole thing by the largest value. max = np.amax(distanceMatrix) distanceMatrix = np.divide(distanceMatrix, max) return distanceMatrix
def my_dist(u, v): return cosine(u, v) * yule(u, v) * braycurtis(u, v) * np.abs( rogerstanimoto(u, v))
def tanimoto_dist(A, B): return rogerstanimoto(A, B)
jaccard_sim = np.zeros((3, 3)) for i in range(0, 3): for j in range(0, 3): if i == j: jaccard_sim[i, j] = 1 else: jaccard_sim[i, j] = jaccard_score(y[i, :], y[j, :]) jaccard_sim #simple matching score simatch_sim = np.zeros((3, 3)) for i in range(0, 3): for j in range(0, 3): if i == j: simatch_sim[i, j] = 1 else: simatch_sim[i, j] = sum(y[i, :] == y[j, :]) / 8 simatch_sim #roger score rogerstan_sim = np.zeros((3, 3)) for i in range(0, 3): for j in range(0, 3): if i == j: rogerstan_sim[i, j] = 1 else: rogerstan_sim[i, j] = 1 - distance.rogerstanimoto(y[i, :], y[j, :]) rogerstan_sim
def rogerstanimoto(app1SyscallsVector, app2SyscallsVector): return spDist.rogerstanimoto(app1SyscallsVector, app2SyscallsVector)
import sys import numpy as np from scipy.spatial import distance filename1 = sys.argv[1] filename2 = sys.argv[2] pad1 = np.loadtxt(filename1, dtype='float', delimiter='\t', usecols=(5), unpack=True, skiprows=11) pad2 = np.loadtxt(filename2, dtype='float', delimiter='\t', usecols=(5), unpack=True, skiprows=11) dE = distance.euclidean(pad1, pad2) rog = distance.rogerstanimoto(pad1, pad2) print("Euclidean", dE) print("Rogers", rog)
print ' Total Number of Classes : ' + str(len(models)) print ' Using TPR threshold of : ' + str(threshold) output_name = input_name + '_' + input_name2 + '_out_binary_sim_' + str( threshold) + '.txt' out_file = open(output_name, 'w') querymatrix, smiles, ids = importQuery(input_name) prediction_results = performTargetPrediction(models) print ' Total Number of Query Molecules file 1 : ' + str(len(querymatrix)) querymatrix, smiles2, ids2 = importQuery(input_name2) prediction_results2 = performTargetPrediction(models) print ' Total Number of Query Molecules file 2 : ' + str(len(querymatrix)) sim_output = [] sim_output2 = [] for idx in range(prediction_results.shape[1]): sim_output.append( rogerstanimoto(prediction_results[:, idx], prediction_results2[:, idx])) sim_output2.append( jaccard(prediction_results[:, idx], prediction_results2[:, idx])) out_file.write( 'Compound Pair No.\tSmiles 1\tSmiles 2\tRogers Tanimoto\tJaccard Sim\n' ) for idx, comp1 in enumerate(ids): comp2 = ids2[idx] s = sim_output[idx] s2 = sim_output2[idx] out_file.write( '\t'.join(map(str, [idx, comp1, comp2, 1.0 - s, 1.0 - s2])) + '\n') print '\n Wrote Results to: ' + output_name out_file.close()
def do_roger_tanimoto(m, rogtan, vec): for i in range(m): for j in range(m): rogtan[i, j] = distance.rogerstanimoto(vec[i], vec[j]) return rogtan
def main(): from scipy.spatial import distance a = np.array([1, 2, 43]) b = np.array([3, 2, 1]) d = Distance() print('-----------------------------------------------------------------') print('My braycurtis: {}'.format(d.braycurtis(a, b))) print('SciPy braycurtis: {}'.format(distance.braycurtis(a, b))) print('-----------------------------------------------------------------') print('My canberra: {}'.format(d.canberra(a, b))) print('SciPy canberra: {}'.format(distance.canberra(a, b))) print('-----------------------------------------------------------------') print('My chebyshev: {}'.format(d.chebyshev(a, b))) print('SciPy chebyshev: {}'.format(distance.chebyshev(a, b))) print('-----------------------------------------------------------------') print('My cityblock: {}'.format(d.cityblock(a, b))) print('SciPy cityblock: {}'.format(distance.cityblock(a, b))) print('-----------------------------------------------------------------') print('My correlation: {}'.format(d.correlation(a, b))) print('SciPy correlation: {}'.format(distance.correlation(a, b))) print('-----------------------------------------------------------------') print('My euclidean: {}'.format(d.euclidean(a, b))) print('SciPy euclidean: {}'.format(distance.euclidean(a, b))) print('-----------------------------------------------------------------') print('My hamming: {}'.format(d.hamming(a, b))) print('SciPy hamming: {}'.format(distance.hamming(a, b))) print('-----------------------------------------------------------------') print('My jaccard: {}'.format(d.jaccard(a, b))) print('SciPy jaccard: {}'.format(distance.jaccard(a, b))) print('-----------------------------------------------------------------') print('My manhattan: {}'.format(d.cityblock(a, b))) print('SciPy manhattan: {}'.format(distance.cityblock(a, b))) print('-----------------------------------------------------------------') print('My cosine: {}'.format(d.cosine(a, b))) print('SciPy cosine: {}'.format(distance.cosine(a, b))) print('-----------------------------------------------------------------') print('My dice: {}'.format(d.dice(a, b))) print('SciPy dice: {}'.format(distance.dice(a, b))) print('-----------------------------------------------------------------') print('My kulsinski: {}'.format(d.kulsinski(a, b))) print('SciPy kulsinski: {}'.format(distance.kulsinski(a, b))) print('-----------------------------------------------------------------') iv = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]]) print('My mahalanobis: {}'.format(d.mahalanobis(a, b, iv))) print('SciPy mahalanobis: {}'.format(distance.mahalanobis(a, b, iv))) print('-----------------------------------------------------------------') print('My seuclidean: {}'.format( d.seuclidean(a, b, np.array([0.1, 0.1, 0.1])))) print('SciPy seuclidean: {}'.format( distance.seuclidean(a, b, [0.1, 0.1, 0.1]))) print('-----------------------------------------------------------------') print('My sokalmichener: {}'.format(d.sokalmichener(a, b))) print('SciPy sokalmichener: {}'.format(distance.sokalmichener(a, b))) print('-----------------------------------------------------------------') print('My sokal_sneath: {}'.format(d.sokalsneath(a, b))) print('SciPy sokal_sneath: {}'.format(distance.sokalsneath(a, b))) print('-----------------------------------------------------------------') print('My sqeuclidean: {}'.format(d.sqeuclidean(a, b))) print('SciPy sqeuclidean: {}'.format(distance.sqeuclidean(a, b))) print('-----------------------------------------------------------------') print('My minkowski: {}'.format(d.minkowski(a, b, 2))) print('SciPy minkowski: {}'.format(distance.minkowski(a, b, 2))) print('-----------------------------------------------------------------') print('My rogerstanimoto: {}'.format(d.rogerstanimoto(a, b))) print('SciPy rogerstanimoto: {}'.format(distance.rogerstanimoto(a, b))) print('-----------------------------------------------------------------') print('My russellrao: {}'.format(d.russellrao(a, b))) print('SciPy russellrao: {}'.format(distance.russellrao(a, b))) print('-----------------------------------------------------------------') print('My wminkowski: {}'.format(d.wminkowski(a, b, 2, np.ones(3)))) print('SciPy wminkowski: {}'.format( distance.wminkowski(a, b, 2, np.ones(3)))) print('-----------------------------------------------------------------') print('My yule: {}'.format(d.yule(a, b))) print('SciPy yule: {}'.format(distance.yule(a, b))) print('-----------------------------------------------------------------')