def test__check_nulls_in_feature_columns(self): df_nulls_features = self.spark.read.csv( 'tests/fixtures/similarity/nulls_features.csv', header=True) df_no_nulls_features = self.spark.read.csv( 'tests/fixtures/similarity/no_nulls_features.csv', header=True) columns_to_convert_nulls = [ col for col in df_nulls_features.columns if 'id' not in col ] for col in columns_to_convert_nulls: df_nulls_features = df_nulls_features.withColumn( col, f.col(col).cast(IntegerType())) columns_to_convert_no_nulls = [ col for col in df_no_nulls_features.columns if 'id' not in col ] for col in columns_to_convert_no_nulls: df_no_nulls_features = df_no_nulls_features.withColumn( col, f.col(col).cast(IntegerType())) Similarity(df_features=df_no_nulls_features) with self.assertRaises(AssertionError): Similarity(df_features=df_nulls_features)
def test_generate(self): df_features = self.spark.read.csv( 'tests/fixtures/similarity/features.csv', header=True) columns_to_convert = [ col for col in df_features.columns if 'id' not in col ] df_features_int = df_features for col in columns_to_convert: df_features_int = df_features_int.withColumn( col, f.col(col).cast(IntegerType())) similarity_cos = Similarity(df_features=df_features_int, similarity_type='cosine') pd_df_similarity_cos, _ = similarity_cos.generate() self.assertEqual(pd_df_similarity_cos.shape[0], df_features.count()) self.assertEqual(pd_df_similarity_cos.shape[1], df_features.count()) similarity_euc = Similarity(df_features=df_features_int, similarity_type='euclidean') pd_df_similarity_euc, _ = similarity_euc.generate() self.assertEqual(pd_df_similarity_euc.shape[0], df_features.count()) self.assertEqual(pd_df_similarity_euc.shape[1], df_features.count()) similarity_fail = Similarity(df_features=df_features_int, similarity_type='test') with self.assertRaises(ValueError): similarity_fail.generate()
def __init__(self, path_to_tfcsv): self.database = Database(path_to_tfcsv) self.dictionary = self.database.get_dictionary() self.similarity = Similarity(self.database.documents) self.rank_limit = 6 self.num_leaders = 5 self.similarity.k_means_cluster(self.num_leaders)
def __init__(self): self.faces = {} self.similarity = Similarity() self.bling = Image.open('images/' + 'bling.png').resize((50, 50)) for i in ikon_categories.keys(): self.faces[i] = Image.open('images/' + ikon_categories[i] + '.png').resize((60, 60))
def build_sim_matrix(self, sentence_list, logger): sim = Similarity() self.sentences = sentence_list sim_matrix = np.empty([len(self.sentences), len(self.sentences)]) for i in range(0, len(self.sentences)): logger.info('Processing sentence # {} => {}'.format( i, self.sentences[i])) for j in range(i + 1, len(self.sentences)): s1 = self.sentences[i] s2 = self.sentences[j] try: score = sim.calculate_similarity_score(s1, s2) except ZeroDivisionError: # print('Problematic s1 {}'.format(s1)) # print('Problematic s2 {}\n'.format(s2)) pass # print('{} | {} | {},{} | {}'.format(s1, s2, i, j, score)) sim_matrix[i][j] = round(score, 2) sim_matrix[j][i] = sim_matrix[i][j] sim_matrix[i][i] = 1.00 try: del sim del score except: pass return sim_matrix
def calculate(self): self.allPredicts = np.zeros((4, self.testSize)) bias = Bias(self.trainData, self.testData) bias.calculateBias() answers, predicts = bias.predict() self.biasClass = bias self.allPredicts[0, :] = predicts #print("Bias: %f" % evaluationRMSE(answers, predicts)) similarity = Similarity(self.trainData, self.testData) similarity.calculateBias() similarity.calcSimiMatrix() answers, predicts = similarity.predict() self.similarityClass = similarity self.allPredicts[1, :] = predicts #print("Similarity: %f" % evaluationRMSE(answers, predicts)) svd = SVD(self.trainData, self.testData) svd.generaterMat() svd.calcSVD() answers, predicts = svd.predict() self.svdClass = svd self.allPredicts[2, :] = predicts #print("SVD: %f" % evaluationRMSE(answers, predicts)) matFactory = MatFactory(self.trainData, self.testData) matFactory.train(10, 11) answers, predicts = matFactory.predict() self.matFactoryClass = matFactory self.allPredicts[3, :] = predicts #print("MatFactory: %f" % evaluationRMSE(answers, predicts)) pickleFile = open(predictsFile, 'wb') pickle.dump(self.allPredicts, pickleFile)
def combine_files(lectures, features=None, prompts=['q1', 'q2']): phrasedir1 = '../data/%s/oracle_annotator_1/phrase/' % course phrasedir2 = '../data/%s/oracle_annotator_2/phrase/' % course X = [] Y = [] if features == None: sim_extractor = Similarity() features = sorted(sim_extractor.features.keys()) for i, lec in enumerate(lectures): for q in prompts: for phrasedir in [phrasedir1, phrasedir2]: path = phrasedir + str(lec) + '/' filename = os.path.join(path, q + sim_exe) data = fio.LoadDictJson(filename) for fdict, score, _ in data: row = [] for name in features: x = fdict[name] if str(x) == 'nan': x = 0.0 row.append(x) X.append(row) Y.append(score) return X, Y
def find_top_k_similar_program(repo_kernel_file, user_prog_graph_dot_file, graph_name, k, num_iter, cluster_json): sim = Similarity() sim.read_graph_kernels(repo_kernel_file) result_program_list_with_score = sim.find_top_k_similar_graphs( user_prog_graph_dot_file, graph_name, k, num_iter, cluster_json) return result_program_list_with_score
def gather_performance(output): sim_extractor = Similarity() allfeatures = sorted(sim_extractor.features.keys()) allbody = [] for k in range(len(allfeatures) + 1): #features = allfeatures#['WordEmbedding'] if k == len(allfeatures): #use all features features = allfeatures else: features = [allfeatures[k]] #features = allfeatures[0:k] + allfeatures[k+1:] name = '_'.join(features) resultfile = '../data/%s/simlearning.cv.svm.%s.txt' % (course, name) head, body = fio.ReadMatrix(resultfile, hasHead=True) #get the average allhead = ['name'] + head[2:] average = [name] for i in range(2, len(head)): #start from the third one values = [float(row[i]) for row in body] average.append(np.mean(values)) allbody.append(average) fio.WriteMatrix(output, allbody, allhead)
def contentBasedFiltering(self, key, n=3): '''Return list of n top match scores along with other keys''' dataset = self.dataset scores = [] for other_key in dataset: if other_key == key: continue # Fetching common inner keys to calculate similarity score common_inner_keys = self.fetchCommonInnerKeys(key, other_key) # If there is no common inner key, skip this other keys if len(common_inner_keys) == 0: continue x = [dataset[key][inner_key] for inner_key in common_inner_keys] y = [ dataset[other_key][inner_key] for inner_key in common_inner_keys ] # Appending the similarity score to a list sim = Similarity() scores.append((sim.pearson(x, y), other_key)) # Sorting the list so the highest score appear at the top scores.sort() scores.reverse() return scores[0:n]
def train_IE256_svm(traincourse, model_dir, name='simlearn_cv'): sim_extractor = Similarity() allfeatures = sorted(sim_extractor.features.keys()) features = allfeatures name = '_'.join(features) lectures = annotation.Lectures dict = defaultdict(int) if traincourse == 'IE256': train = [x for x in range(14, 26) if x != 22] else: train = [x for x in range(3, 27)] model_file = os.path.join(model_dir, '%s_%s.model' % (traincourse, name)) if fio.IsExist(model_file): with open(model_file, 'rb') as handle: clf = pickle.load(handle) else: train_X, train_Y = combine_files_course(traincourse, train, features) clf = svm.SVC() clf.fit(train_X, train_Y) with open(model_file, 'wb') as handle: pickle.dump(clf, handle)
def extractPhrasePaireFeature(phrasedir): for lec in annotation.Lectures: path = phrasedir + str(lec) + '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.' % (prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrasefile = os.path.join(path, "%s.%s.key" % (prompt, method)) phrases = fio.LoadList(phrasefile) for p1 in phrases: for p2 in phrases: featureset.append( (feature_extractor.get_features(p1, p2), 0.0, { 'p1': p1, 'p2': p2 })) fio.SaveDict2Json(featureset, filename) feature_extractor.save()
def similarity_action(self): dialog = Similarity(parent=self, df=self.table) if dialog.exec_(): res = dialog.execute() QMessageBox.information( self, f'Similarity: {dialog.method}', f'Columns {dialog.first_column} and {dialog.second_column} have a similarity value of {res}', QMessageBox.Ok)
def train_leave_one_lecture_out(model_dir, name='simlearn_cv'): # model_dir = '../data/IE256/%s/model/%s/'%(system, name) # fio.NewPath(model_dir) # # outputdir = '../data/IE256/%s/extraction/%s_output/'%(system, name) # fio.NewPath(outputdir) sim_extractor = Similarity() allfeatures = sorted(sim_extractor.features.keys()) if True: k = len(allfeatures) #for k in range(len(allfeatures)+1): #features = allfeatures#['WordEmbedding'] if k == len(allfeatures): #use all features features = allfeatures else: features = [allfeatures[k]] name = '_'.join(features) lectures = annotation.Lectures dict = defaultdict(int) MSE = [] for i, lec in enumerate(lectures): train = [x for x in lectures if x != lec] test = [lec] print train print test model_file = os.path.join(model_dir, '%d_%s.model' % (lec, name)) if fio.IsExist(model_file): with open(model_file, 'rb') as handle: clf = pickle.load(handle) else: train_X, train_Y = combine_files(train, features) clf = svm.SVR() clf.fit(train_X, train_Y) with open(model_file, 'wb') as handle: pickle.dump(clf, handle) for q in ['q1', 'q2']: test_X, test_Y = combine_files(test, features, prompts=[q]) predict_Y = clf.predict(test_X) mse = mean_squared_error(test_Y, predict_Y) MSE.append([lec, q, mse]) output = '../data/%s/simlearning.cv.%s.txt' % (course, name) fio.WriteMatrix(output, MSE, header=['lec', 'prompt', 'MSE'])
def correlation_analysis(course): phrasedir1 = '../data/%s/oracle_annotator_1/phrase/' % course phrasedir2 = '../data/%s/oracle_annotator_2/phrase/' % course outdir = '../data/%s/simlearning/' % course fio.NewPath(outdir) sim_extractor = Similarity() features = sorted(sim_extractor.features.keys()) head = features + ['score', 'predict'] body = [] lectures = annotation.Lectures name = '_'.join(features) for i, lec in enumerate(lectures): model_file = os.path.join(model_dir, '%d_%s.model' % (lec, name)) with open(model_file, 'rb') as handle: clf = pickle.load(handle) for q in ['q1', 'q2']: outfile = os.path.join(outdir, str(lec), '%s%s' % (q, sim_exe)) for phrasedir in [phrasedir1, phrasedir2]: path = phrasedir + str(lec) + '/' filename = os.path.join(path, q + sim_exe) data = fio.LoadDictJson(filename) for fdict, score, _ in data: row = [] for fname in features: x = fdict[fname] if str(x) == 'nan': x = 0.0 row.append(x) predict_score = clf.predict([row]) row.append(score) row.append(predict_score[0]) body.append(row) out_correlation = os.path.join(outdir, 'data.txt') print out_correlation fio.WriteMatrix(out_correlation, body, head)
def test__check_is_spark_data_frame(self): df_simple_table = self.spark.read.csv( 'tests/fixtures/similarity/simple_table.csv', header=True) pd_df_simple_table = pd.read_csv( 'tests/fixtures/similarity/simple_table.csv') columns_to_convert = [ col for col in df_simple_table.columns if 'id' not in col ] for col in columns_to_convert: df_simple_table = df_simple_table.withColumn( col, f.col(col).cast(IntegerType())) Similarity(df_features=df_simple_table) with self.assertRaises(AssertionError): Similarity(df_features=pd_df_simple_table)
def main(): cats = prepare_categories() test_token_freqs = prepare_test_data() similarity = Similarity(cats=cats, data=test_token_freqs) print(max(similarity.jaccard(), key=similarity.jaccard().get)) print(max(similarity.cosine(), key=similarity.cosine().get))
def __init__(self): self.m_preprocessor = Preprocessor() self.m_similarity = Similarity() self.m_plt = Plot() self.m_evaluator = Evaluator() self.m_file = "sts-train" self.m_metric = "path" self.m_ic = "brown" self.m_metric_w2v = "cosine" self.m_metric_t = "path" self.m_thr = 20 self.m_mode = "ontology"
def collaborativeRecommendation(self, key, n=3): '''Return list of n top match scores along with inner keys''' dataset = self.dataset weighted_inner_values = {} total_scores = {} for other_key in dataset: if other_key == key: continue # Fetching common inner keys to calculate similarity score common_inner_keys = self.fetchCommonInnerKeys(key, other_key) # If there is no common inner key, skip this other keys if len(common_inner_keys) == 0: continue x = [dataset[key][inner_key] for inner_key in common_inner_keys] y = [ dataset[other_key][inner_key] for inner_key in common_inner_keys ] # Finding similarity score sim = Similarity() score = sim.pearson(x, y) # Ignoring scores of zero or below if score <= 0: continue for inner_key in dataset[other_key]: if inner_key not in dataset[key] or dataset[key][ inner_key] == 0: # Weighted sum of value times similarity score weighted_inner_values.setdefault(inner_key, 0) weighted_inner_values[ inner_key] += score * dataset[other_key][inner_key] # Sum of similarity score total_scores.setdefault(inner_key, 0) total_scores[inner_key] += score scores = [(weighted_inner_values[inner_key] / total_scores[inner_key], inner_key) for inner_key in weighted_inner_values] # Sorting the list so that highest score appear at the top scores.sort() scores.reverse() return scores[0:n]
def extractPhrasePaireFromAnnotation(phrasedir, annotators, id): for doc, lec, annotator in annotation.generate_all_files( annotation.datadir + 'json/', '.json', anotators=annotators, lectures=annotation.Lectures): print doc #load task task = annotation.Task() task.loadjson(doc) path = phrasedir + str(lec) + '/' fio.NewPath(path) for prompt in ['q1', 'q2']: prefix = os.path.join(path, '%s.%s.' % (prompt, method)) filename = path + prompt + sim_exe print filename featureset = [] feature_extractor = Similarity(prefix) phrase_annotation = task.get_phrase_annotation(prompt) #positive examples for rank1 in sorted(phrase_annotation): for rank2 in sorted(phrase_annotation): if rank1 == rank2: score = 1.0 else: score = 0.0 phrases1 = phrase_annotation[rank1] phrases2 = phrase_annotation[rank2] for phrasedict1 in phrases1: p1 = phrasedict1['phrase'].lower().strip() for phrasedict2 in phrases2: p2 = phrasedict2['phrase'].lower().strip() featureset.append( (feature_extractor.get_features(p1, p2), score, { 'p1': p1, 'p2': p2 })) fio.SaveDict2Json(featureset, filename) feature_extractor.save()
def __init__(self, messages, model, questions: set, answers: set, pc_questions: dict, pc_answers: dict, tokenizer): self.questions = questions self.answers = answers self.pc_questions = pc_questions self.pc_answers = pc_answers self.tokenizer = tokenizer self.model = model self.messages = messages self.pp = PreProcessing() self.s = Similarity(questions=self.questions, answers=self.answers )
def main(experiment_name, phenotypes, data_directory, anchor_genes, num_replicates=1, percent=0.4, num_anchors=50, min_dangle_size=3, max_dangle_size=10, test_ratio=0.5): assert isinstance(phenotypes, list) alphas = random.choices(range(min_dangle_size, max_dangle_size), k=int(num_anchors * test_ratio)) assert len(alphas) < len(anchor_genes) anchor_train_groups = [] anchor_test_groups = [] backbones = [] # Create all backbones for rep_id in range(num_replicates): random.shuffle(anchor_genes) candidates = anchor_genes[:int(num_anchors)] genes_of_interest_train, genes_of_interest_test = train_test_split( candidates, shuffle=True, test_size=test_ratio) anchor_train_groups.append(genes_of_interest_train) anchor_test_groups.append(genes_of_interest_test) backbones.append( build_backbone(anchors=anchor_train_groups[rep_id], alphas=alphas, weight=1, edge_percentage=percent)) # Write train anchors to file with open(os.path.join(experiment_name, 'train_anchors.csv'), 'w') as fout: for gene_group in anchor_train_groups: fout.write(','.join(gene_group)) fout.write("\n") # Write test anchors to file with open(os.path.join(experiment_name, 'test_anchors.csv'), 'w') as fout: for gene_group in anchor_test_groups: fout.write(','.join(gene_group)) fout.write("\n") # Adding the backbones and create the similarity object for pheno in phenotypes: file_name = os.path.join(data_directory, "{}.csv".format(pheno)) for rep_id in range(num_replicates): sim_file_name = "anchored_{}_{}.csv".format(pheno, str(rep_id)) out_address = os.path.join(experiment_name, sim_file_name) similarity = Similarity(file_name, anchors=anchor_train_groups[rep_id], alphas=alphas, string_id=True) similarity.transform() similarity.apply_threshold(lower_cor=0.2, upper_cor=0.8, value=0) similarity.augment(backbones[rep_id]) similarity.to_csv(out_address)
def generate_walks(edge_list_address, walk_per_node, walk_length, workers = 4): similarity = Similarity(correlation_file_path=edge_list_address, anchors=[], alphas=[], sep=',', prefix='pseudo') genes = list(similarity.idx.keys()) start_time = time.time() gen_walk = WalkGenerator(similarity.matrix, genes, walk_length, walk_per_node) print("takes {} seconds to create walk object.".format( time.time() - start_time)) num_cpus = workers pool = mp.Pool(num_cpus) arguments = list(range(len(gen_walk))) chunk_size = len(gen_walk) // num_cpus walks = pool.map(gen_walk, arguments, chunksize=chunk_size) return walks
def test__check_is_numerical_data(self): df_numerical = self.spark.read.csv( 'tests/fixtures/similarity/numerical_data.csv', header=True) columns_to_convert = [ col for col in df_numerical.columns if 'id' not in col ] df_numerical_int = df_numerical df_numerical_float = df_numerical for col in columns_to_convert: df_numerical_int = df_numerical_int.withColumn( col, f.col(col).cast(IntegerType())) df_numerical_float = df_numerical_float.withColumn( col, f.col(col).cast(DoubleType())) Similarity(df_features=df_numerical_int) Similarity(df_features=df_numerical_float) with self.assertRaises(AssertionError): Similarity(df_features=df_numerical)
def test__convert_to_long_format(self): pd_df_similarities_wide = pd.read_csv( 'tests/fixtures/similarity/similarities_wide.csv', index_col=0) df_simple_table = self.spark.read.csv( 'tests/fixtures/similarity/simple_table.csv', header=True) columns_to_convert = [ col for col in df_simple_table.columns if 'id' not in col ] for col in columns_to_convert: df_simple_table = df_simple_table.withColumn( col, f.col(col).cast(IntegerType())) similarity = Similarity(df_features=df_simple_table) pd_df_similarities_long = similarity._convert_to_long_format( pd_df_similarities_wide) self.assertEqual( pd_df_similarities_long.shape[0], pd_df_similarities_wide.shape[0] * pd_df_similarities_wide.shape[1]) check_1_3 = pd_df_similarities_long.loc[ (pd_df_similarities_long['recipe_id_1'] == 1) & (pd_df_similarities_long['recipe_id_2'] == '3' )]['similarity'].values[0] self.assertEqual(check_1_3, 9) check_3_1 = pd_df_similarities_long.loc[ (pd_df_similarities_long['recipe_id_1'] == 3) & (pd_df_similarities_long['recipe_id_2'] == '1' )]['similarity'].values[0] self.assertEqual(check_3_1, 6) check_2_3 = pd_df_similarities_long.loc[ (pd_df_similarities_long['recipe_id_1'] == 2) & (pd_df_similarities_long['recipe_id_2'] == '3' )]['similarity'].values[0] self.assertEqual(check_2_3, 1) check_1_2 = pd_df_similarities_long.loc[ (pd_df_similarities_long['recipe_id_1'] == 1) & (pd_df_similarities_long['recipe_id_2'] == '2' )]['similarity'].values[0] self.assertEqual(check_1_2, 6)
def correlation_analysis_noduplicate(): phrasedir1 = '../data/%s/oracle_annotator_1/phrase/' % course phrasedir2 = '../data/%s/oracle_annotator_2/phrase/' % course outdir = '../data/%s/simlearning/' % course fio.NewPath(outdir) sim_extractor = Similarity() features = sorted(sim_extractor.features.keys()) head = features + ['score'] body = [] lectures = annotation.Lectures for i, lec in enumerate(lectures): for q in ['q1', 'q2']: outfile = os.path.join(outdir, str(lec), '%s%s' % (q, sim_exe)) for phrasedir in [phrasedir1, phrasedir2]: path = phrasedir + str(lec) + '/' filename = os.path.join(path, q + sim_exe) data = fio.LoadDictJson(filename) for fdict, score, pd in data: if pd['p1'] == pd['p2']: print pd['p1'] continue row = [] for name in features: x = fdict[name] if str(x) == 'nan': x = 0.0 row.append(x) row.append(score) body.append(row) out_correlation = os.path.join(outdir, 'data.txt') fio.WriteMatrix(out_correlation, body, head)
def main(): similarity = Similarity() pg.init() clock = pg.time.Clock() clock.tick(30) # create screen display_width = 960 display_height = 650 pos_x = 0 pos_y = 30 os.environ['SDL_VIDEO_WINDOW_POS'] = '%i,%i' % (pos_x, pos_y) gameDisplay = pg.display.set_mode((display_width, display_height)) pg.display.set_caption('Pose Dance') game = PoseDance(gameDisplay, similarity) game.run()
def predict_IE256(train_course, model_dir, phrasedir, modelname='svm'): sim_extractor = Similarity() allfeatures = sorted(sim_extractor.features.keys()) features = allfeatures name = '_'.join(features) lectures = annotation.Lectures for i, lec in enumerate(lectures): test = [lec] print test model_file = os.path.join(model_dir, '%s_%s.model' % (train_course, name)) with open(model_file, 'rb') as handle: clf = pickle.load(handle) path = os.path.join(phrasedir, str(lec)) for q in ['q1', 'q2']: test_X, test_Y = combine_files_test(phrasedir, test, features, prompts=[q]) predict_Y = clf.predict(test_X) #write the output phrasefile = os.path.join(path, "%s.%s.key" % (q, method)) phrases = fio.LoadList(phrasefile) assert (len(predict_Y) == len(phrases) * len(phrases)) k = 0 body = [] for p1 in phrases: row = [] for p2 in phrases: row.append(predict_Y[k]) k += 1 body.append(row) output = os.path.join(path, "%s.%s.%s" % (q, method, modelname)) fio.WriteMatrix(output, body, phrases)
def run(self, corpus_path, test_path, minfreq): self._database = self._construct_database(corpus_path) before_unique, before_total = self._stas(self._database) self._database.apply_minfreq(minfreq) after_unique, after_total = self._stas(self._database) sim = Similarity(self._database) test_phrases = self.IO.read_phrases(test_path) with open('trace.txt', 'w', encoding='utf8') as f: # Write the head line. args = [before_unique, after_unique, before_total, after_total] f.write('\n') self._write_head(f, args) for phrase in test_phrases: most_similar = self._find_k_similar(phrase, sim, 5) self._write_result(f, phrase, most_similar)
def get_best_indices(list, sin_val): ''' The function takes on single row and finds out the best indexes according to similarity distance. The similarity values used are Euclidean distance, Manhattan distance, Minkowski distance, Cosine distance and Jaccard distance. It returns a dictionary of list''' ### local optima saves a dictionary where dictionary is like { distance_type: [best_distance_value, best_lowest_index, best_upper_index] } local_optima = { "Euclidean": [9999999999, 9999999, 99999999], "Manhattan": [9999999999, 9999999, 99999999], "Minkowski": [9999999999, 9999999, 99999999], "Cosine": [9999999999, 9999999, 99999999], "Jaccard": [9999999999, 9999999, 99999999] } measures = Similarity() ### Calling Similarity class size = len(sin_val) ### size of sine value list which is 40 for i in range(len(list) - size): ### Euclidean Portion val = measures.euclidean_distance(list[i:i + size], sin_val) if val <= local_optima["Euclidean"][0]: local_optima["Euclidean"] = [val, i, i + size] ### Manhattan Portion val = measures.manhattan_distance(list[i:i + size], sin_val) if val <= local_optima["Manhattan"][0]: local_optima["Manhattan"] = [val, i, i + size] ### Minkowski Portion val = measures.minkowski_distance(list[i:i + size], sin_val, 3) if val <= local_optima["Minkowski"][0]: local_optima["Minkowski"] = [val, i, i + size] ### Cosine Portion val = measures.cosine_similarity(list[i:i + size], sin_val) if val <= local_optima["Cosine"][0]: local_optima["Cosine"] = [val, i, i + size] ### Jaccard Portion val = measures.jaccard_similarity(list[i:i + size], sin_val) if val <= local_optima["Jaccard"][0]: local_optima["Jaccard"] = [val, i, i + size] return local_optima