def get_best_indices(list, sin_val): ''' The function takes on single row and finds out the best indexes according to similarity distance. The similarity values used are Euclidean distance, Manhattan distance, Minkowski distance, Cosine distance and Jaccard distance. It returns a dictionary of list''' ### local optima saves a dictionary where dictionary is like { distance_type: [best_distance_value, best_lowest_index, best_upper_index] } local_optima = { "Euclidean": [9999999999, 9999999, 99999999], "Manhattan": [9999999999, 9999999, 99999999], "Minkowski": [9999999999, 9999999, 99999999], "Cosine": [9999999999, 9999999, 99999999], "Jaccard": [9999999999, 9999999, 99999999] } measures = Similarity() ### Calling Similarity class size = len(sin_val) ### size of sine value list which is 40 for i in range(len(list) - size): ### Euclidean Portion val = measures.euclidean_distance(list[i:i + size], sin_val) if val <= local_optima["Euclidean"][0]: local_optima["Euclidean"] = [val, i, i + size] ### Manhattan Portion val = measures.manhattan_distance(list[i:i + size], sin_val) if val <= local_optima["Manhattan"][0]: local_optima["Manhattan"] = [val, i, i + size] ### Minkowski Portion val = measures.minkowski_distance(list[i:i + size], sin_val, 3) if val <= local_optima["Minkowski"][0]: local_optima["Minkowski"] = [val, i, i + size] ### Cosine Portion val = measures.cosine_similarity(list[i:i + size], sin_val) if val <= local_optima["Cosine"][0]: local_optima["Cosine"] = [val, i, i + size] ### Jaccard Portion val = measures.jaccard_similarity(list[i:i + size], sin_val) if val <= local_optima["Jaccard"][0]: local_optima["Jaccard"] = [val, i, i + size] return local_optima
values = predictions[:, 1] answer = x[np.where(values == max(values)), 1] text = str(answer) encoding = tokenizer.encode_plus(qst, text, max_length=256) input_ids, token_type_ids = encoding["input_ids"], encoding[ "token_type_ids"] start_scores, end_scores = modelExtractor(torch.tensor([input_ids]), token_type_ids=torch.tensor( [token_type_ids])) all_tokens = tokenizer.convert_ids_to_tokens(input_ids) a = ' '.join( all_tokens[torch.argmax(start_scores):torch.argmax(end_scores) + 1]) cleaned_answer = a.replace(" ##", "") txt_file.write("#################################### \n") txt_file.write("#################################### \n") txt_file.write("Question ====> \t" + row[0] + "\n") txt_file.write("Réponse prédite ====> \t" + cleaned_answer + "\n") txt_file.write("La vraie réponse ====> \t" + row[1] + "\n") txt_file.write("Dice Similarity ====> \t" + str(Similarity.dice_similarity(cleaned_answer, row[1])) + "\n") txt_file.write("Jaccard Similarity ====> \t" + str(Similarity.jaccard_similarity(cleaned_answer, row[1])) + "\n") txt_file.write("#################################### \n") searchObject.reader.close() searchObject.directory.close() tsv_file.close() txt_file.close()