def test_short(self): seq1 = "ACGACG" seq2 = "ACGAT" expected = "ACGACG", "ACGA-T" sim_matrix = utils.make_similarity_matrix(1, -2) result = global_seq.run_global_sequence_algorithm(seq1, seq2, sim_matrix, -2)[0] self.assertEquals(expected, result)
def test_BinfSnipacademy(self): seq1 = "CGTTCTA" seq2 = "AACGTTGG" expected = "--CGTTCTA", "AACGTT-GG" sim_matrix = utils.make_similarity_matrix(5, -3) result = global_seq.run_global_sequence_algorithm(seq1, seq2, sim_matrix, -4)[0] self.assertEquals(expected, result)
def testVlabArmita(self): seq1 = "CGTGAATTCAT" seq2 = "GACTTAC" expected1 = "GAATT-C", "GACTTAC" sim_matrix = utils.make_similarity_matrix(5, -3) result = local_seq.run_local_sequence_algorithm(seq1, seq2, sim_matrix, -4)[0] self.assertEquals(expected1, result)
def test_matrix_parses_simple_matrix(self): filename = "inputs/matrix1.txt" expected = utils.make_similarity_matrix(1, -2) with open(filename, "r") as f: result = handlers.parse_matrix_file(f) self.assertEqual(expected, result)
make_sentences_vectors, make_similarity_matrix, apply_pagerank, ask_top_n_sentences_to_extract, extract_sentences pd.set_option('display.max_columns', None) pd.set_option('display.expand_frame_repr', False) pd.set_option('max_colwidth', -1) dataset_path = Path.cwd() / "data" / "Reviews.csv" if __name__ == '__main__': dataset = pd.read_csv(dataset_path, nrows=100) dataset.drop_duplicates(subset=['Text'], inplace=True) dataset.dropna(axis=0, inplace=True) sentences_list = split_in_sentences(dataset['Text']) sentences_list = remove_html_tag(sentences_list) pre_processed_sentences = pre_processing(sentences_list) embedding_dimensionality = ask_embedding_dim() embeddings = get_word_embeddings(embedding_dimensionality) sents_vects = make_sentences_vectors(pre_processed_sentences, embeddings, int(embedding_dimensionality)) similarity_matrix = make_similarity_matrix(sentences_list, sents_vects, int(embedding_dimensionality)) pagerank_scores = apply_pagerank(similarity_matrix) number_sentences_to_extract = ask_top_n_sentences_to_extract() for ex_sent in extract_sentences(number_sentences_to_extract, sentences_list, pagerank_scores): print(ex_sent, "\n")
users.append(row[0]) while(True): try: if(function == 'fetch'): # Get neighbours of each user in the list for i, user in enumerate(users): print('\n', i, user) user_obj = api.get_user(user) n = fetch_neighbours(user_obj.id, api, direction=direction, force=force) print("neighbours: ", n) # Save user info and the id-screen_name pair since we have it already utils.save_user(user_obj) utils.save_screen_name(user_obj.id, user) if(function == 'make_similarity'): print("Creating similarity matrix") fout = 'similarity' + '_' + file.split('.')[0] + '_' + direction + '.csv' make_similarity_matrix(users, direction = direction, file = fout) if(function == 'make_adjacency'): print("Creating adjacency matrix") fout = 'adjacency' + '-' + file.split('.')[0] + '_' + direction + '.csv' make_adjacency_matrix(users, direction = direction, file = fout) break except TweepError as e: print(e) time.sleep(60)
# Get neighbours of each user in the list for i, user in enumerate(users): print('\n', i, user) user_obj = api.get_user(user) n = fetch_neighbours(user_obj.id, api, direction=direction, force=force) print("neighbours: ", n) # Save user info and the id-screen_name pair since we have it already utils.save_user(user_obj) utils.save_screen_name(user_obj.id, user) if (function == 'make_similarity'): print("Creating similarity matrix") fout = 'similarity' + '_' + file.split( '.')[0] + '_' + direction + '.csv' make_similarity_matrix(users, direction=direction, file=fout) if (function == 'make_adjacency'): print("Creating adjacency matrix") fout = 'adjacency' + '-' + file.split( '.')[0] + '_' + direction + '.csv' make_adjacency_matrix(users, direction=direction, file=fout) break except TweepError as e: print(e) time.sleep(60)