Exemple #1
0
 def test_short(self):
     seq1 = "ACGACG"
     seq2 = "ACGAT"
     expected = "ACGACG", "ACGA-T"
     sim_matrix = utils.make_similarity_matrix(1, -2)
     result = global_seq.run_global_sequence_algorithm(seq1, seq2, sim_matrix, -2)[0]
     self.assertEquals(expected, result)
Exemple #2
0
 def test_BinfSnipacademy(self):
     seq1 = "CGTTCTA"
     seq2 = "AACGTTGG"
     expected = "--CGTTCTA", "AACGTT-GG"
     sim_matrix = utils.make_similarity_matrix(5, -3)
     result = global_seq.run_global_sequence_algorithm(seq1, seq2, sim_matrix, -4)[0]
     self.assertEquals(expected, result)
Exemple #3
0
 def testVlabArmita(self):
     seq1 = "CGTGAATTCAT"
     seq2 = "GACTTAC"
     expected1 = "GAATT-C", "GACTTAC"
     sim_matrix = utils.make_similarity_matrix(5, -3)
     result = local_seq.run_local_sequence_algorithm(seq1, seq2, sim_matrix, -4)[0]
     self.assertEquals(expected1, result)
Exemple #4
0
    def test_matrix_parses_simple_matrix(self):
        filename = "inputs/matrix1.txt"

        expected = utils.make_similarity_matrix(1, -2)

        with open(filename, "r") as f:
            result = handlers.parse_matrix_file(f)

        self.assertEqual(expected, result)
Exemple #5
0
    make_sentences_vectors, make_similarity_matrix, apply_pagerank, ask_top_n_sentences_to_extract, extract_sentences

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

dataset_path = Path.cwd() / "data" / "Reviews.csv"
if __name__ == '__main__':
    dataset = pd.read_csv(dataset_path, nrows=100)
    dataset.drop_duplicates(subset=['Text'], inplace=True)
    dataset.dropna(axis=0, inplace=True)

    sentences_list = split_in_sentences(dataset['Text'])
    sentences_list = remove_html_tag(sentences_list)

    pre_processed_sentences = pre_processing(sentences_list)

    embedding_dimensionality = ask_embedding_dim()
    embeddings = get_word_embeddings(embedding_dimensionality)

    sents_vects = make_sentences_vectors(pre_processed_sentences, embeddings, int(embedding_dimensionality))

    similarity_matrix = make_similarity_matrix(sentences_list, sents_vects, int(embedding_dimensionality))

    pagerank_scores = apply_pagerank(similarity_matrix)

    number_sentences_to_extract = ask_top_n_sentences_to_extract()

    for ex_sent in extract_sentences(number_sentences_to_extract, sentences_list, pagerank_scores):
        print(ex_sent, "\n")
    		users.append(row[0])

    while(True): 
        try:
        	if(function == 'fetch'):
        		# Get neighbours of each user in the list
        		for i, user in enumerate(users):
        			print('\n', i, user)
        			user_obj = api.get_user(user) 
        			n = fetch_neighbours(user_obj.id, api, direction=direction, force=force)
        			print("neighbours: ", n)

        			# Save user info and the id-screen_name pair since we have it already
        			utils.save_user(user_obj)
        			utils.save_screen_name(user_obj.id, user)

	        if(function == 'make_similarity'):
	        	print("Creating similarity matrix")
	        	fout = 'similarity' + '_' + file.split('.')[0] + '_' + direction + '.csv'
	        	make_similarity_matrix(users, direction = direction, file = fout)

	        if(function == 'make_adjacency'):
	        	print("Creating adjacency matrix")
	        	fout = 'adjacency' + '-' + file.split('.')[0] + '_' + direction + '.csv'
	        	make_adjacency_matrix(users, direction = direction, file = fout)
        	
        	break

        except TweepError as e:
            print(e)
            time.sleep(60)
Exemple #7
0
                # Get neighbours of each user in the list
                for i, user in enumerate(users):
                    print('\n', i, user)
                    user_obj = api.get_user(user)
                    n = fetch_neighbours(user_obj.id,
                                         api,
                                         direction=direction,
                                         force=force)
                    print("neighbours: ", n)

                    # Save user info and the id-screen_name pair since we have it already
                    utils.save_user(user_obj)
                    utils.save_screen_name(user_obj.id, user)

            if (function == 'make_similarity'):
                print("Creating similarity matrix")
                fout = 'similarity' + '_' + file.split(
                    '.')[0] + '_' + direction + '.csv'
                make_similarity_matrix(users, direction=direction, file=fout)

            if (function == 'make_adjacency'):
                print("Creating adjacency matrix")
                fout = 'adjacency' + '-' + file.split(
                    '.')[0] + '_' + direction + '.csv'
                make_adjacency_matrix(users, direction=direction, file=fout)

            break

        except TweepError as e:
            print(e)
            time.sleep(60)