def main(size_of_test_set, articles, max_sentences): path = r'C:\Users\Adam\workspace\Wiki Pi NLP\Test_Set_Files_BIONLP09\dot_a1_files' count = 0 for dir_entry in os.listdir(path): count += 1 if count > size_of_test_set: break dir_entry_path = os.path.join(path, dir_entry) if os.path.isfile(dir_entry_path): with open(dir_entry_path, 'r') as my_file: reader=csv.reader(my_file,delimiter='\t') rows = [] for row in reader: rows.append(row) first_two_proteins = [] for lst in rows[:2]: first_two_proteins.append(lst[2]) if len(first_two_proteins) != 2: continue first_two_proteins = [x.lower() for x in first_two_proteins] #first_two_proteins = [x.replace('-',' ') for x in first_two_proteins] if first_two_proteins[0] == first_two_proteins[1]: continue a1_file = make_a1_file_object(my_file, dir_entry,first_two_proteins) print a1_file.proteins, count PPI_cite_main.index(a1_file, articles, max_sentences)
def main(q1, q2, articles, max_sentences): q1= q1.lower() q2 = q2.lower() first_two_proteins = [q1, q2] a1_file = make_a1_file_object(first_two_proteins) print a1_file.proteins PPI_cite_main.index(a1_file, articles, max_sentences)
def main(q1, q2, articles, max_sentences): q1 = q1.lower() q2 = q2.lower() first_two_proteins = [q1, q2] a1_file = make_a1_file_object(first_two_proteins) print a1_file.proteins PPI_cite_main.index(a1_file, articles, max_sentences)
def main(size_of_test_set, articles, max_sentences): path = r'C:\Users\Adam\workspace\Wiki Pi NLP\Test_Set_Files_BIONLP09\dot_a1_files' count = 0 for dir_entry in os.listdir(path): count += 1 if count > size_of_test_set: break dir_entry_path = os.path.join(path, dir_entry) if os.path.isfile(dir_entry_path): with open(dir_entry_path, 'r') as my_file: reader = csv.reader(my_file, delimiter='\t') rows = [] for row in reader: rows.append(row) first_two_proteins = [] for lst in rows[:2]: first_two_proteins.append(lst[2]) if len(first_two_proteins) != 2: continue first_two_proteins = [x.lower() for x in first_two_proteins] #first_two_proteins = [x.replace('-',' ') for x in first_two_proteins] if first_two_proteins[0] == first_two_proteins[1]: continue a1_file = make_a1_file_object(my_file, dir_entry, first_two_proteins) print a1_file.proteins, count PPI_cite_main.index(a1_file, articles, max_sentences)