def main(q1, q2, articles, max_sentences): q1 = q1.lower() q2 = q2.lower() first_two_proteins = [q1, q2] a1_file = make_a1_file_object(first_two_proteins) print a1_file.proteins PPI_cite_main.index(a1_file, articles, max_sentences)
def main(size_of_test_set, articles, max_sentences): path = r'C:\Users\Adam\workspace\Wiki Pi NLP\Test_Set_Files_BIONLP09\dot_a1_files' count = 0 for dir_entry in os.listdir(path): count += 1 if count > size_of_test_set: break dir_entry_path = os.path.join(path, dir_entry) if os.path.isfile(dir_entry_path): with open(dir_entry_path, 'r') as my_file: reader=csv.reader(my_file,delimiter='\t') rows = [] for row in reader: rows.append(row) first_two_proteins = [] for lst in rows[:2]: first_two_proteins.append(lst[2]) if len(first_two_proteins) != 2: continue first_two_proteins = [x.lower() for x in first_two_proteins] #first_two_proteins = [x.replace('-',' ') for x in first_two_proteins] if first_two_proteins[0] == first_two_proteins[1]: continue a1_file = make_a1_file_object(my_file, dir_entry,first_two_proteins) print a1_file.proteins, count PPI_cite_main.index(a1_file, articles, max_sentences)
def main(q1, q2, articles, max_sentences): q1= q1.lower() q2 = q2.lower() first_two_proteins = [q1, q2] a1_file = make_a1_file_object(first_two_proteins) print a1_file.proteins PPI_cite_main.index(a1_file, articles, max_sentences)
def sent_with_cooccur( ID_paper_obj_dict, query): # returns a list of all sentences that contain both queries ID_sentence_lists = [] for key in iter(ID_paper_obj_dict): if not ID_paper_obj_dict[key].all_sentences: ID_paper_obj_dict[key].split_abstract_into_sentences(query) ID_paper_obj_dict[key].word_tokenize() coocurrence_list = ID_paper_obj_dict[ key].find_sentences_with_both_queries( ID_paper_obj_dict[key].all_sentences, query) sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples( coocurrence_list) ID_sentence_lists.extend(sentence_list) else: coocurrence_list = ID_paper_obj_dict[ key].find_sentences_with_both_queries( ID_paper_obj_dict[key].all_sentences, query) sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples( coocurrence_list) ID_sentence_lists.extend(sentence_list) if not ID_sentence_lists: print "No sentences with co-occurance found" time.sleep(3) PPI_cite_main.no_cooc_sent("No sentences with co-occurance found", None, None, None, None, None) return ID_sentence_lists else: return ID_sentence_lists
def main(size_of_test_set, articles, max_sentences): path = r'C:\Users\Adam\workspace\Wiki Pi NLP\Test_Set_Files_BIONLP09\dot_a1_files' count = 0 for dir_entry in os.listdir(path): count += 1 if count > size_of_test_set: break dir_entry_path = os.path.join(path, dir_entry) if os.path.isfile(dir_entry_path): with open(dir_entry_path, 'r') as my_file: reader = csv.reader(my_file, delimiter='\t') rows = [] for row in reader: rows.append(row) first_two_proteins = [] for lst in rows[:2]: first_two_proteins.append(lst[2]) if len(first_two_proteins) != 2: continue first_two_proteins = [x.lower() for x in first_two_proteins] #first_two_proteins = [x.replace('-',' ') for x in first_two_proteins] if first_two_proteins[0] == first_two_proteins[1]: continue a1_file = make_a1_file_object(my_file, dir_entry, first_two_proteins) print a1_file.proteins, count PPI_cite_main.index(a1_file, articles, max_sentences)
def sent_with_cooccur(ID_paper_obj_dict, query): # returns a list of all sentences that contain both queries ID_sentence_lists = [] for key in iter(ID_paper_obj_dict): if not ID_paper_obj_dict[key].all_sentences: ID_paper_obj_dict[key].split_abstract_into_sentences(query) ID_paper_obj_dict[key].word_tokenize() coocurrence_list = ID_paper_obj_dict[key].find_sentences_with_both_queries(ID_paper_obj_dict[key].all_sentences, query) sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples(coocurrence_list) ID_sentence_lists.extend(sentence_list) else: coocurrence_list = ID_paper_obj_dict[key].find_sentences_with_both_queries(ID_paper_obj_dict[key].all_sentences, query) sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples(coocurrence_list) ID_sentence_lists.extend(sentence_list) if not ID_sentence_lists: print "No sentences with co-occurance found" time.sleep(3) PPI_cite_main.no_cooc_sent("No sentences with co-occurance found", None, None, None, None, None) return ID_sentence_lists else: return ID_sentence_lists
def get_ID_list(xml): try: root = ET.fromstring(xml) ID_List_ofElements = root.findall("./IdList/Id") ids = [] for element in ID_List_ofElements: singleID_string = ET.tostring(element, method='text') singleID_string_stripped = singleID_string.replace("\n", "") ids.append(singleID_string_stripped) except AttributeError: ids = [] print("No Papers with both queries were found on PubMed") PPI_cite_main.no_papers_with_queries("No Papers with both queries were found on PubMed", None, None, None, None, None) existing_papers = [] # Use this in the future to make database of existing IDs papers_to_download = [] for ind_id in ids: papers_to_download.append(ind_id) full_ID_List = {"existing_papers":existing_papers, "papers_to_download":papers_to_download} return full_ID_List