Example #1
0
def sent_with_cooccur(ID_paper_obj_dict, query):  # returns a list of all sentences that contain both queries

    ID_sentence_lists = []
    for key in iter(ID_paper_obj_dict):
        if not ID_paper_obj_dict[key].all_sentences:
            ID_paper_obj_dict[key].split_abstract_into_sentences(query)
            ID_paper_obj_dict[key].word_tokenize()
            coocurrence_list = ID_paper_obj_dict[key].find_sentences_with_both_queries(
                ID_paper_obj_dict[key].all_sentences, query
            )
            sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples(coocurrence_list)
            ID_sentence_lists.extend(sentence_list)
        else:
            coocurrence_list = ID_paper_obj_dict[key].find_sentences_with_both_queries(
                ID_paper_obj_dict[key].all_sentences, query
            )
            sentence_list = ID_paper_obj_dict[key].make_sentence_id_tuples(coocurrence_list)
            ID_sentence_lists.extend(sentence_list)

    if not ID_sentence_lists:
        print "No sentences with co-occurance found"
        time.sleep(3)
        PPI_cite_main.no_cooc_sent("No sentences with co-occurance found", None, None, None, None, None)
        return ID_sentence_lists
    else:
        return ID_sentence_lists
Example #2
0
def main(q1, q2, articles, max_sentences):
    q1 = q1.lower()
    q2 = q2.lower()
    first_two_proteins = [q1, q2]
    a1_file = make_a1_file_object(first_two_proteins)
    print a1_file.proteins
    PPI_cite_main.index(a1_file, articles, max_sentences)
Example #3
0
def main(size_of_test_set, articles, max_sentences):

    path = r"C:\Users\Adam\workspace\Wiki Pi NLP\Test_Set_Files_BIONLP09\dot_a1_files"
    count = 0
    for dir_entry in os.listdir(path):
        count += 1
        if count > size_of_test_set:
            break
        dir_entry_path = os.path.join(path, dir_entry)
        if os.path.isfile(dir_entry_path):
            with open(dir_entry_path, "r") as my_file:
                reader = csv.reader(my_file, delimiter="\t")
                rows = []
                for row in reader:
                    rows.append(row)
                first_two_proteins = []
                for lst in rows[:2]:
                    first_two_proteins.append(lst[2])
                if len(first_two_proteins) != 2:
                    continue
                first_two_proteins = [x.lower() for x in first_two_proteins]
                # first_two_proteins = [x.replace('-',' ') for x in first_two_proteins]
                if first_two_proteins[0] == first_two_proteins[1]:
                    continue
                a1_file = make_a1_file_object(my_file, dir_entry, first_two_proteins)

            print a1_file.proteins, count
            PPI_cite_main.index(a1_file, articles, max_sentences)
Example #4
0
def get_ID_list(xml):
    try:
        root = ET.fromstring(xml)
        ID_List_ofElements = root.findall("./IdList/Id")
        ids = []
        for element in ID_List_ofElements:
            singleID_string = ET.tostring(element, method="text")
            singleID_string_stripped = singleID_string.replace("\n", "")
            ids.append(singleID_string_stripped)
    except AttributeError:
        ids = []
        print ("No Papers with both queries were found on PubMed")
        PPI_cite_main.no_papers_with_queries(
            "No Papers with both queries were found on PubMed", None, None, None, None, None
        )

    existing_papers = []  # Use this in the future to make database of existing IDs
    papers_to_download = []
    for ind_id in ids:
        papers_to_download.append(ind_id)

    full_ID_List = {"existing_papers": existing_papers, "papers_to_download": papers_to_download}
    return full_ID_List