Beispiel #1
0
def test_s2_spo():
    deps = nlp.prepare_deps(s2)
    chunks = nlp.prepare_chunks(s2)
    trigger = 'causes'
    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
    assert s_head == 'virus'
    assert s == 'the prototype virus'  # TODO
    assert p == 'causes'
    assert o_head == 'gastroenteritis'
    assert o == 'epidemic gastroenteritis'
Beispiel #2
0
def test_s1_spo():
    deps = nlp.prepare_deps(s1)
    chunks = nlp.prepare_chunks(s1)
    trigger = 'leads to'
    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
    assert s_head == 'encapsulation'
    assert s == 'The encapsulation of rifampicin'
    assert p == 'leads to'
    assert o_head == 'reduction'
    assert o == 'a reduction of the Mycobacterium smegmatis inside macrophages'
Beispiel #3
0
def test_s5_spo():
    deps = nlp.prepare_deps(s5)
    chunks = nlp.prepare_chunks(s5)
    trigger = 'cause of'
    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
    assert s_head == 'infection'
    assert s == 'Chronic hepatitis virus infection'
    assert p == 'cause of'
    assert o_head == 'hepatitis'
    assert o == 'chronic hepatitis, cirrhosis, and hepatocellular carcinoma worldwide'
Beispiel #4
0
def test_s4_spo():
    deps = nlp.prepare_deps(s4)
    chunks = nlp.prepare_chunks(s4)
    trigger = 'inhibit'
    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
    assert s_head == 'ribavirin'
    assert s == 'ribavirin'
    assert p == 'inhibit'
    assert o_head == 'replication'
    assert o == 'SARS coronavirus replication in five different cell types of animal or human origin'
Beispiel #5
0
def test_s3_spo():
    deps = nlp.prepare_deps(s3)
    chunks = nlp.prepare_chunks(s3)
    print(chunks)
    trigger = 'cause'
    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
    assert s_head == 'exposure'
    assert s == 'the exposure to ambient air pollution'
    assert p == 'cause'
    assert o_head == 'illnesses'
    assert o == 'serious respiratory illnesses'
Beispiel #6
0
def main():
    """

    :return:
    """
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('-i', '--input-dir', required=True, help='...')
    parser.add_argument('-o', '--output-file', required=True, help='...')

    args = parser.parse_args()

    dr = DataReader(args.input_dir)
    it = iter(dr)
    nlp = StanzaNLP()

    with open(args.output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter='\t')

        while True:
            try:
                pmcid, title, abstract = next(it)
            # TODO just sentence splitter?
            except StopIteration:
                break
            else:
                doc = nlp.process(abstract)
                for sentence in list(doc.sentences):
                    sent = StanzaNLP.get_sentence(sentence.words)
                    trigger = get_fired_trigger(sent)

                    if not trigger:
                        continue

                    deps = StanzaNLP.get_dependencies(sentence)
                    # chunking
                    chunks = nlp.prepare_chunks(sent)
                    s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger)
                    ss = get_coordinated_nps(s)
                    os = get_coordinated_nps(o)

                    for s in ss:
                        for o in os:
                            row = [title, pmcid, f'PMC{pmcid}.nxml', s, p, o, sent]
                            if all(row):
                                writer.writerow(row)