def test_s2_spo(): deps = nlp.prepare_deps(s2) chunks = nlp.prepare_chunks(s2) trigger = 'causes' s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) assert s_head == 'virus' assert s == 'the prototype virus' # TODO assert p == 'causes' assert o_head == 'gastroenteritis' assert o == 'epidemic gastroenteritis'
def test_s1_spo(): deps = nlp.prepare_deps(s1) chunks = nlp.prepare_chunks(s1) trigger = 'leads to' s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) assert s_head == 'encapsulation' assert s == 'The encapsulation of rifampicin' assert p == 'leads to' assert o_head == 'reduction' assert o == 'a reduction of the Mycobacterium smegmatis inside macrophages'
def test_s5_spo(): deps = nlp.prepare_deps(s5) chunks = nlp.prepare_chunks(s5) trigger = 'cause of' s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) assert s_head == 'infection' assert s == 'Chronic hepatitis virus infection' assert p == 'cause of' assert o_head == 'hepatitis' assert o == 'chronic hepatitis, cirrhosis, and hepatocellular carcinoma worldwide'
def test_s4_spo(): deps = nlp.prepare_deps(s4) chunks = nlp.prepare_chunks(s4) trigger = 'inhibit' s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) assert s_head == 'ribavirin' assert s == 'ribavirin' assert p == 'inhibit' assert o_head == 'replication' assert o == 'SARS coronavirus replication in five different cell types of animal or human origin'
def test_s3_spo(): deps = nlp.prepare_deps(s3) chunks = nlp.prepare_chunks(s3) print(chunks) trigger = 'cause' s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) assert s_head == 'exposure' assert s == 'the exposure to ambient air pollution' assert p == 'cause' assert o_head == 'illnesses' assert o == 'serious respiratory illnesses'
def main(): """ :return: """ parser = argparse.ArgumentParser(description='') parser.add_argument('-i', '--input-dir', required=True, help='...') parser.add_argument('-o', '--output-file', required=True, help='...') args = parser.parse_args() dr = DataReader(args.input_dir) it = iter(dr) nlp = StanzaNLP() with open(args.output_file, 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter='\t') while True: try: pmcid, title, abstract = next(it) # TODO just sentence splitter? except StopIteration: break else: doc = nlp.process(abstract) for sentence in list(doc.sentences): sent = StanzaNLP.get_sentence(sentence.words) trigger = get_fired_trigger(sent) if not trigger: continue deps = StanzaNLP.get_dependencies(sentence) # chunking chunks = nlp.prepare_chunks(sent) s_head, s, p, o_head, o = extract_spo(deps, chunks, trigger) ss = get_coordinated_nps(s) os = get_coordinated_nps(o) for s in ss: for o in os: row = [title, pmcid, f'PMC{pmcid}.nxml', s, p, o, sent] if all(row): writer.writerow(row)