Python process_json_file Examples, indra.reach.process_json_file Python Examples

Example #1

0

Show file

File: agent.py Project: johnbachman/sbgnviz_indra_test

def on_message(data):
    global last_seen_msg_id
    global stmts
    if isinstance(data, dict) and data['id'] != last_seen_msg_id:
        last_seen_msg_id = data['id']
        if {'id': user_id} in data['targets']:
            if data['comment'].startswith('indra:'):
                text = data['comment'][6:].strip()
                if text.strip().lower() in ['start over', 'cls', 'clear']:
                    clear_model(data['userName'])
                # Retrieve cached REACH JSON for demo purposes
                elif text.strip().lower().startswith('read pmc4338247'):
                    pmcid = 'PMC4338247'
                    say("%s: Got it. Reading %s via INDRA. This usually "
                        "takes about a minute." % (data['userName'], pmcid))
                    rp = reach.process_json_file('reach_PMC4338247.json')
                    stmts += rp.statements
                    assemble_model(data['userName'])
                elif text.strip().lower().startswith('read'):
                    pmcid = text[4:].strip()
                    update_model_from_paper(pmcid, data['userName'])
                elif text.strip().lower().startswith('remove'):
                    remove_arg = text[6:].strip()
                    if len(remove_arg.split(' ')) == 1:
                        remove_agent(remove_arg, data['userName'])
                        print "Remove agent:", remove_arg
                    else:
                        remove_mechanism(remove_arg, data['userName'])
                        print "Remove mechanism:", remove_arg
                else:
                    update_model_from_text(text, data['userName'])
            if data['comment'] == 'biopax':
                print 'BIOPAX'
                call_biopax()
            print '<%s> %s' % (data['userName'], data['comment'])

Example #2

0

Show file

def process_paper(model_name, pmid):
    json_path = os.path.join(model_path, model_name, 'jsons',
                             'PMID%s.json' % pmid)

    if pmid.startswith('api') or pmid.startswith('PMID'):
        logger.warning('Invalid PMID: %s' % pmid)
    # If the paper has been read, use the json output file
    if os.path.exists(json_path):
        rp = reach.process_json_file(json_path, citation=pmid)
        txt_format = 'existing_json'
    # If the paper has not been read, download the text and read
    else:
        txt, txt_format = get_full_text(pmid, 'pmid')
        if txt_format == 'pmc_oa_xml':
            rp = reach.process_nxml_str(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'elsevier_xml':
            # Extract the raw text from the Elsevier XML
            txt = elsevier_client.extract_text(txt)
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'abstract':
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        else:
            rp = None
    if rp is not None:
        check_pmids(rp.statements)
    return rp, txt_format

Example #3

0

Show file

from indra import trips, reach
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == "__main__":
    pmc_ids = ["PMC1234335", "PMC3178447", "PMC3690480", "PMC4345513", "PMC534114"]
    pmids = [id_lookup(pmcid)["pmid"] for pmcid in pmc_ids]

    for pmid, pmcid in zip(pmids, pmc_ids):
        print "Processing %s..." % pmcid
        trips_fname = "trips/" + pmcid + "-20160503T1152.ekb"
        tp = trips.process_xml(open(trips_fname).read())
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        reach_fname = "reach/" + pmcid + ".json"
        rp = reach.process_json_file(reach_fname)
        all_statements = tp.statements + rp.statements
        run_assembly(all_statements, "combined", pmcid)

Example #4

0

Show file

    rerun = False

    # Download the papers if they are not available yet
    for pmcid in pmc_ids:
        prefix = folder + "/" + pmcid
        if not have_file(prefix + ".nxml") and not have_file(prefix + ".txt"):
            txt, txt_format = get_full_text(pmcid)
            if txt_format == "nxml":
                fname = prefix + ".nxml"
            else:
                fname = prefix + ".txt"
            with open(fname, "wt") as fh:
                fh.write(txt.encode("utf-8"))

    # Read each paper if it hasn't been read yet.
    # Otherwise use the existing json extractions.
    for pmcid, pmid in zip(pmc_ids, pmids):
        prefix = folder + "/" + pmcid
        print "Processing %s..." % pmcid
        # If REACH already processed it then don't run it again
        if rerun or not have_file(prefix + ".json"):
            if have_file(prefix + ".txt"):
                txt = open(prefix + ".txt").read().decode("utf-8")
                rp = reach.process_text(txt, citation=pmid, offline=True)
            elif have_file(prefix + ".nxml"):
                rp = reach.process_nxml_file(prefix + ".nxml", citation=pmid, offline=True)
            shutil.move("reach_output.json", prefix + ".json")
        else:
            rp = reach.process_json_file(prefix + ".json", citation=pmid)
        run_assembly(rp.statements, folder, pmcid)

Example #5

0

Show file

    for pmcid in pmc_ids:
        prefix = folder + '/' + pmcid
        if not have_file(prefix + '.nxml') and\
           not have_file(prefix + '.txt'):
            txt, txt_format = get_full_text(pmcid)
            if txt_format == 'nxml':
                fname = prefix + '.nxml'
            else:
                fname = prefix + '.txt'
            with open(fname, 'wt') as fh:
                fh.write(txt.encode('utf-8'))
        pmids.append(id_lookup(pmcid)['pmid'])


    # Read each paper if it hasn't been read yet.
    # Otherwise use the existing json extractions.
    for pmcid, pmid in zip(pmc_ids, pmids):
        prefix = folder + '/' + pmcid
        print 'Processing %s...' % pmcid
        # If REACH already processed it then don't run it again
        if rerun or not have_file(prefix + '.json'):
            if have_file(prefix + '.txt'):
                txt = open(prefix + '.txt').read().decode('utf-8')
                rp = reach.process_text(txt, citation=pmid)
            elif have_file(prefix + '.nxml'):
                rp = reach.process_nxml_file(prefix + '.nxml', citation=pmid)
            shutil.move('reach_output.json', prefix + '.json')
        else:
            rp = reach.process_json_file(prefix + '.json', citation=pmid)
        run_assembly(rp.statements, folder, pmcid)

Example #6

0

Show file

import sys
import pickle
from indra import reach
from indra.assemblers import GraphAssembler

if len(sys.argv) < 2:
    process_type = 'text'
else:
    process_type = sys.argv[1]

if process_type == 'text':
    txt = open('ras_pathway.txt', 'rt').read()
    rp = reach.process_text(txt, offline=True)
    st = rp.statements
elif process_type == 'json':
    rp = reach.process_json_file('reach_output.json')
    st = rp.statements
else:
    st = pickle.load(open('statements.pkl', 'rb'))
for s in st:
    print '%s\t%s' % (s, s.evidence[0].text)

graphpr = {'rankdir': 'TD'}
nodepr = {'fontsize': 12, 'shape': 'plaintext', 'margin': '0,0', 'pad': 0}
ga = GraphAssembler(st, graph_properties=graphpr, node_properties=nodepr)
ga.make_model()
ga.save_dot('ras_pathway.dot')
ga.save_pdf('ras_pathway.pdf')