Example #1
0
def process_sentence_xml(sentence):
    fname = re.sub('[^a-zA-Z0-9]', '_', sentence[:-1]) + '.ekb'
    path = os.path.join(path_this, 'trips_ekbs', fname)
    with open(path, 'rb') as fh:
        xml = fh.read().decode('utf-8')
    tp = trips.process_xml(xml)
    return tp
Example #2
0
def get_file_stmts(fname):
    with open(fname, 'rt') as fh:
        xml_str = fh.read()
        tp = trips.process_xml(xml_str)
        if tp is None:
            return []
        return tp.statements
Example #3
0
from indra import trips
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    pmc_ids = ['PMC1234335', 'PMC3178447', 'PMC3690480',
               'PMC4345513', 'PMC534114']
    pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids]
    # Use the existing EKB extractions.
    for pmid, pmcid in zip(pmids, pmc_ids):
        folder = 'trips'
        prefix = folder + '/' + pmcid
        print 'Processing %s...' % pmcid
        tp = trips.process_xml(open(prefix + '-20160503T1152.ekb').read())
        # PMIDs from TRIPS need to be set here because it propagates
        # the PMCID by default
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        run_assembly(tp.statements, folder, pmcid)
Example #4
0
from indra import trips, reach
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == "__main__":
    pmc_ids = ["PMC1234335", "PMC3178447", "PMC3690480", "PMC4345513", "PMC534114"]
    pmids = [id_lookup(pmcid)["pmid"] for pmcid in pmc_ids]

    for pmid, pmcid in zip(pmids, pmc_ids):
        print "Processing %s..." % pmcid
        trips_fname = "trips/" + pmcid + "-20160503T1152.ekb"
        tp = trips.process_xml(open(trips_fname).read())
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        reach_fname = "reach/" + pmcid + ".json"
        rp = reach.process_json_file(reach_fname)
        all_statements = tp.statements + rp.statements
        run_assembly(all_statements, "combined", pmcid)
Example #5
0
from indra import trips
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()]
    pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids]
    # Use the existing EKB extractions.
    for pmid, pmcid in zip(pmids, pmc_ids):
        folder = 'trips'
        prefix = folder + '/' + pmcid
        print 'Processing %s...' % pmcid
        tp = trips.process_xml(open(prefix + '_20160614.ekb').read())
        # PMIDs from TRIPS need to be set here because it propagates
        # the PMCID by default
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        run_assembly(tp.statements, folder, pmcid)
Example #6
0
def test_trips_processor_offline():
    """Smoke test to see if imports and executes without error. Doesn't
    check for correctness of parse or of assembled model."""
    tp = trips.process_xml(open(test_small_file).read())
Example #7
0
            db_refs_str = ', '.join(db_refs)
            ev_txt = (s.evidence[0].text).encode('utf-8')
            fh.write('%s\t%s\t%s\t%s\n' %
                     (s, db_refs_str, 'PMC'+s.evidence[0].pmid,
                      ev_txt))

if __name__ == '__main__':
    fnames = glob.glob('*.ekb')

    pa = Preassembler(eh, mh)

    for fn in fnames:
        print '\n\n----------------------------'
        print 'Processing %s...' % fn
        xml_str = open(fn, 'rt').read()
        tp = trips.process_xml(xml_str)
        print 'Extracted events by type'
        print '------------------------'
        for k,v in tp.extracted_events.iteritems():
            print k, len(v)
        print '------------------------'
        print '%s statements collected.' % len(tp.statements)
        pa.add_statements(tp.statements)
        print '----------------------------\n\n'

    print '%d statements collected in total.' % len(pa.stmts)
    duplicate_stmts = pa.combine_duplicates()
    print '%d statements after combining duplicates.' % len(duplicate_stmts)
    related_stmts = pa.combine_related()
    print '%d statements after combining related.' % len(related_stmts)
Example #8
0
def assemble_model(model_name, reread=False):
    xml_fname = model_name + '.xml'
    if not reread:
        print('Processing %s' % xml_fname)
        if os.path.exists(xml_fname):
            with open(xml_fname, 'rb') as fh:
                tp = trips.process_xml(fh.read())
        else:
            reread = True
    if reread:
        fname = model_name + '.txt'
        print('Reading %s' % fname)
        with open(fname, 'rb') as fh:
            tp = trips.process_text(fh.read(), xml_fname)

    print('Assembling statements:')
    for i, st in enumerate(tp.statements):
        print('%d: %s' % (i, st))
    print('----------------------')

    pa = PysbAssembler()
    pa.add_statements(tp.statements)
    model = pa.make_model()
    model.name = model_name

    p53 = model.monomers['TP53']
    obs = Observable(b'p53_active', p53(activity='active'))
    model.add_component(obs)
    if not model_name.endswith('var'):
        model.parameters['kf_aa_act_1'].value = 5e-06
    model.parameters['kf_pt_act_1'].value = 1e-05

    if model_name == 'p53_ATM':
        model.add_component(Parameter('ATMa_0', 1))
        atm = model.monomers['ATM']
        model.initial(atm(activity='active'),
                      model.parameters['ATMa_0'])
        model.parameters['kf_pa_act_1'].value = 1e-04
        obs = Observable(b'atm_active', atm(activity='active'))
        model.add_component(obs)

    if model_name == 'p53_ATR':
        model.add_component(Parameter('ATRa_0', 1))
        atr = model.monomers['ATR']
        model.initial(atr(activity='active'),
                      model.parameters['ATRa_0'])
        obs = Observable(b'atr_active', atr(activity='active'))
        model.add_component(obs)

    if model_name == 'p53_ATM_var':
        #model.add_component(Parameter('ATMa_0', 1))
        #atm = model.monomers['ATM']
        #model.initial(atm(activity='active'),
        #              model.parameters['ATMa_0'])
        model.add_component(Parameter('ATMa_0', 1))
        atm = model.monomers['ATM']
        model.initial(atm(phospho='p'),
                      model.parameters['ATMa_0'])
        model.parameters['kf_pa_dephosphorylation_1'].value = 1e-04
        model.parameters['MDM2_0'].value = 0
        model.parameters['kf_m_deg_1'].value = 8e-01
        model.parameters['kf_tm_synth_1'].value = 0.2
        model.parameters['kf_aa_phosphorylation_1'].value = 5e-06
        obs = Observable(b'atm_active', atm(phospho='p'))
        model.add_component(obs)

    pa.model = model
    pa.save_model('%s.py' % model_name)
    return model
Example #9
0
from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
from indra import trips
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()]
    # Use the existing EKB extractions.
    for pmcid in pmc_ids:
        folder = 'trips'
        prefix = folder + '/' + pmcid
        print('Processing %s...' % pmcid)
        with open(prefix + '.ekb', 'r') as f:
            tp = trips.process_xml(f.read())
        # PMIDs from TRIPS need to be set here because it propagates
        # the PMCID by default
        run_assembly(tp.statements, folder, pmcid)