Beispiel #1
0
def assemble_english(stmts):
    txts = []
    for stmt in stmts:
        ea = EnglishAssembler([stmt])
        txt = ea.make_model()
        if txt and txt[-1] == '.':
            txt = txt[:-1]
            txts.append(txt)
    return txts
Beispiel #2
0
def make_english_output(results, model, stmts):
    citations = {}
    citation_count = 1
    for source, target, polarity, value, found_path, paths, flag in results:
        cond = 'How does treatment with %s %s %s?' % \
            (source, 'increase' if polarity == 'positive' else
                     'decrease', target)
        print(cond)
        print('=' * len(cond))
        if paths:
            path = paths[0]
            sentences = []
            for i, (path_rule, sign) in enumerate(path):
                for rule in model.rules:
                    if rule.name == path_rule:
                        stmt = _stmt_from_rule(model, path_rule, stmts)
                        if i == 0:
                            sentences.append('%s is a target of %s.' %
                                            (stmt.agent_list()[0].name, source))

                        # Make citations
                        pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                        cit_nums = []
                        for pmid in pmids:
                            cit_num = citations.get(pmid)
                            if cit_num is None:
                                citations[pmid] = citation_count
                                cit_num = citation_count
                                citation_count += 1
                            cit_nums.append(cit_num)
                        if cit_nums:
                            cit_nums = sorted(list(set(cit_nums)))
                            cit_str = ' [%s]' % (','.join([str(c) for c
                                                          in cit_nums]))
                        else:
                            cit_str = ''
                        ea = EnglishAssembler([stmt])
                        sentence = ea.make_model()
                        sentence = sentence[:-1] + cit_str + '.'
                        sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % target
            text = ' '.join(sentences)
            print('INDRA\'s hypothesis: ' + text)
        elif found_path:
            print('INDRA determined that there exists an explanation but'
                  ' it is intractable to reconstruct.')
        else:
            print('INDRA couldn\'t find an explanation for this observation.')
        print('\n')
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Beispiel #3
0
def report_paths(scored_paths, model, stmts, cell_line):
    """Report paths for a specific cell line."""
    citations = {}
    citation_count = 1
    ab_name = 'Total c-Jun'
    if cell_line == 'C32':
        pol = 'decreased'
    else:
        pol = 'increased'
    for drug in scored_paths.keys():
        paths = scored_paths[drug]
        for path, score in paths[:1]:
            title = 'How does %s treatment result in %s %s' % \
                (drug, pol, ab_name)
            title += ' in %s cells?' % cell_line
            print(title)
            print('=' * len(title))
            path_stmts = stmts_from_path(path, model, stmts)
            sentences = []
            for i, stmt in enumerate(path_stmts):
                if i == 0:
                    target = stmt.agent_list()[0].name
                    sentences.append('%s is a target of %s.' %
                                     (target, drug))
                # Make citations
                pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                cit_nums = []
                for pmid in pmids:
                    cit_num = citations.get(pmid)
                    if cit_num is None:
                        citations[pmid] = citation_count
                        cit_num = citation_count
                        citation_count += 1
                    cit_nums.append(cit_num)
                if cit_nums:
                    cit_nums = sorted(list(set(cit_nums)))
                    cit_str = ' [%s]' % (','.join([str(c) for c
                                                  in cit_nums]))
                else:
                    cit_str = ''
                ea = EnglishAssembler([stmt])
                sentence = ea.make_model()
                sentence = sentence[:-1] + cit_str + '.'
                sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % ab_name
            print(' '.join(sentences))
            print()
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Beispiel #4
0
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
Beispiel #5
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Beispiel #6
0
def run_assembly(stmts, folder, pmcid):
    indexcard_prefix = folder + '/index_cards/' + pmcid
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Filter for grounding
    grounded_stmts = []
    for st in stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(eh, mh)

    pa.add_statements(grounded_stmts)
    print '%d statements collected in total.' % len(pa.stmts)
    unique_stmts = pa.combine_duplicates()
    print '%d statements after combining duplicates.' % len(unique_stmts)
    ml = MechLinker(unique_stmts)
    ml.link_statements()
    pa = Preassembler(eh, mh, ml.statements)
    pa.combine_duplicates()
    related_stmts = pa.combine_related()
    print '%d statements after combining related.' % len(related_stmts)

    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(related_stmts, fh)

    flattened_evidence_stmts = flatten_evidence(related_stmts)

    card_counter = 1
    card_lim = float('inf')
    top_stmts = []
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: len(x.evidence), reverse=True):
        print len(st.evidence), st

        if is_background_knowledge(st):
            print 'This statement is background knowledge - skipping.'
            continue
        # Assemble IndexCards
        ia = IndexCardAssembler([st])
        ia.make_model()
        if ia.cards:
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    ea = EnglishAssembler(top_stmts)
    print '======================='
    print ea.make_model()
    print '======================='

    # Print the statement graph
    graph = render_stmt_graph(related_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')

    pya = PysbAssembler()
    pya.add_statements(related_stmts)
    model = pya.make_model()

    print 'PySB model has %d monomers and %d rules' %\
        (len(model.monomers), len(model.rules))