Beispiel #1
0
def assemble_english(stmts):
    txts = []
    for stmt in stmts:
        ea = EnglishAssembler([stmt])
        txt = ea.make_model()
        if txt and txt[-1] == '.':
            txt = txt[:-1]
            txts.append(txt)
    return txts
Beispiel #2
0
def make_english_output(results, model, stmts):
    citations = {}
    citation_count = 1
    for source, target, polarity, value, found_path, paths, flag in results:
        cond = 'How does treatment with %s %s %s?' % \
            (source, 'increase' if polarity == 'positive' else
                     'decrease', target)
        print(cond)
        print('=' * len(cond))
        if paths:
            path = paths[0]
            sentences = []
            for i, (path_rule, sign) in enumerate(path):
                for rule in model.rules:
                    if rule.name == path_rule:
                        stmt = _stmt_from_rule(model, path_rule, stmts)
                        if i == 0:
                            sentences.append('%s is a target of %s.' %
                                            (stmt.agent_list()[0].name, source))

                        # Make citations
                        pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                        cit_nums = []
                        for pmid in pmids:
                            cit_num = citations.get(pmid)
                            if cit_num is None:
                                citations[pmid] = citation_count
                                cit_num = citation_count
                                citation_count += 1
                            cit_nums.append(cit_num)
                        if cit_nums:
                            cit_nums = sorted(list(set(cit_nums)))
                            cit_str = ' [%s]' % (','.join([str(c) for c
                                                          in cit_nums]))
                        else:
                            cit_str = ''
                        ea = EnglishAssembler([stmt])
                        sentence = ea.make_model()
                        sentence = sentence[:-1] + cit_str + '.'
                        sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % target
            text = ' '.join(sentences)
            print('INDRA\'s hypothesis: ' + text)
        elif found_path:
            print('INDRA determined that there exists an explanation but'
                  ' it is intractable to reconstruct.')
        else:
            print('INDRA couldn\'t find an explanation for this observation.')
        print('\n')
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Beispiel #3
0
def report_paths(scored_paths, model, stmts, cell_line):
    """Report paths for a specific cell line."""
    citations = {}
    citation_count = 1
    ab_name = 'Total c-Jun'
    if cell_line == 'C32':
        pol = 'decreased'
    else:
        pol = 'increased'
    for drug in scored_paths.keys():
        paths = scored_paths[drug]
        for path, score in paths[:1]:
            title = 'How does %s treatment result in %s %s' % \
                (drug, pol, ab_name)
            title += ' in %s cells?' % cell_line
            print(title)
            print('=' * len(title))
            path_stmts = stmts_from_path(path, model, stmts)
            sentences = []
            for i, stmt in enumerate(path_stmts):
                if i == 0:
                    target = stmt.agent_list()[0].name
                    sentences.append('%s is a target of %s.' %
                                     (target, drug))
                # Make citations
                pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                cit_nums = []
                for pmid in pmids:
                    cit_num = citations.get(pmid)
                    if cit_num is None:
                        citations[pmid] = citation_count
                        cit_num = citation_count
                        citation_count += 1
                    cit_nums.append(cit_num)
                if cit_nums:
                    cit_nums = sorted(list(set(cit_nums)))
                    cit_str = ' [%s]' % (','.join([str(c) for c
                                                  in cit_nums]))
                else:
                    cit_str = ''
                ea = EnglishAssembler([stmt])
                sentence = ea.make_model()
                sentence = sentence[:-1] + cit_str + '.'
                sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % ab_name
            print(' '.join(sentences))
            print()
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
Beispiel #4
0
def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt = 'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i + 1, t)
        if i < len(source_txts) - 1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt
Beispiel #5
0
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
Beispiel #6
0
    def respond_find_qca_path(self, content):
        """Response content to find-qca-path request"""
        if self.qca.ndex is None:
            reply = self.make_failure('SERVICE_UNAVAILABLE')
            return reply

        source_arg = content.gets('SOURCE')
        target_arg = content.gets('TARGET')
        reltype_arg = content.get('RELTYPE')

        if not source_arg:
            raise ValueError("Source list is empty")
        if not target_arg:
            raise ValueError("Target list is empty")

        target = self._get_term_name(target_arg)
        if target is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('TARGET_MISSING')
            return reply

        source = self._get_term_name(source_arg)
        if source is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('SOURCE_MISSING')
            return reply

        if reltype_arg is None or len(reltype_arg) == 0:
            relation_types = None
        else:
            relation_types = [str(k.data) for k in reltype_arg.data]

        results_list = self.qca.find_causal_path([source], [target],
                                                 relation_types=relation_types)
        if not results_list:
            reply = self.make_failure('NO_PATH_FOUND')
            return reply
        first_result = results_list[0]
        first_edges = first_result[1::2]
        indra_edges = [fe[0]['INDRA json'] for fe in first_edges]
        indra_edges = [json.loads(e) for e in indra_edges]
        indra_edges = _fix_indra_edges(indra_edges)
        indra_edge_stmts = stmts_from_json(indra_edges)
        for stmt in indra_edge_stmts:
            txt = EnglishAssembler([stmt]).make_model()
            self.send_provenance_for_stmts(
                [stmt], "the path from %s to %s (%s)" % (source, target, txt))
        indra_edges_str = json.dumps(indra_edges)
        ks = KQMLString(indra_edges_str)

        reply = KQMLList('SUCCESS')
        reply.set('paths', KQMLList([ks]))

        return reply
Beispiel #7
0
 def get_ev_desc(ev, stmt):
     "Get a description of the evidence."
     if ev.text:
         entry = "<i>'%s'</i>" % ev.text
     # If the entry at least has a source ID in a database
     elif ev.source_id:
         entry = "Database entry in '%s': %s" % \
             (ev.source_api, ev.source_id)
     # Otherwise turn it into English
     else:
         txt = EnglishAssembler([stmt]).make_model()
         entry = "Database entry in '%s' representing: %s" % \
             (ev.source_api, txt)
     return entry
Beispiel #8
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Beispiel #9
0
def run_assembly(stmts, folder, pmcid):
    indexcard_prefix = folder + '/index_cards/' + pmcid
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Filter for grounding
    grounded_stmts = []
    for st in stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(eh, mh)

    pa.add_statements(grounded_stmts)
    print '%d statements collected in total.' % len(pa.stmts)
    unique_stmts = pa.combine_duplicates()
    print '%d statements after combining duplicates.' % len(unique_stmts)
    ml = MechLinker(unique_stmts)
    ml.link_statements()
    pa = Preassembler(eh, mh, ml.statements)
    pa.combine_duplicates()
    related_stmts = pa.combine_related()
    print '%d statements after combining related.' % len(related_stmts)

    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(related_stmts, fh)

    flattened_evidence_stmts = flatten_evidence(related_stmts)

    card_counter = 1
    card_lim = float('inf')
    top_stmts = []
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: len(x.evidence), reverse=True):
        print len(st.evidence), st

        if is_background_knowledge(st):
            print 'This statement is background knowledge - skipping.'
            continue
        # Assemble IndexCards
        ia = IndexCardAssembler([st])
        ia.make_model()
        if ia.cards:
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    ea = EnglishAssembler(top_stmts)
    print '======================='
    print ea.make_model()
    print '======================='

    # Print the statement graph
    graph = render_stmt_graph(related_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')

    pya = PysbAssembler()
    pya.add_statements(related_stmts)
    model = pya.make_model()

    print 'PySB model has %d monomers and %d rules' %\
        (len(model.monomers), len(model.rules))