예제 #1
0
def assemble_english(stmts):
    txts = []
    for stmt in stmts:
        ea = EnglishAssembler([stmt])
        txt = ea.make_model()
        if txt and txt[-1] == '.':
            txt = txt[:-1]
            txts.append(txt)
    return txts
예제 #2
0
def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt = 'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i + 1, t)
        if i < len(source_txts) - 1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt
예제 #3
0
    def respond_describe_model(self, content):
        """Convert the model to natural language."""
        # Get the model.
        model_id = self._get_model_id(content)
        model = self.mra.get_model_by_id(model_id)

        # Turn the model into a text description.
        english_assembler = EnglishAssembler(model)
        desc = english_assembler.make_model()

        # Respond to the BA.
        resp = KQMLList('SUCCESS')
        resp.sets('description', desc)
        return resp
예제 #4
0
파일: api.py 프로젝트: johnbachman/indra
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
예제 #5
0
def assemble_english():
    """Assemble each statement into """
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    sentences = {}
    for st in stmts:
        enga = EnglishAssembler()
        enga.add_statements([st])
        model_str = enga.make_model()
        sentences[st.uuid] = model_str
    res = {'sentences': sentences}
    return res
예제 #6
0
def report_paths(scored_paths, model, stmts, cell_line):
    citations = {}
    citation_count = 1
    ab_name = 'p-S6(S235/236)'
    for drug in scored_paths.keys():
        paths = scored_paths[drug]
        for path, score in paths[:1]:
            title = 'How does %s treatment result in decreased %s' % \
                (drug, ab_name)
            title += ' in %s cells?' % cell_line
            print(title)
            print('=' * len(title))
            path_stmts = stmts_from_path(path, model, stmts)
            sentences = []
            for i, stmt in enumerate(path_stmts):
                if i == 0:
                    target = stmt.agent_list()[0].name
                    sentences.append('%s is a target of %s.' % (target, drug))
                # Make citations
                pmids = [ev.pmid for ev in stmt.evidence if ev.pmid]
                cit_nums = []
                for pmid in pmids:
                    cit_num = citations.get(pmid)
                    if cit_num is None:
                        citations[pmid] = citation_count
                        cit_num = citation_count
                        citation_count += 1
                    cit_nums.append(cit_num)
                if cit_nums:
                    cit_nums = sorted(list(set(cit_nums)))
                    cit_str = ' [%s]' % (','.join([str(c) for c in cit_nums]))
                else:
                    cit_str = ''
                ea = EnglishAssembler([stmt])
                sentence = ea.make_model()
                sentence = sentence[:-1] + cit_str + '.'
                sentences.append(sentence)
            sentences[-1] = sentences[-1][:-1] + \
                ', which is measured by %s.' % ab_name
            print(' '.join(sentences))
            print()
    references = 'References\n==========\n'
    for k, v in sorted(citations.items(), key=lambda x: x[1]):
        references += '[%d] https://www.ncbi.nlm.nih.gov/pubmed/%s\n' % (v, k)
    print(references)
예제 #7
0
 def send_null_provenance(self, stmt, for_what, reason=''):
     """Send out that no provenance could be found for a given Statement."""
     content_fmt = ('<h4>No supporting evidence found for {statement} from '
                    '{cause}{reason}.</h4>')
     content = KQMLList('add-provenance')
     stmt_txt = EnglishAssembler([stmt]).make_model()
     content.sets('html', content_fmt.format(statement=stmt_txt,
                                             cause=for_what, reason=reason))
     return self.tell(content)
예제 #8
0
def _english_from_agents_type(agA_name, agB_name, stmt_type):
    agA = Agent(agA_name)
    agB = Agent(agB_name)
    StmtClass = get_statement_by_name(stmt_type)
    if stmt_type.lower() == 'complex':
        stmt = StmtClass([agA, agB])
    else:
        stmt = StmtClass(agA, agB)
    return EnglishAssembler([stmt]).make_model()
예제 #9
0
    def send_model_diagnoses(self, res):
        # SUGGESTIONS
        # If there is an explanation, english assemble it
        expl_path = res.get('explanation_path')
        if expl_path:
            # Only send this if we haven't already sent an explanation
            if not self.have_explanation:
                ea_path = EnglishAssembler(expl_path)
                path_str = ea_path.make_model()
                ea_goal = EnglishAssembler([self.mra.explain])
                goal_str = ea_goal.make_model()
                if path_str and goal_str:
                    explanation_str = (
                            'Our model can now explain how %s: <i>%s</i>' %
                            (goal_str[:-1], path_str))
                    content = KQMLList('SPOKEN')
                    content.sets('WHAT', explanation_str)
                    self.tell(content)

        # If there is a suggestion, say it
        suggs = res.get('stmt_suggestions')
        if suggs:
            say = 'I have some suggestions on how to complete our model.'
            say += ' We could try modeling one of:<br>'
            stmt_str = '<ul>%s</ul>' % \
                       ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model())
                                for stmt in suggs])
            say += stmt_str
            content = KQMLList('SPOKEN')
            content.sets('WHAT', say)
            self.tell(content)

        # If there are corrections
        corrs = res.get('stmt_corrections')
        if corrs:
            stmt = corrs[0]
            say = 'It looks like a required activity is missing,'
            say += ' consider revising to <i>%s</i>' % \
                   (EnglishAssembler([stmt]).make_model())
            content = KQMLList('SPOKEN')
            content.sets('WHAT', say)
            self.tell(content)
예제 #10
0
    def send_model_diagnoses(self, res):
        diagnostic_tells = []

        # SUGGESTIONS
        # If there is an explanation, english assemble it
        expl_path = res.get('explanation_path')
        if expl_path:
            # Only send this if we haven't already sent an explanation
            if not self.have_explanation:
                ea_path = EnglishAssembler(expl_path)
                path_str = ea_path.make_model()
                ea_goal = EnglishAssembler([self.mra.explain])
                goal_str = ea_goal.make_model()
                if path_str and goal_str:
                    explanation_str = (
                        'Our model can now explain how %s: <i>%s</i>' %
                        (goal_str[:-1], path_str))
                    diagnostic_tells.append(explanation_str)

        # If there is a suggestion, say it
        suggs = res.get('stmt_suggestions')
        if suggs:
            say = 'I have some suggestions on how to complete our model.'
            say += ' We could try modeling one of:<br>'
            stmt_str = '<ul>%s</ul>' % \
                       ''.join([('<li>%s</li>' % EnglishAssembler([stmt]).make_model())
                                for stmt in suggs])
            say += stmt_str
            diagnostic_tells.append(say)

        # If there are corrections
        corrs = res.get('stmt_corrections')
        if corrs:
            stmt = corrs[0]
            say = 'It looks like a required activity may be missing,'
            say += ' say \'%s\' to add it.' % \
                   (EnglishAssembler([stmt]).make_model())
            diagnostic_tells.append(say)

        # Finally, say all we have to say
        for text in diagnostic_tells:
            content = KQMLList('SPOKEN')
            content.sets('WHAT', text)
            # TELLING DIRECTLY HERE IS CURRENTLY INACTIVATED,
            # IT'S THE BA's RESPONSIBILITY TO DO THIS
            # self.tell(content)

        return diagnostic_tells
예제 #11
0
 def _format_stmt_text(stmt):
     # Get the English assembled statement
     ea = EnglishAssembler([stmt])
     english = ea.make_model()
     if not english:
         english = str(stmt)
     indices = []
     for ag in stmt.agent_list():
         if ag is None or not ag.name:
             continue
         url = id_url(ag)
         if url is None:
             continue
         # Build up a set of indices
         tag_start = "<a href='%s'>" % url
         tag_close = "</a>"
         # FIXME: the EnglishAssembler capitalizes the first letter of
         # each sentence. In some cases this causes no match here
         # and not produce agent links.
         indices += [(m.start(), m.start() + len(ag.name), ag.name,
                      tag_start, tag_close)
                      for m in re.finditer(re.escape(ag.name), english)]
     return tag_text(english, indices)
예제 #12
0
def get_annotation_text(stmt, annotate_agents=True):
    ea = EnglishAssembler(stmts=[stmt])
    annotation_text = ea.make_model()
    if annotate_agents:
        inserts = []
        for agent_wc in ea.stmt_agents[0]:
            for insert_begin, insert_len in inserts:
                if insert_begin < agent_wc.coords[0]:
                    agent_wc.update_coords(insert_len)
            db_ns, db_id = get_grounding(agent_wc.db_refs, grounding_ns)
            if not db_ns:
                continue
            identifiers_url = \
                identifiers.get_identifiers_url(db_ns, db_id)
            grounding_text = '[%s](%s)' % (agent_wc.name, identifiers_url)
            insert_len = len(grounding_text) - agent_wc.coords[1] + \
                agent_wc.coords[0]
            inserts.append((agent_wc.coords[0], insert_len))
            before_part = annotation_text[:agent_wc.coords[0]]
            after_part = annotation_text[agent_wc.coords[1]:]
            annotation_text = ''.join(
                [before_part, grounding_text, after_part])
    return annotation_text
예제 #13
0
 def _format_stmt_text(stmt):
     # Get the English assembled statement
     ea = EnglishAssembler([stmt])
     english = ea.make_model()
     if not english:
         english = str(stmt)
     indices = []
     for ag in stmt.agent_list():
         if ag is None or not ag.name:
             continue
         url = id_url(ag)
         if url is None:
             continue
         # Build up a set of indices
         tag_start = "<a href='%s'>" % url
         tag_close = "</a>"
         # FIXME: the EnglishAssembler capitalizes the first letter of
         # each sentence. In some cases this causes no match here
         # and not produce agent links.
         indices += [(m.start(), m.start() + len(ag.name), ag.name,
                      tag_start, tag_close)
                     for m in re.finditer(re.escape(ag.name), english)]
     return tag_text(english, indices)
예제 #14
0
파일: api.py 프로젝트: steppi/indra
    def post(self):
        """Assemble each statement into English sentence.

        Parameters
        ----------
        statements : list[indra.statements.Statement.to_json()]
            A list of INDRA Statements to assemble.

        Returns
        -------
        sentences : dict
            Dictionary mapping Statement UUIDs with English sentences.
        """
        args = request.json
        stmts_json = args.get('statements')
        stmts = stmts_from_json(stmts_json)
        sentences = {}
        for st in stmts:
            enga = EnglishAssembler()
            enga.add_statements([st])
            model_str = enga.make_model()
            sentences[st.uuid] = model_str
        res = {'sentences': sentences}
        return res
예제 #15
0
def format_stmts(stmts, output_format, ev_counts=None, source_counts=None):
    if output_format == 'tsv':
        msg = ''
        for stmt in stmts:
            if not stmt.evidence:
                logger.warning('Statement %s without evidence' % stmt.uuid)
                txt = ''
                pmid = ''
            else:
                txt = '"%s"' % stmt.evidence[0].text if \
                    stmt.evidence[0].text else ''
                pmid = stmt.evidence[0].pmid if stmt.evidence[0].pmid else ''
            try:
                ea_txt = EnglishAssembler([stmt]).make_model()
            except Exception as e:
                ea_txt = ''
                logger.error('English assembly failed for %s' % stmt)
                logger.error(e)
            line = '%s\t%s\t%s\tPMID%s\n' % (stmt, ea_txt, txt, pmid)
            msg += line
        return msg
    elif output_format == 'pkl':
        fname = 'indrabot.pkl'
        with open(fname, 'wb') as fh:
            pickle.dump(stmts, fh)
        return fname
    elif output_format == 'pdf':
        fname = 'indrabot.pdf'
        ga = GraphAssembler(stmts)
        ga.make_model()
        ga.save_pdf(fname)
        return fname
    elif output_format == 'json':
        msg = json.dumps(stmts_to_json(stmts), indent=1)
        return msg
    elif output_format == 'html':
        ev_counts = {} if not ev_counts else ev_counts
        ha = HtmlAssembler(stmts, ev_totals=ev_counts,
                           source_counts=source_counts)
        fname = 'indrabot.html'
        ha.save_model(fname)
        return fname
    return None
예제 #16
0
stmt_freq = [(_stmt_from_rule(model, r[0], statements), r[1])
             for r in dist_filt]
combined_freq = {}
for stmt, freq in stmt_freq:
    if stmt.uuid not in combined_freq:
        combined_freq[stmt.uuid] = (stmt, freq)
    else:
        _, old_freq = combined_freq[stmt.uuid]
        combined_freq[stmt.uuid] = (stmt, freq + old_freq)
top_stmts = list(combined_freq.values())
top_stmts.sort(key=lambda x: x[1], reverse=True)

desc = []
for s, freq in top_stmts:
    ea = EnglishAssembler([s])
    text = ea.make_model()
    desc.append((text, freq))
for t, f in desc[:30]:
    print('%s,%s' % (t, f))
"""
str_names, freqs = zip(*dist_filt)
num_genes = 30
plt.ion()
plt.figure(figsize=(5,2), dpi=150)
ypos = np.array(range(num_genes)) * 1.0
plt.bar(ypos, freqs[:num_genes], align='center')
plt.xticks(ypos, str_names[:num_genes], rotation='vertical')
ax = plt.gca()
plt.ylabel('Frequency')
plt.subplots_adjust(bottom=0.3)
예제 #17
0
def stmt_to_english(stmt):
    """Return an English assembled Statement as a sentence."""
    ea = EnglishAssembler([stmt])
    return ea.make_model()[:-1]
예제 #18
0
def render_stmt_graph(statements, reduce=True, english=False, rankdir=None,
                      agent_style=None):
    """Render the statement hierarchy as a pygraphviz graph.

    Parameters
    ----------
    stmts : list of :py:class:`indra.statements.Statement`
        A list of top-level statements with associated supporting statements
        resulting from building a statement hierarchy with
        :py:meth:`combine_related`.
    reduce : bool
        Whether to perform a transitive reduction of the edges in the graph.
        Default is True.
    english : bool
        If True, the statements in the graph are represented by their
        English-assembled equivalent; otherwise they are represented as
        text-formatted Statements.
    rank_dir : str or None
        Argument to pass through to the  pygraphviz `AGraph` constructor
        specifying graph layout direction. In particular, a value of 'LR'
        specifies a left-to-right direction. If None, the pygraphviz default
        is used.
    agent_style : dict or None
        Dict of attributes specifying the visual properties of nodes. If None,
        the following default attributes are used::

            agent_style = {'color': 'lightgray', 'style': 'filled',
                           'fontname': 'arial'}

    Returns
    -------
    pygraphviz.AGraph
        Pygraphviz graph with nodes representing statements and edges pointing
        from supported statements to supported_by statements.

    Examples
    --------
    Pattern for getting statements and rendering as a Graphviz graph:

    >>> from indra.preassembler.hierarchy_manager import hierarchies
    >>> braf = Agent('BRAF')
    >>> map2k1 = Agent('MAP2K1')
    >>> st1 = Phosphorylation(braf, map2k1)
    >>> st2 = Phosphorylation(braf, map2k1, residue='S')
    >>> pa = Preassembler(hierarchies, [st1, st2])
    >>> pa.combine_related() # doctest:+ELLIPSIS
    [Phosphorylation(BRAF(), MAP2K1(), S)]
    >>> graph = render_stmt_graph(pa.related_stmts)
    >>> graph.write('example_graph.dot') # To make the DOT file
    >>> graph.draw('example_graph.png', prog='dot') # To make an image

    Resulting graph:

    .. image:: /images/example_graph.png
        :align: center
        :alt: Example statement graph rendered by Graphviz

    """
    from indra.assemblers.english import EnglishAssembler
    # Set the default agent formatting properties
    if agent_style is None:
        agent_style = {'color': 'lightgray', 'style': 'filled',
                       'fontname': 'arial'}
    # Sets to store all of the nodes and edges as we recursively process all
    # of the statements
    nodes = set([])
    edges = set([])
    stmt_dict = {}

    # Recursive function for processing all statements
    def process_stmt(stmt):
        nodes.add(str(stmt.matches_key()))
        stmt_dict[str(stmt.matches_key())] = stmt
        for sby_ix, sby_stmt in enumerate(stmt.supported_by):
            edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key())))
            process_stmt(sby_stmt)

    # Process all of the top-level statements, getting the supporting statements
    # recursively
    for stmt in statements:
        process_stmt(stmt)
    # Create a networkx graph from the nodes
    nx_graph = nx.DiGraph()
    nx_graph.add_edges_from(edges)
    # Perform transitive reduction if desired
    if reduce:
        nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph)
    # Create a pygraphviz graph from the nx graph
    try:
        pgv_graph = pgv.AGraph(name='statements', directed=True,
                               rankdir=rankdir)
    except NameError:
        logger.error('Cannot generate graph because '
                     'pygraphviz could not be imported.')
        return None
    for node in nx_graph.nodes():
        stmt = stmt_dict[node]
        if english:
            ea = EnglishAssembler([stmt])
            stmt_str = ea.make_model()
        else:
            stmt_str = str(stmt)
        pgv_graph.add_node(node,
                           label='%s (%d)' % (stmt_str, len(stmt.evidence)),
                           **agent_style)
    pgv_graph.add_edges_from(nx_graph.edges())
    return pgv_graph
예제 #19
0
    def respond_find_qca_path(self, content):
        """Response content to find-qca-path request"""
        if self.qca.ndex is None:
            reply = self.make_failure('SERVICE_UNAVAILABLE')
            return reply

        source_arg = content.get('SOURCE')
        target_arg = content.get('TARGET')
        reltype_arg = content.get('RELTYPE')

        if not source_arg:
            raise ValueError("Source list is empty")
        if not target_arg:
            raise ValueError("Target list is empty")

        target = self.get_agent(target_arg)
        if target is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('TARGET_MISSING')
            return reply

        source = self.get_agent(source_arg)
        if source is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('SOURCE_MISSING')
            return reply

        if reltype_arg is None or len(reltype_arg) == 0:
            relation_types = None
        else:
            relation_types = [str(k.data) for k in reltype_arg.data]

        results_list = self.qca.find_causal_path([source.name], [target.name],
                                                 relation_types=relation_types)
        if not results_list:
            reply = self.make_failure('NO_PATH_FOUND')
            return reply

        def get_path_statements(results_list):
            stmts_list = []
            for res in results_list:
                # Edges of the first result
                edges = res[1::2]
                # INDRA JSON of the edges of the result
                try:
                    indra_edges = [fe[0]['__INDRA json'] for fe in edges]
                except Exception:
                    indra_edges = [fe[0]['INDRA json'] for fe in edges]
                # Make the JSONs dicts from strings
                indra_edges = [json.loads(e) for e in indra_edges]
                # Now fix the edges if needed due to INDRA Statement changes
                indra_edges = _fix_indra_edges(indra_edges)
                stmts_list.append(indra_edges)
            return stmts_list

        paths_list = get_path_statements(results_list)

        self.report_paths_graph(paths_list)

        # Take the first one to report
        indra_edges = paths_list[0]
        # Get the INDRA Statement objects
        indra_edge_stmts = stmts_from_json(indra_edges)
        # Assemble into English
        for stmt in indra_edge_stmts:
            txt = EnglishAssembler([stmt]).make_model()
            self.send_provenance_for_stmts(
                [stmt], "the path from %s to %s (%s)" % (source, target, txt))
        edges_cl_json = self.make_cljson(indra_edge_stmts)
        paths = KQMLList()
        paths.append(edges_cl_json)
        reply = KQMLList('SUCCESS')
        reply.set('paths', paths)

        return reply
예제 #20
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(bio_ontology)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(bio_ontology, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
예제 #21
0
파일: __init__.py 프로젝트: maparent/indra
def render_stmt_graph(statements,
                      reduce=True,
                      english=False,
                      rankdir=None,
                      agent_style=None):
    """Render the statement hierarchy as a pygraphviz graph.

    Parameters
    ----------
    stmts : list of :py:class:`indra.statements.Statement`
        A list of top-level statements with associated supporting statements
        resulting from building a statement hierarchy with
        :py:meth:`combine_related`.
    reduce : bool
        Whether to perform a transitive reduction of the edges in the graph.
        Default is True.
    english : bool
        If True, the statements in the graph are represented by their
        English-assembled equivalent; otherwise they are represented as
        text-formatted Statements.
    rank_dir : str or None
        Argument to pass through to the  pygraphviz `AGraph` constructor
        specifying graph layout direction. In particular, a value of 'LR'
        specifies a left-to-right direction. If None, the pygraphviz default
        is used.
    agent_style : dict or None
        Dict of attributes specifying the visual properties of nodes. If None,
        the following default attributes are used::

            agent_style = {'color': 'lightgray', 'style': 'filled',
                           'fontname': 'arial'}

    Returns
    -------
    pygraphviz.AGraph
        Pygraphviz graph with nodes representing statements and edges pointing
        from supported statements to supported_by statements.

    Examples
    --------
    Pattern for getting statements and rendering as a Graphviz graph:

    >>> from indra.ontology.bio import bio_ontology
    >>> braf = Agent('BRAF')
    >>> map2k1 = Agent('MAP2K1')
    >>> st1 = Phosphorylation(braf, map2k1)
    >>> st2 = Phosphorylation(braf, map2k1, residue='S')
    >>> pa = Preassembler(bio_ontology, [st1, st2])
    >>> pa.combine_related() # doctest:+ELLIPSIS
    [Phosphorylation(BRAF(), MAP2K1(), S)]
    >>> graph = render_stmt_graph(pa.related_stmts)
    >>> graph.write('example_graph.dot') # To make the DOT file
    >>> graph.draw('example_graph.png', prog='dot') # To make an image

    Resulting graph:

    .. image:: /images/example_graph.png
        :align: center
        :alt: Example statement graph rendered by Graphviz

    """
    import pygraphviz as pgv
    from indra.assemblers.english import EnglishAssembler
    # Set the default agent formatting properties
    if agent_style is None:
        agent_style = {
            'color': 'lightgray',
            'style': 'filled',
            'fontname': 'arial'
        }
    # Sets to store all of the nodes and edges as we recursively process all
    # of the statements
    nodes = set([])
    edges = set([])
    stmt_dict = {}

    # Recursive function for processing all statements
    def process_stmt(stmt):
        nodes.add(str(stmt.matches_key()))
        stmt_dict[str(stmt.matches_key())] = stmt
        for sby_ix, sby_stmt in enumerate(stmt.supported_by):
            edges.add((str(stmt.matches_key()), str(sby_stmt.matches_key())))
            process_stmt(sby_stmt)

    # Process all of the top-level statements, getting the supporting statements
    # recursively
    for stmt in statements:
        process_stmt(stmt)
    # Create a networkx graph from the nodes
    nx_graph = nx.DiGraph()
    nx_graph.add_edges_from(edges)
    # Perform transitive reduction if desired
    if reduce:
        nx_graph = nx.algorithms.dag.transitive_reduction(nx_graph)
    # Create a pygraphviz graph from the nx graph
    try:
        pgv_graph = pgv.AGraph(name='statements',
                               directed=True,
                               rankdir=rankdir)
    except NameError:
        logger.error('Cannot generate graph because '
                     'pygraphviz could not be imported.')
        return None
    for node in nx_graph.nodes():
        stmt = stmt_dict[node]
        if english:
            ea = EnglishAssembler([stmt])
            stmt_str = ea.make_model()
        else:
            stmt_str = str(stmt)
        pgv_graph.add_node(node,
                           label='%s (%d)' % (stmt_str, len(stmt.evidence)),
                           **agent_style)
    pgv_graph.add_edges_from(nx_graph.edges())
    return pgv_graph
예제 #22
0
def path_to_english(path, model, stmts):
    path_stmts = stmts_from_path(path, model, stmts)
    ea = EnglishAssembler(path_stmts)
    return ea.make_model()
예제 #23
0
def stmt_to_english(stmt):
    """Return an English assembled Statement as a sentence."""
    ea = EnglishAssembler([stmt])
    return ea.make_model()[:-1]
    write_unicode_csv('reach_rule_frequencies.tsv', frequencies,
                      delimiter='\t')

    sample_rows = []
    max_sample_size = 20
    for rule, freq in frequencies:
        stmts = stmts_by_rule[rule]
        if max_sample_size < len(stmts):
            sample_stmts = np.random.choice(stmts,
                                            max_sample_size, replace=False)
        else:
            sample_stmts = stmts
        for stmt in sample_stmts:
            for ag in stmt.agent_list():
                if ag is not None:
                    ag.name = ag.db_refs.get('TEXT')
            is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '')
            is_direct = stmt.evidence[0].epistemics.get('direct', '')
            # Get the English assembly of the statement
            eng = EnglishAssembler([stmt])
            eng_sentence = eng.make_model()
            if eng_sentence == '':
                eng_sentence = str(stmt)
            sample_rows.append([eng_sentence, is_hypothesis, '', '', '',
                                stmt.evidence[0].pmid,
                                stmt.evidence[0].text, rule, freq, stmt,
                                is_direct])

    write_unicode_csv('stmts_by_rule_to_curate.tsv', sample_rows,
                      delimiter='\t')
예제 #25
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
예제 #26
0
    sample_rows = []
    max_sample_size = 20
    for rule, freq in frequencies:
        stmts = stmts_by_rule[rule]
        if max_sample_size < len(stmts):
            sample_stmts = np.random.choice(stmts,
                                            max_sample_size,
                                            replace=False)
        else:
            sample_stmts = stmts
        for stmt in sample_stmts:
            for ag in stmt.agent_list():
                if ag is not None:
                    ag.name = ag.db_refs.get('TEXT')
            is_hypothesis = stmt.evidence[0].epistemics.get('hypothesis', '')
            is_direct = stmt.evidence[0].epistemics.get('direct', '')
            # Get the English assembly of the statement
            eng = EnglishAssembler([stmt])
            eng_sentence = eng.make_model()
            if eng_sentence == '':
                eng_sentence = str(stmt)
            sample_rows.append([
                eng_sentence, is_hypothesis, '', '', '', stmt.evidence[0].pmid,
                stmt.evidence[0].text, rule, freq, stmt, is_direct
            ])

    write_unicode_csv('stmts_by_rule_to_curate.tsv',
                      sample_rows,
                      delimiter='\t')
예제 #27
0
파일: api.py 프로젝트: kolusask/indra_db
def get_metadata(level):
    start = datetime.utcnow()
    query = request.args.copy()

    # Figure out authorization.
    has = dict.fromkeys(['elsevier', 'medscan'], False)
    user, roles = resolve_auth(query)
    for role in roles:
        for resource in has.keys():
            has[resource] |= role.permissions.get(resource, False)
    logger.info('Auths: %s' % str(has))

    w_curations = _pop(query, 'with_cur_counts', False)

    kwargs = dict(limit=_pop(query, 'limit', type_cast=int),
                  offset=_pop(query, 'offset', type_cast=int),
                  best_first=_pop(query, 'best_first', True))
    try:
        db_query = _db_query_from_web_query(query, {'HasAgent'}, True)
    except Exception as e:
        abort(Response(f'Problem forming query: {e}', 400))
        return

    if not has['medscan']:
        db_query -= HasOnlySource('medscan')

    if level == 'hashes':
        res = db_query.get_interactions(**kwargs)
    elif level == 'relations':
        res = db_query.get_relations(with_hashes=w_curations, **kwargs)
    elif level == 'agents':
        res = db_query.get_agents(with_hashes=w_curations, **kwargs)
    else:
        abort(Response(f'Invalid level: {level}'))
        return

    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Got %s results after %.2f." % (len(res.results), dt))

    ret = res.json()
    res_list = []
    for key, entry in ret.pop('results').items():
        # Filter medscan from source counts.
        if not has['medscan']:
            res.evidence_totals[key] -= entry['source_counts'].pop(
                'medscan', 0)
            entry['total_count'] = res.evidence_totals[key]
            if not entry['source_counts']:
                logger.warning("Censored content present.")
                continue

        # Create english
        if level == 'agents':
            ag_dict = entry['agents']
            if len(ag_dict) == 0:
                eng = ''
            else:
                ag_list = list(ag_dict.values())
                eng = ag_list[0]
                if len(ag_dict) > 1:
                    eng += ' interacts with ' + ag_list[1]
                    if len(ag_dict) > 3:
                        eng += ', ' + ', '.join(ag_list[2:-1])
                    if len(ag_dict) > 2:
                        eng += ', and ' + ag_list[-1]
        else:
            eng = EnglishAssembler([stmt_from_interaction(entry)]).make_model()
        entry['english'] = eng

        res_list.append(entry)

    # Look up curations, if result with_curations was set.
    if w_curations:
        rel_hash_lookup = {}
        if level == 'hashes':
            for rel in res_list:
                rel['cur_count'] = 0
                rel_hash_lookup[rel['hash']] = rel
        else:
            for rel in res_list:
                for h in rel['hashes']:
                    rel['cur_count'] = 0
                    rel_hash_lookup[h] = rel
        curations = get_curations(pa_hash=set(rel_hash_lookup.keys()))
        for cur in curations:
            rel_hash_lookup[cur.pa_hash]['cur_count'] += 1

    # Finish up the query.
    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Returning with %s results after %.2f seconds." %
                (len(res_list), dt))

    ret['relations'] = res_list
    resp = Response(json.dumps(ret), mimetype='application/json')

    dt = (datetime.utcnow() - start).total_seconds()
    logger.info("Result prepared after %.2f seconds." % dt)
    return resp