Ejemplo n.º 1
0
 def check_response_to_message(self, output):
     assert output.head() == 'SUCCESS', output
     paths = output.get('paths')
     for path in paths:
         stmts = stmts_from_json(json.loads(path.string_value()))
         assert stmts[0].agent_list()[0].name == self.agents[0]
         assert stmts[-1].agent_list()[1].name == self.agents[1]
Ejemplo n.º 2
0
def get_statements_by_hash(hash_list, ev_limit=100, best_first=True, tries=2):
    """Get fully formed statements from a list of hashes.

    Parameters
    ----------
    hash_list : list[int or str]
        A list of statement hashes.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 100.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can
        also help gracefully handle an unreliable connection, if you're
        willing to wait. Default is 2.
    """
    if not isinstance(hash_list, list):
        raise ValueError("The `hash_list` input is a list, not %s."
                         % type(hash_list))
    if not hash_list:
        return []
    if isinstance(hash_list[0], str):
        hash_list = [int(h) for h in hash_list]
    if not all([isinstance(h, int) for h in hash_list]):
        raise ValueError("Hashes must be ints or strings that can be "
                         "converted into ints.")
    resp = submit_statement_request('post', 'from_hashes', ev_limit=ev_limit,
                                    data={'hashes': hash_list},
                                    best_first=best_first, tries=tries)
    return stmts_from_json(resp.json()['statements'].values())
Ejemplo n.º 3
0
 def check_response_to_message(self, output):
     assert output.head() == 'SUCCESS', output
     paths = output.get('paths')
     assert len(paths) == 1, len(paths)
     path = paths[0].string_value()
     path_json = json.loads(path)
     stmts = stmts_from_json(path_json)
     assert len(stmts) == 1, stmts
     assert isinstance(stmts[0], Gef), stmts[0]
     assert stmts[0].ras.name == 'KRAS', stmts[0].ras.name
     assert stmts[0].gef.name == 'SOS1', stmts[0].get.name
Ejemplo n.º 4
0
 def build_model_from_json(self, model_json):
     """Build a model using INDRA JSON."""
     stmts = stmts_from_json(json.loads(model_json))
     model_id = self.new_model(stmts)
     res = {'model_id': model_id,
            'model': stmts}
     if not stmts:
         return res
     model_exec = self.assemble_pysb(stmts)
     res['model_exec'] = model_exec
     res['diagrams'] = make_diagrams(model_exec, model_id,
                                     self.models[model_id], self.context)
     return res
Ejemplo n.º 5
0
 def report_paths_graph(self, paths_list):
     from indra.assemblers.graph import GraphAssembler
     from indra.util import flatten
     path_stmts = [stmts_from_json(l) for l in paths_list]
     all_stmts = flatten(path_stmts)
     ga = GraphAssembler(all_stmts)
     ga.make_model()
     resource = get_img_path('qca_paths.png')
     ga.save_pdf(resource)
     content = KQMLList('display-image')
     content.set('type', 'simulation')
     content.sets('path', resource)
     self.tell(content)
Ejemplo n.º 6
0
    def _merge_json(self, stmt_json, ev_counts):
        """Merge these statement jsons with new jsons."""
        # Where there is overlap, there _should_ be agreement.
        self.__evidence_counts.update(ev_counts)

        for k, sj in stmt_json.items():
            if k not in self.__statement_jsons:
                self.__statement_jsons[k] = sj  # This should be most of them
            else:
                # This should only happen rarely.
                for evj in sj['evidence']:
                    self.__statement_jsons[k]['evidence'].append(evj)

        if not self.__started:
            self.statements_sample = stmts_from_json(
                self.__statement_jsons.values())
            self.__started = True
        return
Ejemplo n.º 7
0
 def expand_model_from_json(self, model_json, model_id):
     """Expand a model using INDRA JSON."""
     stmts = stmts_from_json(json.loads(model_json))
     new_model_id, new_stmts = self.extend_model(stmts, model_id)
     logger.info('Old model id: %s, New model id: %s' %
                 (model_id, new_model_id))
     model_stmts = self.models[new_model_id]
     res = {'model_id': new_model_id,
            'model': model_stmts}
     if not model_stmts:
         return res
     res['model_new'] = new_stmts
     model_exec = self.assemble_pysb(model_stmts)
     res['model_exec'] = model_exec
     res['diagrams'] = make_diagrams(model_exec, new_model_id,
                                     self.models[new_model_id],
                                     self.context)
     return res
Ejemplo n.º 8
0
def get_statements_for_paper(ids, ev_limit=10, best_first=True, tries=2,
                             max_stmts=None):
    """Get the set of raw Statements extracted from a paper given by the id.

    Parameters
    ----------
    ids : list[(<id type>, <id value>)]
        A list of tuples with ids and their type. The type can be any one of
        'pmid', 'pmcid', 'doi', 'pii', 'manuscript id', or 'trid', which is the
        primary key id of the text references in the database.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 10.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can also
        help gracefully handle an unreliable connection, if you're willing to
        wait. Default is 2.
    max_stmts : int or None
        Select a maximum number of statements to be returned. Default is None.

    Returns
    -------
    stmts : list[:py:class:`indra.statements.Statement`]
        A list of INDRA Statement instances.
    """
    id_l = [{'id': id_val, 'type': id_type} for id_type, id_val in ids]
    resp = submit_statement_request('post', 'from_papers', data={'ids': id_l},
                                    ev_limit=ev_limit, best_first=best_first,
                                    tries=tries, max_stmts=max_stmts)
    stmts_json = resp.json()['statements']
    return stmts_from_json(stmts_json.values())
Ejemplo n.º 9
0
def decode_indra_stmts(stmts_json_str):
    stmts_json = json.loads(stmts_json_str)
    stmts = stmts_from_json(stmts_json)
    return stmts
Ejemplo n.º 10
0
def insert_agents(db, stmt_tbl_obj, agent_tbl_obj, *other_stmt_clauses,
                  **kwargs):
    """Insert agents for statements that don't have any agents.

    Note: This method currently works for both Statements and PAStatements and
    their corresponding agents (Agents and PAAgents).

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are adding agents.
    stmt_tbl_obj : :py:class:`sqlalchemy.Base` table object
        For example, `db.Statements`. The object corresponding to the
        statements column you creating agents for.
    agent_tbl_obj : :py:class:`sqlalchemy.Base` table object
        That agent table corresponding to the statement table above.
    *other_stmt_clauses : sqlalchemy clauses
        Further arguments, such as `db.Statements.db_ref == 1' are used to
        restrict the scope of statements whose agents may be added.
    verbose : bool
        If True, print extra information and a status bar while compiling
        agents for insert from statements. Default False.
    num_per_yield : int
        To conserve memory, statements are loaded in batches of `num_per_yeild`
        using the `yeild_per` feature of sqlalchemy queries.
    """
    verbose = kwargs.pop('verbose', False)
    num_per_yield = kwargs.pop('num_per_yield', 100)
    override_default_query = kwargs.pop('override_default_query', False)
    if len(kwargs):
        raise IndraDatabaseError("Unrecognized keyword argument(s): %s." %
                                 kwargs)
    # Build a dict mapping stmt UUIDs to statement IDs
    logger.info("Getting %s that lack %s in the database." %
                (stmt_tbl_obj.__tablename__, agent_tbl_obj.__tablename__))
    if not override_default_query:
        stmts_w_agents_q = db.filter_query(
            stmt_tbl_obj, stmt_tbl_obj.id == agent_tbl_obj.stmt_id)
        stmts_wo_agents_q = (db.filter_query(
            stmt_tbl_obj, *other_stmt_clauses).except_(stmts_w_agents_q))
    else:
        stmts_wo_agents_q = db.filter_query(stmt_tbl_obj, *other_stmt_clauses)
    logger.debug("Getting stmts with query:\n%s" % str(stmts_wo_agents_q))
    if verbose:
        num_stmts = stmts_wo_agents_q.count()
        print("Adding agents for %d statements." % num_stmts)
    stmts_wo_agents = stmts_wo_agents_q.yield_per(num_per_yield)

    # Now assemble agent records
    logger.info("Building agent data for insert...")
    if verbose:
        print("Loading:", end='', flush=True)
    agent_data = []
    for i, db_stmt in enumerate(stmts_wo_agents):
        # Convert the database statement entry object into an indra statement.
        stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0]

        # Figure out how the agents are structured and assign roles.
        ag_list = stmt.agent_list()
        nary_stmt_types = [
            Complex, SelfModification, ActiveForm, Conversion, Translocation
        ]
        if any([isinstance(stmt, tp) for tp in nary_stmt_types]):
            agents = {('OTHER', ag) for ag in ag_list}
        elif len(ag_list) == 2:
            agents = {('SUBJECT', ag_list[0]), ('OBJECT', ag_list[1])}
        else:
            raise IndraDatabaseError("Unhandled agent structure for stmt %s "
                                     "with agents: %s." %
                                     (str(stmt), str(stmt.agent_list())))

        # Prep the agents for copy into the database.
        for role, ag in agents:
            # If no agent, or no db_refs for the agent, skip the insert
            # that follows.
            if ag is None or ag.db_refs is None:
                continue
            for ns, ag_id in ag.db_refs.items():
                if isinstance(ag_id, list):
                    for sub_id in ag_id:
                        agent_data.append((db_stmt.id, ns, sub_id, role))
                else:
                    agent_data.append((db_stmt.id, ns, ag_id, role))

        # Optionally print another tick on the progress bar.
        if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0:
            print('|', end='', flush=True)

    if verbose and num_stmts > 25:
        print()

    cols = ('stmt_id', 'db_name', 'db_id', 'role')
    db.copy(agent_tbl_obj.__tablename__, agent_data, cols)
    return
Ejemplo n.º 11
0
def make_stmts_from_db_list(db_stmt_objs):
    stmt_json_list = []
    for st_obj in db_stmt_objs:
        stmt_json_list.append(json.loads(st_obj.json.decode('utf8')))
    return stmts_from_json(stmt_json_list)
Ejemplo n.º 12
0
    def process_entries(self, result):
        if result.result_type == 'hashes':
            # There is really nothing to do for hashes.
            return

        elsevier_redactions = 0
        if not all(self.has.values()) or self.fmt == 'json-js' \
                or self.w_english:
            for key, entry in result.results.copy().items():
                # Build english reps of each result (unless their just hashes)
                if self.w_english and result.result_type != 'hashes':
                    stmt = None
                    # Fix the agent order
                    if self.strict:
                        if result.result_type == 'statements':
                            stmt = stmts_from_json([entry])[0]
                            if type(stmt) == Complex:
                                id_lookup = {v: int(k)
                                             for k, v in self.agent_dict.items()}
                                stmt.members.sort(
                                    key=lambda ag: id_lookup.get(ag.name, 10)
                                )
                            agent_set = {ag.name
                                         for ag in stmt.agent_list()
                                         if ag is not None}
                        else:
                            agent_set = set(entry['agents'].values())
                            if result.result_type == 'relations' \
                                    and entry['type'] == 'Complex':
                                entry['agents'] = self.agent_dict
                        if agent_set < self.agent_set:
                            result.results.pop(key, None)
                            continue

                    # Construct the english.
                    if result.result_type == 'statements':
                        if stmt is None:
                            stmt = stmts_from_json([entry])[0]
                        eng = _format_stmt_text(stmt)
                        entry['evidence'] = _format_evidence_text(stmt)
                    else:
                        eng = _make_english_from_meta(entry['agents'],
                                                      entry.get('type'))
                    if not eng:
                        logger.warning(f"English not formed for {key}:\n"
                                       f"{entry}")
                    entry['english'] = eng

                # Filter out medscan if user does not have medscan privileges.
                if not self.has['medscan']:
                    if result.result_type == 'statements':
                        result.source_counts[key].pop('medscan', 0)
                    else:
                        result.evidence_counts[key] -= \
                            entry['source_counts'].pop('medscan', 0)
                        entry['total_count'] = result.evidence_counts[key]
                        if not entry['source_counts']:
                            logger.warning("Censored content present.")

                # In most cases we can stop here
                if self.has['elsevier'] and self.fmt != 'json-js' \
                        and not self.w_english:
                    continue

                if result.result_type == 'statements':
                    # If there is evidence, loop through it if necessary.
                    for ev_json in entry['evidence'][:]:
                        if self.fmt == 'json-js':
                            ev_json['source_hash'] = str(ev_json['source_hash'])

                        # Check for elsevier and redact if necessary
                        if not self.has['elsevier'] and \
                                get_source(ev_json) == 'elsevier':
                            text = ev_json['text']
                            if len(text) > 200:
                                ev_json['text'] = text[:200] + REDACT_MESSAGE
                                elsevier_redactions += 1
                elif result.result_type != 'hashes' and self.fmt == 'json-js':
                    # Stringify lists of hashes.
                    if 'hashes' in entry and entry['hashes'] is not None:
                        entry['hashes'] = [str(h) for h in entry['hashes']]
                    elif 'hash' in entry:
                        entry['hash'] = str(entry['hash'])

        if result.result_type == 'statements':
            logger.info(f"Redacted {elsevier_redactions} pieces of elsevier "
                        f"evidence.")

        logger.info(f"Process entries for {self.__class__.__name__} after "
                    f"{sec_since(self.start_time)} seconds.")
        return
Ejemplo n.º 13
0
    def produce_response(self, result):
        if result.result_type == 'statements':
            res_json = result.json()

            # Add derived values to the res_json.
            if self.w_cur_counts:
                res_json['num_curations'] = self.get_curation_counts(result)
            res_json['statement_limit'] = MAX_STMTS
            res_json['statements_returned'] = len(result.results)
            res_json['end_of_statements'] = \
                (len(result.results) < MAX_STMTS)
            res_json['statements_removed'] = 0
            res_json['evidence_returned'] = result.returned_evidence

            # Build the HTML if HTML, else just tweak the JSON.
            stmts_json = result.results
            if self.fmt == 'html':
                title = TITLE
                ev_counts = res_json.pop('evidence_counts')
                beliefs = res_json.pop('belief_scores')
                stmts = stmts_from_json(stmts_json.values())
                db_rest_url = request.url_root[:-1] \
                    + self._env.globals['url_for']('root')[:-1]
                html_assembler = \
                    HtmlAssembler(stmts, summary_metadata=res_json,
                                  ev_counts=ev_counts, beliefs=beliefs,
                                  sort_by=self.sort_by, title=title,
                                  source_counts=result.source_counts,
                                  db_rest_url=db_rest_url)
                idbr_template = \
                    self._env.get_template('idbr_statements_view.html')
                if not TESTING['status']:
                    identity = self.user.identity() if self.user else None
                else:
                    identity = None
                source_info, source_colors = get_html_source_info()
                resp_content = html_assembler.make_model(
                    idbr_template, identity=identity, source_info=source_info,
                    source_colors=source_colors, simple=False
                )
                if self.tracker.get_messages():
                    level_stats = ['%d %ss' % (n, lvl.lower())
                                   for lvl, n
                                   in self.tracker.get_level_stats().items()]
                    msg = ' '.join(level_stats)
                    resp_content = html_assembler.append_warning(msg)
                mimetype = 'text/html'
            else:  # Return JSON for all other values of the format argument
                res_json.update(self.tracker.get_level_stats())
                res_json['statements'] = stmts_json
                resp_content = json.dumps(res_json)
                mimetype = 'application/json'

            resp = Response(resp_content, mimetype=mimetype)
            logger.info("Exiting with %d statements with %d/%d evidence of "
                        "size %f MB after %s seconds."
                        % (res_json['statements_returned'],
                           res_json['evidence_returned'],
                           res_json['total_evidence'],
                           sys.getsizeof(resp.data) / 1e6,
                           sec_since(self.start_time)))
        elif result.result_type != 'hashes':
            # Look up curations, if result with_curations was set.
            if self.w_cur_counts:
                rel_hash_lookup = defaultdict(list)
                if result.result_type == 'interactions':
                    for h, rel in result.results.items():
                        rel['cur_count'] = 0
                        rel_hash_lookup[int(h)].append(rel)
                else:
                    for rel in result.results.values():
                        for h in rel['hashes']:
                            rel['cur_count'] = 0
                            rel_hash_lookup[int(h)].append(rel)
                        if not self.special['with_hashes']:
                            rel['hashes'] = None
                curations = get_curations(pa_hash=set(rel_hash_lookup.keys()))
                for cur in curations:
                    for rel in rel_hash_lookup[cur['pa_hash']]:
                        rel['cur_count'] += 1

            logger.info("Returning with %s results after %.2f seconds."
                        % (len(result.results), sec_since(self.start_time)))

            res_json = result.json()
            res_json['relations'] = list(res_json['results'].values())
            if result.result_type == 'agents' and self.fmt == 'json-js':
                res_json['complexes_covered'] = \
                    [str(h) for h in res_json['complexes_covered']]
            res_json.pop('results')
            res_json['query_str'] = str(self.db_query)
            resp = Response(json.dumps(res_json), mimetype='application/json')

            logger.info("Result prepared after %.2f seconds."
                        % sec_since(self.start_time))
        else:
            return super(StatementApiCall, self).produce_response(result)
        return resp
Ejemplo n.º 14
0
def insert_agents(db, prefix, stmts_wo_agents=None, **kwargs):
    """Insert agents for statements that don't have any agents.

    Note: This method currently works for both Statements and PAStatements and
    their corresponding agents (Agents and PAAgents).

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are adding agents.
    prefix : str
        Select which stage of statements for which you wish to insert agents.
        The choices are 'pa' for preassembled statements or 'raw' for raw
        statements.
    *other_stmt_clauses : sqlalchemy clauses
        Further arguments, such as `db.Statements.db_ref == 1' are used to
        restrict the scope of statements whose agents may be added.
    verbose : bool
        If True, print extra information and a status bar while compiling
        agents for insert from statements. Default False.
    num_per_yield : int
        To conserve memory, statements are loaded in batches of `num_per_yeild`
        using the `yeild_per` feature of sqlalchemy queries.
    """
    verbose = kwargs.pop('verbose', False)
    if len(kwargs):
        raise IndraDatabaseError("Unrecognized keyword argument(s): %s." %
                                 kwargs)

    agent_tbl_obj = db.tables[prefix + '_agents']

    if stmts_wo_agents is None:
        stmts_wo_agents = get_statements_without_agents(db, prefix)

    if verbose:
        num_stmts = len(stmts_wo_agents)

    # Construct the agent records
    logger.info("Building agent data for insert...")
    if verbose:
        print("Loading:", end='', flush=True)
    agent_data = []
    for i, db_stmt in enumerate(stmts_wo_agents):
        # Convert the database statement entry object into an indra statement.
        stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0]

        # Figure out how the agents are structured and assign roles.
        ag_list = stmt.agent_list()
        nary_stmt_types = [
            Complex, SelfModification, ActiveForm, Conversion, Translocation
        ]
        if any([isinstance(stmt, tp) for tp in nary_stmt_types]):
            agents = {('OTHER', ag) for ag in ag_list}
        elif len(ag_list) == 2:
            agents = {('SUBJECT', ag_list[0]), ('OBJECT', ag_list[1])}
        else:
            raise IndraDatabaseError("Unhandled agent structure for stmt %s "
                                     "with agents: %s." %
                                     (str(stmt), str(stmt.agent_list())))

        # Prep the agents for copy into the database.
        for role, ag in agents:
            # If no agent, or no db_refs for the agent, skip the insert
            # that follows.
            if ag is None or ag.db_refs is None:
                continue
            for ns, ag_id in ag.db_refs.items():
                if prefix == 'pa':
                    stmt_id = db_stmt.mk_hash
                else:  # prefix == 'raw'
                    stmt_id = db_stmt.id
                if isinstance(ag_id, list):
                    for sub_id in ag_id:
                        agent_data.append((stmt_id, ns, sub_id, role))
                else:
                    agent_data.append((stmt_id, ns, ag_id, role))

        # Optionally print another tick on the progress bar.
        if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0:
            print('|', end='', flush=True)

    if verbose and num_stmts > 25:
        print()

    if prefix == 'pa':
        cols = ('stmt_mk_hash', 'db_name', 'db_id', 'role')
    else:  # prefix == 'raw'
        cols = ('stmt_id', 'db_name', 'db_id', 'role')
    db.copy(agent_tbl_obj.__tablename__, agent_data, cols)
    return
Ejemplo n.º 15
0
 def remove_mechanism(self, mech_js, model_id):
     """Return a new model with the given mechanism having been removed."""
     rem_stmts = stmts_from_json(json.loads(mech_js))
     return self.remove_mechanism_from_stmts(rem_stmts, model_id)
Ejemplo n.º 16
0
    def decorator(*args, **kwargs):
        tracker = LogTracker()
        start_time = datetime.now()
        logger.info("Got query for %s at %s!" % (f.__name__, start_time))

        query = request.args.copy()
        offs = query.pop('offset', None)
        ev_lim = query.pop('ev_limit', None)
        best_first_str = query.pop('best_first', 'true')
        best_first = True if best_first_str.lower() == 'true' \
                             or best_first_str else False
        do_stream_str = query.pop('stream', 'false')
        do_stream = True if do_stream_str == 'true' else False
        max_stmts = min(int(query.pop('max_stmts', MAX_STATEMENTS)),
                        MAX_STATEMENTS)
        format = query.pop('format', 'json')

        api_key = query.pop('api_key', None)
        logger.info("Running function %s after %s seconds." %
                    (f.__name__, sec_since(start_time)))
        result = f(query, offs, max_stmts, ev_lim, best_first, *args, **kwargs)
        logger.info("Finished function %s after %s seconds." %
                    (f.__name__, sec_since(start_time)))

        # Redact elsevier content for those without permission.
        if api_key is None or not _has_elsevier_auth(api_key):
            for stmt_json in result['statements'].values():
                for ev_json in stmt_json['evidence']:
                    if get_source(ev_json) == 'elsevier':
                        text = ev_json['text']
                        if len(text) > 200:
                            ev_json['text'] = text[:200] + REDACT_MESSAGE
        logger.info("Finished redacting evidence for %s after %s seconds." %
                    (f.__name__, sec_since(start_time)))
        result['offset'] = offs
        result['evidence_limit'] = ev_lim
        result['statement_limit'] = MAX_STATEMENTS
        result['statements_returned'] = len(result['statements'])

        if format == 'html':
            stmts_json = result.pop('statements')
            ev_totals = result.pop('evidence_totals')
            stmts = stmts_from_json(stmts_json.values())
            html_assembler = HtmlAssembler(stmts,
                                           result,
                                           ev_totals,
                                           title='INDRA DB REST Results',
                                           db_rest_url=request.url_root[:-1])
            content = html_assembler.make_model()
            if tracker.get_messages():
                level_stats = [
                    '%d %ss' % (n, lvl.lower())
                    for lvl, n in tracker.get_level_stats().items()
                ]
                msg = ' '.join(level_stats)
                content = html_assembler.append_warning(msg)
            mimetype = 'text/html'
        else:  # Return JSON for all other values of the format argument
            result.update(tracker.get_level_stats())
            content = json.dumps(result)
            mimetype = 'application/json'

        if do_stream:
            # Returning a generator should stream the data.
            resp_json_bts = content
            gen = batch_iter(resp_json_bts, 10000)
            resp = Response(gen, mimetype=mimetype)
        else:
            resp = Response(content, mimetype=mimetype)
        logger.info("Exiting with %d statements with %d evidence of size "
                    "%f MB after %s seconds." %
                    (result['statements_returned'], result['total_evidence'],
                     sys.getsizeof(resp.data) / 1e6, sec_since(start_time)))
        return resp
Ejemplo n.º 17
0
 def _compile_results(self):
     """Generate statements from the jsons."""
     self.statements = stmts_from_json(self.__statement_jsons.values())
     if self.use_obtained_counts:
         self.__source_counts = get_available_source_counts(self.statements)
         self.__evidence_counts = get_available_ev_counts(self.statements)
Ejemplo n.º 18
0
 def statements(self) -> list:
     """Get a list of Statements from the results."""
     assert isinstance(self.results, dict), "Results must be a dictionary."
     return stmts_from_json(list(self.results.values()))
Ejemplo n.º 19
0
 def _compile_statements(self):
     """Generate statements from the jsons."""
     self.statements = stmts_from_json(self.__statement_jsons.values())
Ejemplo n.º 20
0
The input files are JSON files containing JSON-serialized INDRA statements.

This script was prepared for ISI. """

import json
import pickle
from glob import glob
from tqdm import tqdm
from indra.statements import stmts_from_json, Statement

files = glob("../data/UN_stmt_jsons/*")

d = {}
for file in tqdm(files):
    with open(file, "r") as f:
        sts = stmts_from_json(json.load(f)["statements"])
        for s in sts:
            for x in (s.subj, s.obj):
                db_refs = x.db_refs
                text = db_refs["TEXT"]
                if db_refs.get("UN") is not None:
                    top_un_grounding = db_refs["UN"][0][0].split("/")[-1]
                    if top_un_grounding not in d:
                        d[top_un_grounding] = [text]
                    else:
                        d[top_un_grounding].append(text)

for k, v in d.items():
    d[k] = list(set(v))

with open("groundings.json", "w") as f:
Ejemplo n.º 21
0
    def respond_find_qca_path(self, content):
        """Response content to find-qca-path request"""
        if self.qca.ndex is None:
            reply = self.make_failure('SERVICE_UNAVAILABLE')
            return reply

        source_arg = content.get('SOURCE')
        target_arg = content.get('TARGET')
        reltype_arg = content.get('RELTYPE')

        if not source_arg:
            raise ValueError("Source list is empty")
        if not target_arg:
            raise ValueError("Target list is empty")

        target = self.get_agent(target_arg)
        if target is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('TARGET_MISSING')
            return reply

        source = self.get_agent(source_arg)
        if source is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('SOURCE_MISSING')
            return reply

        if reltype_arg is None or len(reltype_arg) == 0:
            relation_types = None
        else:
            relation_types = [str(k.data) for k in reltype_arg.data]

        results_list = self.qca.find_causal_path([source.name], [target.name],
                                                 relation_types=relation_types)
        if not results_list:
            reply = self.make_failure('NO_PATH_FOUND')
            return reply

        def get_path_statements(results_list):
            stmts_list = []
            for res in results_list:
                # Edges of the first result
                edges = res[1::2]
                # INDRA JSON of the edges of the result
                try:
                    indra_edges = [fe[0]['__INDRA json'] for fe in edges]
                except Exception:
                    indra_edges = [fe[0]['INDRA json'] for fe in edges]
                # Make the JSONs dicts from strings
                indra_edges = [json.loads(e) for e in indra_edges]
                # Now fix the edges if needed due to INDRA Statement changes
                indra_edges = _fix_indra_edges(indra_edges)
                stmts_list.append(indra_edges)
            return stmts_list

        paths_list = get_path_statements(results_list)

        self.report_paths_graph(paths_list)

        # Take the first one to report
        indra_edges = paths_list[0]
        # Get the INDRA Statement objects
        indra_edge_stmts = stmts_from_json(indra_edges)
        # Assemble into English
        for stmt in indra_edge_stmts:
            txt = EnglishAssembler([stmt]).make_model()
            self.send_provenance_for_stmts(
                [stmt], "the path from %s to %s (%s)" % (source, target, txt))
        edges_cl_json = self.make_cljson(indra_edge_stmts)
        paths = KQMLList()
        paths.append(edges_cl_json)
        reply = KQMLList('SUCCESS')
        reply.set('paths', paths)

        return reply
Ejemplo n.º 22
0
 def build_model_from_json(self, model_json):
     """Build a model using INDRA JSON."""
     stmts = stmts_from_json(json.loads(model_json))
     return self.build_model_from_stmts(stmts)
Ejemplo n.º 23
0
 def post(self):
     args = self.process_args(request.json)
     stmts = stmts_from_json(args.pop('statements'))
     stmts_out = pipeline_functions[self.func_name](stmts, **args)
     return _return_stmts(stmts_out)
Ejemplo n.º 24
0
 def get_hash_statements_dict(self):
     """Return a dict of Statements keyed by hashes."""
     res = {stmt_hash: stmts_from_json([stmt])[0]
            for stmt_hash, stmt in self.__statement_jsons.items()}
     return res
Ejemplo n.º 25
0
 def _compile_statements(self):
     """Generate statements from the jsons."""
     self.statements = stmts_from_json(self.__statement_jsons.values())
Ejemplo n.º 26
0
 def get_hash_statements_dict(self):
     """Return a dict of Statements keyed by hashes."""
     res = {stmt_hash: stmts_from_json([stmt])[0]
            for stmt_hash, stmt in self.__statement_jsons.items()}
     return res
Ejemplo n.º 27
0
    def respond_find_qca_path(self, content):
        """Response content to find-qca-path request"""
        if self.qca.ndex is None:
            reply = self.make_failure('SERVICE_UNAVAILABLE')
            return reply

        source_arg = content.gets('SOURCE')
        target_arg = content.gets('TARGET')
        reltype_arg = content.get('RELTYPE')

        if not source_arg:
            raise ValueError("Source list is empty")
        if not target_arg:
            raise ValueError("Target list is empty")

        target = self._get_term_name(target_arg)
        if target is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('TARGET_MISSING')
            return reply

        source = self._get_term_name(source_arg)
        if source is None:
            reply = self.make_failure('NO_PATH_FOUND')
            # NOTE: use the one below if it's handled by NLG
            #reply = self.make_failure('SOURCE_MISSING')
            return reply

        if reltype_arg is None or len(reltype_arg) == 0:
            relation_types = None
        else:
            relation_types = [str(k.data) for k in reltype_arg.data]

        results_list = self.qca.find_causal_path([source], [target],
                                                 relation_types=relation_types)
        if not results_list:
            reply = self.make_failure('NO_PATH_FOUND')
            return reply

        def get_path_statements(results_list):
            stmts_list = []
            for res in results_list:
                # Edges of the first result
                edges = res[1::2]
                # INDRA JSON of the edges of the result
                try:
                    indra_edges = [fe[0]['__INDRA json'] for fe in edges]
                except Exception:
                    indra_edges = [fe[0]['INDRA json'] for fe in edges]
                # Make the JSONs dicts from strings
                indra_edges = [json.loads(e) for e in indra_edges]
                # Now fix the edges if needed due to INDRA Statement changes
                indra_edges = _fix_indra_edges(indra_edges)
                stmts_list.append(indra_edges)
            return stmts_list

        paths_list = get_path_statements(results_list)

        self.report_paths_graph(paths_list)

        # Take the first one to report
        indra_edges = paths_list[0]
        # Get the INDRA Statement objects
        indra_edge_stmts = stmts_from_json(indra_edges)
        # Assemble into English
        for stmt in indra_edge_stmts:
            txt = EnglishAssembler([stmt]).make_model()
            self.send_provenance_for_stmts(
                [stmt], "the path from %s to %s (%s)" % (source, target, txt))
        indra_edges_str = json.dumps(indra_edges)
        ks = KQMLString(indra_edges_str)

        reply = KQMLList('SUCCESS')
        reply.set('paths', KQMLList([ks]))

        return reply
Ejemplo n.º 28
0
 def expand_model_from_json(self, model_json, model_id):
     """Expand a model using INDRA JSON."""
     stmts = stmts_from_json(json.loads(model_json))
     return self.expand_model_from_stmts(stmts, model_id)
Ejemplo n.º 29
0
def test_eidos_json():
    from indra_world.tests.test_eidos import test_jsonld, _get_data_file
    sc.db = DbManager(url='sqlite:///:memory:')
    sc.db.create_all()

    with open(test_jsonld, 'r') as fh:
        jsonld = fh.read()
    res_json = _call_api('post',
                         'sources/eidos/process_jsonld',
                         json={'jsonld': jsonld})
    stmts_json = res_json.get('statements')
    stmts = stmts_from_json(stmts_json)
    assert len(stmts) == 1
    stmt = stmts[0]
    assert len(stmt.subj.concept.db_refs) > 2
    assert len(stmt.obj.concept.db_refs) > 2

    # Grounding NS
    res_json = _call_api('post',
                         'sources/eidos/process_jsonld',
                         json={
                             'jsonld': jsonld,
                             'grounding_ns': ['UN']
                         })
    stmts_json = res_json.get('statements')
    stmts = stmts_from_json(stmts_json)
    assert len(stmts) == 1
    stmt = stmts[0]
    assert set(stmt.subj.concept.db_refs.keys()) == {'TEXT', 'UN'}
    assert set(stmt.obj.concept.db_refs.keys()) == {'TEXT', 'UN'}

    # Extract filter
    res_json = _call_api('post',
                         'sources/eidos/process_jsonld',
                         json={
                             'jsonld': jsonld,
                             'extract_filter': ['influence']
                         })
    stmts_json = res_json.get('statements')
    stmts = stmts_from_json(stmts_json)
    assert len(stmts) == 1
    res_json = _call_api('post',
                         'sources/eidos/process_jsonld',
                         json={
                             'jsonld': jsonld,
                             'extract_filter': ['event']
                         })
    stmts_json = res_json.get('statements')
    stmts = stmts_from_json(stmts_json)
    assert len(stmts) == 0

    # Grounding mode
    with open(_get_data_file('eidos_compositional.jsonld'), 'r') as fh:
        jsonld = fh.read()
    res_json = _call_api('post',
                         'sources/eidos/process_jsonld',
                         json={
                             'jsonld': jsonld,
                             'grounding_mode': 'compositional'
                         })
    stmts_json = res_json.get('statements')
    stmts = stmts_from_json(stmts_json)
    assert len(stmts) == 1
Ejemplo n.º 30
0
 def get_statements_for_record(self, record_key):
     """Return prepared statements for given record."""
     qfilter = wms_schema.PreparedStatements.record_key.like(record_key)
     q = self.query(wms_schema.PreparedStatements.stmt).filter(qfilter)
     stmts = stmts_from_json([r[0] for r in q.all()])
     return stmts
Ejemplo n.º 31
0
def decode_indra_stmts(stmts_json_str):
    stmts_json = json.loads(stmts_json_str)
    stmts = stmts_from_json(stmts_json)
    return stmts
Ejemplo n.º 32
0
 def get_statements(self):
     """Return all prepared statements in the DB."""
     q = self.query(wms_schema.PreparedStatements.stmt)
     stmts = stmts_from_json([r[0] for r in q.all()])
     return stmts
Ejemplo n.º 33
0
Archivo: util.py Proyecto: budakn/INDRA
def insert_agents(db, prefix, stmts_wo_agents=None, **kwargs):
    """Insert agents for statements that don't have any agents.

    Note: This method currently works for both Statements and PAStatements and
    their corresponding agents (Agents and PAAgents). However, if you already
    have preassembled INDRA Statement objects that you know don't have agents in
    the database, you can use `insert_pa_agents_directly` to insert the agents
    much faster.

    Parameters
    ----------
    db : :py:class:`DatabaseManager`
        The manager for the database into which you are adding agents.
    prefix : str
        Select which stage of statements for which you wish to insert agents.
        The choices are 'pa' for preassembled statements or 'raw' for raw
        statements.
    *other_stmt_clauses : sqlalchemy clauses
        Further arguments, such as `db.Statements.db_ref == 1' are used to
        restrict the scope of statements whose agents may be added.
    verbose : bool
        If True, print extra information and a status bar while compiling
        agents for insert from statements. Default False.
    num_per_yield : int
        To conserve memory, statements are loaded in batches of `num_per_yeild`
        using the `yeild_per` feature of sqlalchemy queries.
    """
    verbose = kwargs.pop('verbose', False)
    if len(kwargs):
        raise IndraDatabaseError("Unrecognized keyword argument(s): %s." %
                                 kwargs)

    agent_tbl_obj = db.tables[prefix + '_agents']

    if stmts_wo_agents is None:
        stmts_wo_agents, num_stmts = \
            get_statements_without_agents(db, prefix, verbose=verbose)
    else:
        num_stmts = None

    if verbose:
        if num_stmts is None:
            try:
                num_stmts = len(stmts_wo_agents)
            except TypeError:
                logger.info("Could not get length from type: %s. Turning off "
                            "verbose messaging." % type(stmts_wo_agents))
                verbose = False

    # Construct the agent records
    logger.info("Building agent data for insert...")
    if verbose:
        print("Loading:", end='', flush=True)
    agent_data = []
    for i, db_stmt in enumerate(stmts_wo_agents):
        # Convert the database statement entry object into an indra statement.
        stmt = stmts_from_json([json.loads(db_stmt.json.decode())])[0]

        if prefix == 'pa':
            stmt_id = db_stmt.mk_hash
        else:  # prefix == 'raw'
            stmt_id = db_stmt.id

        agent_data.extend(_get_agent_tuples(stmt, stmt_id))

        # Optionally print another tick on the progress bar.
        if verbose and num_stmts > 25 and i % (num_stmts // 25) == 0:
            print('|', end='', flush=True)

    if verbose and num_stmts > 25:
        print()

    if prefix == 'pa':
        cols = ('stmt_mk_hash', 'db_name', 'db_id', 'role')
    else:  # prefix == 'raw'
        cols = ('stmt_id', 'db_name', 'db_id', 'role')
    db.copy(agent_tbl_obj.__tablename__, agent_data, cols)
    return
Ejemplo n.º 34
0
    def get_statements(self,
                       model_id,
                       date=None,
                       offset=0,
                       limit=None,
                       sort_by=None,
                       stmt_types=None,
                       min_belief=None,
                       max_belief=None):
        """Load the statements by model and date.

        Parameters
        ----------
        model_id : str
            The standard name of the model to get statements for.
        date : str
            The date when the model was generated.
        offset : int
            The offset to start at.
        limit : int
            The number of statements to return.

        Returns
        -------
        list[indra.statements.Statement]
            A list of statements corresponding to the model and date.
        """
        if not date:
            date = self.get_latest_date(model_id)
        logger.info(f'Got request to get statements for model {model_id} '
                    f'on date {date} with offset {offset} and limit {limit} '
                    f'and sort by {sort_by}')
        with self.get_session() as sess:
            q = sess.query(Statement.statement_json).filter(
                Statement.model_id == model_id, Statement.date == date)
            if stmt_types:
                stmt_types = [stmt_type.lower() for stmt_type in stmt_types]
                q = q.filter(
                    func.lower(Statement.statement_json['type'].astext).in_(
                        stmt_types))
            if min_belief:
                q = q.filter(Statement.statement_json['belief'].astext.cast(
                    Float) >= float(min_belief))
            if max_belief:
                q = q.filter(Statement.statement_json['belief'].astext.cast(
                    Float) <= float(max_belief))
            if sort_by == 'evidence':
                q = q.order_by(
                    nullslast(
                        jsonb_array_length(
                            Statement.statement_json["evidence"]).desc()))
            elif sort_by == 'belief':
                q = q.order_by(
                    nullslast(Statement.statement_json['belief'].desc()))
            elif sort_by == 'paths':
                q = q.order_by(Statement.path_count.desc())
            if offset:
                q = q.offset(offset)
            if limit:
                q = q.limit(limit)
            stmts = stmts_from_json([s for s, in q.all()])
            logger.info(f'Got {len(stmts)} statements')
        return stmts