Exemple #1
0
def format_stmts(stmts, output_format):
    if output_format == 'tsv':
        msg = ''
        for stmt in stmts:
            if not stmt.evidence:
                logger.warning('Statement %s without evidence' % stmt.uuid)
                txt = ''
                pmid = ''
            else:
                txt = stmt.evidence[0].text if stmt.evidence[0].text else ''
                pmid = stmt.evidence[0].pmid if stmt.evidence[0].pmid else ''
            line = '%s\t%s\t%s\n' % (stmt, txt, pmid)
            msg += line
        return msg
    elif output_format == 'pkl':
        fname = 'indrabot.pkl'
        with open(fname, 'wb') as fh:
            pickle.dump(stmts, fh)
        return fname
    elif output_format == 'pdf':
        fname = 'indrabot.pdf'
        ga = GraphAssembler(stmts)
        ga.make_model()
        ga.save_pdf(fname)
        return fname
    elif output_format == 'json':
        msg = json.dumps(stmts_to_json(stmts), indent=1)
        return msg
    return None
Exemple #2
0
def _stmts_from_proc(proc):
    if proc and proc.statements:
        stmts = stmts_to_json(proc.statements)
        res = {'statements': stmts}
    else:
        res = {'statements': []}
    return res
Exemple #3
0
def assemble_one_corpus():
    """For assembling one of the four corpora."""
    path = '/home/bmg16/data/wm/2-Jsonld'
    corpus_size = '16k'
    prefix = '%s%s' % (path, corpus_size)
    fnames = glob.glob('%s/*.jsonld' % prefix)

    # For large corpus
    all_statements = []
    for idx, fname in enumerate(fnames):
        ep = eidos.process_json_file(fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                ev.annotations['provenance'][0]['document']['@id'] = \
                    os.path.basename(fname)

        all_statements += ep.statements
        print('%d: %d' % (idx, len(all_statements)))
    with open('%s/3-Indra%s.pkl' % (prefix, corpus_size), 'wb') as fh:
        pickle.dump(all_statements, fh)

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('%s/3-Indra%s.json' % (prefix, corpus_size), 'w') as fh:
        json.dump(jd, fh, indent=1)
Exemple #4
0
def test_respond_expand_model_from_json():
    mm = MRA_Module(testing=True)
    st = sts.Phosphorylation(sts.Agent('MEK'), sts.Agent('ERK'))
    msg = KQMLList('BUILD-MODEL')
    msg.sets('description', json.dumps(sts.stmts_to_json([st])))
    msg.sets('format', 'indra_json')
    reply = mm.respond_build_model(msg)
    assert (reply.get('model'))
    assert (reply.get('model-id') == '1')
    st = sts.Phosphorylation(sts.Agent('RAF'), sts.Agent('MEK'))
    msg = KQMLList('EXPAND-MODEL')
    msg.sets('description', json.dumps(sts.stmts_to_json([st])))
    msg.sets('format', 'indra_json')
    msg.set('model-id', '1')
    reply = mm.respond_expand_model(msg)
    assert (reply.get('model'))
    assert (reply.get('model-id') == '2')
def get_and_write_statements_from_pmids(
    pmids: Union[str, Iterable[str]],
    file: Union[None, str, TextIO] = None,
    json_file: Union[None, str, TextIO] = None,
    sep: Optional[str] = None,
    limit: Optional[int] = None,
    duplicates: bool = False,
    keep_only_query_pmids: bool = False,
    minimum_belief: Optional[float] = None,
    extra_columns: Optional[List[str]] = None,
) -> None:
    """Get INDRA statements for the given agents and write the to a TSV for BEL curation.

    :param pmids: A finite iterable of PubMed identifiers
    :param file: The file to write curation sheets to
    :param json_file: The file to output structured INDRA statement JSON to
    :param sep: The separator for the CSV. Defaults to a tab.
    :param limit: The optional limit of statements to write
    :param duplicates: should duplicate statements be written (with multiple evidences?)
    :param keep_only_query_pmids: If set only keeps evidences from this PMID. Warning: still might
     have multiple evidences.
    :param minimum_belief: The minimum belief score to keep
    :param extra_columns: Headers of extra columns for curation
    """
    if isinstance(pmids, str):
        pmids = [pmids]

    statements = get_statements_from_pmids(pmids)

    if isinstance(json_file, str):
        with open(json_file, 'w') as _json_file:
            json.dump(stmts_to_json(statements), _json_file, indent=2)
    elif json_file is not None:
        json.dump(stmts_to_json(statements), json_file, indent=2)

    print_statements(
        statements,
        file=file,
        sep=sep,
        limit=limit,
        allow_duplicates=duplicates,
        keep_only_pmids=pmids if keep_only_query_pmids else None,
        minimum_belief=minimum_belief,
        extra_columns=extra_columns,
    )
Exemple #6
0
def _get_gk_model_indra():
    kras = Agent('KRAS', db_refs={'HGNC': '6407', 'UP': 'P01116'})
    braf = Agent('BRAF', db_refs={'HGNC': '1097', 'UP': 'P15056'})
    pp2a = Agent('PPP2CA')
    st1 = Phosphorylation(kras, braf)
    st2 = Dephosphorylation(pp2a, braf)
    stmts = [st1, st2]
    stmts_json = json.dumps(stmts_to_json(stmts))
    return stmts_json
Exemple #7
0
def combine_all_stmts(pkl_list, output_file):
    all_stmts = []
    for pkl_file in pkl_list:
        all_stmts.extend(ac.load_statements(pkl_file))
    ac.dump_statements(all_stmts, output_file)
    stmt_json = stmts_to_json(all_stmts)
    output_json = f"{output_file.rsplit('.', maxsplit=1)[0]}.json"
    with open(output_json, 'wt') as f:
        json.dump(stmt_json, f, indent=2)
    return all_stmts
Exemple #8
0
def update_groundings():
    if request.json is None:
        abort(Response('Missing application/json header.', 415))

    # Get input parameters
    corpus_id = request.json.get('corpus_id')
    # Run the actual regrounding
    stmts = curator.update_groundings(corpus_id)
    stmts_json = stmts_to_json(stmts)
    return jsonify(stmts_json)
Exemple #9
0
def update_groundings():
    if request.json is None:
        abort(Response('Missing application/json header.', 415))

    # Get input parameters
    corpus_id = request.json.get('corpus_id')
    # Run the actual regrounding
    stmts = curator.update_groundings(corpus_id)
    stmts_json = stmts_to_json(stmts)
    return jsonify(stmts_json)
Exemple #10
0
def save_tests_to_s3(tests, bucket, key, save_format='pkl'):
    """Save tests in pkl, json or jsonl format."""
    if save_format == 'pkl':
        save_pickle_to_s3(tests, bucket, key)
    elif save_format in ['json', 'jsonl']:
        if isinstance(tests, list):
            stmts = [test.stmt for test in tests]
        elif isinstance(tests, dict):
            stmts = [test.stmt for test in tests['tests']]
        stmts_json = stmts_to_json(stmts)
        save_json_to_s3(stmts_json, bucket, key, save_format)
    def s3_put(self,
               name,
               bucket=default_bucket,
               key_base_name=default_base_name):
        """Push a corpus object to S3 in the form of three json files

        The json files representing the object have S3 keys of the format
        <key_base_name>/<name>/<file>.json

        Parameters
        ----------
        name : str
            The name of the model to upload. Is part of the S3 key.
        bucket : str
            The S3 bucket to upload the Corpus to. Default: 'world-modelers'.
        key_base_name : str
            The base object path to upload the json files to. Is part of the
            S3 key. Default: 'indra_models'.

        Returns
        -------
        keys : tuple(str)
            A tuple of three strings giving the S3 key to the pushed objects
        """
        key_base = key_base_name + '/' + name + '/'
        key_base = key_base.replace('//', '/')  # # replace double slashes
        try:
            s3 = self._get_s3_client()
            # Structure and upload raw statements
            s3.put_object(Body=json.dumps(stmts_to_json(self.raw_statements)),
                          Bucket=bucket,
                          Key=key_base + 'raw_statements.json')

            # Structure and upload assembled statements
            s3.put_object(Body=_stmts_dict_to_json_str(self.statements),
                          Bucket=bucket,
                          Key=key_base + 'statements.json')

            # Structure and upload curations
            s3.put_object(Body=json.dumps(self.curations),
                          Bucket=bucket,
                          Key=key_base + 'curations.json')
            keys = tuple(
                key_base + s + '.json'
                for s in ['raw_statements', 'statements', 'curations'])
            logger.info('Corpus uploaded as %s, %s and %s at %s.' %
                        (*keys, key_base))
            return keys

        except Exception as e:
            logger.exception('Failed to put on s3: %s' % e)
            return None
Exemple #12
0
def test_standalone_event():
    se_jsonld = os.path.join(path_this, 'eidos_standalone_event.json')
    ep = eidos.process_json_file(se_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Event)
    assert hasattr(st, 'evidence')
    ev = st.evidence[0]
    assert ev.text is not None
    js = st.to_json()
    assert js['evidence']
    from indra.statements import stmts_to_json
    js2 = stmts_to_json([st])[0]
    assert 'evidence' in js2
Exemple #13
0
def test_standalone_event():
    se_jsonld = os.path.join(path_this, 'eidos_standalone_event.json')
    ep = eidos.process_json_file(se_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Event)
    assert hasattr(st, 'evidence')
    ev = st.evidence[0]
    assert ev.text is not None
    js = st.to_json()
    assert js['evidence']
    from indra.statements import stmts_to_json
    js2 = stmts_to_json([st])[0]
    assert 'evidence' in js2
Exemple #14
0
def test_path_counts():
    db = _get_test_db('stmt')
    # Put statements in the database
    model_id = 'test'
    date = '2021-01-01'
    stmts = [
        Activation(Agent('A', db_refs={'HGNC': '1234'}),
                   Agent('B', db_refs={'HGNC': '2345'}),
                   evidence=[
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1234'}),
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1235'})
                   ]),
        Phosphorylation(Agent('B', db_refs={'HGNC': '2345'}),
                        Agent('C', db_refs={'HGNC': '3456'}),
                        evidence=[
                            Evidence(text='B phosphorylates C.',
                                     source_api='assertion',
                                     text_refs={'TRID': '2345'})
                        ])
    ]
    hash0 = str(stmts[0].get_hash())
    hash1 = str(stmts[1].get_hash())
    stmt_jsons = stmts_to_json(stmts)
    db.add_statements(model_id, date, stmt_jsons)
    # All path counts should be 0
    path_counts = db.get_path_counts(model_id, date)
    assert len(path_counts) == 0
    # Can update path counts multiple times, can be a subset of hashes
    db.update_statements_path_counts(model_id, date, {hash0: 7})
    path_counts = db.get_path_counts(model_id, date)
    assert len(path_counts) == 1, path_counts
    assert path_counts[hash0] == 7
    db.update_statements_path_counts(model_id, date, {hash0: 1, hash1: 5})
    path_counts = db.get_path_counts(model_id, date)
    assert len(path_counts) == 2
    assert path_counts[hash0] == 8  # 7 + 1
    assert path_counts[hash1] == 5
    db.update_statements_path_counts(model_id, date, {hash0: 3})
    path_counts = db.get_path_counts(model_id, date)
    assert len(path_counts) == 2
    assert path_counts[hash0] == 11  # 7 + 1 + 3
    assert path_counts[hash1] == 5  # Only added 5
Exemple #15
0
 def save_stmts(stmts, model_name):
     stmts_json = stmts_to_json(stmts)
     # Save a timestapmed version and a generic latest version of files
     dated_key = f'assembled/{model_name}/statements_{self.date_str}'
     latest_key = f'assembled/{model_name}/' \
                  f'latest_statements_{model_name}'
     for ext in ('json', 'jsonl'):
         latest_obj_key = latest_key + '.' + ext
         logger.info('Uploading assembled statements to '
                     f'{latest_obj_key}')
         save_json_to_s3(stmts_json, bucket, latest_obj_key, ext)
     dated_jsonl = dated_key + '.jsonl'
     dated_zip = dated_key + '.gz'
     logger.info(f'Uploading assembled statements to {dated_jsonl}')
     save_json_to_s3(stmts_json, bucket, dated_jsonl, 'jsonl')
     logger.info(f'Uploading assembled statements to {dated_zip}')
     save_gzip_json_to_s3(stmts_json, bucket, dated_zip, 'json')
Exemple #16
0
 def add_statements_for_record(self, record_key, stmts, indra_version):
     """Add a set of prepared statements for a given document."""
     if not stmts:
         return None
     op = insert(wms_schema.PreparedStatements).values([
         {
             'record_key': record_key,
             'indra_version': indra_version,
             'stmt': stmt
         }
         # Note: the deepcopy here is done because when dumping
         # statements into JSON, the hash is overwritten, potentially
         # with an inadequate one (due to a custom matches_fun not being
         # given here).
         for stmt in stmts_to_json(deepcopy(stmts))
     ])
     return self.execute(op)
Exemple #17
0
 def save_assembled_statements(self, bucket=EMMAA_BUCKET_NAME):
     """Upload assembled statements jsons to S3 bucket."""
     stmts = self.model.assembled_stmts
     stmts_json = stmts_to_json(stmts)
     # Save a timestapmed version and a generic latest version of files
     dated_key = f'assembled/{self.model.name}/statements_{self.date_str}'
     latest_key = f'assembled/{self.model.name}/' \
                  f'latest_statements_{self.model.name}'
     for ext in ('json', 'jsonl'):
         latest_obj_key = latest_key + '.' + ext
         logger.info(f'Uploading assembled statements to {latest_obj_key}')
         save_json_to_s3(stmts_json, bucket, latest_obj_key, ext)
     dated_jsonl = dated_key + '.jsonl'
     dated_zip = dated_key + '.gz'
     logger.info(f'Uploading assembled statements to {dated_jsonl}')
     save_json_to_s3(stmts_json, bucket, dated_jsonl, 'jsonl')
     logger.info(f'Uploading assembled statements to {dated_zip}')
     save_gzip_json_to_s3(stmts_json, bucket, dated_zip, 'json')
Exemple #18
0
def format_stmts(stmts, output_format, ev_counts=None, source_counts=None):
    if output_format == 'tsv':
        msg = ''
        for stmt in stmts:
            if not stmt.evidence:
                logger.warning('Statement %s without evidence' % stmt.uuid)
                txt = ''
                pmid = ''
            else:
                txt = '"%s"' % stmt.evidence[0].text if \
                    stmt.evidence[0].text else ''
                pmid = stmt.evidence[0].pmid if stmt.evidence[0].pmid else ''
            try:
                ea_txt = EnglishAssembler([stmt]).make_model()
            except Exception as e:
                ea_txt = ''
                logger.error('English assembly failed for %s' % stmt)
                logger.error(e)
            line = '%s\t%s\t%s\tPMID%s\n' % (stmt, ea_txt, txt, pmid)
            msg += line
        return msg
    elif output_format == 'pkl':
        fname = 'indrabot.pkl'
        with open(fname, 'wb') as fh:
            pickle.dump(stmts, fh)
        return fname
    elif output_format == 'pdf':
        fname = 'indrabot.pdf'
        ga = GraphAssembler(stmts)
        ga.make_model()
        ga.save_pdf(fname)
        return fname
    elif output_format == 'json':
        msg = json.dumps(stmts_to_json(stmts), indent=1)
        return msg
    elif output_format == 'html':
        ev_counts = {} if not ev_counts else ev_counts
        ha = HtmlAssembler(stmts, ev_totals=ev_counts,
                           source_counts=source_counts)
        fname = 'indrabot.html'
        ha.save_model(fname)
        return fname
    return None
Exemple #19
0
def test_get_dates_and_delete():
    db = _get_test_db('stmt')
    model_id = 'test'
    # At first there are no statements in the database
    assert db.get_number_of_dates(model_id) == 0
    assert db.get_oldest_date(model_id) is None
    # Put statements in the database
    date = '2021-01-01'
    stmts = [
        Activation(Agent('A', db_refs={'HGNC': '1234'}),
                   Agent('B', db_refs={'HGNC': '2345'}),
                   evidence=[
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1234'}),
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1235'})
                   ]),
        Phosphorylation(Agent('B', db_refs={'HGNC': '2345'}),
                        Agent('C', db_refs={'HGNC': '3456'}),
                        evidence=[
                            Evidence(text='B phosphorylates C.',
                                     source_api='assertion',
                                     text_refs={'TRID': '2345'})
                        ])
    ]
    stmt_jsons = stmts_to_json(stmts)
    db.add_statements(model_id, date, stmt_jsons)
    # There should be one date
    assert db.get_number_of_dates(model_id) == 1
    assert db.get_oldest_date(model_id) == date
    # Add another date
    date2 = '2022-01-01'
    db.add_statements(model_id, date2, stmt_jsons)
    assert db.get_number_of_dates(model_id) == 2
    # Oldest date is still the first one
    assert db.get_oldest_date(model_id) == date
    # Delete statements from the first date
    db.delete_statements(model_id, date)
    # There should be one date left
    assert db.get_number_of_dates(model_id) == 1
    assert db.get_oldest_date(model_id) == date2
Exemple #20
0
def from_agents(
    agents: List[str],
    output: TextIO,
    statement_file: TextIO,
    belief_cutoff: float,
    no_duplicates: bool,
    no_ungrounded: bool,
):
    """Make a sheet for the given agents."""
    statements = get_and_write_statements_from_agents(
        agents=agents,
        file=output,
        allow_duplicates=(not no_duplicates),
        allow_ungrounded=(not no_ungrounded),
        minimum_belief=belief_cutoff,
    )

    if statement_file:
        json.dump(stmts_to_json(statements), statement_file, indent=2)
Exemple #21
0
def test_get_statements_by_hash():
    db = _get_test_db('stmt')
    # Put statements in the database
    model_id = 'test'
    date = '2021-01-01'
    stmts = [
        Activation(Agent('A', db_refs={'HGNC': '1234'}),
                   Agent('B', db_refs={'HGNC': '2345'}),
                   evidence=[
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1234'}),
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1235'})
                   ]),
        Phosphorylation(Agent('B', db_refs={'HGNC': '2345'}),
                        Agent('C', db_refs={'HGNC': '3456'}),
                        evidence=[
                            Evidence(text='B phosphorylates C.',
                                     source_api='assertion',
                                     text_refs={'TRID': '2345'})
                        ])
    ]
    hash0 = stmts[0].get_hash()
    hash1 = stmts[1].get_hash()
    stmt_jsons = stmts_to_json(stmts)
    db.add_statements(model_id, date, stmt_jsons)

    # Load statements by hash
    stmts_loaded = db.get_statements_by_hash(model_id, date, [hash0, hash1])
    assert len(stmts_loaded) == 2
    assert stmts_loaded[0].get_hash() == hash0
    assert stmts_loaded[1].get_hash() == hash1
    stmts_loaded = db.get_statements_by_hash(model_id, date, [hash0])
    assert len(stmts_loaded) == 1
    assert stmts_loaded[0].get_hash() == hash0
Exemple #22
0
def stmts_json_from_text(text):
    """Return an INDRA Statements JSON from text."""
    stmts_json = stmts_to_json(stmts_from_text(text))
    return stmts_json
Exemple #23
0
def encode_indra_stmts(stmts):
    stmts_json = stmts_to_json(stmts)
    json_str = json.dumps(stmts_json)
    return json_str
Exemple #24
0
 def get_json(self):
     """Generate statement jsons and return the json bytes."""
     msg = json.dumps(stmts_to_json(self.get_statements()), indent=1)
     return msg
Exemple #25
0
 def create_statements(self):
     content = KQMLList('INDRA-TO-NL')
     content.sets('statements', json.dumps(stmts_to_json(self.statements)))
     return get_request(content), content
Exemple #26
0
def test_get_statements():
    db = _get_test_db('stmt')
    # Put statements and path counts in the database
    model_id = 'test'
    date = '2021-01-01'
    stmts = [
        Activation(Agent('A', db_refs={'HGNC': '1234'}),
                   Agent('B', db_refs={'HGNC': '2345'}),
                   evidence=[
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1234'}),
                       Evidence(text='A activates B.',
                                source_api='assertion',
                                text_refs={'TRID': '1235'})
                   ]),
        Phosphorylation(Agent('B', db_refs={'HGNC': '2345'}),
                        Agent('C', db_refs={'HGNC': '3456'}),
                        evidence=[
                            Evidence(text='B phosphorylates C.',
                                     source_api='assertion',
                                     text_refs={'TRID': '2345'})
                        ]),
        IncreaseAmount(Agent('A', db_refs={'HGNC': '1234'}),
                       Agent('C', db_refs={'HGNC': '3456'}))
    ]
    stmts[0].belief = 0.8
    stmts[1].belief = 0.9
    stmts[2].belief = 0.5
    hash0 = stmts[0].get_hash()
    hash1 = stmts[1].get_hash()
    hash2 = stmts[2].get_hash()
    stmt_jsons = stmts_to_json(stmts)
    db.add_statements(model_id, date, stmt_jsons)
    db.update_statements_path_counts(model_id, date, {
        str(hash0): 1,
        str(hash1): 5
    })

    # Load statements with different sort/filter options

    # Sort by evidence count
    stmts_loaded = db.get_statements(model_id, date, sort_by='evidence')
    assert len(stmts_loaded) == 3
    assert stmts_loaded[0].get_hash() == hash0, len(stmts_loaded[0].evidence)
    assert stmts_loaded[1].get_hash() == hash1, len(stmts_loaded[1].evidence)
    assert stmts_loaded[2].get_hash() == hash2  # stmt with no evidence is last
    # Sort by belief
    stmts_loaded = db.get_statements(model_id, date, sort_by='belief')
    assert len(stmts_loaded) == 3
    assert stmts_loaded[0].get_hash() == hash1
    assert stmts_loaded[1].get_hash() == hash0
    # Sort by path count
    stmts_loaded = db.get_statements(model_id, date, sort_by='paths')
    assert len(stmts_loaded) == 3
    assert stmts_loaded[0].get_hash() == hash1, stmts_loaded
    assert stmts_loaded[1].get_hash() == hash0

    # Filter by statement type
    stmts_loaded = db.get_statements(model_id, date, stmt_types=['Activation'])
    assert len(stmts_loaded) == 1
    assert stmts_loaded[0].get_hash() == hash0
    stmts_loaded = db.get_statements(model_id,
                                     date,
                                     stmt_types=['Phosphorylation'])
    assert len(stmts_loaded) == 1
    assert stmts_loaded[0].get_hash() == hash1

    # Filter by belief
    stmts_loaded = db.get_statements(model_id, date, min_belief=0.85)
    assert len(stmts_loaded) == 1
    assert stmts_loaded[0].get_hash() == hash1
    stmts_loaded = db.get_statements(model_id, date, max_belief=0.85)
    assert len(stmts_loaded) == 2
    assert set([stmt.get_hash() for stmt in stmts_loaded]) == {hash0, hash2}
    stmts_loaded = db.get_statements(model_id,
                                     date,
                                     min_belief=0.85,
                                     max_belief=0.85)
    assert len(stmts_loaded) == 0

    # Use offset and limit
    stmts_loaded = db.get_statements(model_id, date)
    assert len(stmts_loaded) == 3
    stmts_loaded = db.get_statements(model_id, date, offset=1)
    assert len(stmts_loaded) == 2, stmts_loaded
    stmts_loaded = db.get_statements(model_id, date, limit=1)
    assert len(stmts_loaded) == 1
    # Returns only remaining statements after upset even if limit is larger
    stmts_loaded = db.get_statements(model_id, date, offset=1, limit=5)
    assert len(stmts_loaded) == 2
Exemple #27
0
               #'50': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
               #      'wm_fao/20181101/2-Jsonld50',
               '500': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
                      'wm_fao/20181101/2-Jsonld500',
                '16k': '/home/bmg16/data/wm/2-Jsonld16k',
                }
    all_statements = []
    for corpus_size, path in corpora.items():
        fnames = glob.glob('%s/*.jsonld' % path)
        for idx, fname in enumerate(fnames):
            ep = eidos.process_json_file(fname)
            for stmt in ep.statements:
                for ev in stmt.evidence:
                    ev.annotations['provenance'][0]['document']['@id'] = \
                        os.path.basename(fname)
                    ev.annotations['provenance'][0]['document']['corpus'] = \
                        corpus_size
            all_statements += ep.statements
            print('%d: %d' % (idx, len(all_statements)))

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('3-Indra-merged-500-16k.json', 'w') as fh:
        json.dump(jd, fh, indent=1)

#    assemble_all()
Exemple #28
0
 def get_json(self):
     """Generate statement jsons and return the json bytes."""
     msg = json.dumps(stmts_to_json(self.get_statements()), indent=1)
     return msg
Exemple #29
0
    POLYPHENOLS_LIST = 'input/list_polyphenols.xlsx'

    # Load the list of polyphenols
    df = pd.read_excel(POLYPHENOLS_LIST)

    results_dict = {}

    for name, pubchem_id in df[['polyphenols', 'pubchem_id']].values:
        # Query the INDRA DB web service using the INDRA Python API
        idrp = idr.get_statements(agents=[f'{pubchem_id}@PUBCHEM'],
                                  ev_limit=100000)
        # Run preassembly
        # 1. Fix common named entity normalization ("grounding") errors
        stmts = ac.map_grounding(idrp.statements)
        # 2. Fix inconsistent sites of post-translational modifications
        stmts = ac.map_sequence(stmts)
        # 3. Identify duplicate/overlapping statements, calculate belief
        stmts = ac.run_preassembly(stmts)

        # Convert statements to JSON
        stmts_json = stmts_to_json(stmts)
        # Store results in dict indexed by Pubchem ID
        results_dict[str(pubchem_id)] = {
            'name': name,
            'statements': stmts_json
        }

    # Save to file
    with open('output/polyphenol_stmts.json', 'wt') as f:
        json.dump(results_dict, f, indent=2)
Exemple #30
0
def stmts_to_jsonl_str(stmts):
    return '\n'.join([json.dumps(stmt) for stmt in stmts_to_json(stmts)])
Exemple #31
0
def encode_indra_stmts(stmts):
    stmts_json = stmts_to_json(stmts)
    json_str = json.dumps(stmts_json)
    return json_str
Exemple #32
0
def stmts_json_from_text(text):
    ekb_xml = read_or_load(text)
    tp = trips.process_xml(ekb_xml)
    stmts_json = stmts_to_json(tp.statements)
    return stmts_json
def dump_stmts_json(stmts, fname):
    print('Dumping statements into JSON')
    jd = stmts_to_json(stmts, use_sbo=False)
    with open(fname, 'w') as fh:
        json.dump(jd, fh, indent=1)
Exemple #34
0
def stmts_json_from_text(text):
    ekb_xml = read_or_load(text)
    tp = trips.process_xml(ekb_xml)
    stmts_json = stmts_to_json(tp.statements)
    return stmts_json