def read_elsevier_eidos_search_terms(piis_to_terms):
    """Return extracted EmmaaStatements given a dict of PIIS to SearchTerms.

    Parameters
    ----------
    piis_to_terms : dict
        A dict representing a set of PIIs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given PMIDs.
    """
    piis = list(piis_to_terms.keys())
    date = datetime.datetime.utcnow()
    texts = read_piis(piis)
    pii_stmts = process_texts(texts)
    estmts = []
    for pii, stmts in pii_stmts.items():
        for stmt in stmts:
            for evid in stmt.evidence:
                evid.annotations['pii'] = pii
        pii_estmts = to_emmaa_stmts(stmts, date, piis_to_terms[pii],
                                    {'internal': True})
        estmts += pii_estmts
    return estmts
Exemple #2
0
    def update_from_files(self, files_config):
        """Add custom statements from files.

        Relevant part of reading config should look similar to:

        {"other_files": [
            {
                "bucket": "indra-covid19",
                "filename": "ctd_stmts.pkl",
                "metadata": {"internal": true, "curated": true}
            }
        ]
        }
        """
        new_estmts = []
        for file_dict in files_config:
            bucket = file_dict['bucket']
            fname = file_dict['filename']
            metadata = file_dict['metadata']
            file_stmts = load_pickle_from_s3(bucket, fname)
            logger.info(f'Loaded {len(file_stmts)} statements from {fname}.')
            file_estmts = to_emmaa_stmts(file_stmts, datetime.datetime.now(),
                                         [], metadata)
            new_estmts += file_estmts
        return new_estmts
Exemple #3
0
def read_db_ids_search_terms(id_search_terms, id_type):
    """Return extracted EmmaaStatements from INDRA database given an
    ID-search term dict.

    Parameters
    ----------
    id_search_terms : dict
        A dict representing a set of IDs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given IDs.
    """
    ids = list(id_search_terms.keys())
    date = datetime.datetime.utcnow()
    db = get_db('primary')
    id_stmts = get_raw_stmt_jsons_from_papers(ids, id_type=id_type, db=db)
    estmts = []
    for _id, stmt_jsons in id_stmts.items():
        stmts = stmts_from_json(stmt_jsons)
        id_estmts = to_emmaa_stmts(stmts, date, id_search_terms[_id],
                                   {'internal': True})
        estmts += id_estmts
    return estmts
Exemple #4
0
    def update_with_cord19(self, cord19_config):
        """Update model with new CORD19 dataset statements.

        Relevant part of reading config should look similar to:

        {"cord19_update": {
            "metadata": {
                "internal": true,
                "curated": false
                },
            "date_limit": 5
            }
        }
        """
        # Using local import to avoid dependency
        from covid_19.emmaa_update import make_model_stmts
        current_stmts = self.get_indra_stmts()
        metadata = cord19_config['metadata']
        date_limit = cord19_config['date_limit']
        new_stmts, paper_ids = make_model_stmts(current_stmts,
                                                date_limit=date_limit)
        new_estmts = to_emmaa_stmts(new_stmts,
                                    datetime.datetime.now(), [],
                                    metadata=metadata)
        self.add_paper_ids(paper_ids, 'TRID')
        return new_estmts
Exemple #5
0
def test_to_emmaa_stmts():
    estmts = to_emmaa_stmts([stmt],
                            date=date,
                            search_terms=search_terms,
                            metadata={'internal': True})
    assert estmts
    estmt = estmts[0]
    assert isinstance(estmt, EmmaaStatement)
    assert estmt.stmt == stmt
    assert estmt.metadata == {'internal': True}
    emmaa_anns = estmt.stmt.evidence[0].annotations.get('emmaa')
    assert emmaa_anns
    assert len(emmaa_anns['search_terms']) == 2
    assert emmaa_anns['metadata'] == {'internal': True}
Exemple #6
0
 def update_with_cord19(self):
     """Update model with new CORD19 dataset statements."""
     current_stmts = self.get_indra_stmts()
     drug_stmts = load_pickle_from_s3('indra-covid19', 'drug_stmts.pkl')
     gordon_stmts = load_pickle_from_s3('indra-covid19',
                                        'gordon_ndex_stmts.pkl')
     virhostnet_stmts = load_pickle_from_s3('indra-covid19',
                                            'virhostnet_stmts.pkl')
     ctd_stmts = load_pickle_from_s3('indra-covid19', 'ctd_stmts.pkl')
     logger.info(f'Loaded {len(current_stmts)} current model statements, '
                 f'{len(drug_stmts)} drug statements, {len(gordon_stmts)} '
                 f'Gordon statements, {len(virhostnet_stmts)} '
                 f'VirHostNet statements, {len(ctd_stmts)} CTD statements.')
     other_stmts = drug_stmts + gordon_stmts + virhostnet_stmts + ctd_stmts
     new_stmts = make_model_stmts(current_stmts, other_stmts)
     self.stmts = to_emmaa_stmts(new_stmts, datetime.datetime.now(), [])
Exemple #7
0
def create_upload_model(model_name, full_name, indra_stmts, ndex_id=None):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    short_name : str
        Short name of the model to use on S3.
    full_name : str
        Human-readable model name to use in EMMAA dashboard.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    ndex_id : str
        UUID of the network corresponding to the model on NDex. If provided,
        the NDex network will be updated with the latest model content.
        If None (default), a new network will be created and the UUID stored
        in the model config files on S3.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [])
    # Get updated CX content for the INDRA Statements
    cxa = CxAssembler(indra_stmts)
    cx_str = cxa.make_model()
    # If we don't have an NDex ID, create network and upload to Ndex
    if ndex_id is None:
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
    # If the NDEx ID is provided, update the existing network
    else:
        ndex_client.update_network(cx_str, ndex_id)
    # Create the config dictionary
    config_dict = {'ndex': {'network': ndex_id}, 'search_terms': []}
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_dict)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3 with config as YAML and JSON
    emmaa_model.save_to_s3()
    s3_client = boto3.client('s3')
    config_json = json.dumps(config_dict)
    s3_client.put_object(Body=config_json.encode('utf8'),
                         Key='models/%s/config.json' % model_name,
                         Bucket='emmaa')
    config_json = json.dumps(config_dict)
    s3_client.put_object(Body=config_json.encode('utf8'),
                         Key='models/%s/config.json' % model_name,
                         Bucket='emmaa')
Exemple #8
0
 def update_with_cord19(self):
     """Update model with new CORD19 dataset statements."""
     # Using local import to avoid dependency
     from covid_19.emmaa_update import make_model_stmts
     current_stmts = self.get_indra_stmts()
     default_filenames = [
         'drug_stmts_v2.pkl', 'gordon_ndex_stmts.pkl',
         'virhostnet_stmts.pkl', 'ctd_stmts.pkl']
     if isinstance(self.reading_config['cord19_update'], dict):
         fnames = self.reading_config['cord19_update'].get(
             'filenames', default_filenames)
     else:  # if it's a boolean
         fnames = default_filenames
     other_stmts = []
     for fname in fnames:
         file_stmts = load_pickle_from_s3('indra-covid19', fname)
         logger.info(f'Loaded {len(file_stmts)} statements from {fname}.')
         other_stmts += file_stmts
     new_stmts, paper_ids = make_model_stmts(current_stmts, other_stmts)
     self.stmts = to_emmaa_stmts(new_stmts, datetime.datetime.now(), [])
     self.add_paper_ids(paper_ids, 'TRID')
Exemple #9
0
def create_upload_model(model_name, indra_stmts, config_file):
    """Make and upload an EMMAA model from a list of INDRA Statements.

    Parameters
    ----------
    model_name : str
        Name of the model to use on S3.
    indra_stmts : list of indra.statement
        INDRA Statements to be used to populate the EMMAA model.
    config_file : str
        Path to the local config.json file.
    """
    emmaa_stmts = to_emmaa_stmts(indra_stmts, datetime.datetime.now(), [],
                                 {'internal': True})
    # Load config information
    with open(config_file, 'rt') as f:
        config_json = json.load(f)
    # If there is no ndex entry in the config, create a new network and update
    # the config file with the NDex network ID
    if 'ndex' not in config_json:
        cxa = CxAssembler(indra_stmts)
        cx_str = cxa.make_model()
        ndex_id = cxa.upload_model(private=False)
        print(f'NDex ID for {model_name} is {ndex_id}.')
        config_json['ndex'] = {'network': ndex_id}
        updated_config_file = f'{config_file}.updated'
        with open(updated_config_file, 'wt') as f:
            json.dump(config_json, f, indent=2)
    # If the NDEx ID is provided we don't need to update the existing network
    # because this will occur as part of the model assembly/update procedure
    # on EMMAA itself.
    # Create the config dictionary
    # Create EMMAA model
    emmaa_model = EmmaaModel(model_name, config_json)
    emmaa_model.add_statements(emmaa_stmts)
    # Upload model to S3
    emmaa_model.save_to_s3()
    # Upload config JSON
    s3_client = boto3.client('s3')
    save_config_to_s3(model_name, config_json)
Exemple #10
0
def read_pmid_search_terms(pmid_search_terms):
    """Return extracted EmmaaStatements given a PMID-search term dict.

    Parameters
    ----------
    pmid_search_terms : dict
        A dict representing a set of PMIDs pointing to search terms that
        produced them.

    Returns
    -------
    list[:py:class:`emmaa.model.EmmaaStatement`]
        A list of EmmaaStatements extracted from the given PMIDs.
    """
    pmids = list(pmid_search_terms.keys())
    date = datetime.datetime.utcnow()
    pmid_stmts = read_pmids(pmids, date)
    estmts = []
    for pmid, stmts in pmid_stmts.items():
        pmid_estmts = to_emmaa_stmts(stmts, date, pmid_search_terms[pmid],
                                     {'internal': True})
        estmts += pmid_estmts
    return estmts
Exemple #11
0
    def update_from_disease_map(self, disease_map_config):
        """Update model by processing MINERVA Disease Map.

        Relevant part of reading config should look similar to:

        {"disease_map": {
            "map_name": "covid19map",
            "filenames" : "all",  # or a list of filenames
            "metadata": {
                "internal": true
                }
            }
        }
        """
        filenames = disease_map_config['filenames']
        map_name = disease_map_config['map_name']
        metadata = disease_map_config['metadata']
        logger.info('Loading Statements from %s Disease Map' % map_name)
        sp = process_from_web(filenames=filenames, map_name=map_name)
        new_estmts = to_emmaa_stmts(sp.statements, datetime.datetime.now(), [],
                                    metadata)
        logger.info('Got %d EMMAA Statements from %s Disease Map' %
                    (len(new_estmts), map_name))
        return new_estmts