Exemple #1
0
 def update_with_cord19(self):
     """Update model with new CORD19 dataset statements."""
     current_stmts = self.get_indra_stmts()
     drug_stmts = load_pickle_from_s3('indra-covid19', 'drug_stmts.pkl')
     gordon_stmts = load_pickle_from_s3('indra-covid19',
                                        'gordon_ndex_stmts.pkl')
     virhostnet_stmts = load_pickle_from_s3('indra-covid19',
                                            'virhostnet_stmts.pkl')
     ctd_stmts = load_pickle_from_s3('indra-covid19', 'ctd_stmts.pkl')
     logger.info(f'Loaded {len(current_stmts)} current model statements, '
                 f'{len(drug_stmts)} drug statements, {len(gordon_stmts)} '
                 f'Gordon statements, {len(virhostnet_stmts)} '
                 f'VirHostNet statements, {len(ctd_stmts)} CTD statements.')
     other_stmts = drug_stmts + gordon_stmts + virhostnet_stmts + ctd_stmts
     new_stmts = make_model_stmts(current_stmts, other_stmts)
     self.stmts = to_emmaa_stmts(new_stmts, datetime.datetime.now(), [])
Exemple #2
0
    def update_from_files(self, files_config):
        """Add custom statements from files.

        Relevant part of reading config should look similar to:

        {"other_files": [
            {
                "bucket": "indra-covid19",
                "filename": "ctd_stmts.pkl",
                "metadata": {"internal": true, "curated": true}
            }
        ]
        }
        """
        new_estmts = []
        for file_dict in files_config:
            bucket = file_dict['bucket']
            fname = file_dict['filename']
            metadata = file_dict['metadata']
            file_stmts = load_pickle_from_s3(bucket, fname)
            logger.info(f'Loaded {len(file_stmts)} statements from {fname}.')
            file_estmts = to_emmaa_stmts(file_stmts, datetime.datetime.now(),
                                         [], metadata)
            new_estmts += file_estmts
        return new_estmts
Exemple #3
0
def load_model_manager_from_s3(model_name=None,
                               key=None,
                               bucket=EMMAA_BUCKET_NAME):
    # First try find the file from specified key
    if key:
        try:
            model_manager = load_pickle_from_s3(bucket, key)
            if not model_manager.model.assembled_stmts:
                stmts, _ = get_assembled_statements(model_manager.model.name,
                                                    strip_out_date(
                                                        model_manager.date_str,
                                                        'date'),
                                                    bucket=bucket)
                model_manager.model.assembled_stmts = stmts
            return model_manager
        except Exception as e:
            logger.info('Could not load the model manager directly')
            logger.info(e)
            if not model_name:
                model_name = key.split('/')[1]
            date = strip_out_date(key, 'date')
            logger.info('Trying to load model manager from statements')
            try:
                model_manager = ModelManager.load_from_statements(
                    model_name, date=date, bucket=bucket)
                return model_manager
            except Exception as e:
                logger.info('Could not load the model manager from '
                            'statements')
                logger.info(e)
                return None
    # Now try find the latest key for given model
    if model_name:
        # Versioned
        key = find_latest_s3_file(bucket,
                                  f'results/{model_name}/model_manager_',
                                  '.pkl')
        if key is None:
            # Non-versioned
            key = f'results/{model_name}/latest_model_manager.pkl'
        return load_model_manager_from_s3(model_name=model_name,
                                          key=key,
                                          bucket=bucket)
    # Could not find either from key or from model name.
    logger.info('Could not find the model manager.')
    return None
Exemple #4
0
def load_stmts_from_s3(model_name, bucket=EMMAA_BUCKET_NAME):
    """Return the list of EMMAA Statements constituting the latest model.

    Parameters
    ----------
    model_name : str
        The name of the model whose config should be loaded.

    Returns
    -------
    stmts : list of emmaa.statements.EmmaaStatement
        The list of EMMAA Statements in the latest model version.
    """
    base_key = f'models/{model_name}'
    latest_model_key = find_latest_s3_file(bucket, f'{base_key}/model_',
                                           extension='.pkl')
    logger.info(f'Loading model state from {latest_model_key}')
    stmts = load_pickle_from_s3(bucket, latest_model_key)
    return stmts, latest_model_key
Exemple #5
0
 def update_with_cord19(self):
     """Update model with new CORD19 dataset statements."""
     # Using local import to avoid dependency
     from covid_19.emmaa_update import make_model_stmts
     current_stmts = self.get_indra_stmts()
     default_filenames = [
         'drug_stmts_v2.pkl', 'gordon_ndex_stmts.pkl',
         'virhostnet_stmts.pkl', 'ctd_stmts.pkl']
     if isinstance(self.reading_config['cord19_update'], dict):
         fnames = self.reading_config['cord19_update'].get(
             'filenames', default_filenames)
     else:  # if it's a boolean
         fnames = default_filenames
     other_stmts = []
     for fname in fnames:
         file_stmts = load_pickle_from_s3('indra-covid19', fname)
         logger.info(f'Loaded {len(file_stmts)} statements from {fname}.')
         other_stmts += file_stmts
     new_stmts, paper_ids = make_model_stmts(current_stmts, other_stmts)
     self.stmts = to_emmaa_stmts(new_stmts, datetime.datetime.now(), [])
     self.add_paper_ids(paper_ids, 'TRID')
Exemple #6
0
def load_tests_from_s3(test_name, bucket=EMMAA_BUCKET_NAME):
    """Load Emmaa Tests with the given name from S3.

    Parameters
    ----------
    test_name : str
        Looks for a test file in the emmaa bucket on S3 with key
        'tests/{test_name}'.

    Return
    ------
    list of EmmaaTest
        List of EmmaaTest objects loaded from S3.
    """
    prefix = f'tests/{test_name}'
    try:
        test_key = find_latest_s3_file(bucket, prefix, '.pkl')
    except ValueError:
        test_key = f'tests/{test_name}.pkl'
    logger.info(f'Loading tests from {test_key}')
    tests = load_pickle_from_s3(bucket, test_key)
    return tests, test_key
Exemple #7
0
def load_model_manager_from_s3(model_name=None,
                               key=None,
                               bucket=EMMAA_BUCKET_NAME):
    # First try find the file from specified key
    if key:
        try:
            model_manager = load_pickle_from_s3(bucket, key)
            return model_manager
        except Exception as e:
            logger.info('Could not load the model manager')
            logger.info(e)
    # Now try find the latest key for given model
    if model_name:
        # Versioned
        key = find_latest_s3_file(bucket,
                                  f'results/{model_name}/model_manager_',
                                  '.pkl')
        if key is None:
            # Non-versioned
            key = f'results/{model_name}/latest_model_manager.pkl'
        return load_model_manager_from_s3(key=key, bucket=bucket)
    # Could not find either from key or from model name.
    logger.info('Could not find the model manager.')
    return None