Example #1
0
def test_save_load_update_model_manager():
    # Local imports are recommended when using moto
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        load_model_manager_from_s3, update_model_manager_on_s3
    from emmaa.util import find_number_of_files_on_s3
    client = setup_bucket(add_model=True)
    # Should be None if no model manager
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 0
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm is None
    # Save a model manager and load it back
    model = create_model()
    mm = ModelManager(model)
    save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 1
    # Update should create a new file if there's at least one second difference
    time.sleep(1)
    update_model_manager_on_s3('test', TEST_BUCKET_NAME)
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 2
Example #2
0
 def load_from_s3_key(cls,
                      key,
                      bucket=EMMAA_BUCKET_NAME,
                      load_estmts=False):
     mm = load_model_manager_from_s3(key=key, bucket=bucket)
     if not mm:
         return
     statements = mm.model.assembled_stmts
     date_str = mm.date_str
     try:
         paper_ids = list(mm.model.paper_ids)
     except AttributeError:
         paper_ids = None
     paper_id_type = mm.model.reading_config.get('main_id_type', 'TRID')
     estmts = None
     if load_estmts:
         estmts, _ = load_stmts_from_s3(mm.model.name, bucket)
     if mm.model.reading_config.get('filter'):
         conditions = mm.model.reading_config['filter']['conditions']
         evid_policy = mm.model.reading_config['filter']['evid_policy']
         statements = filter_indra_stmts_by_metadata(
             statements, conditions, evid_policy)
         if estmts:
             estmts = filter_emmaa_stmts_by_metadata(estmts, conditions)
     return cls(statements, date_str, paper_ids, paper_id_type, estmts)
Example #3
0
 def load_from_s3_key(cls, key, bucket=EMMAA_BUCKET_NAME):
     mm = load_model_manager_from_s3(key=key, bucket=bucket)
     if not mm:
         return
     statements = mm.model.assembled_stmts
     date_str = mm.date_str
     return cls(statements, date_str)
Example #4
0
def answer_queries_from_s3(model_name, db=None, bucket=EMMAA_BUCKET_NAME):
    """Answer registered queries with model manager on s3.

    Parameters
    ----------
    model_name : str
        Name of EmmaaModel to answer queries for.
    db : Optional[emmaa.db.manager.EmmaaDatabaseManager]
        If given over-rides the default primary database.
    """
    mm = load_model_manager_from_s3(model_name=model_name, bucket=bucket)
    qm = QueryManager(db=db, model_managers=[mm])
    qm.answer_registered_queries(model_name)
Example #5
0
def load_model_manager_from_cache(model_name, bucket=EMMAA_BUCKET_NAME):
    model_manager = model_manager_cache.get(model_name)
    if model_manager:
        latest_on_s3 = find_latest_s3_file(
            bucket, f'results/{model_name}/model_manager_', '.pkl')
        cached_date = model_manager.date_str
        logger.info(f'Found model manager cached on {cached_date} and '
                    f'latest file on S3 is {latest_on_s3}')
        if cached_date in latest_on_s3:
            logger.info(f'Loaded model manager for {model_name} from cache.')
            return model_manager
    logger.info(f'Loading model manager for {model_name} from S3.')
    model_manager = load_model_manager_from_s3(model_name=model_name,
                                               bucket=bucket)
    model_manager_cache[model_name] = model_manager
    return model_manager
Example #6
0
import sys
import json
from indra.databases import get_identifiers_url
from emmaa.model_tests import load_model_manager_from_s3

if __name__ == '__main__':
    model_name = sys.argv[1]
    mm = load_model_manager_from_s3(model_name)
    namespaces = set()
    for entity in mm.entities:
        namespaces |= set(entity.db_refs)
    namespaces -= {'TEXT', 'TEXT_NORM'}
    namespaces = sorted(namespaces)
    urls = {ns: get_identifiers_url(ns, '[ID]') for ns in namespaces}
    urls = {k: v for k, v in urls.items() if v is not None}
    # Some INDRA-specific customizations we need to revert here
    if 'CHEBI' in urls:
        urls['CHEBI'] = urls['CHEBI'].replace('CHEBI:', '')
    if 'CHEMBL' in urls:
        urls['CHEMBL'] = urls['CHEMBL'].replace('CHEMBL', '')
    with open('url_patterns.json', 'w') as fh:
        json.dump(urls, fh, indent=1)