Ejemplo n.º 1
0
def get_email_subscriptions(email):
    """Verifies which email subsciptions exist for the provided email

    Parameters
    ----------
    email : str
        The email to the check subscriptions for

    Returns
    -------
    list(tuple(str, str, query_hash))
    """
    user_queries = db.get_subscribed_queries(email)
    user_models = db.get_user_models(email)
    model_full_names = {}
    for qo, mid, dh in user_queries:
        if mid not in model_full_names:
            config = load_config_from_s3(mid)
            model_full_names[mid] = config.get('human_readable_name', mid)
    for mid in user_models:
        if mid not in model_full_names:
            config = load_config_from_s3(mid)
            model_full_names[mid] = config.get('human_readable_name', mid)
    results = {
        'queries': [(qo.to_english() + f' for model {model_full_names[mid]}',
                     f'{qo.get_type()}'.replace('_', ' '), qh)
                    for qo, mid, qh in user_queries],
        'models': [(mid, model_full_names[mid]) for mid in user_models]
    }
    return results
Ejemplo n.º 2
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    """Create StatementCheckingTests from model statements."""
    stmts, _ = get_assembled_statements(model_name, bucket=bucket)
    config = load_config_from_s3(model_name, bucket=bucket)
    # Filter statements if needed
    if isinstance(config.get('make_tests'), dict):
        conditions = config['make_tests']['filter']['conditions']
        evid_policy = config['make_tests']['filter']['evid_policy']
        stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy)
    tests = [
        StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list())
    ]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the '
        f'{config.get("human_readable_name")} on {date_str[:10]}')
    test_name = f'{config.get("human_readable_name")} model test corpus'
    test_dict = {
        'test_data': {
            'description': test_description,
            'name': test_name
        },
        'tests': tests
    }
    if upload:
        save_tests_to_s3(test_dict, bucket,
                         f'tests/{model_name}_tests_{date_str}.pkl', 'pkl')
    return test_dict
Ejemplo n.º 3
0
 def add_model_from_s3(self, model_id, config=None, number_of_updates=3,
                       bucket=EMMAA_BUCKET_NAME):
     """Add data for one model from S3 files."""
     if not config:
         config = load_config_from_s3(model_id)
     test_corpora = config['test']['test_corpus']
     if isinstance(test_corpora, str):
         test_corpora = [test_corpora]
     stmt_files = sort_s3_files_by_date_str(
         bucket, f'assembled/{model_id}/statements_', '.gz')
     stmt_files_to_use = stmt_files[:number_of_updates]
     for stmt_file in stmt_files_to_use:
         date = strip_out_date(stmt_file, 'date')
         dt = strip_out_date(stmt_file, 'datetime')
         # First get and add statements
         stmt_jsons = load_gzip_json_from_s3(bucket, stmt_file)
         self.add_statements(model_id, date, stmt_jsons)
         # Also update the path counts from each test corpus
         for test_corpus in test_corpora:
             key = f'results/{model_id}/results_{test_corpus}_{dt}.json'
             try:
                 results = load_json_from_s3(bucket, key)
                 path_counts = results[0].get('path_stmt_counts')
                 if path_counts:
                     self.update_statements_path_counts(
                         model_id, date, path_counts)
             except ClientError as e:
                 if e.response['Error']['Code'] == 'NoSuchKey':
                     logger.warning(f'No results file for {key}, skipping')
                     continue
                 else:
                     raise e
Ejemplo n.º 4
0
def model_update_notify(model_name,
                        test_corpora,
                        date,
                        db,
                        bucket=EMMAA_BUCKET_NAME):
    """This function finds delta for a given model and sends updates via
    Twitter posts and email notifications.

    Parameters
    ----------
    model_name : str
        A name of EMMAA model.
    test_corpora : list[str]
        A list of test corpora names to get test stats.
    date : str
        A date for which to get stats for.
    db : emmaa.db.EmmaaDatabaseManager
        An instance of a database manager to use.
    bucket : str
        A name of S3 bucket where corresponding stats files are stored.
    """
    # Find where to send notifications (Twitter, user emails)
    config = load_config_from_s3(model_name, bucket)
    twitter_cred = None
    twitter_key = config.get('twitter')
    if twitter_key:
        twitter_cred = get_credentials(twitter_key)

    users = db.get_model_users(model_name)

    if not twitter_cred and not users:
        logger.info('No Twitter account and no users subscribed '
                    'to this model, not generating deltas')
        return

    # Get deltas
    deltas = get_model_deltas(model_name, test_corpora, date, bucket=bucket)

    # Tweet if configured
    if twitter_cred:
        tweet_deltas(deltas, twitter_cred)

    # Send emails if there are subscribed users
    if users:
        msg_dicts = get_all_update_messages(deltas, is_tweet=False)
        if msg_dicts:
            str_email = '\n'.join([msg['message'] for msg in msg_dicts])
            full_name = config.get('human_readable_name', model_name)
            subject_line = f'Updates to the {full_name} EMMAA model'
            for user_email in users:
                html_email = make_model_html_email(msg_dicts, user_email)
                res = send_email(sender=notifications_sender_default,
                                 recipients=[user_email],
                                 subject=subject_line,
                                 body_text=str_email,
                                 body_html=html_email,
                                 source_arn=indra_bio_ARN,
                                 return_email=notifications_return_default,
                                 return_arn=indra_bio_ARN)
Ejemplo n.º 5
0
def test_save_load_config():
    # Local imports are recommended when using moto
    from emmaa.model import save_config_to_s3, load_config_from_s3
    client = setup_bucket()
    config = {'test': 'This is test config'}
    save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME)
    read_config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME)
    assert config == read_config
Ejemplo n.º 6
0
def get_model_config(model, bucket=EMMAA_BUCKET_NAME):
    if model in model_cache:
        return model_cache[model]
    try:
        config_json = load_config_from_s3(model, bucket=bucket)
        model_cache[model] = config_json
    except ClientError:
        logger.warning(f"Model {model} has no metadata. Skipping...")
        return None
    if 'human_readable_name' not in config_json.keys():
        logger.warning(f"Model {model} has no readable name. Skipping...")
        model_cache[model] = None
    return model_cache[model]
Ejemplo n.º 7
0
    def get_config_from(self, assembly_config_template):
        """Return assembly config given a template model's name.

        Parameters
        ----------
        assembly_config_template : str
            The name of a model whose assembly config should be adopted.

        Returns
        -------
        dict
            The assembly config of the given template model.
        """
        from emmaa.model import load_config_from_s3
        config = load_config_from_s3(assembly_config_template)
        return config.get('assembly')
Ejemplo n.º 8
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    """Create StatementCheckingTests from model statements."""
    stmts, _ = get_assembled_statements(model_name, bucket=bucket)
    config = load_config_from_s3(model_name, bucket=bucket)
    tests = [StatementCheckingTest(stmt) for stmt in stmts if
             all(stmt.agent_list())]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the '
        f'{config.get("human_readable_name")} on {date_str[:10]}')
    test_name = f'{config.get("human_readable_name")} model test corpus'
    test_dict = {'test_data': {'description': test_description,
                               'name': test_name},
                 'tests': tests}
    if upload:
        save_tests_to_s3(test_dict, bucket,
                         f'tests/{model_name}_tests_{date_str}.pkl', 'pkl')
    return test_dict
Ejemplo n.º 9
0
 def load_from_statements(cls, model_name, mode='local', date=None,
                          bucket=EMMAA_BUCKET_NAME):
     config = load_config_from_s3(model_name, bucket=bucket)
     if date:
         prefix = f'papers/{model_name}/paper_ids_{date}'
     else:
         prefix = f'papers/{model_name}/paper_ids_'
     paper_key = find_latest_s3_file(bucket, prefix, 'json')
     if paper_key:
         paper_ids = load_json_from_s3(bucket, paper_key)
     else:
         paper_ids = None
     model = EmmaaModel(model_name, config, paper_ids)
     # Loading assembled statements to avoid reassembly
     stmts, fname = get_assembled_statements(model_name, date, bucket)
     model.assembled_stmts = stmts
     model.date_str = strip_out_date(fname, 'datetime')
     mm = cls(model, mode=mode)
     return mm
Ejemplo n.º 10
0
def test_model_to_tests():
    # Local imports are recommended when using moto
    from emmaa.model_tests import model_to_tests, load_tests_from_s3, \
        StatementCheckingTest
    from emmaa.model import save_config_to_s3, load_config_from_s3
    client = setup_bucket(add_model=True, add_mm=True)
    test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME)
    assert isinstance(test_dict, dict)
    assert 'test_data' in test_dict
    assert 'tests' in test_dict
    tests = test_dict['tests']
    # With default config get tests for each statement
    assert len(tests) == 2
    # Modify config to filter
    config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME)
    config['make_tests'] = {
        'filter': {
            'conditions': {
                'curated': False
            },
            'evid_policy': 'any'
        }
    }
    save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME)
    test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME)
    assert isinstance(test_dict, dict)
    assert 'test_data' in test_dict
    assert 'tests' in test_dict
    tests = test_dict['tests']
    # With modified config statements are filtered
    assert len(tests) == 1
    assert isinstance(tests[0], StatementCheckingTest)
    loaded_tests, _ = load_tests_from_s3('test_tests', bucket=TEST_BUCKET_NAME)
    assert loaded_tests
    assert isinstance(loaded_tests, dict)
    assert 'test_data' in loaded_tests
    assert 'tests' in loaded_tests
Ejemplo n.º 11
0
def save_config(ctype, terms):
    config = load_config_from_s3(ctype)
    config['search_terms'] = [term.to_json() for term in terms]
    save_config_to_s3(ctype, config)