def get_email_subscriptions(email): """Verifies which email subsciptions exist for the provided email Parameters ---------- email : str The email to the check subscriptions for Returns ------- list(tuple(str, str, query_hash)) """ user_queries = db.get_subscribed_queries(email) user_models = db.get_user_models(email) model_full_names = {} for qo, mid, dh in user_queries: if mid not in model_full_names: config = load_config_from_s3(mid) model_full_names[mid] = config.get('human_readable_name', mid) for mid in user_models: if mid not in model_full_names: config = load_config_from_s3(mid) model_full_names[mid] = config.get('human_readable_name', mid) results = { 'queries': [(qo.to_english() + f' for model {model_full_names[mid]}', f'{qo.get_type()}'.replace('_', ' '), qh) for qo, mid, qh in user_queries], 'models': [(mid, model_full_names[mid]) for mid in user_models] } return results
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): """Create StatementCheckingTests from model statements.""" stmts, _ = get_assembled_statements(model_name, bucket=bucket) config = load_config_from_s3(model_name, bucket=bucket) # Filter statements if needed if isinstance(config.get('make_tests'), dict): conditions = config['make_tests']['filter']['conditions'] evid_policy = config['make_tests']['filter']['evid_policy'] stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy) tests = [ StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list()) ] date_str = make_date_str() test_description = ( f'These tests were generated from the ' f'{config.get("human_readable_name")} on {date_str[:10]}') test_name = f'{config.get("human_readable_name")} model test corpus' test_dict = { 'test_data': { 'description': test_description, 'name': test_name }, 'tests': tests } if upload: save_tests_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl', 'pkl') return test_dict
def add_model_from_s3(self, model_id, config=None, number_of_updates=3, bucket=EMMAA_BUCKET_NAME): """Add data for one model from S3 files.""" if not config: config = load_config_from_s3(model_id) test_corpora = config['test']['test_corpus'] if isinstance(test_corpora, str): test_corpora = [test_corpora] stmt_files = sort_s3_files_by_date_str( bucket, f'assembled/{model_id}/statements_', '.gz') stmt_files_to_use = stmt_files[:number_of_updates] for stmt_file in stmt_files_to_use: date = strip_out_date(stmt_file, 'date') dt = strip_out_date(stmt_file, 'datetime') # First get and add statements stmt_jsons = load_gzip_json_from_s3(bucket, stmt_file) self.add_statements(model_id, date, stmt_jsons) # Also update the path counts from each test corpus for test_corpus in test_corpora: key = f'results/{model_id}/results_{test_corpus}_{dt}.json' try: results = load_json_from_s3(bucket, key) path_counts = results[0].get('path_stmt_counts') if path_counts: self.update_statements_path_counts( model_id, date, path_counts) except ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey': logger.warning(f'No results file for {key}, skipping') continue else: raise e
def model_update_notify(model_name, test_corpora, date, db, bucket=EMMAA_BUCKET_NAME): """This function finds delta for a given model and sends updates via Twitter posts and email notifications. Parameters ---------- model_name : str A name of EMMAA model. test_corpora : list[str] A list of test corpora names to get test stats. date : str A date for which to get stats for. db : emmaa.db.EmmaaDatabaseManager An instance of a database manager to use. bucket : str A name of S3 bucket where corresponding stats files are stored. """ # Find where to send notifications (Twitter, user emails) config = load_config_from_s3(model_name, bucket) twitter_cred = None twitter_key = config.get('twitter') if twitter_key: twitter_cred = get_credentials(twitter_key) users = db.get_model_users(model_name) if not twitter_cred and not users: logger.info('No Twitter account and no users subscribed ' 'to this model, not generating deltas') return # Get deltas deltas = get_model_deltas(model_name, test_corpora, date, bucket=bucket) # Tweet if configured if twitter_cred: tweet_deltas(deltas, twitter_cred) # Send emails if there are subscribed users if users: msg_dicts = get_all_update_messages(deltas, is_tweet=False) if msg_dicts: str_email = '\n'.join([msg['message'] for msg in msg_dicts]) full_name = config.get('human_readable_name', model_name) subject_line = f'Updates to the {full_name} EMMAA model' for user_email in users: html_email = make_model_html_email(msg_dicts, user_email) res = send_email(sender=notifications_sender_default, recipients=[user_email], subject=subject_line, body_text=str_email, body_html=html_email, source_arn=indra_bio_ARN, return_email=notifications_return_default, return_arn=indra_bio_ARN)
def test_save_load_config(): # Local imports are recommended when using moto from emmaa.model import save_config_to_s3, load_config_from_s3 client = setup_bucket() config = {'test': 'This is test config'} save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME) read_config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME) assert config == read_config
def get_model_config(model, bucket=EMMAA_BUCKET_NAME): if model in model_cache: return model_cache[model] try: config_json = load_config_from_s3(model, bucket=bucket) model_cache[model] = config_json except ClientError: logger.warning(f"Model {model} has no metadata. Skipping...") return None if 'human_readable_name' not in config_json.keys(): logger.warning(f"Model {model} has no readable name. Skipping...") model_cache[model] = None return model_cache[model]
def get_config_from(self, assembly_config_template): """Return assembly config given a template model's name. Parameters ---------- assembly_config_template : str The name of a model whose assembly config should be adopted. Returns ------- dict The assembly config of the given template model. """ from emmaa.model import load_config_from_s3 config = load_config_from_s3(assembly_config_template) return config.get('assembly')
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): """Create StatementCheckingTests from model statements.""" stmts, _ = get_assembled_statements(model_name, bucket=bucket) config = load_config_from_s3(model_name, bucket=bucket) tests = [StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list())] date_str = make_date_str() test_description = ( f'These tests were generated from the ' f'{config.get("human_readable_name")} on {date_str[:10]}') test_name = f'{config.get("human_readable_name")} model test corpus' test_dict = {'test_data': {'description': test_description, 'name': test_name}, 'tests': tests} if upload: save_tests_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl', 'pkl') return test_dict
def load_from_statements(cls, model_name, mode='local', date=None, bucket=EMMAA_BUCKET_NAME): config = load_config_from_s3(model_name, bucket=bucket) if date: prefix = f'papers/{model_name}/paper_ids_{date}' else: prefix = f'papers/{model_name}/paper_ids_' paper_key = find_latest_s3_file(bucket, prefix, 'json') if paper_key: paper_ids = load_json_from_s3(bucket, paper_key) else: paper_ids = None model = EmmaaModel(model_name, config, paper_ids) # Loading assembled statements to avoid reassembly stmts, fname = get_assembled_statements(model_name, date, bucket) model.assembled_stmts = stmts model.date_str = strip_out_date(fname, 'datetime') mm = cls(model, mode=mode) return mm
def test_model_to_tests(): # Local imports are recommended when using moto from emmaa.model_tests import model_to_tests, load_tests_from_s3, \ StatementCheckingTest from emmaa.model import save_config_to_s3, load_config_from_s3 client = setup_bucket(add_model=True, add_mm=True) test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME) assert isinstance(test_dict, dict) assert 'test_data' in test_dict assert 'tests' in test_dict tests = test_dict['tests'] # With default config get tests for each statement assert len(tests) == 2 # Modify config to filter config = load_config_from_s3('test', bucket=TEST_BUCKET_NAME) config['make_tests'] = { 'filter': { 'conditions': { 'curated': False }, 'evid_policy': 'any' } } save_config_to_s3('test', config, bucket=TEST_BUCKET_NAME) test_dict = model_to_tests('test', bucket=TEST_BUCKET_NAME) assert isinstance(test_dict, dict) assert 'test_data' in test_dict assert 'tests' in test_dict tests = test_dict['tests'] # With modified config statements are filtered assert len(tests) == 1 assert isinstance(tests[0], StatementCheckingTest) loaded_tests, _ = load_tests_from_s3('test_tests', bucket=TEST_BUCKET_NAME) assert loaded_tests assert isinstance(loaded_tests, dict) assert 'test_data' in loaded_tests assert 'tests' in loaded_tests
def save_config(ctype, terms): config = load_config_from_s3(ctype) config['search_terms'] = [term.to_json() for term in terms] save_config_to_s3(ctype, config)