def read_pmids(pmids, date): """Return extracted INDRA Statements per PMID after running reading on AWS. Parameters ---------- pmids : list[str] A list of PMIDs to read. date : datetime The date and time associated with the reading, typically the current time. Returns ------- dict[str, list[indra.statements.Statement] A dict of PMIDs and the list of Statements extracted for the given PMID by reading. """ date_str = date.strftime('%Y-%m-%d-%H-%M-%S') pmid_fname = 'pmids-%s.txt' % date_str with open(pmid_fname, 'wt') as fh: fh.write('\n'.join(pmids)) job_list = submit_reading('emmaa', pmid_fname, ['reach']) monitor = BatchMonitor('run_reach_queue', job_list) monitor.watch_and_wait(idle_log_timeout=600, kill_on_log_timeout=True) pmid_stmts = {} for pmid in pmids: reach_json_str = get_reader_json_str('reach', pmid) if reach_json_str is None: pmid_stmts[pmid] = [] continue rp = reach.process_json_str(reach_json_str) if not rp: pmid_stmts[pmid] = [] else: pmid_stmts[pmid] = rp.statements return pmid_stmts
def test_handler(): """Test the lambda handler locally.""" dts = make_date_str() key = f'models/test/test_model_{dts}.pkl' event = {'Records': [{'s3': {'object': {'key': key}}}]} context = None res = lambda_handler(event, context) print(res) assert res['statusCode'] == 200, res assert res['result'] == 'SUCCESS', res assert res['job_id'], res job_id = res['job_id'] results = {} monitor = BatchMonitor(QUEUE, [{'jobId': job_id}]) monitor.watch_and_wait(result_record=results) print(results) assert job_id in [job_def['jobId'] for job_def in results['succeeded']], \ results['failed'] s3 = get_s3_client() s3_res = s3.list_objects(Bucket='emmaa', Prefix='results/test/' + dts[:10]) print(s3_res.keys()) assert s3_res, s3_res
def __init__(self, basename, group_name=None, project_name=None, job_timeout=None, **options): self.basename = basename self.group_name = group_name self.s3_base, self.job_base = \ get_s3_and_job_prefixes(self.job_class, basename, group_name) self.project_name = project_name self.job_timeout_override = job_timeout self.job_lists = {q_name: [] for q_name in self._job_queue_dict.keys()} self.options = options self.running = None self.submitting = False self.monitors = {} for queue_name in self._iter_over_select_queues(): self.monitors[queue_name] = \ BatchMonitor(queue_name, self.job_lists[queue_name], self.job_base, self.s3_base) self.max_jobs = None return
'-K', action='store_true', help='If a log times out, terminate the offending job.') parser.add_argument( '--stash_log_method', '-l', choices=['s3', 'local'], metavar='METHOD', help=('Select a method from: [%(choices)s] to store the job logs. ' 'If no method is specified, the logs will not be ' 'loaded off of AWS. If \'s3\' is specified, then ' '`job_name_prefix` must also be given, as this will indicate ' 'where on s3 to store the logs.')) return parser if __name__ == '__main__': parser = make_parser() args = parser.parse_args() from indra_reading.batch.monitor import BatchMonitor job_list = None if args.job_list is not None: job_list = [{'jobId': jid} for jid in args.job_list] s3_base, _ = get_s3_and_job_prefixes('reading', args.job_name_prefix) bm = BatchMonitor(args.queue_name, job_list, args.job_name_prefix, s3_base) bm.watch_and_wait(args.poll_interval, args.timeout, args.kill_on_timeout, args.stash_log_method)