Exemple #1
0
def read_pmids(pmids, date):
    """Return extracted INDRA Statements per PMID after running reading on AWS.

    Parameters
    ----------
    pmids : list[str]
        A list of PMIDs to read.
    date : datetime
        The date and time associated with the reading, typically the
        current time.

    Returns
    -------
    dict[str, list[indra.statements.Statement]
        A dict of PMIDs and the list of Statements extracted for the given
        PMID by reading.
    """
    date_str = date.strftime('%Y-%m-%d-%H-%M-%S')
    pmid_fname = 'pmids-%s.txt' % date_str
    with open(pmid_fname, 'wt') as fh:
        fh.write('\n'.join(pmids))
    job_list = submit_reading('emmaa', pmid_fname, ['reach'])
    monitor = BatchMonitor('run_reach_queue', job_list)
    monitor.watch_and_wait(idle_log_timeout=600,  kill_on_log_timeout=True)
    pmid_stmts = {}
    for pmid in pmids:
        reach_json_str = get_reader_json_str('reach', pmid)
        if reach_json_str is None:
            pmid_stmts[pmid] = []
            continue
        rp = reach.process_json_str(reach_json_str)
        if not rp:
            pmid_stmts[pmid] = []
        else:
            pmid_stmts[pmid] = rp.statements
    return pmid_stmts
Exemple #2
0
def test_handler():
    """Test the lambda handler locally."""
    dts = make_date_str()
    key = f'models/test/test_model_{dts}.pkl'
    event = {'Records': [{'s3': {'object': {'key': key}}}]}
    context = None
    res = lambda_handler(event, context)
    print(res)
    assert res['statusCode'] == 200, res
    assert res['result'] == 'SUCCESS', res
    assert res['job_id'], res
    job_id = res['job_id']

    results = {}
    monitor = BatchMonitor(QUEUE, [{'jobId': job_id}])
    monitor.watch_and_wait(result_record=results)
    print(results)
    assert job_id in [job_def['jobId'] for job_def in results['succeeded']], \
        results['failed']

    s3 = get_s3_client()
    s3_res = s3.list_objects(Bucket='emmaa', Prefix='results/test/' + dts[:10])
    print(s3_res.keys())
    assert s3_res, s3_res
Exemple #3
0
 def __init__(self, basename, group_name=None, project_name=None,
              job_timeout=None, **options):
     self.basename = basename
     self.group_name = group_name
     self.s3_base, self.job_base = \
         get_s3_and_job_prefixes(self.job_class, basename, group_name)
     self.project_name = project_name
     self.job_timeout_override = job_timeout
     self.job_lists = {q_name: [] for q_name in self._job_queue_dict.keys()}
     self.options = options
     self.running = None
     self.submitting = False
     self.monitors = {}
     for queue_name in self._iter_over_select_queues():
         self.monitors[queue_name] = \
             BatchMonitor(queue_name, self.job_lists[queue_name],
                          self.job_base, self.s3_base)
     self.max_jobs = None
     return
Exemple #4
0
        '-K',
        action='store_true',
        help='If a log times out, terminate the offending job.')
    parser.add_argument(
        '--stash_log_method',
        '-l',
        choices=['s3', 'local'],
        metavar='METHOD',
        help=('Select a method from: [%(choices)s] to store the job logs. '
              'If no method is specified, the logs will not be '
              'loaded off of AWS. If \'s3\' is specified, then '
              '`job_name_prefix` must also be given, as this will indicate '
              'where on s3 to store the logs.'))
    return parser


if __name__ == '__main__':
    parser = make_parser()
    args = parser.parse_args()

    from indra_reading.batch.monitor import BatchMonitor

    job_list = None
    if args.job_list is not None:
        job_list = [{'jobId': jid} for jid in args.job_list]

    s3_base, _ = get_s3_and_job_prefixes('reading', args.job_name_prefix)
    bm = BatchMonitor(args.queue_name, job_list, args.job_name_prefix, s3_base)
    bm.watch_and_wait(args.poll_interval, args.timeout, args.kill_on_timeout,
                      args.stash_log_method)