def neptune_ml_training(args: argparse.Namespace, client: Client, output: widgets.Output, params): if args.which_sub == 'start': if params is None or params == '' or params == {}: params = { "id": args.job_id, "dataProcessingJobId": args.data_processing_id, "trainingInstanceType": args.instance_type, } start_training_res = client.modeltraining_start( args.job_id, args.s3_output_uri, **params) start_training_res.raise_for_status() training_job = start_training_res.json() if args.wait: return wait_for_training(training_job['id'], client, output, args.wait_interval, args.wait_timeout) else: return training_job elif args.which_sub == 'status': if args.wait: return wait_for_training(args.job_id, client, output, args.wait_interval, args.wait_timeout) else: training_status_res = client.modeltraining_job_status(args.job_id) training_status_res.raise_for_status() return training_status_res.json() else: return f'Sub parser "{args.which} {args.which_sub}" was not recognized'
def wait_for_training(job_id: str, client: Client, output: widgets.Output, wait_interval: int = DEFAULT_WAIT_INTERVAL, wait_timeout: int = DEFAULT_WAIT_TIMEOUT): job_id_output = widgets.Output() update_status_output = widgets.Output() with output: display(job_id_output, update_status_output) with job_id_output: print(f'Wait called on training job {job_id}') with update_status_output: beginning_time = datetime.datetime.utcnow() while datetime.datetime.utcnow() - beginning_time < ( datetime.timedelta(seconds=wait_timeout)): update_status_output.clear_output() training_status_res = client.modeltraining_job_status(job_id) training_status_res.raise_for_status() status = training_status_res.json() if status['status'] in ['Completed', 'Failed']: print('Training is finished') return status else: print(f'Status is {status["status"]}') print(f'Waiting for {wait_interval} before checking again...') time.sleep(wait_interval)