Example #1
0
def neptune_ml_training(args: argparse.Namespace, client: Client,
                        output: widgets.Output, params):
    if args.which_sub == 'start':
        if params is None or params == '' or params == {}:
            params = {
                "id": args.job_id,
                "dataProcessingJobId": args.data_processing_id,
                "trainingInstanceType": args.instance_type,
            }

        start_training_res = client.modeltraining_start(
            args.job_id, args.s3_output_uri, **params)
        start_training_res.raise_for_status()
        training_job = start_training_res.json()
        if args.wait:
            return wait_for_training(training_job['id'], client, output,
                                     args.wait_interval, args.wait_timeout)
        else:
            return training_job
    elif args.which_sub == 'status':
        if args.wait:
            return wait_for_training(args.job_id, client, output,
                                     args.wait_interval, args.wait_timeout)
        else:
            training_status_res = client.modeltraining_job_status(args.job_id)
            training_status_res.raise_for_status()
            return training_status_res.json()
    else:
        return f'Sub parser "{args.which} {args.which_sub}" was not recognized'
Example #2
0
def wait_for_training(job_id: str,
                      client: Client,
                      output: widgets.Output,
                      wait_interval: int = DEFAULT_WAIT_INTERVAL,
                      wait_timeout: int = DEFAULT_WAIT_TIMEOUT):
    job_id_output = widgets.Output()
    update_status_output = widgets.Output()
    with output:
        display(job_id_output, update_status_output)

    with job_id_output:
        print(f'Wait called on training job {job_id}')

    with update_status_output:
        beginning_time = datetime.datetime.utcnow()
        while datetime.datetime.utcnow() - beginning_time < (
                datetime.timedelta(seconds=wait_timeout)):
            update_status_output.clear_output()
            training_status_res = client.modeltraining_job_status(job_id)
            training_status_res.raise_for_status()
            status = training_status_res.json()
            if status['status'] in ['Completed', 'Failed']:
                print('Training is finished')
                return status
            else:
                print(f'Status is {status["status"]}')
                print(f'Waiting for {wait_interval} before checking again...')
                time.sleep(wait_interval)