def configure_job(): """Construct jobSpec for ML Engine job.""" # See documentation: # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#traininginput training_input = { 'pythonModule': 'tensor2tensor.bin.t2t_trainer', 'args': flags_as_args(), 'region': cloud.default_region(), 'runtimeVersion': '1.4', 'pythonVersion': '3.5' if sys.version_info.major == 3 else '2.7', 'jobDir': FLAGS.output_dir, 'scaleTier': 'CUSTOM', 'masterType': FLAGS.cloud_mlengine_master_type or get_default_master_type( num_gpus=FLAGS.worker_gpu, use_tpu=FLAGS.use_tpu) } if FLAGS.hparams_range: tf.logging.info('Configuring hyperparameter tuning.') training_input['hyperparameters'] = configure_autotune( FLAGS.hparams_range, FLAGS.autotune_objective, FLAGS.autotune_maximize, FLAGS.autotune_max_trials, FLAGS.autotune_parallel_trials, ) timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') job_name = '%s_%s_t2t_%s' % (FLAGS.model, FLAGS.problems, timestamp) job_spec = {'jobId': job_name, 'trainingInput': training_input} return job_spec
def configure_job(): """Construct jobSpec for ML Engine job.""" # See documentation: # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#traininginput training_input = { "pythonModule": "tensor2tensor.bin.t2t_trainer", "args": flags_as_args(), "region": text_encoder.native_to_unicode(cloud.default_region()), "runtimeVersion": RUNTIME_VERSION, "pythonVersion": "3.5" if sys.version_info.major == 3 else "2.7", "jobDir": FLAGS.output_dir, "scaleTier": "CUSTOM", "masterType": FLAGS.cloud_mlengine_master_type or get_default_master_type( num_gpus=FLAGS.worker_gpu) } if FLAGS.use_tpu: training_input["masterType"] = (FLAGS.cloud_mlengine_master_type or "standard") training_input["workerType"] = "cloud_tpu" training_input["workerCount"] = 1 if FLAGS.hparams_range: tf.logging.info("Configuring hyperparameter tuning.") training_input["hyperparameters"] = configure_autotune( FLAGS.hparams_range, FLAGS.autotune_objective, FLAGS.autotune_maximize, FLAGS.autotune_max_trials, FLAGS.autotune_parallel_trials, ) timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") job_name = "%s_%s_t2t_%s" % (FLAGS.model, FLAGS.problem, timestamp) job_spec = {"jobId": job_name, "trainingInput": training_input} return job_spec
def configure_job(): """Construct jobSpec for ML Engine job.""" train_dir = FLAGS.output_dir assert train_dir.startswith('gs://') job_name = os.path.basename(train_dir) # See documentation: # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#traininginput training_input = { 'pythonModule': 'tensor2tensor.bin.t2t_trainer', 'args': flags_as_args(), 'region': cloud.default_region(), 'runtimeVersion': '1.4', 'pythonVersion': '3.5' if sys.version_info.major == 3 else '2.7', 'jobDir': train_dir, } training_input.update( machine_config(num_gpus=FLAGS.worker_gpu, use_tpu=FLAGS.use_tpu, master_type=FLAGS.cloud_mlengine_master_type)) if FLAGS.hparams_range: assert FLAGS.autotune_objective tf.logging.info('Configuring hyperparameter tuning.') training_input['hyperparameters'] = configure_autotune( FLAGS.hparams_range, FLAGS.autotune_objective, FLAGS.autotune_maximize, FLAGS.autotune_max_trials, FLAGS.autotune_parallel_trials, ) if training_input['scaleTier'] == 'CUSTOM': assert 'masterType' in training_input job_spec = {'jobId': job_name, 'trainingInput': training_input} return job_spec