def main(out_dir, train_path, eval_path, hidden_units, max_steps, nbuckets, resume, job_dir, batch_size, learning_rate): # do not overwrite results when tuning hyperparams out_dir = os.path.join( out_dir, json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get('trial', '')) # cleanup output dir if not resuming if not resume: shutil.rmtree(out_dir, ignore_errors=True) hidden_units = hidden_units.split(' ') metrics = train_and_evaluate(out_dir, train_path, eval_path, hidden_units, nbuckets, max_steps, batch_size, learning_rate) # in training on GCP, metrics are undefined if metrics is not None: print(metrics[0])
help= 'Specify a pattern that has to be in input files. For example 00001-of will process only one shard', default='of') parser.add_argument( '--job-dir', help='this model ignores this field, but it is required by gcloud', default='junk') args = parser.parse_args() arguments = args.__dict__ # unused args provided by service arguments.pop('job_dir', None) arguments.pop('job-dir', None) output_dir = arguments.pop('output_dir') model.BUCKET = arguments.pop('bucket') model.TRAIN_STEPS = arguments.pop('train_steps') model.PATTERN = arguments.pop('pattern') # Append trial_id to path if we are doing hptuning # This code can be removed if you are not using hyperparameter tuning output_dir = os.path.join( output_dir, json.loads(os.environ.get('TF_CONFIG', '{}')).get('task', {}).get('trial', '')) # Run the training job #learn_runner.run(model.experiment_fn, output_dir) model.train_and_evaluate(output_dir)
"--num_evals", help="Number of times to evaluate model on eval data training.", type=int, default=5) parser.add_argument("--num_examples_to_train_on", help="Number of examples to train on.", type=int, default=100) parser.add_argument( "--output_dir", help="GCS location to write checkpoints and export models", required=True) parser.add_argument( "--train_data_path", help="GCS location pattern of train files containing eval URLs", required=True) parser.add_argument( "--job-dir", help="this model ignores this field, but it is required by gcloud", default="junk") args, _ = parser.parse_known_args() hparams = args.__dict__ hparams["output_dir"] = os.path.join( hparams["output_dir"], json.loads(os.environ.get("TF_CONFIG", "{}")).get("task", {}).get("trial", "")) print("output_dir", hparams["output_dir"]) model.train_and_evaluate(hparams)
type=float, default=1.0) parser.add_argument('--project_id', help='ID (not name) of your project', required=True) parser.add_argument( '--job-dir', help='Output directory for model, automatically provided by gcloud', required=True) args = parser.parse_args() arguments = args.__dict__ #model.PROJECT = arguments['projectId'] #model.KEYDIR = 'trainer' print(arguments) estimator, acc_eval = model.train_and_evaluate( arguments['eval_size'], arguments['frac'], arguments['WE_max_df'], arguments['WE_min_df'], arguments['FT_norm'], arguments['M_alpha']) loc = model.save_model(estimator, arguments['job_dir'], 'stackoverlow') print("Saved model to {}".format(loc)) # this is for hyperparameter tuning hpt = hypertune.HyperTune() hpt.report_hyperparameter_tuning_metric( hyperparameter_metric_tag='accuracy', metric_value=acc_eval, global_step=0)
"--nfil2", help="number of filters in second layer for CNN", type=int, default=10, ) parser.add_argument("--dprob", help="dropout probability for CNN", type=float, default=0.25) parser.add_argument( "--batch_norm", help="if specified, do batch_norm for CNN", dest="batch_norm", action="store_true", ) parser.set_defaults(batch_norm=False) args = parser.parse_args() hparams = args.__dict__ output_dir = hparams.pop("output_dir") # Append trial_id to path for hptuning output_dir = os.path.join( output_dir, json.loads(os.environ.get("TF_CONFIG", "{}")).get("task", {}).get("trial", ""), ) # Run the training job model.train_and_evaluate(output_dir, hparams)
"--eval_steps", help="""Positive number of steps for which to evaluate model. Default to None, which means to evaluate until input_fn raises an end-of-input exception""", type=int, default=None ) # Parse all arguments args = parser.parse_args() arguments = args.__dict__ # Unused args provided by service arguments.pop("job_dir", None) arguments.pop("job-dir", None) # Modify some arguments arguments["train_examples"] *= 1000 # Append trial_id to path if we are doing hptuning # This code can be removed if you are not using hyperparameter tuning arguments["output_dir"] = os.path.join( arguments["output_dir"], json.loads( os.environ.get("TF_CONFIG", "{}") ).get("task", {}).get("trial", "") ) # Run the training job model.train_and_evaluate(arguments)
'--min_eval_frequency', help = 'Minimum number of training steps between evaluations', default = 1, type = int ) args = parser.parse_args() arguments = args.__dict__ # Unused args provided by service arguments.pop('job_dir', None) arguments.pop('job-dir', None) output_dir = arguments.pop('output_dir') # Append trial_id to path if we are doing hptuning # This code can be removed if you are not using hyperparameter tuning output_dir = os.path.join( output_dir, json.loads( os.environ.get('TF_CONFIG', '{}') ).get('task', {}).get('trial', '') ) # Run the training job try: shutil.rmtree(output_dir, ignore_errors = True) # start fresh each time model.train_and_evaluate(args, output_dir, arguments['keras']) except: traceback.print_exc()
parser.add_argument( '--test_data_paths', help = 'GCS or local path to testing data', required = True ) # Training arguments parser.add_argument( '--batch_size', help = 'Batch size', type = int, default = 150 ) parser.add_argument( '--output_dir', help = 'GCS location to write checkpoints and export models', required = True ) args = parser.parse_args() # Assign model variables to commandline arguments model.TRAIN_DATA_DIR = args.train_data_paths model.TEST_DATA_DIR = args.test_data_paths model.DATA_BATCH_SIZE = args.batch_size model.LOGS_DIR = os.path.join(args.output_dir, str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S"))) # Run the training job model.train_and_evaluate()
help='GCS/local dir to write checkpoints and export models', required=True) parser.add_argument( '--layer_width', help= 'architecture of the linear network: comma delimited widths of the network. i.e.: "2,3,5" is a network with inputs of dim 2, inner layer of dim 3*5, and output of dim 5.', required=True) parser.add_argument('--p', help='Order of loss function (l_p loss)', required=True) parser.add_argument('--num_epochs', help='number of epcohs for training', required=True) parser.add_argument('--lr', help='Gradient Descent learning rate', required=True) args = parser.parse_args() layer_width = [int(width) for width in args.layer_width.split(",")] p = int(args.p) lr = float(args.lr) num_epochs = int(args.num_epochs) # Run the training job model.train_and_evaluate(args.train_data_dir, args.logs_dir, layer_width, p, num_epochs, lr)