def helper_mxnet_tests(collection, register_loss, save_config): coll_name, coll_regex = collection run_id = "trial_" + coll_name + "-" + datetime.now().strftime( "%Y%m%d-%H%M%S%f") trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id) hook = MX_Hook(out_dir=trial_dir, include_collections=[coll_name], export_tensorboard=True) coll = hook.get_collection(coll_name) coll.save_config = save_config save_steps = save_config.get_save_config(ModeKeys.TRAIN).save_steps if not save_steps: save_interval = save_config.get_save_config( ModeKeys.TRAIN).save_interval save_steps = [i for i in range(0, 10, save_interval)] simple_mx_model(hook, register_loss=register_loss) hook.close() saved_scalars = [ "scalar/mx_before_train", "scalar/mx_train_loss", "scalar/mx_after_train" ] check_trials(trial_dir, save_steps, coll_name, saved_scalars) check_metrics_file(saved_scalars)
def create_hook(output_s3_uri): # With the following SaveConfig, we will save tensors for steps 0, 1, 2 and 3 # (indexing starts with 0). save_config = SaveConfig(save_steps=[0, 1, 2, 3]) # Create a hook that logs weights, biases and gradients while training the model. hook = Hook( out_dir=output_s3_uri, save_config=save_config, include_collections=["ReluActivation", "weights", "biases", "gradients"], ) hook.get_collection("ReluActivation").include(["relu*", "input_*"]) return hook
def create_hook(): # With the following SaveConfig, we will save tensors for every 100 steps save_config = SaveConfig(save_interval=100) # Create a hook that logs weights, biases and gradients while training the model. hook = Hook(save_config=save_config, save_all=True) return hook
def create_hook(output_s3_uri): # Create a SaveConfig that determines tensors from which steps are to be stored. # With the following SaveConfig, we will save tensors for steps 1, 2 and 3. save_config = SaveConfig(save_steps=[1, 2, 3]) # Create a hook that logs all the tensors seen while training the model. hook = Hook(out_dir=output_s3_uri, save_config=save_config, save_all=True) return hook
def helper_mxnet_tests(collection, register_loss, save_config): coll_name, coll_regex = collection run_id = "trial_" + coll_name + "-" + datetime.now().strftime("%Y%m%d-%H%M%S%f") trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id) hook = MX_Hook( out_dir=trial_dir, include_collections=[coll_name], save_config=save_config, export_tensorboard=True, ) simple_mx_model(hook, register_loss=register_loss) hook.close() saved_scalars = ["scalar/mx_num_steps", "scalar/mx_before_train", "scalar/mx_after_train"] verify_files(trial_dir, save_config, saved_scalars)
def create_hook(output_s3_uri, block): # Create a SaveConfig that determines tensors from which steps are to be stored. # With the following SaveConfig, we will save tensors for steps 1, 2 and 3. save_config = SaveConfig(save_steps=[1, 2, 3]) # Create a hook that logs weights, biases, gradients and inputs outputs of model while training. hook = Hook( out_dir=output_s3_uri, save_config=save_config, include_collections=["weights", "gradients", "biases", "TopBlock"], ) # The names of input and output tensors of a block are in following format # Inputs : <block_name>_input_<input_index>, and # Output : <block_name>_output # In order to log the inputs and output of a model, we will create a collection as follows: hook.get_collection("TopBlock").add_block_tensors(block, inputs=True, outputs=True) return hook
def create_hook(output_uri, save_frequency): # With the following SaveConfig, we will save tensors with the save_interval 100. save_config = SaveConfig(save_interval=save_frequency) # Create a hook that logs weights, biases and gradients while training the model. hook = Hook( out_dir=output_uri, save_config=save_config, include_collections=["weights", "gradients", "biases"], ) return hook
def helper_mxnet_tests(collection, register_loss, save_config, with_timestamp): coll_name, coll_regex = collection run_id = "trial_" + coll_name + "-" + datetime.now().strftime("%Y%m%d-%H%M%S%f") trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id) hook = MX_Hook( out_dir=trial_dir, include_collections=[coll_name], save_config=save_config, export_tensorboard=True, ) saved_scalars = simple_mx_model( hook, register_loss=register_loss, with_timestamp=with_timestamp ) hook.close() verify_files(trial_dir, save_config, saved_scalars) if with_timestamp: check_tf_events(trial_dir, saved_scalars)
def create_smdebug_hook(output_s3_uri): # With the following SaveConfig, we will save tensors for steps 1, 2 and 3 # (indexing starts with 0). save_config = SaveConfig(save_steps=[1, 2, 3]) # Create a hook that logs weights, biases and gradients while training the model. hook = Hook( out_dir=output_s3_uri, save_config=save_config, include_collections=["weights", "gradients"], ) return hook
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate, hybridize, num_batches_per_epoch): #create train dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] + "/train.csv", header=0, index_col=0) training_data = ListDataset([{ "start": df.index[0], "target": df.usage[:], "item_id": df.client[:] }], freq=freq) #create test dataset df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] + "/test.csv", header=0, index_col=0) test_data = ListDataset([{ "start": df.index[0], "target": df.usage[:], "item_id": 'client_12' }], freq=freq) hook = Hook.create_from_json_file() #determine estimators################################## if algo == "DeepAR": estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, context_length=1, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("DeepAR training is complete SUCCESS") elif algo == "SFeedFwd": estimator = SimpleFeedForwardEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") elif algo == "lstnet": # Needed for LSTNet ONLY grouper = MultivariateGrouper(max_target_dim=6) training_data = grouper(training_data) test_data = grouper(test_data) context_length = prediction_length num_series = 1 skip_size = 1 ar_window = 1 channels = 4 estimator = LSTNetEstimator( freq=freq, prediction_length=prediction_length, context_length=context_length, num_series=num_series, skip_size=skip_size, ar_window=ar_window, channels=channels, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") elif algo == "seq2seq": estimator = MQCNNEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") else: estimator = TransformerEstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(ctx="cpu", epochs=epochs, learning_rate=learning_rate, hybridize=hybridize, num_batches_per_epoch=num_batches_per_epoch)) #train the model predictor = estimator.train(training_data=training_data) print("training is complete SUCCESS") ################################################### #evaluate trained model on test data forecast_it, ts_it = make_evaluation_predictions(test_data, predictor, num_samples=100) print("EVALUATION is complete SUCCESS") forecasts = list(forecast_it) tss = list(ts_it) evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9]) agg_metrics, item_metrics = evaluator(iter(tss), iter(forecasts), num_series=len(test_data)) print("METRICS retrieved SUCCESS") #bucket = "bwp-sandbox" mainpref = "gluonts/blog-models/" prefix = mainpref + str(seq) + "/" agg_df = pd.DataFrame(agg_metrics, index=[0]) file = "metrics" + str(seq) + ".csv" os.system('mkdir metrics') cspath = os.path.join('metrics', file) agg_df.to_csv(cspath) s3.upload_file(cspath, bucket, mainpref + "metrics/" + file) hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True) hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True) hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True) hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True) print("MAPE:", agg_metrics["MAPE"]) #save the model predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR'])) uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket) return predictor