예제 #1
0
def helper_mxnet_tests(collection, register_loss, save_config):
    coll_name, coll_regex = collection

    run_id = "trial_" + coll_name + "-" + datetime.now().strftime(
        "%Y%m%d-%H%M%S%f")
    trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id)

    hook = MX_Hook(out_dir=trial_dir,
                   include_collections=[coll_name],
                   export_tensorboard=True)
    coll = hook.get_collection(coll_name)
    coll.save_config = save_config
    save_steps = save_config.get_save_config(ModeKeys.TRAIN).save_steps
    if not save_steps:
        save_interval = save_config.get_save_config(
            ModeKeys.TRAIN).save_interval
        save_steps = [i for i in range(0, 10, save_interval)]

    simple_mx_model(hook, register_loss=register_loss)
    hook.close()

    saved_scalars = [
        "scalar/mx_before_train", "scalar/mx_train_loss",
        "scalar/mx_after_train"
    ]
    check_trials(trial_dir, save_steps, coll_name, saved_scalars)
    check_metrics_file(saved_scalars)
예제 #2
0
def create_hook(output_s3_uri):
    # With the following SaveConfig, we will save tensors for steps 0, 1, 2 and 3
    # (indexing starts with 0).
    save_config = SaveConfig(save_steps=[0, 1, 2, 3])
    # Create a hook that logs weights, biases and gradients while training the model.
    hook = Hook(
        out_dir=output_s3_uri,
        save_config=save_config,
        include_collections=["ReluActivation", "weights", "biases", "gradients"],
    )
    hook.get_collection("ReluActivation").include(["relu*", "input_*"])
    return hook
def create_hook():
    # With the following SaveConfig, we will save tensors for every 100 steps
    save_config = SaveConfig(save_interval=100)

    # Create a hook that logs weights, biases and gradients while training the model.
    hook = Hook(save_config=save_config, save_all=True)
    return hook
예제 #4
0
def create_hook(output_s3_uri):
    # Create a SaveConfig that determines tensors from which steps are to be stored.
    # With the following SaveConfig, we will save tensors for steps 1, 2 and 3.
    save_config = SaveConfig(save_steps=[1, 2, 3])

    # Create a hook that logs all the tensors seen while training the model.
    hook = Hook(out_dir=output_s3_uri, save_config=save_config, save_all=True)
    return hook
def helper_mxnet_tests(collection, register_loss, save_config):
    coll_name, coll_regex = collection

    run_id = "trial_" + coll_name + "-" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
    trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id)

    hook = MX_Hook(
        out_dir=trial_dir,
        include_collections=[coll_name],
        save_config=save_config,
        export_tensorboard=True,
    )

    simple_mx_model(hook, register_loss=register_loss)
    hook.close()

    saved_scalars = ["scalar/mx_num_steps", "scalar/mx_before_train", "scalar/mx_after_train"]
    verify_files(trial_dir, save_config, saved_scalars)
예제 #6
0
def create_hook(output_s3_uri, block):
    # Create a SaveConfig that determines tensors from which steps are to be stored.
    # With the following SaveConfig, we will save tensors for steps 1, 2 and 3.
    save_config = SaveConfig(save_steps=[1, 2, 3])

    # Create a hook that logs weights, biases, gradients and inputs outputs of model while training.
    hook = Hook(
        out_dir=output_s3_uri,
        save_config=save_config,
        include_collections=["weights", "gradients", "biases", "TopBlock"],
    )

    # The names of input and output tensors of a block are in following format
    # Inputs :  <block_name>_input_<input_index>, and
    # Output :  <block_name>_output
    # In order to log the inputs and output of a model, we will create a collection as follows:
    hook.get_collection("TopBlock").add_block_tensors(block, inputs=True, outputs=True)
    return hook
def create_hook(output_uri, save_frequency):
    # With the following SaveConfig, we will save tensors with the save_interval 100.
    save_config = SaveConfig(save_interval=save_frequency)

    # Create a hook that logs weights, biases and gradients while training the model.
    hook = Hook(
        out_dir=output_uri,
        save_config=save_config,
        include_collections=["weights", "gradients", "biases"],
    )
    return hook
예제 #8
0
def helper_mxnet_tests(collection, register_loss, save_config, with_timestamp):
    coll_name, coll_regex = collection

    run_id = "trial_" + coll_name + "-" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
    trial_dir = os.path.join(SMDEBUG_MX_HOOK_TESTS_DIR, run_id)

    hook = MX_Hook(
        out_dir=trial_dir,
        include_collections=[coll_name],
        save_config=save_config,
        export_tensorboard=True,
    )

    saved_scalars = simple_mx_model(
        hook, register_loss=register_loss, with_timestamp=with_timestamp
    )
    hook.close()

    verify_files(trial_dir, save_config, saved_scalars)
    if with_timestamp:
        check_tf_events(trial_dir, saved_scalars)
예제 #9
0
def create_smdebug_hook(output_s3_uri):
    # With the following SaveConfig, we will save tensors for steps 1, 2 and 3
    # (indexing starts with 0).
    save_config = SaveConfig(save_steps=[1, 2, 3])

    # Create a hook that logs weights, biases and gradients while training the model.
    hook = Hook(
        out_dir=output_s3_uri,
        save_config=save_config,
        include_collections=["weights", "gradients"],
    )
    return hook
def train(bucket, seq, algo, freq, prediction_length, epochs, learning_rate,
          hybridize, num_batches_per_epoch):

    #create train dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TRAIN'] +
                     "/train.csv",
                     header=0,
                     index_col=0)

    training_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": df.client[:]
    }],
                                freq=freq)

    #create test dataset
    df = pd.read_csv(filepath_or_buffer=os.environ['SM_CHANNEL_TEST'] +
                     "/test.csv",
                     header=0,
                     index_col=0)

    test_data = ListDataset([{
        "start": df.index[0],
        "target": df.usage[:],
        "item_id": 'client_12'
    }],
                            freq=freq)

    hook = Hook.create_from_json_file()
    #determine estimators##################################
    if algo == "DeepAR":
        estimator = DeepAREstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=1,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("DeepAR training is complete SUCCESS")
    elif algo == "SFeedFwd":
        estimator = SimpleFeedForwardEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "lstnet":
        # Needed for LSTNet ONLY
        grouper = MultivariateGrouper(max_target_dim=6)
        training_data = grouper(training_data)
        test_data = grouper(test_data)
        context_length = prediction_length
        num_series = 1
        skip_size = 1
        ar_window = 1
        channels = 4

        estimator = LSTNetEstimator(
            freq=freq,
            prediction_length=prediction_length,
            context_length=context_length,
            num_series=num_series,
            skip_size=skip_size,
            ar_window=ar_window,
            channels=channels,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    elif algo == "seq2seq":
        estimator = MQCNNEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")
    else:
        estimator = TransformerEstimator(
            freq=freq,
            prediction_length=prediction_length,
            trainer=Trainer(ctx="cpu",
                            epochs=epochs,
                            learning_rate=learning_rate,
                            hybridize=hybridize,
                            num_batches_per_epoch=num_batches_per_epoch))

        #train the model
        predictor = estimator.train(training_data=training_data)
        print("training is complete SUCCESS")

    ###################################################

    #evaluate trained model on test data
    forecast_it, ts_it = make_evaluation_predictions(test_data,
                                                     predictor,
                                                     num_samples=100)
    print("EVALUATION is complete SUCCESS")
    forecasts = list(forecast_it)
    tss = list(ts_it)
    evaluator = Evaluator(quantiles=[0.1, 0.5, 0.9])
    agg_metrics, item_metrics = evaluator(iter(tss),
                                          iter(forecasts),
                                          num_series=len(test_data))
    print("METRICS retrieved SUCCESS")
    #bucket = "bwp-sandbox"

    mainpref = "gluonts/blog-models/"
    prefix = mainpref + str(seq) + "/"
    agg_df = pd.DataFrame(agg_metrics, index=[0])
    file = "metrics" + str(seq) + ".csv"
    os.system('mkdir metrics')
    cspath = os.path.join('metrics', file)
    agg_df.to_csv(cspath)
    s3.upload_file(cspath, bucket, mainpref + "metrics/" + file)

    hook.save_scalar("MAPE", agg_metrics["MAPE"], sm_metric=True)
    hook.save_scalar("RMSE", agg_metrics["RMSE"], sm_metric=True)
    hook.save_scalar("MASE", agg_metrics["MASE"], sm_metric=True)
    hook.save_scalar("MSE", agg_metrics["MSE"], sm_metric=True)

    print("MAPE:", agg_metrics["MAPE"])

    #save the model
    predictor.serialize(pathlib.Path(os.environ['SM_MODEL_DIR']))

    uploadDirectory(os.environ['SM_MODEL_DIR'], prefix, bucket)

    return predictor