Ejemplo n.º 1
0
def train(net: DaRnnNet, train_data: TrainData, t_cfg: TrainConfig, scaler, n_epochs=10, save_plots=False):
    iter_per_epoch = int(np.ceil(t_cfg.train_size * 1. / t_cfg.batch_size))
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(f"Iterations per epoch: {t_cfg.train_size * 1. / t_cfg.batch_size:3.3f} ~ {iter_per_epoch:d}.")

    n_iter = 0

    for e_i in range(n_epochs):
        perm_idx = np.random.permutation(t_cfg.train_size - t_cfg.T)

        for t_i in range(0, t_cfg.train_size, t_cfg.batch_size):
            batch_idx = perm_idx[t_i:(t_i + t_cfg.batch_size)]
            feats, y_history, y_target = prep_train_data(batch_idx, t_cfg, train_data)

            loss = train_iteration(net, t_cfg.loss_func, feats, y_history, y_target)
            iter_losses[e_i * iter_per_epoch + t_i // t_cfg.batch_size] = loss
            # if (j / t_cfg.batch_size) % 50 == 0:
            #    self.logger.info("Epoch %d, Batch %d: loss = %3.3f.", i, j / t_cfg.batch_size, loss)
            n_iter += 1

            adjust_learning_rate(net, n_iter)

        epoch_losses[e_i] = np.mean(iter_losses[range(e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])

        if e_i % 10 == 0:
            y_test_pred = scaler.inverse_transform(np.concatenate([train_data.feats[t_cfg.train_size:],predict(net, train_data,
                                  t_cfg.train_size, t_cfg.batch_size, t_cfg.T,
                                  on_train=False)],axis=1))[:,-1]
            y_test_true = scaler.inverse_transform(np.concatenate([train_data.feats[t_cfg.train_size:],train_data.targs[t_cfg.train_size:]],axis=1))[:,-1]

            # TODO: make this MSE and make it work for multiple inputs
            val_loss = y_test_pred - y_test_true
            rmse = np.sqrt(np.mean(np.square(y_test_true - y_test_pred)))
            logger.info(f"Epoch {e_i:d}, train loss: {epoch_losses[e_i]:3.3f}, val loss: {np.mean(np.abs(val_loss))}, rmse: {rmse}.")
            logger.info(f"PRED: {y_test_pred[-10:]}")
            logger.info(f"TRUE: {y_test_true[-10:]}")

            plt.figure()
            plt.plot(y_test_true,
                     label="True")
            plt.plot(y_test_pred,
                     label='Predicted - Test')
            plt.legend(loc='upper left')
            utils.save_or_show_plot(f"pred_{e_i}_{rmse}.png", save_plots)

    return iter_losses, epoch_losses
Ejemplo n.º 2
0
def train(net: DaRnnNet, train_data: TrainData, t_cfg: TrainConfig, n_epochs=10, save_plots=False):
    iter_per_epoch = int(np.ceil(t_cfg.train_size * 1. / t_cfg.batch_size))
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(f"Iterations per epoch: {t_cfg.train_size * 1. / t_cfg.batch_size:3.3f} ~ {iter_per_epoch:d}.")

    n_iter = 0

    for e_i in range(n_epochs):
        perm_idx = np.random.permutation(t_cfg.train_size - t_cfg.T)

        for t_i in range(0, t_cfg.train_size, t_cfg.batch_size):
            batch_idx = perm_idx[t_i:(t_i + t_cfg.batch_size)]
            feats, y_history, y_target = prep_train_data(batch_idx, t_cfg, train_data)

            loss = train_iteration(net, t_cfg.loss_func, feats, y_history, y_target)
            iter_losses[e_i * iter_per_epoch + t_i // t_cfg.batch_size] = loss
            # if (j / t_cfg.batch_size) % 50 == 0:
            #    self.logger.info("Epoch %d, Batch %d: loss = %3.3f.", i, j / t_cfg.batch_size, loss)
            n_iter += 1

            adjust_learning_rate(net, n_iter)

        epoch_losses[e_i] = np.mean(iter_losses[range(e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])

        if e_i % 10 == 0:
            y_test_pred = predict(net, train_data,
                                  t_cfg.train_size, t_cfg.batch_size, t_cfg.T,
                                  on_train=False)
            # TODO: make this MSE and make it work for multiple inputs
            val_loss = y_test_pred - train_data.targs[t_cfg.train_size:]
            logger.info(f"Epoch {e_i:d}, train loss: {epoch_losses[e_i]:3.3f}, val loss: {np.mean(np.abs(val_loss))}.")
            y_train_pred = predict(net, train_data,
                                   t_cfg.train_size, t_cfg.batch_size, t_cfg.T,
                                   on_train=True)
            plt.figure()
            plt.plot(range(1, 1 + len(train_data.targs)), train_data.targs,
                     label="True")
            plt.plot(range(t_cfg.T, len(y_train_pred) + t_cfg.T), y_train_pred,
                     label='Predicted - Train')
            plt.plot(range(t_cfg.T + len(y_train_pred), len(train_data.targs) + 1), y_test_pred,
                     label='Predicted - Test')
            plt.legend(loc='upper left')
            utils.save_or_show_plot(f"pred_{e_i}.png", save_plots)

    return iter_losses, epoch_losses
Ejemplo n.º 3
0
def main():
    save_plots = True
    debug = False

    raw_data = pd.read_csv(os.path.join("data", "nasdaq100_padding.csv"), nrows=100 if debug else None)
    logger.info(f"Shape of data: {raw_data.shape}.\nMissing in data: {raw_data.isnull().sum().sum()}.")
    targ_cols = ("NDX",)
    data, scaler = preprocess_data(raw_data, targ_cols)

    da_rnn_kwargs = {"batch_size": 128, "T": 10}
    config, model = da_rnn(data, n_targs=len(targ_cols), learning_rate=.001, **da_rnn_kwargs)
    iter_loss, epoch_loss = train(model, data, config, n_epochs=10, save_plots=save_plots)
    final_y_pred = predict(model, data, config.train_size, config.batch_size, config.T)

    plt.figure()
    plt.semilogy(range(len(iter_loss)), iter_loss)
    utils.save_or_show_plot("iter_loss.png", save_plots)

    plt.figure()
    plt.semilogy(range(len(epoch_loss)), epoch_loss)
    utils.save_or_show_plot("epoch_loss.png", save_plots)

    plt.figure()
    plt.plot(final_y_pred, label='Predicted')
    plt.plot(data.targs[config.train_size:], label="True")
    plt.legend(loc='upper left')
    utils.save_or_show_plot("final_predicted.png", save_plots)

    with open(os.path.join("data", "da_rnn_kwargs.json"), "w") as fi:
        json.dump(da_rnn_kwargs, fi, indent=4)

    joblib.dump(scaler, os.path.join("data", "scaler.pkl"))
    torch.save(model.encoder.state_dict(), os.path.join("data", "encoder.torch"))
    torch.save(model.decoder.state_dict(), os.path.join("data", "decoder.torch"))
Ejemplo n.º 4
0
def train(net: DaRnnNet,
          train_data: TrainData,
          t_cfg: TrainConfig,
          n_epochs=10,
          save_plots=False):

    iter_per_epoch = int(np.ceil(t_cfg.train_size * 1. / t_cfg.batch_size))
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(
        f"Iterations per epoch: {t_cfg.train_size * 1. / t_cfg.batch_size:3.3f} ~ {iter_per_epoch:d}."
    )

    n_iter = 0

    for e_i in range(n_epochs):

        print(e_i, end='\r')

        # ANDREA --> The training set is now chosen at random
        #print(len(train_data), t_cfg.train_size, train_data[0][0][:2])
        perm_idx = np.random.permutation(t_cfg.train_size - t_cfg.T)
        perm_idx = np.random.choice(perm_idx, size=TRAIN_SIZE)
        #print(perm_idx)

        #for t_i in range(0, t_cfg.train_size, t_cfg.batch_size):
        for t_i in range(0, TRAIN_SIZE, t_cfg.batch_size):
            batch_idx = perm_idx[t_i:(t_i + t_cfg.batch_size)]

            ################################################################
            feats, y_history, y_target = prep_train_data(
                batch_idx, t_cfg, train_data)
            ################################################################

            loss = train_iteration(net, t_cfg.loss_func, feats, y_history,
                                   y_target)
            iter_losses[e_i * iter_per_epoch + t_i // t_cfg.batch_size] = loss
            # if (j / t_cfg.batch_size) % 50 == 0:
            #    self.logger.info("Epoch %d, Batch %d: loss = %3.3f.", i, j / t_cfg.batch_size, loss)
            n_iter += 1

            adjust_learning_rate(net, n_iter)

        epoch_losses[e_i] = np.mean(iter_losses[range(
            e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])

        if e_i % 5 == 0:
            y_test_pred = predict(net,
                                  train_data,
                                  t_cfg.train_size,
                                  t_cfg.batch_size,
                                  t_cfg.T,
                                  on_train=False,
                                  eval=True)

            # ANDREA --> Temporary fix
            # y_test_pred = np.maximum(np.minimum(1, y_test_pred), -1)
            val_loss = y_test_pred - train_data.targs[
                t_cfg.train_size:t_cfg.train_size + len(y_test_pred)]
            val_loss = np.mean(np.square(val_loss))

            y_train_pred = predict(net,
                                   train_data,
                                   t_cfg.train_size,
                                   t_cfg.batch_size,
                                   t_cfg.T,
                                   on_train=True)
            tra_loss = y_train_pred - train_data.targs[:len(y_train_pred)]
            tra_loss = np.mean(np.square(tra_loss))

            # TODO: make this MSE and make it work for multiple inputs
            #val_loss = y_test_pred - train_data.targs[t_cfg.train_size:]

            #logger.info(f"Epoch {e_i:d}, train loss: {epoch_losses[e_i]:3.3f}, val loss: {val_loss}.")
            logger.info(
                f"Epoch {e_i:d}, train loss: {tra_loss:3.3f}, val loss: {val_loss:3.3f}."
            )

            # ANDREA --> Temporary fix
            # y_train_pred = np.maximum(np.minimum(1, y_train_pred), -1)

            plt.figure()
            plt.plot(range(1, 1 + len(train_data.targs)),
                     train_data.targs,
                     label="True")
            plt.plot(range(t_cfg.T,
                           len(y_train_pred) + t_cfg.T),
                     y_train_pred,
                     label='Predicted - Train')
            #plt.plot(range(t_cfg.T + len(y_train_pred), len(train_data.targs) + 1), y_test_pred, label='Predicted - Test')
            t0 = t_cfg.T + len(y_train_pred)
            plt.plot(range(t0, t0 + VALI_SIZE),
                     y_test_pred,
                     label='Predicted - Test')
            plt.legend(loc='upper left')
            utils.save_or_show_plot(f"pred_{e_i}.png", save_plots)

    return iter_losses, epoch_losses
Ejemplo n.º 5
0
#raw_data = pd.read_csv(os.path.join("data", "iex_nasdaq100_dataset.csv"), nrows=100 if debug else None)
raw_data = pd.read_csv(os.path.join("data", "nasdaq100_padding.csv"), nrows=100 if debug else None)
logger.info(f"Shape of data: {raw_data.shape}.\nMissing in data: {raw_data.isnull().sum().sum()}.")
targ_cols = ("NDX",)
#targ_cols = ("AAPL",)
data, scaler = preprocess_data(raw_data, targ_cols)

da_rnn_kwargs = {"batch_size": 128, "T": 10}
config, model = da_rnn(data, n_targs=len(targ_cols), learning_rate=.001, **da_rnn_kwargs)
iter_loss, epoch_loss = train(model, data, config, n_epochs=10, save_plots=save_plots)
final_y_pred = predict(model, data, config.train_size, config.batch_size, config.T)

plt.figure()
plt.semilogy(range(len(iter_loss)), iter_loss)
utils.save_or_show_plot("iter_loss.png", save_plots)

plt.figure()
plt.plot(final_y_pred, label="Predicted")
plt.plot(data.targs[config.train_size:], label="True")
plt.legend(loc="upper left")
#plt.show()
utils.save_or_show_plot("final_predicted.png", save_plots)

if interactive_plot == True:
    pred_plot = go.Scatter(x = np.arange(0, len(final_y_pred)), 
                           y = final_y_pred,
                           name='Predicted',
                           mode= 'lines+markers')
    true_plot = go.Scatter(x = np.arange(0, len(data.targs[config.train_size:])),
                           y = data.targs[config.train_size:],
Ejemplo n.º 6
0
def train(net: TCHA, train_data: TrainData, t_cfg: TrainConfig, tarRoad, scaler, n_epochs=10, save_plots=False):
    # 每epoch中含有批次数
    iter_per_epoch = int(np.ceil(t_cfg.train_size * 1. / t_cfg.batch_size))
    # 批次总数
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(f"Iterations per epoch: {t_cfg.train_size * 1. / t_cfg.batch_size:3.6f} ~ {iter_per_epoch:d}.")

    n_iter = 0

    enschedual = ReduceLROnPlateau(net.enc_opt, 'min', patience=5, factor=0.5)
    deschedual = ReduceLROnPlateau(net.dec_opt, 'min', patience=5, factor=0.5)
    # vis = visdom.Visdom()
    # winstr = 'HC train loss B: ' + str(t_cfg.batch_size) + 'I: ' + str(t_cfg.interval)
    # vis.line([0.], [0.], win=winstr, opts=dict(title=winstr))
    # global_step = 0

    for e_i in range(n_epochs):
        st = datetime.datetime.now()

        perm_idx = np.random.permutation(t_cfg.train_size - t_cfg.T - t_cfg.interval)
        # perm_idx = np.arange(0, t_cfg.train_size - t_cfg.T - t_cfg.interval)

        for t_i in range(0, t_cfg.train_size - t_cfg.T - t_cfg.interval, t_cfg.batch_size):
            # 每一混乱批次
            # if t_i == 5632:
            #     print(t_i)
            endidx = min(t_i + t_cfg.batch_size, t_cfg.train_size)
            batch_idx = perm_idx[t_i:endidx]
            feats, y_history, y_target, speed = prep_train_data(batch_idx, t_cfg, train_data)

            loss = train_iteration(net, t_cfg.loss_func, feats, y_history, y_target, speed)

            # 第t_i批次的loss放入
            iter_losses[e_i * iter_per_epoch + t_i // t_cfg.batch_size] = loss
            # if (j / t_cfg.batch_size) % 50 == 0:
            #    self.logger.info("Epoch %d, Batch %d: loss = %3.3f.", i, j / t_cfg.batch_size, loss)
            n_iter += 1
            # adjust_learning_rate(net, n_iter)
        # 当前epoch平均loss
        epoch_losses[e_i] = np.mean(iter_losses[range(e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])
        # vis.line([epoch_losses[e_i]], [global_step], win=winstr, update='append')
        # global_step += 1
        enschedual.step(epoch_losses[e_i])
        deschedual.step(epoch_losses[e_i])
        # if e_i % 20 == 0:
        weights, y_test_pred, _ = predict(net, train_data,
                                 t_cfg.train_size, t_cfg.batch_size, t_cfg.T, t_cfg.interval, on_train=False)
        # TODO: make this MSE and make it work for multiple inputs
        # val_loss = y_test_pred - train_data.targs[t_cfg.train_size:]
        val_loss = myMSE(y_test_pred, train_data.targs[t_cfg.train_size:], t_cfg.isMean)

        save_final(t_cfg.isMean, y_test_pred, train_data.targs[t_cfg.train_size:], tarRoad, config.timestep, config.interval, scaler)

        weights, y_train_pred, _ = predict(net, train_data,
                                  t_cfg.train_size, t_cfg.batch_size, t_cfg.T, t_cfg.interval, on_train=True)

        save_final(t_cfg.isMean, y_train_pred, train_data.targs[:t_cfg.train_size], tarRoad, config.timestep, config.interval, scaler,filename="hhh")

        train_loss = myMSE(y_train_pred, train_data.targs[:t_cfg.train_size], t_cfg.isMean)

        logger.info(f"Epoch {e_i:d}, train loss: {np.mean(np.abs(epoch_losses[e_i]))}, val loss: {np.mean(np.abs(val_loss))}.")

        torch.save(net.encoder.state_dict(),
                   'models/' + tarRoad + '/' + str(t_cfg.interval) + '/HCAdam_encoder' + str(t_cfg.batch_size) +
                   str(e_i) + '-norm' + '.model')
        torch.save(net.decoder.state_dict(),
                   'models/' + tarRoad + '/' + str(t_cfg.interval) + '/HCAdam_decoder' + str(t_cfg.batch_size) +
                   str(e_i) + '-norm' + '.model')
        plt.figure()
        plt.plot(range(1, 1 + len(train_data.targs)), train_data.targs,
                 label="True")
        plt.plot(range(t_cfg.T, len(y_train_pred) + t_cfg.T), y_train_pred,
                 label='Predicted - Train')
        plt.plot(range(t_cfg.T + len(y_train_pred) + t_cfg.interval, len(train_data.targs)), y_test_pred,
                 label='Predicted - Test')
        plt.legend(loc='upper left')
        savename = "pred_epoch" + str(e_i) + "interval" + str(t_cfg.interval) + 'batchsize' + str(
            t_cfg.batch_size) + '.png'
        utils.save_or_show_plot(savename, save_plots, t_cfg.interval, tarRoad)
        pd_iterloss = pd.DataFrame(iter_losses)
        pd_epoloss = pd.DataFrame(epoch_losses)
        pd_iterloss.to_csv('result/' + tarRoad + '/iterloss iterval' + str(t_cfg.interval) + '.csv')
        pd_epoloss.to_csv('result/' + tarRoad + '/epochloss iterval' + str(t_cfg.interval) + '.csv')
        plt.close()

        ed = datetime.datetime.now()
        print('epoch:{}, time cost:{}\n\n'.format(e_i, (ed - st).seconds))

    return iter_losses, epoch_losses
Ejemplo n.º 7
0
            pd_sptial = pd.DataFrame(embeded_weights[0])
            pd_time = pd.DataFrame(embeded_weights[1])
            pd_sptial.to_csv('result/' + 'att_sptial'+'.csv')
            pd_time.to_csv('result/' + 'att_time' + '.csv')

        else:
            iter_loss, epoch_loss = train(model, data, config, tarRoad, scaler, n_epochs=num_epochs, save_plots=save_plots)
            weightss, final_y_pred, y_true = predict(model, data, config.train_size, config.batch_size, config.T, config.interval)
            pd_sptial = pd.DataFrame(weightss[0])
            pd_time = pd.DataFrame(weightss[1])
            pd_sptial.to_csv('result/' + 'att_sptial'+'.csv')
            pd_time.to_csv('result/' + 'att_time' + '.csv')
            try:
                plt.figure()
                plt.semilogy(range(len(iter_loss)), iter_loss)
                utils.save_or_show_plot("train iter_loss interval" + str(interval) + ".png", save_plots, interval=interval, tarRoad=tarRoad)

                plt.figure()
                plt.semilogy(range(len(epoch_loss)), epoch_loss)
                utils.save_or_show_plot("train epoch epoch_loss interval" + str(interval) + ".png", save_plots, interval, tarRoad)
            except Exception as e:
                print(e)

        plt.figure()
        plt.plot(data.targs[config.train_size:], label="True")
        plt.plot(final_y_pred, label='Predicted')
        plt.legend(loc='upper left')
        # y1 = pd.DataFrame(y_true)
        # y2 = pd.DataFrame(data.targs[config.train_size:])
        # print(y1)
        # print(y2)
Ejemplo n.º 8
0
    debug = False
    save_plots = False

    with open(os.path.join("data", "enc_kwargs.json"), "r") as fi:
        enc_kwargs = json.load(fi)
    enc = Encoder(**enc_kwargs)
    enc.load_state_dict(
        torch.load(os.path.join("data", "encoder.torch"), map_location=device))

    with open(os.path.join("data", "dec_kwargs.json"), "r") as fi:
        dec_kwargs = json.load(fi)
    dec = Decoder(**dec_kwargs)
    dec.load_state_dict(
        torch.load(os.path.join("data", "decoder.torch"), map_location=device))

    scaler = joblib.load(os.path.join("data", "scaler.pkl"))
    raw_data = pd.read_csv(os.path.join("data", "nasdaq100_padding.csv"),
                           nrows=100 if debug else None)
    targ_cols = ("NDX", )
    data = preprocess_data(raw_data, targ_cols, scaler)

    with open(os.path.join("data", "da_rnn_kwargs.json"), "r") as fi:
        da_rnn_kwargs = json.load(fi)
    final_y_pred = predict(enc, dec, data, **da_rnn_kwargs)

    plt.figure()
    plt.plot(final_y_pred, label='Predicted')
    plt.plot(data.targs[(da_rnn_kwargs["T"] - 1):], label="True")
    plt.legend(loc='upper left')
    utils.save_or_show_plot("final_predicted_reloaded.png", save_plots)
def train(net: DaRnnNet,
          train_data: TrainData,
          training_configuration: TrainConfig,
          n_epochs=10,
          save_plots=True):

    iter_per_epoch = int(
        np.ceil(training_configuration.train_size * 1. /
                training_configuration.batch_size)
    )  #calculating how many iterations there will be in each epoch
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(
        f"Iterations per epoch: {training_configuration.train_size * 1. / training_configuration.batch_size:3.3f} ~ {iter_per_epoch:d}."
    )

    n_iter = 0
    #while j < self.train_size - self.T:
    epoch_counter = 0

    for e_i in range(n_epochs):
        print("Epoch no: ", epoch_counter)
        epoch_counter += 1
        randomly_permuted_index = np.random.permutation(
            training_configuration.train_size - training_configuration.T
        )  #If x is an integer, randomly permute np.arange(x).
        # If x is an array, make a copy and shuffle the elements randomly.

        for t_i in range(0, training_configuration.train_size,
                         training_configuration.batch_size):

            batch_idx = randomly_permuted_index[t_i:(
                t_i + training_configuration.batch_size)]
            feats, y_history, y_target = prep_train_data(
                batch_idx, training_configuration, train_data)

            loss = train_iteration(net, training_configuration.loss_func,
                                   feats, y_history, y_target)
            iter_losses[e_i * iter_per_epoch +
                        t_i // training_configuration.batch_size] = loss
            # if (j / t_cfg.batch_size) % 50 == 0:
            #    self.logger.info("Epoch %d, Batch %d: loss = %3.3f.", i, j / t_cfg.batch_size, loss)
            n_iter += 1

            adjust_learning_rate(net, n_iter)

        epoch_losses[e_i] = np.mean(iter_losses[range(
            e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])

        if e_i % 10 == 0:
            y_test_pred = predict(net,
                                  train_data,
                                  training_configuration.train_size,
                                  training_configuration.batch_size,
                                  training_configuration.T,
                                  on_train=False)
            # TODO: make this MSE and make it work for multiple inputs
            val_loss = y_test_pred - train_data.targs[training_configuration.
                                                      train_size:]
            logger.info(
                f"Epoch {e_i:d}, train loss: {epoch_losses[e_i]:3.3f}, val loss: {np.mean(np.abs(val_loss))}."
            )
            y_train_pred = predict(net,
                                   train_data,
                                   training_configuration.train_size,
                                   training_configuration.batch_size,
                                   training_configuration.T,
                                   on_train=True)
            plt.figure()
            plt.plot(range(1, 1 + len(train_data.targs)),
                     train_data.targs,
                     label="True")
            plt.plot(range(training_configuration.T,
                           len(y_train_pred) + training_configuration.T),
                     y_train_pred,
                     label='Predicted - Train')
            plt.plot(range(training_configuration.T + len(y_train_pred),
                           len(train_data.targs) + 1),
                     y_test_pred,
                     label='Predicted - Test')
            plt.legend(loc='upper left')
            utils.save_or_show_plot(f"pred_{e_i}.png", save_plots)

    return iter_losses, epoch_losses
Ejemplo n.º 10
0
def main(raw_args=None):
    """
    Call like this: 
    ['--tasks', 'smartrain', '-epochs', '500']
    """

    save_plots = True
    debug = False

    arguments = parse_args(raw_args)
    """ handle different datasets"""
    if arguments.task == "nasdaq":
        raw_data = pd.read_csv(os.path.join("data", "nasdaq100_padding.csv"),
                               nrows=100 if debug else None)
        targ_cols = ("NDX", )  # "RH"
    elif arguments.task == "pump":
        print("pump")
        targ_cols = ("sensor_00", "sensor_04")
    elif arguments.task == "smartrain":
        #path = "/content/data/pump/labeled/sensor.csv.pkl"
        path = "/content/data/smart-rain/All_Data_No0.csv"
        #raw_data = pd.read_pickle(path)
        raw_data = pd.read_csv(path)
        raw_data['time'] = pd.to_datetime(raw_data['time'], )
        raw_data.drop(['time', "Rain"], axis=1, inplace=True)
        print(raw_data.head())

        #raw_data = raw_data.set_index("time")
        #raw_data.index = pd.to_datetime(raw_data.index)
        #raw_data = sm.datasets.rwm.load_pandas().data
        #raw_data = sm.datasets.get_rdataset("datasets", "treering")

        print(raw_data.head())
        print(raw_data.columns)

        print(raw_data.tail())
        targ_cols = ("temperature", )  # "RH"
    else:
        raise ValueError('Invalid task.')

    logger.info(
        f"Shape of data: {raw_data.shape}.\nMissing in data: {raw_data.isnull().sum().sum()}."
    )

    data, scaler = preprocess_data(raw_data, targ_cols)

    da_rnn_kwargs = {
        "batch_size": arguments.batchsize,
        "T": arguments.ntimestep
    }
    config, model = da_rnn(data,
                           n_targs=len(targ_cols),
                           learning_rate=arguments.lr,
                           **da_rnn_kwargs)

    iter_loss, epoch_loss = train(model,
                                  data,
                                  config,
                                  n_epochs=arguments.epochs,
                                  save_plots=save_plots)
    final_y_pred = predict(model, data, config.train_size, config.batch_size,
                           config.T)

    plt.figure()
    plt.semilogy(range(len(iter_loss)), iter_loss)
    utils.save_or_show_plot("iter_loss.png", save_plots)

    plt.figure()
    plt.semilogy(range(len(epoch_loss)), epoch_loss)
    utils.save_or_show_plot("epoch_loss.png", save_plots)

    plt.figure()
    plt.plot(final_y_pred, label='Predicted')
    plt.plot(data.targs[config.train_size:], label="True")
    plt.legend(loc='upper left')
    utils.save_or_show_plot("final_predicted.png", save_plots)

    with open(os.path.join("data", "da_rnn_kwargs.json"), "w") as fi:
        json.dump(da_rnn_kwargs, fi, indent=4)

    joblib.dump(scaler, os.path.join("data", "scaler.pkl"))
    torch.save(model.encoder.state_dict(),
               os.path.join("data", "encoder.torch"))
    torch.save(model.decoder.state_dict(),
               os.path.join("data", "decoder.torch"))
Ejemplo n.º 11
0
def train(net: DaRnnNet,
          train_data: TrainData,
          t_cfg: TrainConfig,
          n_epochs=10,
          save_plots=False):
    iter_per_epoch = int(np.ceil(t_cfg.train_size * 1. / t_cfg.batch_size))
    iter_losses = np.zeros(n_epochs * iter_per_epoch)
    epoch_losses = np.zeros(n_epochs)
    logger.info(
        f"[{time_since(start)}] Iterations per epoch:{t_cfg.train_size * 1. / t_cfg.batch_size:3.3f}~{iter_per_epoch:d}"
    )

    n_iter = 0  # 总训练次数
    for e_i in range(n_epochs):
        # perm_idx 长度为T的时间段的起始下标
        perm_idx = np.random.permutation(
            t_cfg.train_size - t_cfg.T)  # permutation打乱数据 范围:0~train_size-T-1

        for t_i in range(0, t_cfg.train_size,
                         t_cfg.batch_size):  # 0~train_size 步长为 batch_size
            # 返回每一个 batch_size 的 起始下标 构成的数组
            batch_idx = perm_idx[t_i:(t_i + t_cfg.batch_size)]
            feats, y_history, y_target = prepare_train_data(
                batch_idx, t_cfg, train_data)

            loss = train_iteration(net, t_cfg.loss_func, feats, y_history,
                                   y_target)
            # 第e_i个循环(每个循环iter_per_epoch个)的第 t_i // t_cfg.batch_size个
            iter_losses[e_i * iter_per_epoch + t_i // t_cfg.batch_size] = loss
            n_iter += 1
            adjust_learning_rate(net, n_iter)

        epoch_losses[e_i] = np.mean(iter_losses[range(
            e_i * iter_per_epoch, (e_i + 1) * iter_per_epoch)])
        """
        一轮训练完打印一次结果
        """
        if e_i % 1 == 0:
            y_test_pred = predict(net,
                                  train_data,
                                  t_cfg.train_size,
                                  t_cfg.batch_size,
                                  t_cfg.T,
                                  on_train=False)
            # TODO: make this mse and make it work for multiple inputs
            val_loss = y_test_pred - train_data.targs[t_cfg.train_size:]
            logger.info(
                f"[{time_since(start)}] Epoch{e_i:d} - train loss:{epoch_losses[e_i]}  val loss:{np.mean(np.abs(val_loss))}."
            )
            y_train_pred = predict(net,
                                   train_data,
                                   t_cfg.train_size,
                                   t_cfg.batch_size,
                                   t_cfg.T,
                                   on_train=True)

            plt.figure()
            plt.title(f"pred_{e_i}")
            plt.plot(range(1, 1 + len(train_data.targs)),
                     train_data.targs,
                     label="True")
            plt.plot(range(t_cfg.T,
                           len(y_train_pred) + t_cfg.T),
                     y_train_pred,
                     label="Predicted -Train")
            plt.plot(range(t_cfg.T + len(y_train_pred),
                           len(train_data.targs) + 1),
                     y_test_pred,
                     label='Predicted - Test')
            plt.legend(loc="upper left")
            utils.save_or_show_plot(f"pred_{e_i}.png", save_plots)

    return iter_losses, epoch_losses