Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--cfg", type=str, default='experiments/db_resnet50_s1.yml', help="config file")
    parser.add_argument("--start", type=int, default=0, help="start iter")
    parser.add_argument("--gpu", type=int, default=0, help="start iter")

    args, _ = parser.parse_known_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
    cfg.merge_from_file(args.cfg)
    trainer = Trainer(cfg)
    trainer.train(args.start)
Ejemplo n.º 2
0
def main(args, **model_kwargs):
    device = torch.device(args.device)
    data = util.load_dataset(args.data,
                             args.batch_size,
                             args.batch_size,
                             args.batch_size,
                             n_obs=args.n_obs,
                             fill_zeroes=args.fill_zeroes)
    scaler = data['scaler']
    aptinit, supports = util.make_graph_inputs(args, device)

    model = GWNet.from_args(args, device, supports, aptinit, **model_kwargs)
    if args.checkpoint:
        model.load_checkpoint(torch.load(args.checkpoint))
    model.to(device)
    engine = Trainer.from_args(model, scaler, args)
    metrics = []
    best_model_save_path = os.path.join(args.save, 'best_model.pth')
    lowest_mae_yet = 100  # high value, will get overwritten
    mb = progress_bar(list(range(1, args.epochs + 1)))
    epochs_since_best_mae = 0
    for _ in mb:
        train_loss, train_mape, train_rmse = [], [], []
        data['train_loader'].shuffle()
        for iter, (x, y) in enumerate(data['train_loader'].get_iterator()):
            trainx = torch.Tensor(x).to(device).transpose(1, 3)
            trainy = torch.Tensor(y).to(device).transpose(1, 3)
            yspeed = trainy[:, 0, :, :]
            if yspeed.max() == 0: continue
            mae, mape, rmse = engine.train(trainx, yspeed)
            train_loss.append(mae)
            train_mape.append(mape)
            train_rmse.append(rmse)
            if args.n_iters is not None and iter >= args.n_iters:
                break
        engine.scheduler.step()
        _, valid_loss, valid_mape, valid_rmse = eval_(data['val_loader'],
                                                      device, engine)
        m = dict(train_loss=np.mean(train_loss),
                 train_mape=np.mean(train_mape),
                 train_rmse=np.mean(train_rmse),
                 valid_loss=np.mean(valid_loss),
                 valid_mape=np.mean(valid_mape),
                 valid_rmse=np.mean(valid_rmse))

        m = pd.Series(m)
        metrics.append(m)
        if m.valid_loss < lowest_mae_yet:
            torch.save(engine.model.state_dict(), best_model_save_path)
            lowest_mae_yet = m.valid_loss
            epochs_since_best_mae = 0
        else:
            epochs_since_best_mae += 1
        met_df = pd.DataFrame(metrics)
        mb.comment = f'best val_loss: {met_df.valid_loss.min(): .3f}, current val_loss: {m.valid_loss:.3f}, current train loss: {m.train_loss: .3f}'
        met_df.round(6).to_csv(f'{args.save}/metrics.csv')
        if epochs_since_best_mae >= args.es_patience: break
    # Metrics on test data
    engine.model.load_state_dict(torch.load(best_model_save_path))
    realy = torch.Tensor(data['y_test']).transpose(1, 3)[:, 0, :, :].to(device)
    test_met_df, yhat = calc_tstep_metrics(engine.model, device,
                                           data['test_loader'], scaler, realy,
                                           args.seq_length)
    test_met_df.round(6).to_csv(os.path.join(args.save, 'test_metrics.csv'))
    print(summary(args.save))
Ejemplo n.º 3
0
print(f"LOGGING EVERY {config.LOG_EVERY} EPOCHS\n")

epochs = config.EPOCHS

model.to(config.DEVICE)

# initialie `Trainer` if resuming training
if args['resume_training'] == 'yes':
    if args['model_path'] == None:
        sys.exit('\nPLEASE PROVIDE A MODEL TO RESUME TRAINING FROM!')
    trainer = Trainer(model,
                      train_data_loader,
                      train_dataset,
                      valid_data_loader,
                      valid_dataset,
                      config.CLASSES_TO_TRAIN,
                      epochs,
                      config.DEVICE,
                      config.LR,
                      args['resume_training'],
                      model_path=args['model_path'])

# initialie `Trainer` if training from beginning
else:
    trainer = Trainer(model, train_data_loader, train_dataset,
                      valid_data_loader, valid_dataset,
                      config.CLASSES_TO_TRAIN, epochs, config.DEVICE,
                      config.LR, args['resume_training'])

trained_epochs = trainer.get_num_epochs()
epochs_to_train = epochs - trained_epochs
Ejemplo n.º 4
0
def make_trainer(model, args):

    return Trainer(model)
Ejemplo n.º 5
0
parser.add_argument("--pre_trained_disc", default=None)
parser.add_argument("--pre_trained_gen", default=None)
parser.add_argument("--dataset", default="flowers")
parser.add_argument("--split", default=0, type=int)
parser.add_argument("--batch_size", default=64, type=int)
parser.add_argument("--num_workers", default=8, type=int)
parser.add_argument("--epochs", default=200, type=int)
args = parser.parse_args()

trainer = Trainer(
    type=args.type,
    dataset=args.dataset,
    split=args.split,
    lr=args.lr,
    diter=args.diter,
    vis_screen=args.vis_screen,
    save_path=args.save_path,
    l1_coef=args.l1_coef,
    l2_coef=args.l2_coef,
    pre_trained_disc=args.pre_trained_disc,
    pre_trained_gen=args.pre_trained_gen,
    batch_size=args.batch_size,
    num_workers=args.num_workers,
    epochs=args.epochs,
)

if not args.inference:
    trainer.train(args.cls)
else:
    trainer.predict()
Ejemplo n.º 6
0
def main(args, **model_kwargs):
    # Train on subset of sensors (faster for isolated pred)
    # incl_sensors = list(range(207)) #[17, 111, 12, 80, 200]
    # args.num_sensors = len(incl_sensors)
    device = torch.device(args.device)
    data = util.load_dataset(args.data,
                             args.batch_size,
                             args.batch_size,
                             args.batch_size,
                             n_obs=args.n_obs,
                             fill_zeroes=args.fill_zeroes)
    scaler = data['scaler']
    supports = []
    aptinit = 0
    # aptinit, supports = util.make_graph_inputs(args, device)

    # Length of the prediction
    args.seq_length = data['y_val'].shape[1]
    args.num_sensors = data['x_val'].shape[2]
    if args.static:
        print('Selected static prediction')
        model = StaticNet.from_args(args, device, supports, aptinit,
                                    **model_kwargs)
    elif args.lstm:
        print('Selected LSTM-FC model')
        args.nhid = 256
        args.weight_decay = 0.0005
        args.learning_rate = 0.001
        model = LSTMNet.from_args(args, device, supports, aptinit,
                                  **model_kwargs)
    else:
        print('Selected Graph Wavenet model')
        model = GWNet.from_args(args, device, supports, aptinit,
                                **model_kwargs)

    print(args)

    if args.checkpoint:
        model.load_checkpoint(torch.load(args.checkpoint))
    model.to(device)
    engine = Trainer.from_args(model, scaler, args)
    metrics = []
    best_model_save_path = os.path.join(args.save, 'best_model.pth')
    lowest_mae_yet = 100  # high value, will get overwritten
    mb = progress_bar(list(range(1, args.epochs + 1)))
    epochs_since_best_mae = 0
    ep_count = 1

    for _ in mb:
        train_loss, train_mape, train_rmse = [], [], []
        data['train_loader'].shuffle()
        for iter, (x, y) in enumerate(data['train_loader'].get_iterator()):
            trainx = torch.Tensor(x).to(device).transpose(1, 3)
            trainy = torch.Tensor(y).to(device).transpose(1, 3)
            # print (trainx.shape, trainy.shape)
            yspeed = trainy[:, 0, :, :]
            if yspeed.max() == 0: continue
            mae, mape, rmse = engine.train(trainx, yspeed)
            train_loss.append(mae)
            train_mape.append(mape)
            train_rmse.append(rmse)
            print('MAPE', mape)
            if iter % 10 == 5:
                # WARN: saves for every x iterations
                evaluate_multiple_horizon(engine.model, device, data,
                                          args.seq_length)

            if args.n_iters is not None and iter >= args.n_iters:
                break
        engine.scheduler.step()
        print('EPOCH', ep_count)
        ep_count += 1
        _, valid_loss, valid_mape, valid_rmse = eval_(data['val_loader'],
                                                      device, engine)
        m = dict(train_loss=np.mean(train_loss),
                 train_mape=np.mean(train_mape),
                 train_rmse=np.mean(train_rmse),
                 valid_loss=np.mean(valid_loss),
                 valid_mape=np.mean(valid_mape),
                 valid_rmse=np.mean(valid_rmse))

        m = pd.Series(m)
        metrics.append(m)
        if m.valid_loss < lowest_mae_yet:
            torch.save(engine.model.state_dict(), best_model_save_path)
            lowest_mae_yet = m.valid_loss
            epochs_since_best_mae = 0
        else:
            epochs_since_best_mae += 1
        met_df = pd.DataFrame(metrics)
        mb.comment = f'best val_loss: {met_df.valid_loss.min(): .3f}, current val_loss: {m.valid_loss:.3f}, current train loss: {m.train_loss: .3f}'
        met_df.round(6).to_csv(f'{args.save}/metrics.csv')
        if epochs_since_best_mae >= args.es_patience: break
    # Metrics on test data
    engine.model.load_state_dict(torch.load(best_model_save_path))
    realy = torch.Tensor(data['y_test']).transpose(1, 3)[:, 0, :, :].to(device)
    test_met_df, yhat = calc_tstep_metrics(engine.model, device,
                                           data['test_loader'], scaler, realy,
                                           args.seq_length)
    test_met_df.round(6).to_csv(os.path.join(args.save, 'test_metrics.csv'))
    print(summary(args.save))