Example #1
0
def trainNet(model, train_loader, val_loader, device, static_map, start_epoch=0, globaliter_=0):
    # Print all of the hyper parameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", config['dataloader']['batch_size'])
    print("epochs=", config['num_epochs'])
    print('starting from epoch %i' % start_epoch)
    print("learning_rate=", config['optimizer']['lr'])
    print("network_depth=", config['model']['depth'])
    print("=" * 30)

    # define the optimizer & learning rate
    optim = torch.optim.SGD(model.parameters(), **config['optimizer'])

    scheduler = StepLR(optim,
                       step_size=config['lr_step_size'],
                       gamma=config['lr_gamma'])

    if config['cont_model_path'] is not None:
        log_dir = config['cont_model_path']
    else:
        log_dir = 'runs/Unet-' + datetime.now().strftime("%Y-%m-%d-%H-%M-%S-") + \
                  '-'.join(config['dataset']['cities'])
    writer = Visualizer(log_dir)

    # dump config file
    with open(os.path.join(log_dir, 'config.json'), 'w') as fp:
        json.dump(config, fp)

    # Time for printing
    training_start_time = time.time()
    globaliter = globaliter_

    # initialize the early_stopping object
    early_stopping = EarlyStopping(log_dir, patience=config['patience'], verbose=True)

    # Loop for n_epochs
    for epoch_idx, epoch in enumerate(range(start_epoch, config['num_epochs'])):
        writer.write_lr(optim, epoch)

        # train for one epoch
        globaliter = train(model, train_loader, static_map, optim, device, writer, epoch, globaliter)

        # At the end of the epoch, do a pass on the validation set
        val_loss = validate(model, val_loader, static_map, device, writer, globaliter)

        # At the end of the epoch, do a pass on the validation set only considering the test times
        # val_loss_testtimes = validate(model, val_loader_ttimes, device, writer, globaliter, if_testtimes=True)

        # early_stopping needs the validation loss to check if it has decresed,
        # and if it has, it will make a checkpoint of the current model
        early_stopping(val_loss, model, epoch+1, globaliter)

        if early_stopping.early_stop:
            print("Early stopping")
            break

        if config['debug'] and epoch_idx >= 0:
            break

        scheduler.step(epoch)

    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

    # remember to close tensorboard writer
    writer.close()
Example #2
0
def trainNet(model, train_loader, val_loader, device, adj, nn_ixs, edge_index, config, log_dir, coords=None):
    """

    Args:
        model:
        train_loader:
        val_loader:
        device:
        adj:
        nn_ixs:
        edge_index:
        config:
        log_dir:
        coords:

    Returns:

    """

    # define the optimizer & learning rate
    optim = torch.optim.Adam(model.parameters(), **config['optimizer'])

    # scheduler = StepLR(optim, step_size=config['lr_step_size'], gamma=config['lr_gamma'])

    writer = Visualizer(log_dir)

    # dump config file
    with open(os.path.join(log_dir, 'config.json'), 'w') as fp:
        json.dump(config, fp)

    # Time for printing
    training_start_time = time.time()
    globaliter = 0

    # initialize the early_stopping object
    early_stopping = EarlyStopping(log_dir, patience=config['patience'], verbose=True)
    #    adj = adj.to(device)
    batch_size = config['dataloader']['batch_size']
    print_every_step = config['print_every_step']
    # Loop for n_epochs
    for epoch_idx, epoch in enumerate(range(config['num_epochs'])):

        writer.write_lr(optim, globaliter)

        # train for one epoch
        globaliter = train(model=model, train_loader=train_loader, optim=optim, device=device, writer=writer,
                           epoch=epoch, globaliter=globaliter, adj=adj, nn_ixs=nn_ixs, edge_index=edge_index,
                           batch_size=batch_size, coords=coords, print_every_step=print_every_step)

        # At the end of the epoch, do a pass on the validation set
        # val_loss = validate(model, val_loader, device, writer, globaliter, adj, nn_ixs, edge_index)
        val_loss = validate(model=model, val_loader=val_loader, device=device, adj=adj, nn_ixs=nn_ixs,
                            edge_index=edge_index, batch_size=batch_size, coords=coords,
                            writer=writer, globaliter=globaliter)

        # early_stopping needs the validation loss to check if it has decresed,
        # and if it has, it will make a checkpoint of the current model
        early_stopping(val_loss, model)

        if early_stopping.early_stop:
            print("Early stopping")
            break

        if config['debug'] and epoch_idx >= 0:
            break

        # scheduler.step()

    print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

    # remember to close writer
    writer.close()
Example #3
0

# define data
dataset_name = 'Cora'
path = osp.join(os.getcwd(), '..', 'data', dataset_name)
dataset = Planetoid(path, dataset_name, T.NormalizeFeatures())
data = dataset[0]
data.batch = None
data.adj = to_dense_adj(data.edge_index)

# define logging
log_dir = os.path.join(
    '..', 'runs',
    config['modelname'] + '_' + datetime.now().strftime("%Y-%m-%d-%H-%M-%S") +
    '_' + config['model_log_info'])
writer = Visualizer(log_dir)
with open(os.path.join(log_dir, 'config.json'), 'w') as fp:
    json.dump(config, fp)

# define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model
model = diff_pool_net2(dataset, **config['model']).to(device)
data = data.to(device)
lr = config['optimizer']['lr']
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# initialize the early_stopping object
early_stopping = EarlyStopping(log_dir,
                               patience=config['patience'],
Example #4
0
def networkTraining(model, train_loader, val_loader, epochs, learning_rate,
                    device, log_path, includeHeading):
    """
        Do the whole training
    """

    print('------ STARTING TRAINING ------')
    print('Number of epochs: ', epochs)
    print('Learning rate: ', learning_rate)
    print('Batch Size: ', config['batch_size'])
    print('City: ', config['city'])
    print('-' * 31)

    # Define optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    # Initialize log file
    writer = Visualizer(log_path)

    # dump config file
    with open(os.path.join(log_path, 'config.json'), 'w') as fp:
        json.dump(config, fp)

    startTime = time.time()
    iterator = 0

    val_loss_min = np.Inf
    counter = 0

    # For each epoch
    for epoch in range(epochs):
        writer.write_lr(optimizer, iterator)

        # train for one epoch
        iterator = training(model, train_loader, optimizer, device, writer,
                            epoch, iterator)

        # Early stopping (training failed)
        if iterator == -1:
            duration = time.time() - startTime
            print("Training finished (Error), took {:.2f}s".format(duration))
            break

        # get validation loss and save images
        valLoss = validation(model, val_loader, device, writer, iterator,
                             log_path, includeHeading)

        # Early stopping (didn't improve for 2 epochs)
        if valLoss < val_loss_min:
            torch.save(model.state_dict(),
                       os.path.join(log_path, 'checkpoint.pt'))
            val_loss_min = valLoss
            counter = 0
        elif counter == 1:
            duration = time.time() - startTime
            print("Training finished (early), took {:.2f}s".format(duration))
            break
        else:
            counter += 1

    # Dump statistics in tensorboard file
    duration = time.time() - startTime
    print("Training finished, took {:.2f}s".format(duration))

    writer.write_text('{:.2f}'.format(duration), 'Time')

    if device != 'cpu':
        mem = torch.cuda.max_memory_allocated(device)
        mem = mem // 1048576
        writer.write_text('{:d}'.format(mem), 'Memory')
    writer.close()