def train_main():
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.05)
    criterion = nn.BCELoss()

    print(model)

    batch_size = 25
    train_loader = get_train_loader(batch_size)
    validation_loader = get_validation_loader(batch_size)

    log = get_tensorboard('simple')
    epochs = 50

    start_time = datetime.now()
    for epoch in range(1, epochs + 1):
        train(model, train_loader, criterion, optimizer, epoch, log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, epoch, log)

    end_time = datetime.now()
    print('Total training time: {}.'.format(end_time - start_time))

    torch.save(model.state_dict(), model_file)
    print('Wrote model to', model_file)
Beispiel #2
0
def train_main():
    model = Net()

    num_gpus = torch.cuda.device_count()
    if num_gpus > 1:
        print('Using multi-gpu with {} GPUs!'.format(num_gpus))
        model = nn.DataParallel(model)
    model.to(device)

    optimizer = optim.SGD(model.parameters(), lr=0.05)
    criterion = nn.BCELoss()

    print(model)

    batch_size = 25 * num_gpus
    train_loader = get_train_loader(batch_size)
    validation_loader = get_validation_loader(batch_size)

    log = get_tensorboard('simple_multigpu')
    epochs = 50

    start_time = datetime.now()
    for epoch in range(1, epochs + 1):
        train(model, train_loader, criterion, optimizer, epoch, log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, epoch, log)

    end_time = datetime.now()
    print('Total training time: {}.'.format(end_time - start_time))

    torch.save(model.module.state_dict(), model_file)
    print('Wrote model to', model_file)
def train_main():
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    criterion = nn.BCELoss()

    print(model)

    batch_size = 25
    train_loader = get_train_loader_hdf5(batch_size)
    validation_loader = get_validation_loader_hdf5(batch_size)

    # for data, target in train_loader:
    #     print(data.shape, data.dtype)
    #     print(target.shape, target.dtype)
    #     print(target[0])
    #     break
    # return

    log = get_tensorboard('simple')
    epochs = 20

    warmup_epochs = 1
    tot_time = 0
    for epoch in range(1, epochs + 1):
        start_time = datetime.now()
        train(model, train_loader, criterion, optimizer, epoch, log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, epoch, log)
        end_time = datetime.now()
        epoch_time = (end_time - start_time).total_seconds()
        txt = 'Epoch took {:.2f} seconds.'.format(epoch_time)
        if epoch > warmup_epochs:
            tot_time += epoch_time
            secs_per_epoch = tot_time / (epoch - warmup_epochs)
            txt += ' Running average: {:.2f}'.format(secs_per_epoch)

        print(txt)

    print('Total training time: {:.2f}, {:.2f} secs/epoch.'.format(
        tot_time, secs_per_epoch))

    torch.save(model.state_dict(), model_file)
    print('Wrote model to', model_file)
def train_main():
    model = Net().to(device)
    # optimizer = optim.SGD(model.parameters(), lr=0.05)

    print(model)

    # Horovod: broadcast parameters.
    hvd.broadcast_parameters(model.state_dict(), root_rank=0)

    # Horovod: scale learning rate by the number of GPUs.
    lr = 0.05
    optimizer = optim.SGD(model.parameters(), lr=lr * hvd.size())

    # Horovod: wrap optimizer with DistributedOptimizer.
    optimizer = hvd.DistributedOptimizer(
        optimizer, named_parameters=model.named_parameters())
    criterion = nn.BCELoss()

    batch_size = 25
    train_loader = get_train_loader(batch_size)
    validation_loader = get_validation_loader(batch_size)

    log = get_tensorboard('simple')
    epochs = 50

    start_time = datetime.now()
    for epoch in range(1, epochs + 1):
        train(model, train_loader, criterion, optimizer, epoch, log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, epoch, log)

    end_time = datetime.now()
    print('Total training time: {}.'.format(end_time - start_time))

    torch.save(model.state_dict(), model_file)
    print('Wrote model to', model_file)
Beispiel #5
0
def train_main():
    # Learning 1: New layers

    model = PretrainedNet().to(device)

    params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.SGD(params, lr=0.01)
    criterion = nn.BCELoss()

    print(model)

    batch_size = 25
    train_loader = get_train_loader(batch_size)
    validation_loader = get_validation_loader(batch_size)

    log = get_tensorboard('pretrained')
    epochs = 10

    start_time = datetime.now()
    for epoch in range(1, epochs + 1):
        train(model, train_loader, criterion, optimizer, epoch, log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, epoch, log)

    end_time = datetime.now()
    print('Total training time: {}.'.format(end_time - start_time))

    torch.save(model.state_dict(), model_file)
    print('Wrote model to', model_file)

    # Learning 2: Fine-tuning
    log = get_tensorboard('finetuned')

    for name, layer in model.vgg_features.named_children():
        note = ' '
        for param in layer.parameters():
            note = '-'
            if int(name) >= 24:
                param.requires_grad = True
                note = '+'
        print(name, note, layer, len(param))

    params = filter(lambda p: p.requires_grad, model.parameters())
    # optimizer = optim.SGD(model.parameters(), lr=1e-3)
    optimizer = optim.RMSprop(params, lr=1e-5)
    criterion = nn.BCELoss()

    print(model)

    prev_epochs = epoch
    epochs = 20

    start_time = datetime.now()
    for epoch in range(1, epochs + 1):
        train(model, train_loader, criterion, optimizer, prev_epochs + epoch,
              log)

        with torch.no_grad():
            print('\nValidation:')
            evaluate(model, validation_loader, criterion, prev_epochs + epoch,
                     log)

    end_time = datetime.now()
    print('Total training time: {}.'.format(end_time - start_time))

    torch.save(model.state_dict(), model_file_ft)
    print('Wrote finetuned model to', model_file_ft)