def train_main(): model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=0.05) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): model = Net() num_gpus = torch.cuda.device_count() if num_gpus > 1: print('Using multi-gpu with {} GPUs!'.format(num_gpus)) model = nn.DataParallel(model) model.to(device) optimizer = optim.SGD(model.parameters(), lr=0.05) criterion = nn.BCELoss() print(model) batch_size = 25 * num_gpus train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple_multigpu') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.module.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): model = Net().to(device) optimizer = optim.SGD(model.parameters(), lr=0.1) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader_hdf5(batch_size) validation_loader = get_validation_loader_hdf5(batch_size) # for data, target in train_loader: # print(data.shape, data.dtype) # print(target.shape, target.dtype) # print(target[0]) # break # return log = get_tensorboard('simple') epochs = 20 warmup_epochs = 1 tot_time = 0 for epoch in range(1, epochs + 1): start_time = datetime.now() train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() epoch_time = (end_time - start_time).total_seconds() txt = 'Epoch took {:.2f} seconds.'.format(epoch_time) if epoch > warmup_epochs: tot_time += epoch_time secs_per_epoch = tot_time / (epoch - warmup_epochs) txt += ' Running average: {:.2f}'.format(secs_per_epoch) print(txt) print('Total training time: {:.2f}, {:.2f} secs/epoch.'.format( tot_time, secs_per_epoch)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): model = Net().to(device) # optimizer = optim.SGD(model.parameters(), lr=0.05) print(model) # Horovod: broadcast parameters. hvd.broadcast_parameters(model.state_dict(), root_rank=0) # Horovod: scale learning rate by the number of GPUs. lr = 0.05 optimizer = optim.SGD(model.parameters(), lr=lr * hvd.size()) # Horovod: wrap optimizer with DistributedOptimizer. optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters()) criterion = nn.BCELoss() batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('simple') epochs = 50 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file)
def train_main(): # Learning 1: New layers model = PretrainedNet().to(device) params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(params, lr=0.01) criterion = nn.BCELoss() print(model) batch_size = 25 train_loader = get_train_loader(batch_size) validation_loader = get_validation_loader(batch_size) log = get_tensorboard('pretrained') epochs = 10 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file) print('Wrote model to', model_file) # Learning 2: Fine-tuning log = get_tensorboard('finetuned') for name, layer in model.vgg_features.named_children(): note = ' ' for param in layer.parameters(): note = '-' if int(name) >= 24: param.requires_grad = True note = '+' print(name, note, layer, len(param)) params = filter(lambda p: p.requires_grad, model.parameters()) # optimizer = optim.SGD(model.parameters(), lr=1e-3) optimizer = optim.RMSprop(params, lr=1e-5) criterion = nn.BCELoss() print(model) prev_epochs = epoch epochs = 20 start_time = datetime.now() for epoch in range(1, epochs + 1): train(model, train_loader, criterion, optimizer, prev_epochs + epoch, log) with torch.no_grad(): print('\nValidation:') evaluate(model, validation_loader, criterion, prev_epochs + epoch, log) end_time = datetime.now() print('Total training time: {}.'.format(end_time - start_time)) torch.save(model.state_dict(), model_file_ft) print('Wrote finetuned model to', model_file_ft)