# Pass the model to GPU if use_cuda: # model = model.cuda() model = torch.nn.DataParallel( model, device_ids=[0]).cuda() # Multiple GPU parallelism # Get the optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay * batch_size}] optimizer = optim.SGD(model.parameters(), lr=learning_rate / batch_size, momentum=momentum, dampening=0, weight_decay=decay * batch_size) # optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimization evaluate = False if evaluate: logging('evaluating ...') test(0, 0) else: for epoch in range(init_epoch, max_epochs): # TRAIN niter = train(epoch) # TEST and SAVE
# Specify the number of workers kwargs = { 'num_workers': num_workers, 'pin_memory': True } if use_cuda else {} # Pass the model to GPU params_dict = dict(model.named_parameters()) params = [] #init_epoch = model.seen//nsamples for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay * batch_size}] optimizer = optim.Adam(model.parameters(), lr=learning_rate) # optimizer = optim.Adam(model.parameters(), lr=0.001) # Adam optimization if use_cuda: #model = model.cuda() # Multiple GPU parallelism model = torch.nn.DataParallel(model).cuda() # Multiple GPU parallelism # Get the optimizer for epoch in range(init_epoch, max_epochs): # TRAIN niter, loss = train(epoch) model.module.save_weights('%s/init.weights' % (checkpoint)) if loss < 0.1: break log_file.flush() log_file.close()