def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) val_dataset = a2d_dataset.A2DDataset(val_cfg, args.dataset_path) data_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1) model = resnet152(pretrained=True) model.fc = torch.nn.Linear(2048, 43) model = model.to(device) model.load_state_dict(torch.load(os.path.join(args.model_path, 'net.ckpt'))) X = np.zeros((data_loader.__len__(), args.num_cls)) Y = np.zeros((data_loader.__len__(), args.num_cls)) print(data_loader.__len__()) model.eval() with torch.no_grad(): for batch_idx, data in enumerate(data_loader): # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) output = model(images).cpu().detach().numpy() target = labels.cpu().detach().numpy() output[output >= 0.1] = 1 output[output < 0.1] = 0 X[batch_idx, :] = output Y[batch_idx, :] = target P = Precision(X, Y) R = Recall(X, Y) F = F1(X, Y) print('Precision: {:.1f} Recall: {:.1f} F1: {:.1f}'.format(100 * P, 100 * R, 100 * F))
def predict(args): # val_dataset = a2d_dataset.A2DDataset(val_cfg) # val_loader = DataLoader(val_dataset,batch_size=1,shuffle=False,num_workers=1) test_dataset = a2d_dataset.A2DDataset(test_cfg) test_loader = DataLoader(test_dataset,batch_size=1,shuffle=False,num_workers=1) print('Loading model...') model = torchfcn.models.FCN32s(args.num_cls).to(device) model.load_state_dict(torch.load(os.path.join(args.model_path,'net.ckpt'))) print('Model loaded :D') gt_list = [] mask_list = [] # Total number of steps per epoch total_step = len(val_loader) # print('Predicting class label on pixel level...') # start = time.time() # with torch.no_grad(): # for i,data in enumerate(val_loader): # images = data[0].to(device) # gt = data[1].to(device)#[224,224] # output = model(images) # mask = output.data.max(1)[1].cpu().numpy()[:,:,:] # mask_list.append(mask) # gt_list.append(gt.cpu().numpy()) # # Print log every 1/10 of the total step # if i % (total_step//10) == 0: # print("Step [{}/{}]".format(i, total_step)) print('Predicting class label on pixel level...') start = time.time() with torch.no_grad(): for i,data in enumerate(test_loader): images = data.to(device) output = model(images) mask = output.data.max(1)[1].cpu().numpy()[:,:,:] mask_list.append(mask.astype(np.uint8)) # Print log every 1/10 of the total step if i % (total_step//10) == 0: print("Step [{}/{}]".format(i, total_step)) print('Prediction took {} minutes'.format((time.time()-start)/60)) # with open('models/eval_mask_pred.pkl', 'wb') as f: # print('Dumping mask file...') # pickle.dump(mask_list,f) # with open('models/eval_mask_gt.pkl','wb') as f: # print('Dumping ground truth...') # pickle.dump(gt_list,f) with open('models/test_mask_pred.pkl','wb') as f: print('Dumping mask file...') pickle.dump(mask_list,f) print('Finished prediction!')
def main(args): if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg) data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) model = torchfcn.models.FCN32s(n_class=args.num_cls).to(device) #vgg16 = torchfcn.models.VGG16(pretrained=True).to(device) #model.copy_params_from_vgg16(vgg16) model.load_state_dict(torch.load(os.path.join(args.model_path, 'net.ckpt'))) #criterion = nn.BCELoss() optimizer = torch.optim.SGD([ { 'params': get_parameters(model, bias=False) }, { 'params': get_parameters(model, bias=True), 'lr': 1e-10 * 2, 'weight_decay': 0 }, ], lr=1e-10, momentum=0.99, weight_decay=0.00005) #optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum = 0.99) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) total_step = len(data_loader) weight = torch.ones(44).to(device) weight[0] = 1e-2 for epoch in range(args.num_epochs): #mask_list = [] t1 = time.time() for i, data in enumerate(data_loader): images = data[0].to(device) labels = data[1].to(device) outputs = model(images) loss = cross_entropy2d(outputs, labels) model.zero_grad() loss.backward() optimizer.step() if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(model.state_dict(), os.path.join(args.model_path, 'net_Wm2.ckpt')) t2 = time.time() print(t2 - t1)
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=1) # define load your model here model_1 = net('efficientnet_b7') model_1.cuda() model_1.load_state_dict( torch.load(os.path.join(args.model_path, 'efficientnetb7_F53.8.ckpt'))) model_2 = net('efficientnet_b7') model_2.cuda() model_2.load_state_dict( torch.load( os.path.join(args.model_path, 'efficientnetb7_val_53.7.ckpt'))) X = np.zeros((data_loader.__len__(), args.num_cls)) Y = np.zeros((data_loader.__len__(), args.num_cls)) print(data_loader.__len__()) model_1.eval() model_2.eval() with torch.no_grad(): for batch_idx, data in enumerate(data_loader): # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) # output = model(images).cpu().detach().numpy() output_1 = model_1(images) # output_1 = (torch.nn.functional.sigmoid(output_1)).cpu().detach().numpy() output_2 = model_2(images) # output_2 = (torch.nn.functional.sigmoid(output_2)).cpu().detach().numpy() output = (output_1 + output_2) / 2 output = ( torch.nn.functional.sigmoid(output)).cpu().detach().numpy() target = labels.cpu().detach().numpy() output[output >= 0.4] = 1 output[output < 0.4] = 0 X[batch_idx, :] = output Y[batch_idx, :] = target P = Precision(X, Y) R = Recall(X, Y) F = F1(X, Y) print('Precision: {:.1f} Recall: {:.1f} F1: {:.1f}'.format( 100 * P, 100 * R, 100 * F))
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=4) # you can make changes # Define model, Loss, and optimizer # model.to(device) # model = net(num_classes=args.num_cls) model = net(args.num_cls) model.cuda() criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): t1 = time.time() for i, data in enumerate(data_loader): # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) #labels.squeeze(1) # Forward, backward and optimize outputs = model(images) loss = criterion(outputs, labels) model.zero_grad() loss.backward() optimizer.step() # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(model.state_dict(), os.path.join( args.model_path, 'net.ckpt')) t2 = time.time() print(t2 - t1)
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=16) # you can make changes model = resnet152(pretrained=True) model.fc = torch.nn.Linear(2048, 43) model = model.to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): t1 = time.time() for i, data in enumerate(data_loader): # mini-batch images = data[0].to(device) # labels = data[1].type(torch.FloatTensor).to(device) labels = data[1].to(device) labels[labels == 0] = -1 # Forward, backward and optimize outputs = model(images) loss = criterion(outputs, torch.max(labels, 1)[1]) model.zero_grad() loss.backward() optimizer.step() # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(model.state_dict(), os.path.join( args.model_path, 'net.ckpt')) t2 = time.time() print(t2 - t1)
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=12) # you can make changes # Define model, Loss, and optimizer model = net('se_resnext101') model.cuda() model.load_state_dict( torch.load(os.path.join(args.model_path, 'net_F.ckpt'))) # for i, param in model.backbone.features.named_parameters(): # if 'stage4' or 'stage3' or 'stage2' or 'stage1'in i: # print(i) # param.requires_grad = True # else: # param.requires_grad = False criterion = nn.BCEWithLogitsLoss() # print(list(filter(lambda p: p.requires_grad, model.parameters()))) base_optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9) # base_optimizer = optim.SGD(list(filter(lambda p: p.requires_grad, model.parameters())), lr=0.005, momentum=0.9) optimizer = SWA(base_optimizer, swa_start=1, swa_freq=5, swa_lr=0.005) # optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9) # optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=0.00005) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs, eta_min=5e-5, last_epoch=-1) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [5, 10, 20, 40, 75], gamma=0.25) model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) # Train the models total_step = len(data_loader) best_P, best_R, best_F = 0, 0, 0 for epoch in range(args.num_epochs): # scheduler.step() print('epoch:{}, lr:{}'.format(epoch, scheduler.get_lr()[0])) t1 = time.time() for i, data in enumerate(data_loader): optimizer.zero_grad() # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) # Forward, backward and optimize outputs = model(images) loss = criterion(outputs, labels) with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % 1 == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0, norm_type=2) optimizer.step() optimizer.zero_grad() # optimizer.swap_swa_sgd() # scheduler.step() # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch, args.num_epochs, i, total_step, loss.item())) optimizer.swap_swa_sgd() # Save the model checkpoints # if (i + 1) % args.save_step == 0: # torch.save(model.state_dict(), os.path.join( # args.model_path, 'net.ckpt')) scheduler.step() t2 = time.time() print('Time Spend per epoch: ', t2 - t1) if epoch > -1: val_dataset = a2d_dataset.A2DDataset(val_cfg, args.dataset_path) val_data_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1) X = np.zeros((val_data_loader.__len__(), 43)) Y = np.zeros((val_data_loader.__len__(), 43)) model.eval() with torch.no_grad(): for batch_idx, data in enumerate(val_data_loader): # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) # output = model(images).cpu().detach().numpy() output = model(images) output = (torch.nn.functional.sigmoid(output) ).cpu().detach().numpy() target = labels.cpu().detach().numpy() output[output >= 0.5] = 1 output[output < 0.5] = 0 X[batch_idx, :] = output Y[batch_idx, :] = target P = Precision(X, Y) R = Recall(X, Y) F = F1(X, Y) print('Precision: {:.1f} Recall: {:.1f} F1: {:.1f}'.format( 100 * P, 100 * R, 100 * F)) if (P > best_P): torch.save(model.state_dict(), os.path.join(args.model_path, 'net_P.ckpt')) best_P = P if (R > best_R): torch.save(model.state_dict(), os.path.join(args.model_path, 'net_R.ckpt')) best_R = R if (F > best_F): torch.save(model.state_dict(), os.path.join(args.model_path, 'net_F.ckpt')) best_F = F
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) # you can make changes ####################################################### # Edit the inception_v3 model pre-trained on ImageNet # ####################################################### # Our parameters freeze_layers = True n_class = args.num_cls # Load the model model = torchvision.models.inception_v3(pretrained='imagenet') ## Lets freeze the first few layers. This is done in two stages # Stage-1 Freezing all the layers if freeze_layers: for i, param in model.named_parameters(): param.requires_grad = False # Edit the auxilary net num_ftrs = model.AuxLogits.fc.in_features model.AuxLogits.fc = nn.Linear(num_ftrs, n_class) # Edit the primary net num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, n_class) # Stage-2 , Freeze all the layers till "Conv2d_4a_3*3" ct = [] for name, child in model.named_children(): if "Conv2d_4a_3x3" in ct: for params in child.parameters(): params.requires_grad = True ct.append(name) # Push model to GPU model.to(device) # Define Loss and optimizer print("[Using BCEWithLogitsLoss...]") criterion = nn.BCEWithLogitsLoss() print("[Using small learning rate with momentum...]") ############## # Parameters # ############## # Number of steps per iteration total_step = len(data_loader) # Data size data_size = total_step * args.batch_size # Learning rate if args.lr: learning_rate = args.lr # 0.05 else: raise (ValueError('Please provide learning rate')) # Decay rate of learning rate if args.lr_decay: lr_decay = args.lr_decay # 5 else: raise (ValueError('Please provide rate of decay for learning rate')) # Number of times learning rate decay if args.lr_changes: lr_step = args.num_epochs // args.lr_changes else: raise (ValueError( 'Please provide number of decay times for learning rate')) # Define optimizer optimizer = optim.SGD(list( filter(lambda p: p.requires_grad, model.parameters())), lr=learning_rate, momentum=0.9) # Define learning rate scheduler my_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_decay) ################### # Train the model # ################### # File name to save model model_name = ''.join([ 'net_', 'batchsize-', str(args.batch_size), '_epoch-', str(args.num_epochs), '_lr-', str(learning_rate), '_decay-', str(lr_decay), '_step-', str(lr_step), '.ckpt' ]) # Move model to GPU model.to(device) # Start time start = time.time() # Log file name log_file = ''.join([ 'train_log/net_', 'batchsize-', str(args.batch_size), '_epoch-', str(args.num_epochs), '_lr-', str(learning_rate), '_decay-', str(lr_decay), '_step-', str(lr_step), '.txt' ]) # Check if file already exists if os.path.isfile(log_file): raise (Exception('File already exists')) # If file not exists else: # Create log file with open(log_file, 'w') as f: # Write the configuration to log file f.write(str(args)) try: for epoch in range(args.num_epochs): # Change of learning rate my_lr_scheduler.step() print('-' * 50) print('Current learning rate:{}\n'.format( get_lr(optimizer))) # Write log to file f.write('-' * 50) f.write('\n') f.write('Current learning rate:{}\n'.format( get_lr(optimizer))) t1 = time.time() running_loss = 0.0 running_corrects = 0 for i, data in enumerate(data_loader): # mini-batch (Move input to GPU) images = data[0].type(torch.FloatTensor).to(device) # labels = data[1].type(torch.int64).to(device) labels = data[1].type(torch.FloatTensor).to(device) # Forward, backward and optimize outputs, aux = model(images) # Compute the loss = primay net loss + 0.3 * auxilary net loss loss = criterion( outputs, labels) + 0.3 * criterion(aux, labels) # Backprop the loss to the network (Compute the gradient of loss w.r.t parameters with require_grad = True) loss.backward() # Update the parameters within network optimizer.step() # zero the parameter gradients optimizer.zero_grad() # Calculate the prediction from probability preds = outputs.cpu().detach().numpy() for pred in preds: pred[pred >= 0.5] = 1 pred[pred < 0.5] = 0 # Convert it back to tensor type preds = torch.Tensor(preds) # Grab labels from GPU to CPU labels = labels.cpu().detach() # Current batch performance running_loss += loss # Calculate number of correct prediction for current batch batch_correct = num_correct(preds, labels) # Add the correct prediction running_corrects += batch_correct if batch_correct > args.batch_size: print('preds:{}'.format(preds.numpy())) print('labels:{}'.format(labels.numpy())) print('batch_correct:{}'.format(batch_correct)) raise (ValueError('WTF DUDE!')) # running_corrects += torch.sum(preds == labels.data) # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'. format(epoch, args.num_epochs, i, total_step, loss.item())) # Write log to file f.write( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}\n'. format(epoch, args.num_epochs, i, total_step, loss.item())) # Compute the loss and correct number of prediction for current epoch epoch_loss = running_loss / data_size epoch_acc = running_corrects / data_size print('Loss: {:.4f} Acc: {:.4f}'.format( epoch_loss, epoch_acc)) # Write log to file f.write('Loss: {:.4f} Acc: {:.4f}\n'.format( epoch_loss, epoch_acc)) t2 = time.time() print('Current epoch training time: {} minutes'.format( (t2 - t1) / 60)) # Write log to file f.write('Current epoch training time: {} minutes\n'.format( (t2 - t1) / 60)) # Save model torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) # End time end = time.time() print('Total training times spent: {} minutes'.format( (end - start) / 60)) # Write log to file f.write('Total training times spent: {} minutes'.format( (end - start) / 60)) # Close the log file f.close() except (RuntimeError, KeyboardInterrupt) as err: # Save model print('Save ckpt on exception ...') f.write('Save ckpt on exception ...') torch.save(model.state_dict(), os.path.join(args.model_path, model_name)) print('Save ckpt done.') f.write('Save ckpt done.') # End time end = time.time() print('Total training times spent: {} minutes'.format( (end - start) / 60)) # Write log to file f.write('Total training times spent: {} minutes'.format( (end - start) / 60)) # Close the log file f.close() # Raise error message raise (err)
def main(args): # Create model directory for saving trained models if not os.path.exists(args.model_path): os.makedirs(args.model_path) test_dataset = a2d_dataset.A2DDataset(train_cfg, args.dataset_path) data_loader = DataLoader(test_dataset, batch_size=4, shuffle=True, num_workers=4) # you can make changes total_step = len(data_loader) # Data size (Each step train on a batch of 4 images) data_size = total_step*4 # Define model, Loss, and optimizer model = net(args.num_cls).to(device) criterion = nn.MultiLabelSoftMarginLoss() #criterion = nn.BCEWithLogitsLoss() optimizer = optim.SGD(list(filter(lambda p: p.requires_grad, model.parameters())), lr=0.1, momentum=0.9) my_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=25, gamma=0.1) # Train the models total_step = len(data_loader) for epoch in range(args.num_epochs): my_lr_scheduler.step() print('Current learning rate:{}'.format(get_lr(optimizer))) t1 = time.time() running_loss = 0.0 running_corrects = 0 for i, data in enumerate(data_loader): # mini-batch images = data[0].to(device) labels = data[1].type(torch.FloatTensor).to(device) optimizer.zero_grad() outputs = model(images) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss labels = torch.max(labels.long(), 1)[1] running_corrects += torch.sum(preds == labels.data) # Log info if i % args.log_step == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch, args.num_epochs, i, total_step, loss.item())) # Save the model checkpoints if (i + 1) % args.save_step == 0: torch.save(model.state_dict(), os.path.join( args.model_path, 'net.ckpt')) epoch_loss = running_loss / data_size epoch_acc = running_corrects.item() / data_size print('running_corrects:{}\tdata_size:{}\tepoch_acc:{}'.format(running_corrects,data_size,epoch_acc)) print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc)) t2 = time.time() print(t2 - t1)