def main(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.multiprocessing.set_start_method('spawn') data_list = load_data(cityscape_img_dir, cityscape_label_dir) random.shuffle(data_list) num_total_items = len(data_list) net = SSD(5) # Training set, ratio: 80% num_train_sets = 0.8 * num_total_items train_set_list = data_list[:int(num_train_sets)] validation_set_list = data_list[int(num_train_sets):] # Create dataloaders for training and validation train_dataset = CityScapeDataset(train_set_list) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0) print('Total training items', len(train_dataset), ', Total training mini-batches in one epoch:', len(train_data_loader)) validation_dataset = CityScapeDataset(validation_set_list) validation_data_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=8, shuffle=True, num_workers=0) print('Total validation items:', len(validation_dataset)) if Tuning: net_state = torch.load(os.path.join(pth_path, 'ssd_net.pth')) print('Loading trained model: ', os.path.join(pth_path, 'ssd_net.pth')) net.load_state_dict(net_state) train(net, train_data_loader, validation_data_loader)
learning_rate = 0.001 max_epochs = 20 test_list = get_list(img_dir, label_dir) # test_list = test_list[0:-20] test_dataset = csd.CityScapeDataset(test_list, train=False, show=False) test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0) print('test items:', len(test_dataset)) file_name = 'SSD' test_net_state = torch.load(os.path.join('.', file_name + '.pth')) net = SSD(3) if use_gpu: net = net.cuda() net.load_state_dict(test_net_state) itr = 0 net.eval() for test_batch_idx, (loc_targets, conf_targets, imgs) in enumerate(test_data_loader): itr += 1 imgs = imgs.permute(0, 3, 1, 2).contiguous() if use_gpu: imgs = imgs.cuda() imgs = Variable(imgs) conf, loc = net.forward(imgs) conf = conf[0, ...]
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0) # print('train items:', len(train_dataset)) idx, (bbox, label, img) = next(enumerate(train_data_loader)) # valid_dataset = csd.CityScapeDataset(train_list, False, False) # valid_data_loader = torch.utils.data.DataLoader(valid_dataset, # batch_size=4, # shuffle=False, # num_workers=0) # print('validation items:', len(valid_dataset)) net = SSD(3) optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) criterion = MultiboxLoss([0.1, 0.1, 0.2, 0.2]) if use_gpu: torch.set_default_tensor_type('torch.cuda.FloatTensor') net.cuda() criterion.cuda() train_losses = [] valid_losses = [] itr = 0
path_to_trained_model = 'ssd_net.pth' img_file_path = sys.argv[1] # the index should be 1, 0 is the 'eval.py' img = Image.open(img_file_path) img_norm = (img - IMG_MEAN) / IMG_STD img_np = np.asarray([img_norm], dtype="float32") img_tensor = torch.from_numpy(img_np) prior_bboxes = generate_prior_bboxes(prior_layer_cfg = prior_layer_cfg) if WILL_TEST: if USE_GPU: test_net_state = torch.load(path_to_trained_model) else: test_net_state = torch.load(path_to_trained_model, map_location='cpu') test_net = SSD(num_classes=3) test_net.load_state_dict(test_net_state) test_net.eval() test_image_permuted = img_tensor.permute(0, 3, 1, 2) test_image_permuted = Variable(test_image_permuted.float()) test_conf_preds, test_loc_preds = test_net.forward(test_image_permuted) test_bbox_priors = prior_bboxes.unsqueeze(0) test_bbox_preds = loc2bbox(test_loc_preds.cpu(), test_bbox_priors.cpu(), center_var=0.1, size_var=0.2) sel_bbox_preds = nms_bbox(test_bbox_preds.squeeze().detach(), test_conf_preds.squeeze().detach().cpu(), overlap_threshold=0.5, prob_threshold=0.5) rects = [] classes = [] for key in sel_bbox_preds.keys():
def test_net(test_dataset, class_labels, results_path): if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') # Load the save model and deploy test_net = SSD(len(class_labels)) test_net_state = torch.load(os.path.join(results_path)) test_net.load_state_dict(test_net_state) test_net.cuda() test_net.eval() # accuracy count_matched = 0 count_gt = 0 for test_item_idx in range(0, len(test_dataset)): # test_item_idx = random.choice(range(0, len(test_dataset))) test_image_tensor, test_label_tensor, test_bbox_tensor, prior_bbox = test_dataset[ test_item_idx] # run Forward with torch.no_grad(): pred_scores_tensor, pred_bbox_tensor = test_net.forward( test_image_tensor.unsqueeze(0).cuda()) # N C H W # scores -> Prob # because I deleted F.softmax~ at the ssd_net for net.eval pred_scores_tensor = F.softmax(pred_scores_tensor, dim=2) # bbox loc -> bbox (center) pred_bbox_tensor = loc2bbox(pred_bbox_tensor, prior_bbox.unsqueeze(0)) # NMS : return tensor dictionary (bbo pred_picked = nms_bbox( pred_bbox_tensor[0], pred_scores_tensor[0]) # not tensor, corner form # Show the result test_image = test_image_tensor.cpu().numpy().astype( np.float32).transpose().copy() # H, W, C test_image = ((test_image + 1) / 2) gt_label = test_label_tensor.cpu().numpy().astype(np.uint8).copy() gt_bbox_tensor = torch.cat([ test_bbox_tensor[..., :2] - test_bbox_tensor[..., 2:] / 2, test_bbox_tensor[..., :2] + test_bbox_tensor[..., 2:] / 2 ], dim=-1) gt_bbox = gt_bbox_tensor.detach().cpu().numpy().astype( np.float32).reshape((-1, 4)).copy() * 300 gt_idx = gt_label > 0 # Calculate accuracy pred_scores = pred_scores_tensor.detach().cpu().numpy().astype( np.float32).copy() pred_label = pred_scores[0].argmax(axis=1) n_matched = 0 for gt, pr in zip(gt_label, pred_label): if gt > 0 and gt == pr: n_matched += 1 acc_per_image = 100 * n_matched / gt_idx.sum() count_matched += n_matched count_gt += gt_idx.sum() # Show the results gt_bbox = gt_bbox[gt_idx] gt_label = gt_label[gt_idx] if False: for idx in range(gt_bbox.shape[0]): cv2.rectangle(test_image, (gt_bbox[idx][0], gt_bbox[idx][1]), (gt_bbox[idx][2], gt_bbox[idx][3]), (255, 0, 0), 1) cv2.putText(test_image, str(gt_label[idx]), (gt_bbox[idx][0], gt_bbox[idx][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 0, 0), 1, cv2.LINE_AA) #-------------------- # cv2.rectangle(test_image, (pred_bbox[idx][0], pred_bbox[idx][1]), (pred_bbox[idx][2], pred_bbox[idx][3]), # (0, 255, 0), 1) #----------------------- for cls_dict in pred_picked: for p_score, p_bbox in zip(cls_dict['picked_scores'], cls_dict['picked_bboxes']): p_lbl = '%d | %.2f' % (cls_dict['class'], p_score) p_bbox = p_bbox * 300 print(p_bbox, p_lbl) cv2.rectangle(test_image, (p_bbox[0], p_bbox[1]), (p_bbox[2], p_bbox[3]), (0, 0, 255), 2) cv2.putText(test_image, p_lbl, (p_bbox[0], p_bbox[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) plt.imshow(test_image) plt.suptitle(class_labels) plt.title('Temp Accuracy: {} %'.format(acc_per_image)) plt.show() acc = 100 * count_matched / count_gt print('Classification acc: ', '%')
with open("Output.txt", "w") as text_file: print(output_str, file=text_file) if (epoch_conf_loss + epoch_loc_loss) < min_total_loss: min_total_loss = epoch_conf_loss + epoch_loc_loss best_model_wts = copy.deepcopy(model.state_dict()) torch.save( ssd_net, os.path.join(root_dir, 'ssd_net_1022_temporary_best_zoomout.pth')) print("saved for temporary best model: ", idx_epochs) print('Min Val Loss: {:4f}'.format(min_total_loss)) model.load_state_dict(best_model_wts) with open("Output.txt", "w") as text_file: print(output_str, file=text_file) return model num_classes = 3 root_dir = 'trained_model' ssd_net = SSD(num_classes).cuda() optimizer = optim.Adam(ssd_net.parameters()) criterion = MultiboxLoss() ssd_net = train(ssd_net, optimizer, criterion, num_epochs=100) torch.save( ssd_net, os.path.join(root_dir, 'ssd_net_1023_augmentation_100_zoomout_rgb.pth'))
def main(): torch.set_default_tensor_type('torch.cuda.FloatTensor') prior_layer_cfg = [{ 'layer_name': 'Conv5', 'feature_dim_hw': (19, 19), 'bbox_size': (60, 60), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }, { 'layer_name': 'Conv11', 'feature_dim_hw': (10, 10), 'bbox_size': (105, 105), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }, { 'layer_name': 'Conv14_2', 'feature_dim_hw': (5, 5), 'bbox_size': (150, 150), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }, { 'layer_name': 'Conv15_2', 'feature_dim_hw': (3, 3), 'bbox_size': (195, 195), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }, { 'layer_name': 'Conv16_2', 'feature_dim_hw': (2, 2), 'bbox_size': (240, 240), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }, { 'layer_name': 'Conv17_2', 'feature_dim_hw': (1, 1), 'bbox_size': (285, 285), 'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t') }] prior_bboxes = generate_prior_bboxes(prior_layer_cfg) # loading the test image img_file_path = sys.argv[1] # img_file_path = 'image.png' img = Image.open(img_file_path) img = img.resize((300, 300)) plot_img = img.copy() img_array = np.asarray(img)[:, :, :3] mean = np.asarray((127, 127, 127)) std = 128.0 img_array = (img_array - mean) / std h, w, c = img_array.shape[0], img_array.shape[1], img_array.shape[2] img_tensor = torch.Tensor(img_array) test_input = img_tensor.view(1, c, h, w) # # loading test input to run test on # test_data_loader = torch.utils.data.DataLoader(test_input, # batch_size=1, # shuffle=True, # num_workers=0) # idx, (img) = next(enumerate(test_data_loader)) # # Setting model to evaluate mode net = SSD(2) test_net_state = torch.load('ssd_net.pth') net.load_state_dict(test_net_state) # net.eval() net.cuda() # Forward test_input = Variable(test_input.cuda()) test_cof, test_loc = net.forward(test_input) test_loc = test_loc.detach() test_loc_clone = test_loc.clone() # normalizing the loss to add up to 1 (for probability) test_cof_score = F.softmax(test_cof[0], dim=1) # print(test_cof_score.shape) # print(test_cof_score) # running NMS sel_idx = nms_bbox1(test_loc_clone[0], prior_bboxes, test_cof_score.detach(), overlap_threshold=0.5, prob_threshold=0.24) test_loc = loc2bbox(test_loc[0], prior_bboxes) test_loc = center2corner(test_loc) sel_bboxes = test_loc[sel_idx] # plotting the output plot_output(plot_img, sel_bboxes.cpu().detach().numpy())
def train_net(train_loader, valid_loader, class_labels, lab_results_dir, learning_rate=0.0001, is_lr_scheduled=True, max_epoch=1, save_epochs=[10, 20, 30]): # Measure execution time train_start = time.time() start_time = strftime('SSD__%dth_%H:%M_', gmtime()) # Define the Net print('num_class: ', len(class_labels)) print('class_labels: ', class_labels) ssd_net = SSD(len(class_labels)) # Set the parameter defined in the net to GPU net = ssd_net if torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.backends.cudnn.benchmark = True net.cuda() # Define the loss center_var = 0.1 size_var = 0.2 criterion = MultiboxLoss([center_var, center_var, size_var, size_var], iou_threshold=0.5, neg_pos_ratio=3.0) # Define Optimizer optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate) # optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, # weight_decay=0.0005) if is_lr_scheduled: scheduler = MultiStepLR(optimizer, milestones=[10, 30, 50, 70], gamma=0.1) # Train data conf_losses = [] loc_losses = [] v_conf_losses = [] v_loc_losses = [] itr = 0 train_log = [] valid_log = [] for epoch_idx in range(0, max_epoch): # decrease learning rate if is_lr_scheduled: scheduler.step() print('\n\n===> lr: {}'.format(scheduler.get_lr()[0])) # Save the trained network if epoch_idx in save_epochs: temp_file = start_time + 'epoch_{}'.format(epoch_idx) net_state = net.state_dict() # serialize the instance torch.save(net_state, lab_results_dir + temp_file + '__model.pth') print('================> Temp file is created: ', lab_results_dir + temp_file + '__model.pth') # iterate the mini-batches: for train_batch_idx, data in enumerate(train_loader): train_images, train_labels, train_bboxes, prior_bbox = data # Switch to train model net.train() # Forward train_img = Variable(train_images.clone().cuda()) train_bbox = Variable(train_bboxes.clone().cuda()) train_label = Variable(train_labels.clone().cuda()) train_out_confs, train_out_locs = net.forward(train_img) # locations(feature map base) -> bbox(center form) train_out_bbox = loc2bbox(train_out_locs, prior_bbox[0].unsqueeze(0)) # update the parameter gradients as zero optimizer.zero_grad() # Compute the loss conf_loss, loc_loss = criterion.forward(train_out_confs, train_out_bbox, train_label, train_bbox) train_loss = conf_loss + loc_loss # Do the backward to compute the gradient flow train_loss.backward() # Update the parameters optimizer.step() conf_losses.append((itr, conf_loss)) loc_losses.append((itr, loc_loss)) itr += 1 if train_batch_idx % 20 == 0: train_log_temp = '[Train]epoch: %d itr: %d Conf Loss: %.4f Loc Loss: %.4f' % ( epoch_idx, itr, conf_loss, loc_loss) train_log += (train_log_temp + '\n') print(train_log_temp) if False: # check input tensor image_s = train_images[0, :, :, :].cpu().numpy().astype( np.float32).transpose().copy() # c , h, w -> h, w, c image_s = ((image_s + 1) / 2) bbox_cr_s = torch.cat([ train_bboxes[..., :2] - train_bboxes[..., 2:] / 2, train_bboxes[..., :2] + train_bboxes[..., 2:] / 2 ], dim=-1) bbox_prior_s = bbox_cr_s[0, :].cpu().numpy().astype( np.float32).reshape( (-1, 4)).copy() # First sample in batch bbox_prior_s = (bbox_prior_s * 300) label_prior_s = train_labels[0, :].cpu().numpy().astype( np.float32).copy() bbox_s = bbox_prior_s[label_prior_s > 0] label_s = (label_prior_s[label_prior_s > 0]).astype( np.uint8) for idx in range(0, len(label_s)): cv2.rectangle(image_s, (bbox_s[idx][0], bbox_s[idx][1]), (bbox_s[idx][2], bbox_s[idx][3]), (255, 0, 0), 2) cv2.putText(image_s, class_labels[label_s[idx]], (bbox_s[idx][0], bbox_s[idx][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 0, 0), 1, cv2.LINE_AA) plt.imshow(image_s) plt.show() # validaton if train_batch_idx % 200 == 0: net.eval() # Evaluation mode v_conf_subsum = torch.zeros( 1) # collect the validation losses for avg. v_loc_subsum = torch.zeros(1) v_itr_max = 5 for valid_itr, data in enumerate(valid_loader): valid_image, valid_label, valid_bbox, prior_bbox = data valid_image = Variable(valid_image.cuda()) valid_bbox = Variable(valid_bbox.cuda()) valid_label = Variable(valid_label.cuda()) # Forward and compute loss with torch.no_grad( ): # make all grad flags to false!! ( Memory decrease) valid_out_confs, valid_out_locs = net.forward( valid_image) valid_out_bbox = loc2bbox( valid_out_locs, prior_bbox[0].unsqueeze(0)) # loc -> bbox(center form) valid_conf_loss, valid_loc_loss = criterion.forward( valid_out_confs, valid_out_bbox, valid_label, valid_bbox) v_conf_subsum += valid_conf_loss v_loc_subsum += valid_loc_loss valid_itr += 1 if valid_itr > v_itr_max: break # avg. valid loss v_conf_losses.append((itr, v_conf_subsum / v_itr_max)) v_loc_losses.append((itr, v_loc_subsum / v_itr_max)) valid_log_temp = '==>[Valid]epoch: %d itr: %d Conf Loss: %.4f Loc Loss: %.4f' % ( epoch_idx, itr, v_conf_subsum / v_itr_max, v_loc_subsum / v_itr_max) valid_log += (valid_log_temp + '\n') print(valid_log_temp) # Measure the time train_end = time.time() m, s = divmod(train_end - train_start, 60) h, m = divmod(m, 60) # Save the result results_file_name = start_time + 'itr_{}'.format(itr) train_data = { 'conf_losses': np.asarray(conf_losses), 'loc_losses': np.asarray(loc_losses), 'v_conf_losses': np.asarray(v_conf_losses), 'v_loc_losses': np.asarray(v_loc_losses), 'learning_rate': learning_rate, 'total_itr': itr, 'max_epoch': max_epoch, 'train_time': '%d:%02d:%02d' % (h, m, s) } torch.save(train_data, lab_results_dir + results_file_name + '.loss') # Save the trained network net_state = net.state_dict() # serialize the instance torch.save(net_state, lab_results_dir + results_file_name + '__model.pth') # Save the train/valid log torch.save({'log': train_log}, lab_results_dir + results_file_name + '__train.log') torch.save({'log': valid_log}, lab_results_dir + results_file_name + '__valid.log') return lab_results_dir + results_file_name
torch.set_default_tensor_type('torch.cuda.FloatTensor') # 1. Conver image to input image tensor ------------------------------------------- img_file_path = sys.argv[1] # # the index should be 1, 0 is the 'eval.py' image = Image.open(img_file_path).resize((300, 300)) image = np.divide((np.asarray(image, dtype=np.float32) - 128.0), np.asarray((127, 127, 127))) img_tensor = torch.from_numpy(image.transpose()).type(torch.float32) if torch.cuda.is_available(): img_tensor = img_tensor.cuda() # 2. Load the saved model and test ------------------------------------------------ class_labels = list(dataset_label_group.keys()) test_net = SSD(len(class_labels)) test_net_state = torch.load(os.path.join(results_path)) test_net.load_state_dict(test_net_state) if torch.cuda.is_available(): test_net.cuda() test_net.eval() # 3. Run Forward ------------------------------------------------------------------- with torch.no_grad(): pred_scores_tensor, pred_bbox_tensor = test_net.forward( img_tensor.unsqueeze(0)) # N C H W prior = CityScapeDataset([])
test_set = CityScapeDataset(test_set_list) test_data_loader = DataLoader(test_set, batch_size=1, shuffle=True, num_workers=num_workers) print('Total test set:', len(test_set)) if WILL_TRAIN: # Print key info of tensor inputs idx, (loc_targets, conf_targets, image) = next(enumerate(train_data_loader)) print('loc_targets tensor shape:', loc_targets.shape) print('conf_targets tensor shape:', conf_targets.shape) print('image tensor shape:', image.shape) # Create the instance of our defined network net = SSD(num_classes=3) net.cuda() criterion = MultiboxLoss([0.1, 0.1, 0.2, 0.2], iou_threshold=0.5, neg_pos_ratio=3.0) # optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005) optimizer = torch.optim.Adam(net.parameters(), lr=0.001) train_losses = [] valid_losses = [] max_epochs = 50 itr = 0 # Train process for epoch_idx in range(0, max_epochs): for train_batch_idx, (train_loc_targets, train_conf_targets, train_image) in enumerate(train_data_loader):