def eval(): # DataLoader dataloader = torch.utils.data.DataLoader( coco_dataset.COCODataset(config.eval_path, (config.img_w, config.img_h), is_training=False), batch_size=config.batch_size, shuffle=False, num_workers=16, pin_memory=False) # net net = mobile_yolo.Mobile_YOLO(config, is_training=False) net = torch.nn.DataParallel(net.cuda()) # checkpoint net.load_state_dict(torch.load(config.checkpoint)) yolo_losses = [] for i in range(3): yolo_losses.append( yolo_loss.YOLOLoss(config.anchors[i], config.classes_num, (config.img_w, config.img_h))) print('Start eval...') net.eval() n_gt = 0 correct = 0 for step, samples in enumerate(dataloader): images, labels = samples["image"], samples["label"] labels = labels.cuda() with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) output = utils.non_max_suppression(output, config.classes_num, conf_thres=0.2) # calculate for sample_i in range(labels.size(0)): # Get labels for sample where width is not zero (dummies) target_sample = labels[sample_i, labels[sample_i, :, 3] != 0] for obj_cls, tx, ty, tw, th in target_sample: # Get rescaled gt coordinates tx1, tx2 = config.img_w * (tx - tw / 2), config.img_w * (tx + tw / 2) ty1, ty2 = config.img_h * (ty - th / 2), config.img_h * (ty + th / 2) n_gt += 1 box_gt = torch.cat([coord.unsqueeze(0) for coord in [tx1, ty1, tx2, ty2]]).view(1, -1) sample_pred = output[sample_i] if sample_pred is not None: # Iterate through predictions where the class predicted is same as gt for x1, y1, x2, y2, conf, obj_conf, obj_pred in sample_pred[sample_pred[:, 6] == obj_cls]: box_pred = torch.cat([coord.unsqueeze(0) for coord in [x1, y1, x2, y2]]).view(1, -1) iou = utils.bbox_iou(box_pred, box_gt) if iou >= config.iou_thres: correct += 1 break if n_gt: print('Batch [%d/%d] mAP: %.5f' % (step, len(dataloader), float(correct / n_gt))) print('Mean Average Precision: %.5f' % float(correct / n_gt))
def detect(): # net net = mobile_yolo.Mobile_YOLO(config) net = torch.nn.DataParallel(net.cuda()) net.eval() # checkpoint net.load_state_dict(torch.load(config.checkpoint)) yolo_losses = [] for i in range(3): yolo_losses.append( yolo_loss.YOLOLoss(config.anchors[i], config.classes_num, (config.img_w, config.img_h))) # prepare images path images_name = os.listdir(config.image_path) images_path = [ os.path.join(config.image_path, name) for name in images_name ] if len(images_path) == 0: raise Exception("no image found in {}".format(config.image_path)) # Start inference batch_size = config.batch_size for step in range(0, len(images_path), batch_size): # preprocess images = [] images_origin = [] for path in images_path[step * batch_size:(step + 1) * batch_size]: print("processing: {}".format(path)) image = cv2.imread(path, cv2.IMREAD_COLOR) if image is None: print("read path error: {}. skip it.".format(path)) continue image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) images_origin.append(image) # keep for save result image = cv2.resize(image, (config.img_w, config.img_h), interpolation=cv2.INTER_LINEAR) image = image.astype(np.float32) image /= 255.0 image = np.transpose(image, (2, 0, 1)) image = image.astype(np.float32) images.append(image) images = np.asarray(images) images = torch.from_numpy(images).cuda() # inference with torch.no_grad(): outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) batch_detections = utils.non_max_suppression( output, config.classes_num, config.conf_thres) # write result images. Draw bounding boxes and labels of detections classes = open(config.classes_names_path, "r").read().split("\n")[:-1] if not os.path.isdir(config.save_path): os.makedirs(config.save_path) for idx, detections in enumerate(batch_detections): plt.figure() fig, ax = plt.subplots(1) ax.imshow(images_origin[idx]) if detections is not None: unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) bbox_colors = random.sample(colors, n_cls_preds) for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: color = bbox_colors[int( np.where(unique_labels == int(cls_pred))[0])] # Rescale coordinates to original dimensions ori_h, ori_w = images_origin[idx].shape[:2] pre_h, pre_w = config.img_h, config.img_w box_h = ((y2 - y1) / pre_h) * ori_h box_w = ((x2 - x1) / pre_w) * ori_w y1 = (y1 / pre_h) * ori_h x1 = (x1 / pre_w) * ori_w # Create a Rectangle patch bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor='none') # Add the bbox to the plot ax.add_patch(bbox) # Add label plt.text(x1, y1, s=classes[int(cls_pred)], color='white', verticalalignment='top', bbox={ 'color': color, 'pad': 0 }) # Save generated image with detections plt.axis('off') plt.gca().xaxis.set_major_locator(NullLocator()) plt.gca().yaxis.set_major_locator(NullLocator()) plt.savefig(config.save_path + '/{}_{}.jpg'.format(step, idx), bbox_inches='tight', pad_inches=0.0) plt.close()
def train(): # DataLoader dataloader = torch.utils.data.DataLoader( coco_dataset.COCODataset(config.train_path, (config.img_w, config.img_h), is_training=True), batch_size=config.batch_size, shuffle=True, pin_memory=True) # net and optimizer net = mobile_yolo.Mobile_YOLO(config) optimizer = _get_optimizer(config, net) lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config.milestones, gamma=0.1) net = torch.nn.DataParallel(net.cuda()) # checkpoints if config.checkpoint: print('lodding checkpoint:', config.checkpoint) checkpoint = torch.load(config.checkpoint) net.load_state_dict(checkpoint) yolo_losses = [] for i in range(3): yolo_losses.append( yolo_loss.YOLOLoss(config.anchors[i], config.classes_num, (config.img_w, config.img_h))) print('Start training...') net.train() global_step = config.start_epoch * len(dataloader) for epoch in range(config.start_epoch, config.epochs): for step, samples in enumerate(dataloader): images, labels = samples["image"], samples["label"] start_time = time.time() # Forward and backward outputs = net(images) losses_name = ["total_loss", "x", "y", "w", "h", "conf", "cls"] losses = [[] for i in range(len(losses_name))] for i in range(3): _loss_item = yolo_losses[i](outputs[i], labels) for j, l in enumerate(_loss_item): losses[j].append(l) losses = [sum(l) for l in losses] loss = losses[0] print(losses) optimizer.zero_grad() loss.backward() optimizer.step() if step > 0 and step % 10 == 0: _loss = loss.item() duration = float(time.time() - start_time) example_per_second = config.batch_size / duration lr = optimizer.param_groups[0]['lr'] print("epoch [%.3d] iter = %d loss = %.2f example/sec = %.3f lr = %.5f " % (epoch, step, _loss, example_per_second, lr)) for i, name in enumerate(losses_name): value = _loss if i == 0 else losses[i] config.writer.add_scalar(name, value, global_step) if step > 0 and step % 1000 == 0: print('saving model to %s/model%s.pth' % (config.save_path, global_step)) torch.save(net.state_dict(), '%s/model%s.pth' % (config.save_path, global_step)) global_step += 1 lr_scheduler.step()
def creat_json(): # DataLoader dataloader = torch.utils.data.DataLoader(coco_dataset.COCODataset( config.eval_path, (config.img_w, config.img_h), is_training=False), batch_size=config.batch_size, shuffle=False, pin_memory=False) # net net = mobile_yolo.Mobile_YOLO(config) net = torch.nn.DataParallel(net.cuda()) net.eval() # checkpoint net.load_state_dict(torch.load(config.checkpoint)) yolo_losses = [] for i in range(3): yolo_losses.append( yolo_loss.YOLOLoss(config.anchors[i], config.classes_num, (config.img_w, config.img_h))) # Start inference json_result = list() for step, samples in enumerate(dataloader): print('[%d/%d]' % (step, len(dataloader))) images, targets = samples["image"].cuda(), samples["label"].cuda() images_id = samples["img_id"].numpy() height = samples['height'].numpy() width = samples['width'].numpy() # inference outputs = net(images) output_list = [] for i in range(3): output_list.append(yolo_losses[i](outputs[i])) output = torch.cat(output_list, 1) detections = utils.non_max_suppression(output.cpu(), config.classes_num, conf_thres=config.conf_thres, nms_thres=config.nms_thres) # format result batch_detections = list() for detection in detections: if detection is None: batch_detections.append(None) continue # top K k = min(detection.size()[0], config.bbox_per) _, index = torch.topk(detection, k, 0) detection = detection[index[:, 4]] # x1,y1,x2,y2 convert to x1,y1,width,height detection[:, 2] -= detection[:, 0] detection[:, 3] -= detection[:, 1] # normalize detection = detection.cpu() \ / torch.Tensor([config.img_w, config.img_h, config.img_w, config.img_h, 1, 1, 1]) batch_detections.append(detection.numpy()) # write result assert len(batch_detections) == len(width) == len(height) == len( images_id) labelmap = json.load(open(config.labelmap)) for bt_id in range(len(batch_detections)): if batch_detections[bt_id] is None: continue for bbox_id in range(len(batch_detections[bt_id])): value = int(batch_detections[bt_id][bbox_id][-1]) + 1 key = int( list(labelmap.keys())[list( labelmap.values()).index(value)]) pred = { "image_id": int(images_id[bt_id]), "category_id": key, "bbox": [ round( batch_detections[bt_id][bbox_id][0] * width[bt_id], 2), round( batch_detections[bt_id][bbox_id][1] * height[bt_id], 2), round( batch_detections[bt_id][bbox_id][2] * width[bt_id], 2), round( batch_detections[bt_id][bbox_id][3] * height[bt_id], 2) ], "score": float(batch_detections[bt_id][bbox_id][4]) * float(batch_detections[bt_id][bbox_id][5]) } json_result.append(pred) json.dump(json_result, open('result.json', 'w'), indent=4) os.system('python cocoevaldemo.py ')