def im2batch(im_bgr, means_bgr, w_h_net, use_cuda): ''' im_bgr_resized = resize_and_fill(im_bgr, means_bgr, w_h_net) im_bgr_norm_resized = im_bgr_resized - means_bgr ''' im_bgr_norm_resized = base_transform(im_bgr, w_h_net[0], means_bgr) im_rgb_norm_resized = cv2.cvtColor(im_bgr_norm_resized, cv2.COLOR_BGR2RGB) #im_rgb_norm_resized = im_rgb_norm_resized.transpose((2, 0, 1)) #ts_rgb_norm = torch.from_numpy(im_rgb_norm_resized).float() ts_rgb_norm = torch.from_numpy(im_rgb_norm_resized.transpose(2, 0, 1)).float() batch_rgb = ts_rgb_norm.unsqueeze(0) #if args.cuda: if use_cuda: batch_rgb = Variable(batch_rgb.cuda(), volatile=True) return batch_rgb
def train(): net.train() train_loss = 0 dataset = VOCDetection(VOCroot, 'train', base_transform(ssd_dim, rgb_means), AnnotationTransform()) for epoch in range(args.epochs): # load train data & create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, collate_fn=detection_collate)) adjust_learning_rate(optimizer, epoch) for iteration in range(len(dataset) // batch_size): images, targets = next(batch_iterator) if args.cuda: images = images.cuda() targets = [anno.cuda() for anno in targets] images = Variable(images) targets = [Variable(t) for t in targets] #forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() print('Timer: ', t1 - t0) if args.log_iters: print(repr(iteration) + ": Current loss: ", loss.data[0]) train_loss += loss.data[0] train_loss /= (len(dataset) / batch_size) torch.save(net.state_dict(), 'ssd_models/' + repr(epoch) + '.pth') print('Avg loss for epoch ' + repr(epoch) + ': ' + repr(train_loss)) torch.save(net, args.save_folder + '' + args.version + '.pth')
def train(): net.train() train_loss = 0 print('Loading Dataset...') dataset = VOCDetection(VOCroot, 'train', base_transform(ssd_dim, rgb_means), AnnotationTransform()) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 for iteration in range(max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(data.DataLoader(dataset,batch_size, \ shuffle=True,collate_fn=detection_collate)) if iteration in stepvalues: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data images, targets = next(batch_iterator) images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] #forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss = criterion(out, targets) loss.backward() optimizer.step() t1 = time.time() train_loss += loss.data[0] if iteration % 10 == 0: print('Timer: ', t1 - t0) print('Loss: %f' % (loss.data[0]), end=' ') if iteration % 5000 == 0: torch.save(net.state_dict(), 'weights/ssd_iter_new' + repr(iteration) + '.pth') torch.save(net, args.save_folder + '' + args.version + '.pth')
def detect(self, img_cv2_list, batch_size=1, conf_thres=0.25, top_k=200): boxes_all = [] for bs_idx in range(int(np.ceil(len(img_cv2_list) / batch_size))): xs = [] # img_paths = img_list[batch_size * bs_idx: batch_size * (bs_idx + 1)] # for img_path in img_paths: # img_cv2 = cv2.imread(img_path) # # img_transformed = base_transform(img_cv2, 300, (128.0, 128.0, 128.0)) # img_transformed = img_transformed[:, :, (2, 1, 0)] # img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1) # # xs.append(img_tensor) # continue imgs = img_cv2_list[batch_size * bs_idx:batch_size * (bs_idx + 1)] for img in imgs: img_cv2 = img img_transformed = base_transform(img_cv2, 300, (128.0, 128.0, 128.0)) img_transformed = img_transformed[:, :, (2, 1, 0)] img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1) xs.append(img_tensor) continue # x = Variable(im.unsqueeze(0)) images_tensor = torch.stack(xs, 0) images = Variable(images_tensor.cuda()) t1 = time.time() detections = self.net(images).data t2 = time.time() box_num = self.num_cls * self.top_k * 5 # detect_time = _t['im_detect'].toc(average=False) boxes_batch = [] for bs_idx in range(len(imgs)): boxes = [] # print("len img_paths: ", len(img_paths)) # print("detections detections: ", len(detections)) # if os.path.exists(imgs[bs_idx]): img_cv2 = imgs[bs_idx] rgb_image = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB) # print(rgb_image.shape[1::-1], rgb_image.shape[1::-1]) scale = torch.Tensor( [rgb_image.shape[1::-1], rgb_image.shape[1::-1]]) # dets = detections[bs_idx*box_num: (bs_idx+1)*box_num][bs_idx].view(1, num_classes, 200, 5) dets = detections[bs_idx].view(1, self.num_cls, top_k, 5) for i in range(dets.size(1)): j = 0 while dets[0, i, j, 0] >= conf_thres: score = dets[0, i, j, 0] label_name = self.class_names[i - 1] display_txt = '%s: %.2f' % (label_name, score) # print(display_txt) pt = (dets[0, i, j, 1:] * scale).cpu().numpy( ) # detections的第四维是5个数,表示[cls_conf, x1, y1, x2, y2] coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 j += 1 # print("i, j: ", i, j) boxes.append( (label_name, score, pt[0], pt[1], pt[2], pt[3])) boxes_all.append(boxes) print("batch forward time: ", t2 - t1, "fusion time: ", time.time() - t2, "total time: ", time.time() - t1) return boxes_all
def test_net(save_folder, net, dataset, transform, top_k, detector, priors): """Test a Fast R-CNN network on an image database.""" num_images = len(dataset) # all detections are collected into:score # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(len(labelmap) + 1)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} all_time = 0. output_dir = get_output_dir( pkl_dir, args.iteration + '_' + args.dataset_name + '_' + args.set_file_name) det_file = os.path.join(output_dir, 'detections.pkl') output_dir = get_output_dir(output_dir, 'multi_test') ######################### Multiscale PriorBox ##################### priorboxes = {} for v1 in multi_scale[str(ssd_dim)]: if not multi_cfg[str(v1)]: return ("not included this multi_scale") priorbox = PriorBox(multi_cfg[str(v1)]) img_size = multi_cfg[str(v1)]['min_dim'] with torch.no_grad(): priorboxes[str(img_size)] = priorbox.forward().to(device) ########################## Detection ############################## for i in range(num_images): _t['im_detect'].tic() image = dataset.pull_image(i) h, w, _ = image.shape detections_multi = {} for v in multi_scale[str(ssd_dim)]: priors = priorboxes[str(v)] ssd_dim_temp = int(v) for loop in range(2): if (loop == 0): im_trans = base_transform(image, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] else: im_f = image.copy() im_f = cv2.flip(im_f, 1) im_trans = base_transform(im_f, ssd_dim_temp, dataset_mean) im_trans = im_trans[:, :, (2, 1, 0)] with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute( 0, 3, 1, 2).to(device) if 'RefineDet' in args.backbone and args.refine: arm_loc, _, loc, conf = net(x) detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) detections_multi[str(ssd_dim) + '_' + str(v) + '_' + str(loop)] = detections.clone() detect_time = _t['im_detect'].toc(average=False) if i > 10: all_time += detect_time ################################################################### for j in range(1, detections.size(1)): cls_dets = np.array([]) for k, d in detections_multi.items(): dets = d[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] if (k[-1] == '1'): boxes[:, 0] = 1 - boxes[:, 0] boxes[:, 2] = 1 - boxes[:, 2] temp_swap = boxes[:, 0].clone() boxes[:, 0] = boxes[:, 2] boxes[:, 2] = temp_swap boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h if k in ['320_192_0', '320_192_1', '512_320_0', '512_320_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_320_0', '320_320_1', '512_512_0', '512_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.maximum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) > 0)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_384_0', '320_384_1', '512_640_0', '512_640_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 160)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_448_0', '320_448_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 128)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_512_0', '320_512_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 96)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in ['320_576_0', '320_576_1']: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 64)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] elif k in [ '320_706_0', '320_706_1', '512_1216_0', '512_1216_1' ]: boxes_np = boxes.cpu().numpy() index_temp = np.where( np.minimum(boxes_np[:, 2] - boxes_np[:, 0] + 1, boxes_np[:, 3] - boxes_np[:, 1] + 1) < 32)[0] if (not index_temp.size): continue else: boxes = boxes[index_temp, :] if (index_temp.size == 0): continue scores = dets[index_temp, 0].cpu().numpy() cls_dets_temp = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if (cls_dets.size == 0): cls_dets = cls_dets_temp.copy() else: cls_dets = np.concatenate((cls_dets, cls_dets_temp), axis=0) if (cls_dets.size != 0): cls_dets = bbox_vote(cls_dets) if (len(cls_dets) != 0): all_boxes[j][i] = cls_dets print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time)) FPS = (num_images - 10) / all_time print('FPS:', FPS) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset, FPS=FPS)
def main(): mean = (104, 117, 123) print('loading model!') if deform: from model.dualrefinedet_vggbn import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=1024, def_groups=deform, multihead=multihead, bn=bn) else: from model.refinedet_vgg import build_net net = build_net('test', size=ssd_dim, num_classes=num_classes, use_refine=refine, c7_channel=1024, bn=bn) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', trained_model) net = net.to(device) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) for i, line in enumerate(open(img_set, 'r')): # if i==10: # break if 'COCO' in dataset: image_name = line[:-1] image_id = int(image_name.split('_')[-1]) elif 'VOC' in dataset: image_name = line[:-1] image_id = -1 else: image_name, image_id = line.split(' ') image_id = image_id[:-1] print(i, image_name, image_id) image_path = os.path.join(img_root, image_name + '.jpg') image = cv2.imread(image_path, 1) h, w, _ = image.shape image_draw = cv2.resize(image.copy(), (640, 480)) im_trans = base_transform(image, ssd_dim, mean) ######################## Detection ######################## with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if 'RefineDet' in backbone and refine: arm_loc, _, loc, conf = net(x) else: loc, conf = net(x) arm_loc = None detections = detector.forward(loc, conf, priors, arm_loc_data=arm_loc) ############################################################ out = list() for j in range(1, detections.size(1)): dets = detections[0, j, :] if dets.sum() == 0: continue mask = dets[:, 0].gt(0.).expand(dets.size(-1), dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, dets.size(-1)) boxes = dets[:, 1:-1] if dets.size(-1) == 6 else dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() boxes_np = boxes.cpu().numpy() for b, s in zip(boxes_np, scores): if save_dir: out.append( [int(b[0]), int(b[1]), int(b[2]), int(b[3]), j - 1, s]) if 'COCO' in dataset: det_list.append({ 'image_id': image_id, 'category_id': labelmap[j], 'bbox': [ float('{:.1f}'.format(b[0])), float('{:.1f}'.format(b[1])), float('{:.1f}'.format(b[2] - b[0] + 1)), float('{:.1f}'.format(b[3] - b[1] + 1)) ], 'score': float('{:.2f}'.format(s)) }) else: results_file.write( str(image_id) + ' ' + str(j) + ' ' + str(s) + ' ' + str(np.around(b[0], 2)) + ' ' + str(np.around(b[1], 2)) + ' ' + str(np.around(b[2], 2)) + ' ' + str(np.around(b[3], 2)) + '\n') if display: cv2.rectangle(image_draw, (int(b[0] / w * 640), int(b[1] / h * 480)), (int(b[2] / w * 640), int(b[3] / h * 480)), (0, 255, 0), thickness=1) cls = class_name[j] if 'COCO' in dataset else str( labelmap[j - 1]) put_str = cls + ':' + str(np.around(s, decimals=2)) cv2.putText( image_draw, put_str, (int(b[0] / w * 640), int(b[1] / h * 480) - 10), cv2.FONT_HERSHEY_DUPLEX, 0.5, color=(0, 255, 0), thickness=1) if display: cv2.imshow('frame', image_draw) ch = cv2.waitKey(0) if ch == 115: if save_dir: print('save: ', line) torch.save( out, os.path.join(save_dir, '%s.pkl' % str(line[:-1]))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(line[:-1])), image) cv2.imwrite( os.path.join(save_dir, '%s_box.jpg' % str(line[:-1])), image_draw) cv2.destroyAllWindows() if save_dir: if dataset == 'COCO': json.dump(det_list, results_file) results_file.close()
score = detections[0, i, j, 0] label_name = labelmap[i - 1] pt = (detections[0, i, j, 1:] * scale).cpu().numpy() coords = (pt[0], pt[1], pt[2], pt[3]) pred_num += 1 with open(filename, mode='a') as f: f.write(str(pred_num)+' label: '+label_name+' score: ' \ +str(score) +' '+' || '.join(str(c) for c in coords)+'\n') j += 1 if __name__ == '__main__': # load net net = build_ssd('test', 300, 21) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data valset = VOCDetection(VOCroot, 'val', None, AnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, valset, base_transform(net.size, (104, 117, 123)), args.top_k, thresh=args.confidence_threshold)
def main(net): logging.info("==> do detect on every image with model reference.") img_list_file = "/home/hyer/datasets/OCR/ssd_k1_test.txt" with open(img_list_file, "r") as f: data = f.readlines() img_list = [] for li in data: img_list.append(li.strip()) batch_size = 4 colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist() for bs_idx in range(int(np.ceil(len(img_list) / batch_size))): xs = [] # im, gt, h, w = dataset.pull_item(i) # img_cv2 = cv2.imread("/home/hyer/datasets/OCR/scan_k1/b7f44061-6d4c-11e8-ad39-480fcf43d407.jpg") img_paths = img_list[batch_size * bs_idx:batch_size * (bs_idx + 1)] for img_path in img_paths: # img_cv2 = cv2.imread(input("imgPath: ")) img_cv2 = cv2.imread(img_path) img_transformed = base_transform(img_cv2, 300, dataset_mean) img_transformed = img_transformed[:, :, (2, 1, 0)] img_tensor = torch.from_numpy(img_transformed).permute(2, 0, 1) xs.append(img_tensor) continue # x = Variable(im.unsqueeze(0)) images_tensor = torch.stack(xs, 0) images = Variable(images_tensor.cuda()) t1 = time.time() detections = net(images).data t2 = time.time() box_num = num_classes * top_k * 5 # detect_time = _t['im_detect'].toc(average=False) boxes_batch = [] for bs_idx in range(batch_size): boxes = [] currentAxis = plt.gca() # image = cv2.imread("/home/hyer/Pictures/2334.jpg") img_cv2 = cv2.imread(img_paths[bs_idx]) rgb_image = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB) # plt.figure("result") colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist() plt.imshow(rgb_image) # plot the image for matplotlib currentAxis = plt.gca() print(rgb_image.shape[1::-1], rgb_image.shape[1::-1]) scale = torch.Tensor( [rgb_image.shape[1::-1], rgb_image.shape[1::-1]]) # dets = detections[bs_idx*box_num: (bs_idx+1)*box_num][bs_idx].view(1, num_classes, 200, 5) draw = False dets = detections[bs_idx].view(1, num_classes, top_k, 5) for i in range(dets.size(1)): j = 0 while dets[0, i, j, 0] >= conf_thres: score = dets[0, i, j, 0] label_name = class_names[i - 1] display_txt = '%s: %.2f' % (label_name, score) print(display_txt) pt = (dets[0, i, j, 1:] * scale).cpu().numpy( ) # detections的第四维是5个数,表示[cls_conf, x1, y1, x2, y2] coords = (pt[0], pt[1]), pt[2] - pt[0] + 1, pt[3] - pt[1] + 1 color = colors[i] if draw: currentAxis.add_patch( plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(pt[0], pt[1], display_txt, bbox={ 'facecolor': color, 'alpha': 0.5 }) j += 1 # print("i, j: ", i, j) boxes.append( (label_name, score, pt[0], pt[1], pt[2], pt[3])) boxes_batch.append(boxes) # plt.figure(figsize=(10, 10)) # plt.imshow(rgb_image) if draw: plt.show() plt.savefig("./test.jpg") print("batch forward time: ", t2 - t1, "fusion time: ", time.time() - t2, "total time: ", time.time() - t1)
def main(): mean = (104, 117, 123) trained_model = model_dir print('loading model!') net = build_ssd('test', ssd_dim, num_classes, tssd=tssd, top_k=top_k, thresh=confidence_threshold, nms_thresh=nms_threshold, attention=attention, prior=prior, tub=tub, tub_thresh=tub_thresh, tub_generate_score=tub_generate_score) net.load_state_dict(torch.load(trained_model)) net.eval() print('Finished loading model!', model_dir) net = net.cuda() cudnn.benchmark = True frame_num = 0 cap = cv2.VideoCapture(video_name) w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) print(w, h) if save_dir: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') size = (640, 480) record = cv2.VideoWriter( os.path.join(save_dir, video_name.split('/')[-1].split('.')[0] + '_OTA.avi'), fourcc, cap.get(cv2.CAP_PROP_FPS), size) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) att_criterion = AttentionLoss((h, w)) state = [None] * 6 if tssd in ['lstm', 'tblstm', 'outlstm'] else None while (cap.isOpened()): ret, frame = cap.read() if not ret: break frame_draw = frame.copy() frame_num += 1 im_trans = base_transform(frame, ssd_dim, mean) x = Variable(torch.from_numpy(im_trans).unsqueeze(0).permute( 0, 3, 1, 2), volatile=True) x = x.cuda() if tssd == 'ssd': detections, att_map = net(x) detections = detections.data else: detections, state, att_map = net(x, state) detections = detections.data # print(np.around(t_diff, decimals=4)) out = list() for j in range(1, detections.size(1)): for k in range(detections.size(2)): dets = detections[0, j, k, :] if dets.dim() == 0: continue boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:] identity = dets[-1] if dets.size(0) == 6 else -1 x_min = int(boxes[0] * w) x_max = int(boxes[2] * w) y_min = int(boxes[1] * h) y_max = int(boxes[3] * h) score = dets[0] if score > confidence_threshold: out.append( [x_min, y_min, x_max, y_max, j - 1, score, identity]) if attention: _, up_attmap = att_criterion( att_map) # scale, batch, tensor(1,h,w) att_target = up_attmap[0][0].cpu().data.numpy().transpose(1, 2, 0) for object in out: x_min, y_min, x_max, y_max, cls, score, identity = object if identity in [0]: color = (0, 0, 255) elif identity in [1]: color = (0, 200, 0) elif identity in [2]: color = (255, 128, 0) elif identity in [3]: color = (255, 0, 255) elif identity in [4]: color = (0, 128, 255) elif identity in [5]: color = (255, 128, 128) else: color = (255, 0, 0) cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max), color, thickness=2) cv2.fillConvexPoly( frame_draw, np.array([[x_min - 1, y_min], [x_min - 1, y_min - 50], [x_max + 1, y_min - 50], [x_max + 1, y_min]], np.int32), color) if dataset_name == 'VID2017': put_str = str( int(identity)) + ':' + VID_CLASSES_name[cls] + ':' + str( np.around(score, decimals=2)) else: put_str = str(int(identity)) cv2.putText(frame_draw, put_str, (x_min + 10, y_min - 10), cv2.FONT_HERSHEY_DUPLEX, 1, color=(255, 255, 255), thickness=1) print( str(frame_num) + ':' + str(np.around(score, decimals=2)) + ',' + VID_CLASSES_name[cls]) if not out: print(str(frame_num)) cv2.imshow('frame', cv2.resize(frame_draw, (640, 360))) if save_dir: frame_write = cv2.resize(frame_draw, size) record.write(frame_write) ch = cv2.waitKey(1) if ch == 32: while 1: in_ch = cv2.waitKey(10) if in_ch == 115: # 's' if save_dir: print('save: ', frame_num) torch.save( out, os.path.join(save_dir, tssd + '_%s.pkl' % str(frame_num))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(frame_num)), frame) elif in_ch == 32: break cap.release() if save_dir: record.release() cv2.destroyAllWindows()
fp_cumsum = torch.cumsum(torch.Tensor(fp[cl]), 0) gt_cumsum = torch.cumsum(torch.Tensor(gts[cl]), 0) rec_cumsum = tp_cumsum.float() / gt_cumsum[-1] prec_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum).clamp(min=1e-6) ap[cl] = voc_ap(rec_cumsum, prec_cumsum) recall[cl] = rec_cumsum[-1] precision[cl] = prec_cumsum[-1] print('class %d rec %.4f prec %.4f AP %.4f tp %.4f fp %.4f, \ gt %.4f' % (cl, recall[cl], precision[cl], ap[cl], sum(tp[cl]), sum(fp[cl]), sum(gts[cl]))) # mAP = mean of APs for all classes mAP = sum(ap.values()) / len(ap) print('mAP', mAP) return mAP if __name__ == '__main__': # load net net = build_ssd('test', 300, 21) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(VOCroot, 'test', None, AnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation eval_net(net, args.cuda, dataset, base_transform( net.size, (104, 117, 123)), args.top_k)
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset = VOCDetection(VOCroot, 'train', base_transform(ssd_dim, rgb_means), AnnotationTransform()) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) for iteration in range(max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, collate_fn=detection_collate)) if iteration in stepvalues: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] else: images = Variable(images) targets = [Variable(anno) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('Timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ') if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') # hacky fencepost solution for 0th epoch plot if iteration == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) if iteration % 5000 == 0: torch.save(net.state_dict(), 'weights/ssd_iter_new' + repr(iteration) + '.pth') torch.save(net, args.save_folder + '' + args.version + '.pth')
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 cum_loc_loss = 0 # cumulative cum_conf_loss = 0 epoch = 0 print('Loading Dataset...') dataset = VOCDetection(VOCroot, 'train', base_transform(ssd_dim, rgb_means), AnnotationTransform()) epoch_size = len(dataset) // args.batch_size print('Training SSD on', dataset.name) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )), Y=torch.zeros((1, 6)), opts=dict(xlabel='Epoch', ylabel='Loss', title='Real-time SSD Training Loss', legend=[ 'Cur Loc Loss', 'Cur Conf Loss', 'Cur Loss', 'Cum Loc Loss', 'Cum Conf Loss', 'Cum Loss' ])) for iteration in range(max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, collate_fn=detection_collate)) if iteration in stepvalues: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) cum_loc_loss += loc_loss cum_conf_loss += conf_loss epoch += 1 if args.visdom: loss_list = [ loc_loss, conf_loss, loc_loss + conf_loss, cum_loc_loss, cum_conf_loss, cum_loc_loss + cum_conf_loss ] viz.line(X=torch.ones((1, 6)) * epoch, Y=torch.tensor(loss_list).unsqueeze(0), win=lot, update='append') # reset epoch loss counters loc_loss = 0 conf_loss = 0 # load train data images, targets = next(batch_iterator) images = Variable(images.cuda()) targets = [Variable(anno.cuda()) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] if iteration % 10 == 0: print('Timer: ', t1 - t0) print('Loss: %f' % (loss.data[0]), end=' ') if iteration % 5000 == 0: torch.save(net.state_dict(), 'weights/ssd_iter_new' + repr(iteration) + '.pth') torch.save(net, args.save_folder + '' + args.version + '.pth')
def main(): mean = (104, 117, 123) if 'FPN' in backbone: from model.refinedet_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) else: from model.ssd4scale_vgg import build_net static_net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn) net = build_net('test', size=ssd_dim, num_classes=num_classes, c7_channel=c7_channel, bn=bn, deform=deform) print('loading model!') static_net.load_state_dict(torch.load(static_dir)) static_net.eval() static_net = static_net.to(device) net.load_state_dict(torch.load(trn_dir)) net.eval() net = net.to(device) print('Finished loading model!', static_dir, trn_dir) detector = Detect(num_classes, 0, top_k, confidence_threshold, nms_threshold) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward().to(device) frame_num = 0 cap = cv2.VideoCapture(video_name) w, h = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) size = (640, 480) if save_dir: fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') record = cv2.VideoWriter( os.path.join(save_dir, video_name.split('/')[-1].split('.')[0] + '.avi'), fourcc, cap.get(cv2.CAP_PROP_FPS), size) # static_flag = True offset_list = list() ref_loc = list() while (cap.isOpened()): ret, frame = cap.read() if not ret: break h, w, _ = frame.shape frame_draw = frame.copy() im_trans = base_transform(frame, ssd_dim, mean) with torch.no_grad(): x = torch.from_numpy(im_trans).unsqueeze(0).permute(0, 3, 1, 2).to(device) if frame_num % interval == 0: # if static_flag: static_out = static_net(x, ret_loc=deform) priors_static = center_size( decode(static_out[0][0], priors, [0.1, 0.2])) if deform: ref_loc = static_out[ 2] # [o * args.loose for o in static_out[2]] offset_list = list() out = net(x, ref_loc=ref_loc, offset_list=offset_list, ret_off=(False, True)[deform and not offset_list]) detections = detector.forward(out[0], out[1], priors_static, scale=torch.cuda.FloatTensor( [w, h, w, h])) if len(detections) == 3: offset_list = out[2] ref_loc = list() # if static_flag: # ref_mask = mask.clone()mask # print('static') # static_flag = False # else: # time1 = time.time() # s_score = (mask * ref_mask).sum().float() / (mask + ref_mask).sum().float() # static_flag = (False, True)[s_score<0.45] # time2 = time.time() # print(s_score, 'match time:', time2-time1) out = list() for j in range(1, detections.size(1)): if detections[0, j, :, :].sum() == 0: continue for k in range(detections.size(2)): dets = detections[0, j, k, :] if dets.sum() == 0: continue boxes = dets[1:-1] if dets.size(0) == 6 else dets[1:] identity = dets[-1] if dets.size(0) == 6 else -1 x_min = int(boxes[0] * w) x_max = int(boxes[2] * w) y_min = int(boxes[1] * h) y_max = int(boxes[3] * h) score = dets[0] if score > confidence_threshold: put_str = VID_CLASSES_name[j - 1] + ':' + str( np.around(score, decimals=2)).split('(')[-1].split(',')[0][:4] color = (255, 0, 0) cv2.rectangle(frame_draw, (x_min, y_min), (x_max, y_max), color, thickness=2) cv2.putText(frame_draw, put_str, (x_min + 10, y_min - 10), cv2.FONT_HERSHEY_DUPLEX, 0.8, color=color, thickness=1) print(str(frame_num)) frame_num += 1 frame_show = cv2.resize(frame_draw, size) cv2.imshow('frame', frame_show) # 255* mask.cpu().numpy()) if save_dir: record.write(frame_show) ch = cv2.waitKey(1) if ch == 32: # if frame_num % 1 ==0: while 1: in_ch = cv2.waitKey(10) if in_ch == 115: # 's' if save_dir: print('save: ', frame_num) torch.save( out, os.path.join(save_dir, '_%s.pkl' % str(frame_num))) cv2.imwrite( os.path.join(save_dir, '%s.jpg' % str(frame_num)), frame) elif in_ch == 32: break cap.release() if save_dir: record.release() cv2.destroyAllWindows()