# 'weights/ssd300_0712_130000.pth', 'weights/ssd300_0712_140000.pth', # 'weights/ssd300_0712_150000.pth', 'weights/ssd300_0712_160000.pth' 'weights_sw_deconv/ssd512_tme_285000.pth' ] for model_name in check_point_list: # load net num_classes = len(TME_CLASSES) + 1 # +1 background net = build_ssd('test', args.dim, num_classes) # initialize SSD # net.load_state_dict(torch.load(args.trained_model)) net.load_state_dict(torch.load(model_name)) net.eval() log.l.info(model_name) log.l.info('Finished loading model!') # load data dataset = TMEDetection(DataRoot, [(set_type)], BaseTransform(args.dim, dataset_mean), AnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, args.dim, thresh=args.confidence_threshold)
for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v net.load_state_dict(new_state_dict) # eval mode net.eval() print('Finished loading model!') # load data #testset = VOCDetection(args.voc_root, [('2007', 'test')], None, AnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation # cv_idx_for_test must be equal to the checkpoint idx of the trained model (otherwise cheat) means = (34, 34, 34) trainset = FISHdetection(ct_train[cv_idx_for_test], coord_ssd_train[cv_idx_for_test], None, 'lesion_train') validset = FISHdetection(ct_valid[cv_idx_for_test], coord_ssd_valid[cv_idx_for_test], None, 'lesion_valid') # allset = FISHdetection(np.vstack(ct), np.vstack(coord).astype(np.float64), None, 'lesion_all') test_net(args.save_folder, net, args.cuda, validset, BaseTransform(size, means), thresh=args.visual_threshold)
cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA ) # We put the label of the class right above the rectangle. j += 1 # We increment j to get to the next occurrence. return frame # We return the original frame with the detector rectangle and the label around the detected object. # Creating the SSD neural network net = build_ssd('test') # We create an object that is our neural network ssd. net.load_state_dict( torch.load('ssd300_mAP_77.43_v2.pth', map_location=lambda storage, loc: storage) ) # We get the weights of the neural network from another one that is pretrained (ssd300_mAP_77.43_v2.pth). # Creating the transformation transform = BaseTransform( net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0) ) # We create an object of the BaseTransform class, a class that will do the required transformations so that the image can be the input of the neural network. # Doing some Object Detection on a video reader = imageio.get_reader('funny_dog.mp4') # We open the video. fps = reader.get_meta_data()[ 'fps'] # We get the fps frequence (frames per second). writer = imageio.get_writer( 'output.mp4', fps=fps) # We create an output video with this same fps frequence. for i, frame in enumerate( reader): # We iterate on the frames of the output video: frame = detect( frame, net.eval(), transform ) # We call our detect function (defined above) to detect the object on the frame. writer.append_data(frame) # We add the next frame in the output video.
(tot_detect_time + tot_nms_time) / (num_images - 1))) print_info('FPS: {:.3f} fps'.format( (num_images - 1) / (tot_detect_time + tot_nms_time))) if __name__ == '__main__': net = build_net('test', cfg.model.input_size, cfg.model) init_net(net, cfg, args.trained_model) print_info('===> Finished constructing and loading model', ['yellow', 'bold']) net.eval() _set = 'eval_sets' if not args.test else 'test_sets' testset = get_dataloader(cfg, args.dataset, _set) if cfg.test_cfg.cuda: net = net.cuda() cudnn.benckmark = True else: net = net.cpu() detector = Detect(num_classes, cfg.loss.bkg_label, anchor_config) save_folder = os.path.join(cfg.test_cfg.save_folder, args.dataset) _preprocess = BaseTransform(cfg.model.input_size, cfg.model.rgb_means, (2, 0, 1)) test_net(save_folder, net, detector, cfg.test_cfg.cuda, testset, transform=_preprocess, max_per_image=cfg.test_cfg.topk, thresh=cfg.test_cfg.score_threshold)
def train(args, net, optimizer, criterion, scheduler = None): log_file = open(args.save_root + file_name + "_training.log", "w", 1) log_file.write(args.exp_name+'\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg)+': '+str(getattr(args, arg))+'\n') log_file.write(str(net)) net.train() # loss counters batch_time = AverageMeter() losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() print('Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type, full_test=False) epoch_size = len(train_dataset) // args.batch_size print ("epoch_size: ", epoch_size) print('Training SSD on', train_dataset.name) if args.visdom: import visdom viz = visdom.Visdom() viz.port = 8097 viz.env = args.exp_name # initialize visdom loss plot lot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1, 6)).cpu(), opts=dict( xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['REG', 'CLS', 'AVG', 'S-REG', ' S-CLS', ' S-AVG'] ) ) # initialize visdom meanAP and class APs plot legends = ['meanAP'] for cls in CLASSES: legends.append(cls) val_lot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1,args.num_classes)).cpu(), opts=dict( xlabel='Iteration', ylabel='Mean AP', title='Current SSD Validation mean AP', legend=legends ) ) batch_iterator = None train_data_loader = DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) val_data_loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) my_dict = copy.deepcopy(train_data_loader.dataset.train_vid_frame) keys = list(my_dict.keys()) k_len = len(keys) arr = np.arange(k_len) xxx = copy.deepcopy(train_data_loader.dataset.ids) itr_count = 0 torch.cuda.synchronize() t0 = time.perf_counter() # for iteration in range(args.max_iter + 1): current_epoch = 0 iteration = 0 while current_epoch < (args.total_epoch + 1): if (not batch_iterator) or (iteration % epoch_size == 0): xxxx = copy.deepcopy(train_data_loader.dataset.ids) np.random.shuffle(arr) iii = 0 for arr_i in arr: key = keys[arr_i] rang = my_dict[key] xxxx[iii:(iii + rang[1] - rang[0])] = xxx[rang[0]:rang[1]] iii += rang[1] - rang[0] train_data_loader.dataset.ids = copy.deepcopy(xxxx) # create batch iterator batch_iterator = iter(train_data_loader) if scheduler is not None and iteration > 0: scheduler.step() current_epoch += 1 iteration += 1 # load train data images, targets, img_indexs = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward out = net(images, img_indexs) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() torch.nn.utils.clip_grad_norm(net.parameters(), args.clip) optimizer.step() loc_loss = loss_l.data[0] conf_loss = loss_c.data[0] # print('Loss data type ',type(loc_loss)) loc_losses.update(loc_loss) cls_losses.update(conf_loss) losses.update((loc_loss + conf_loss)/2.0) if iteration % args.print_step == 0 and iteration > 0: if args.visdom: losses_list = [loc_losses.val, cls_losses.val, losses.val, loc_losses.avg, cls_losses.avg, losses.avg] viz.line(X=torch.ones((1, 6)).cpu() * iteration, Y=torch.from_numpy(np.asarray(losses_list)).unsqueeze(0).cpu(), win=lot, update='append') torch.cuda.synchronize() t1 = time.perf_counter() batch_time.update(t1 - t0) print_line = 'Epoch {:02d}/{:02d} Iteration {:06d}/{:06d} loc-loss {:.3f}({:.3f}) cls-loss {:.3f}({:.3f}) ' \ 'average-loss {:.3f}({:.3f}) Timer {:0.3f}({:0.3f}) lr {:0.5f}'.format( current_epoch, args.total_epoch, iteration, args.max_iter, loc_losses.val, loc_losses.avg, cls_losses.val, cls_losses.avg, losses.val, losses.avg, batch_time.val, batch_time.avg, args.lr) torch.cuda.synchronize() t0 = time.perf_counter() log_file.write(print_line+'\n') print(print_line) # if args.visdom and args.send_images_to_visdom: # random_batch_index = np.random.randint(images.size(0)) # viz.image(images.data[random_batch_index].cpu().numpy()) itr_count += 1 if itr_count % args.loss_reset_step == 0 and itr_count > 0: loc_losses.reset() cls_losses.reset() losses.reset() batch_time.reset() print('Reset accumulators of ', args.exp_name,' at', itr_count*args.print_step) itr_count = 0 if (iteration % args.eval_step == 0 or iteration == 5000) and iteration > 0: torch.cuda.synchronize() tvs = time.perf_counter() print('Saving state, iter:', iteration) torch.save(net.state_dict(), args.save_root + file_name + '_ssd300_ucf24_' + repr(iteration) + '.pth') net.eval() # switch net to evaluation mode mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, iteration, iou_thresh=args.iou_thresh) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str+'\n') ptr_str = '\nMEANAP:::=>'+str(mAP)+'\n' print(ptr_str) log_file.write(ptr_str) if args.visdom: aps = [mAP] for ap in ap_all: aps.append(ap) viz.line( X=torch.ones((1, args.num_classes)).cpu() * iteration, Y=torch.from_numpy(np.asarray(aps)).unsqueeze(0).cpu(), win=val_lot, update='append' ) net.train() # Switch net back to training mode torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0-tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
def train(args, net, optimizer, criterion, scheduler): log_file = open(args.save_root+"training.log", "w", 1) log_file.write(args.exp_name+'\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg)+': '+str(getattr(args, arg))+'\n') log_file.write(str(net)) net.train() # loss counters batch_time = AverageMeter() losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() print('Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.input_type, full_test=False) epoch_size = len(train_dataset) // args.batch_size print('Training SSD on', train_dataset.name) if args.visdom: import visdom viz = visdom.Visdom() viz.port = args.vis_port viz.env = args.exp_name # initialize visdom loss plot lot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1, 6)).cpu(), opts=dict( xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['REG', 'CLS', 'AVG', 'S-REG', ' S-CLS', ' S-AVG'] ) ) # initialize visdom meanAP and class APs plot legends = ['meanAP'] for cls in CLASSES: legends.append(cls) val_lot = viz.line( X=torch.zeros((1,)).cpu(), Y=torch.zeros((1,args.num_classes)).cpu(), opts=dict( xlabel='Iteration', ylabel='Mean AP', title='Current SSD Validation mean AP', legend=legends ) ) batch_iterator = None train_data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) itr_count = 0 torch.cuda.synchronize() t0 = time.perf_counter() iteration = 0 while iteration <= args.max_iter: for i, (images, targets, img_indexs) in enumerate(train_data_loader): if iteration > args.max_iter: break iteration += 1 if args.cuda: images = images.cuda(0, non_blocking=True) targets = [anno.cuda(0, non_blocking=True) for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() scheduler.step() loc_loss = loss_l.item() conf_loss = loss_c.item() # print('Loss data type ',type(loc_loss)) loc_losses.update(loc_loss) cls_losses.update(conf_loss) losses.update((loc_loss + conf_loss)/2.0) if iteration % args.print_step == 0 and iteration>0: if args.visdom: losses_list = [loc_losses.val, cls_losses.val, losses.val, loc_losses.avg, cls_losses.avg, losses.avg] viz.line(X=torch.ones((1, 6)).cpu() * iteration, Y=torch.from_numpy(np.asarray(losses_list)).unsqueeze(0).cpu(), win=lot, update='append') torch.cuda.synchronize() t1 = time.perf_counter() batch_time.update(t1 - t0) print_line = 'Itration {:06d}/{:06d} loc-loss {:.3f}({:.3f}) cls-loss {:.3f}({:.3f}) ' \ 'average-loss {:.3f}({:.3f}) Timer {:0.3f}({:0.3f})'.format( iteration, args.max_iter, loc_losses.val, loc_losses.avg, cls_losses.val, cls_losses.avg, losses.val, losses.avg, batch_time.val, batch_time.avg) torch.cuda.synchronize() t0 = time.perf_counter() log_file.write(print_line+'\n') print(print_line) # if args.visdom and args.send_images_to_visdom: # random_batch_index = np.random.randint(images.size(0)) # viz.image(images.data[random_batch_index].cpu().numpy()) itr_count += 1 if itr_count % args.loss_reset_step == 0 and itr_count > 0: loc_losses.reset() cls_losses.reset() losses.reset() batch_time.reset() print('Reset accumulators of ', args.exp_name,' at', itr_count*args.print_step) itr_count = 0 if (iteration % args.eval_step == 0 or iteration == 5000) and iteration>0: torch.cuda.synchronize() tvs = time.perf_counter() print('Saving state, iter:', iteration) torch.save(net.state_dict(), args.save_root+'ssd300_ucf24_' + repr(iteration) + '.pth') net.eval() # switch net to evaluation mode mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, iteration, iou_thresh=args.iou_thresh) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str+'\n') ptr_str = '\nMEANAP:::=>'+str(mAP)+'\n' print(ptr_str) log_file.write(ptr_str) if args.visdom: aps = [mAP] for ap in ap_all: aps.append(ap) viz.line( X=torch.ones((1, args.num_classes)).cpu() * iteration, Y=torch.from_numpy(np.asarray(aps)).unsqueeze(0).cpu(), win=val_lot, update='append' ) net.train() # Switch net back to training mode torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0-tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
def validation(net, skip): net.eval() ### Load testing data if DATASET_NAME == 'KAIST': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='test20', skip=skip) elif DATASET_NAME == 'VOC0712': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), [('2007', 'test')]) elif DATASET_NAME == 'Sensiac': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='day_test10') elif DATASET_NAME == 'Caltech': dataset = GetDataset(args.voc_root, BaseTransform(image_size, means), AnnotationTransform(), dataset_name='test01', skip=skip) num_images = len(dataset) all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} output_dir = get_output_dir( DATASET_NAME + "_" + args.net + args.input_dim + "_120000", DATASET_NAME) det_file = os.path.join(output_dir, 'detections.pkl') index = 0 for i in range(num_images): im, gt, h, w = dataset.pull_item(i) # if not len(gt): ### some image dont have gt # continue index = index + 1 x = Variable(im.unsqueeze(0)) if args.cuda: x = x.cuda() _t['im_detect'].tic() detections = net(x).data detect_time = _t['im_detect'].toc(average=False) print("%s/%s time:%s" % (index, num_images, detect_time)) # skip j = 0, because it's the background class for j in range(1, detections.size(1)): dets = detections[0, j, :] mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.dim() == 0: continue boxes = dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack((boxes.cpu().numpy(), scores[:, np.newaxis])) \ .astype(np.float32, copy=False) all_boxes[j][i] = cls_dets #all boxes format [classes(2)][num_images][coordinates and score] # print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, # num_images, detect_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map, mam = evaluate_detections(all_boxes, output_dir, dataset) return map, mam
def train(): net.train() # loss counters loc_loss = 0 # epoch conf_loss = 0 epoch = 0 if args.resume_net: epoch = 0 + args.resume_epoch epoch_size = len(train_dataset) // args.batch_size max_iter = args.max_epoch * epoch_size stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', train_dataset.name) ''' n_flops, n_convops, n_params = measure_model(net, int(args.size), int(args.size)) print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'. format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6)) ''' print(' Total params: %.2fM' % (sum(p.numel() for p in net.parameters()) / 1000000.0)) step_index = 0 if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 lr = args.lr log_file = open(log_file_path, 'w') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) loc_loss = 0 conf_loss = 0 if epoch % args.save_frequency == 0 and epoch > 0: torch.save( net.state_dict(), os.path.join( save_folder, args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth')) if epoch % args.test_frequency == 0 and epoch > 0: net.eval() top_k = 200 detector = Detect(num_classes, 0, cfg) if args.dataset == 'VOC': APs, mAP = test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) APs = [str(num) for num in APs] mAP = str(mAP) log_file.write(str(iteration) + ' APs:\n' + '\n'.join(APs)) log_file.write('mAP:\n' + mAP + '\n') else: test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.module.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) net.train() epoch += 1 load_t0 = time.time() for iter_tmp in range(iteration, 0, -epoch_size * args.save_frequency): if iter_tmp in stepvalues: step_index = stepvalues.index(iter_tmp) + 1 if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([ loc_loss, conf_loss, loc_loss + conf_loss ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') break lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [ Variable(anno.cuda(), volatile=True) for anno in targets ] else: images = Variable(images) targets = [Variable(anno, volatile=True) for anno in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.data[0] conf_loss += loss_c.data[0] load_t1 = time.time() if iteration % 10 == 0: print(args.version + 'Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (loss_l.data[0], loss_c.data[0]) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) if args.visdom: viz.line(X=torch.ones((1, 3)).cpu() * iteration, Y=torch.Tensor([ loss_l.data[0], loss_c.data[0], loss_l.data[0] + loss_c.data[0] ]).unsqueeze(0).cpu(), win=lot, update='append') if iteration % epoch_size == 0: viz.line(X=torch.zeros((1, 3)).cpu(), Y=torch.Tensor( [loc_loss, conf_loss, loc_loss + conf_loss]).unsqueeze(0).cpu(), win=epoch_lot, update=True) log_file.close() torch.save( net.state_dict(), os.path.join(save_folder, 'Final_' + args.version + '_' + args.dataset + '.pth'))
new_state_dict[name] = v net.load_state_dict(new_state_dict) # 将读取的模型参数,灌入net中 net.eval() # 现在模型参数就有啦,可以进入评估模式了 print('Finished loading model!') print(net) # load data,加载测试数据 if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2014', 'minival')], None) #COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') if args.cuda: net = net.cuda() cudnn.benchmark = True else: net = net.cpu() # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 # 每张图像上最多检出top_k个bbox detector = Detect(num_classes,0,cfg) # 调用detection.py里的Detect类,完成forward操作的detector save_folder = os.path.join(args.save_folder,args.dataset) rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), # resize + 减均值 + 通道调换 top_k, thresh=0.01) # thresh=0.01,为什么这么小?可以结合mAP介绍的笔记
def train(): net.train() # loss counters epoch = 0 if args.resume_net: epoch = 0 + args.resume_epoch epoch_size = len( train_dataset) // args.batch_size # How many batch size is needed. max_iter = args.max_epoch * epoch_size print('max_iter : ', max_iter) stepvalues_VOC = (150 * epoch_size, 200 * epoch_size, 250 * epoch_size) stepvalues_COCO = (90 * epoch_size, 120 * epoch_size, 140 * epoch_size) stepvalues = (stepvalues_VOC, stepvalues_COCO)[args.dataset == 'COCO'] print('Training', args.version, 'on', train_dataset.name) step_index = 0 ## To visualize if args.visdom: # initialize visdom loss plot lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict(xlabel='Iteration', ylabel='Loss', title='Current SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) epoch_lot = viz.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1, 3)).cpu(), opts=dict( xlabel='Epoch', ylabel='Loss', title='Epoch SSD Training Loss', legend=['Loc Loss', 'Conf Loss', 'Loss'])) if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 log_file = open(log_file_path, 'w') batch_iterator = None mean_loss_c = 0 mean_loss_l = 0 for iteration in range(start_iter, max_iter + 10): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(train_dataset, batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate)) # loc_loss = 0 # conf_loss = 0 if epoch % args.save_frequency == 0 and epoch > 0: torch.save( net.state_dict(), os.path.join( save_folder, args.version + '_' + args.dataset + '_epoches_' + repr(epoch) + '.pth')) ## Evaluation if epoch % args.test_frequency == 0 and epoch > 0: net.eval() top_k = (300, 200)[args.dataset == 'COCO'] if args.dataset == 'VOC': # net.module.size -> net.size. APs, mAP = test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) APs = [str(num) for num in APs] mAP = str(mAP) log_file.write(str(iteration) + ' APs:\n' + '\n'.join(APs)) log_file.write('\n mAP:\n' + mAP + '\n') # -------------------------------------------------------------------------------------------------------------------- # # 1. Log scalar values (scalar summary) # info = {'accuracy': mAP} # # for tag, value in info.items(): # logger.scalar_summary(tag, value, iteration + 1) # -------------------------------------------------------------------------------------------------------------------- # else: test_net(test_save_dir, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, rgb_std, (2, 0, 1)), top_k, thresh=0.01) net.train() epoch += 1 # -------------------------------------------------------------------------------------------------------------------- # load_t0 = time.time() if iteration in stepvalues: step_index = stepvalues.index(iteration) + 1 if args.visdom: viz.line( X=torch.ones((1, 3)).cpu() * epoch, Y=torch.Tensor([ mean_loss_l, mean_loss_c, mean_loss_l + mean_loss_c ]).unsqueeze(0).cpu() / epoch_size, win=epoch_lot, update='append') lr = adjust_learning_rate(optimizer, args.gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) if args.cuda: images = images.cuda() targets = [ann.cuda() for ann in targets] else: images = images targets = [ann for ann in targets] # forward out = net(images) # backprop optimizer.zero_grad() # arm branch loss loss_l, loss_c = criterion(out, priors, targets) # odm branch loss mean_loss_c += loss_c.item() mean_loss_l += loss_l.item() loss = loss_l + loss_c loss.backward() optimizer.step() load_t1 = time.time() # -------------------------------------------------------------------------------------------------------------------- # if iteration % epoch_size == 0: # 1. Log scalar values (scalar summary) info = { 'loss': loss.item(), 'loc_loss': loss_l.item(), 'conf_loss': loss_c.item() } for tag, value in info.items(): logger.scalar_summary(tag, value, iteration + 1) # -------------------------------------------------------------------------------------------------------------------- # # -------------------------------------------------------------------------------------------------------------------- # if iteration % 10 == 0: print('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (mean_loss_l / 10, mean_loss_c / 10) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr)) log_file.write('Epoch:' + repr(epoch) + ' || epochiter: ' + repr(iteration % epoch_size) + '/' + repr(epoch_size) + '|| Totel iter ' + repr(iteration) + ' || L: %.4f C: %.4f||' % (mean_loss_l / 10, mean_loss_c / 10) + 'Batch time: %.4f sec. ||' % (load_t1 - load_t0) + 'LR: %.8f' % (lr) + '\n') mean_loss_c = 0 mean_loss_l = 0 if args.visdom and args.send_images_to_visdom: random_batch_index = np.random.randint(images.size(0)) viz.image(images.data[random_batch_index].cpu().numpy()) # -------------------------------------------------------------------------------------------------------------------- # log_file.close() torch.save( net.state_dict(), os.path.join(save_folder, 'Final_' + args.version + '_' + args.dataset + '.pth'))
def stixel_test(dataset, model): num_images = len(dataset) for i in range(num_images): im, tar, h, w, oimg = dataset.pull_item(i) x = Variable(im.unsqueeze(0)).cuda() dec, stixel = model(x) predict = stixel.data.cpu().numpy()[0] predict = predict.argmax(1) for x, py in enumerate(predict): x0 = int(x * w / 100) x1 = int((x + 1) * w / 100) y = int((py + 0.5) * h / 50) cv2.line(oimg, (x0, y), (x1, y), (0, 255, 255), 1) cv2.imwrite(os.path.join(args.outpath, '%d.png' % i), oimg) print("finish %d/%d" % (i, num_images)) if __name__ == '__main__': # load net num_classes = 9 + 1 # +1 background net = torch.load(args.model) net.eval() print('Finished loading model!') dataset = KittiTracking(args.basepath, 20, BaseTransform(ssd_dim, means)) net = net.cuda() cudnn.benchmark = True dection_test(dataset, net) #stixel_test(dataset,net)
if __name__ == '__main__': # load net print('len(labelmap) = ', len(labelmap)) num_classes = len( labelmap ) + 1 # +1 for background; 13 in total considering "ignored" as a separate class print('in main() num_classes = ', num_classes) net = build_refinedet('test', int(args.input_size), num_classes) # initialize refinedet net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = VISDRONEDetection( set_type, BaseTransform(int(args.input_size), dataset_mean), VISDRONEAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, int(args.input_size), thresh=args.confidence_threshold)
def main(args): create_time = time.strftime('%Y%m%d_%H%M', time.localtime(time.time())) save_folder_path = os.path.join(args.save_folder, create_time) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # dataset = CustomDetection(root = args.image_root, # json_path = args.annotation, # transform = BaseTransform(img_size = args.image_size), # target_transform = CustomAnnotationTransform()) dataset = COCODetection(root = args.image_root, annotation_json = args.annotation, transform = BaseTransform(img_size = args.image_size), target_transform = COCOAnnotationTransform) dataloader = DataLoader(dataset= dataset, batch_size = 4, shuffle= True, collate_fn = detection_collate) n_classes = dataset.get_class_number() + 1 print("Detect class number: {}".format(n_classes)) ## write category id to label name map dataset.get_class_map() model = MobileNetv3(n_classes = n_classes) ssd = SSDMobilenetv3(model, n_classes) if args.pretrain_model_path: ssd.load_state_dict( torch.load(args.pretrain_model_path)) # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo biases = list() not_biases = list() for param_name, param in model.named_parameters(): if param.requires_grad: if param_name.endswith('.bias'): biases.append(param) else: not_biases.append(param) optimizer = torch.optim.SGD(params = [{'params': biases, 'lr': args.learning_rate}, {'params': not_biases}], lr = args.learning_rate, momentum = args.momentum, weight_decay = args.weight_decay) ssd = ssd.to(device) criterion = MultiBoxLossV3(ssd.priors_cxcy, args.threshold, args.neg_pos_ratio).to(device) print(f"epochs: {args.epochs}") for param_group in optimizer.param_groups: optimizer.param_groups[1]['lr'] = args.learning_rate print(f"learning rate. The new LR is {optimizer.param_groups[1]['lr']}") scheduler = ReduceLROnPlateau(optimizer, mode = 'min', factor = 0.1, patience = 15, verbose = True, threshold = 0.00001, threshold_mode = 'rel', cooldown = 0, min_lr = 0, eps = 1e-08) n_train = min(dataset.__len__(), 5000) global_step = 0 writer = SummaryWriter() for epoch in range(args.epochs): mean_loss = 0 inference_count = 0 ssd.train() mean_count = 0 with tqdm(total = n_train, desc = f"{epoch + 1} / {args.epochs}", unit = 'img') as pbar: for img, target in dataloader: img = img.to(device) # target = [anno.to(device) for anno in target] # print(target) # boxes = target[:, :-1] # labels = target[:, -1] boxes = [anno.to(device)[:, :-1] for anno in target] labels = [anno.to(device)[:, -1] for anno in target] prediction_location_loss, prediction_confidence_loss = ssd(img) loss = criterion(prediction_location_loss, prediction_confidence_loss, boxes, labels) pbar.set_postfix( **{"loss ": float(loss)}) mean_loss += float(loss) mean_count += 1 optimizer.zero_grad() loss.backward() optimizer.step() pbar.update( img.shape[0]) scheduler.step(mean_loss) writer.add_scalar('Train/Loss', float(mean_loss / mean_count), global_step) global_step += 1 if epoch % 10 == 0 or epoch == args.epochs - 1: save_model(save_folder_path, ssd, epoch) writer.close()
j = 0 # will correspond to the occurrences of the class while detections[0, i, j, 0] >= 0.6: # taking into account all the occurrences j of the class i that have a matching score larger than 0.6 pt = (detections[0, i, j, 1:] * scale).numpy() # coordinates of the points at the upper left and the lower right of the detector rectangle. cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255, 0, 0), 2) cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA) j += 1 return frame # Creating the SSD neural network net = build_ssd('test') # get the weights of the neural network from another one that is pretrained (ssd300_mAP_77.43_v2.pth) net.load_state_dict(torch.load('ssd300_mAP_77.43_v2.pth', map_location = lambda storage, loc: storage)) # Creating the transformation for images to be compatible with ssd neural network transform = BaseTransform(net.size, (104/256.0, 117/256.0, 123/256.0)) #pre specified # Doing some Object Detection on a video videoName=str(input("ENTER VIDEO LOCATION TO BE ANALYSED: ")) videoNewLoc=str(input("ENTER LOCATION WHERE NEW VIDEO HAS TO BE SAVED: ")) outputName=str(input("ENTER NAME OF DESIRED OUTPUT VIDEO TO BE GENEATED: ")) outputName=videoNewLoc+'\\'+outputName+'.mp4' reader = imageio.get_reader(videoName) fps = reader.get_meta_data()['fps'] # get the fps frequence writer = imageio.get_writer(outputName, fps = fps) # create output video with this same fps frequence for i, frame in enumerate(reader): # iterate on the frames of the output video frame = detect(frame, net.eval(), transform) writer.append_data(frame) # add the next frame in the output video. print("TOTAL FRAMES PROCESSED: "i) # print the number of the processed frame. writer.close()
Y = neural_net(X) detections = Y.data scaled_tensor = torch.Tensor([width,height,width,height]) for i in range(detections.size(1)): j = 0 while detections[0,i,j,0] >= 0.6: c = (detections[0,i,j,1:] * scaled_tensor) c = c.numpy() cv2.rectangle(image,(int(c[0]),int(c[1])),(int(c[2]),int(c[3])),(0,255,0),2) cv2.putText(image,labelmap[i-1],(int(c[0]),int(c[1])),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),2,cv2.LINE_AA) j = j+1 return image nn_obj = build_ssd("test") nn_obj.load_state_dict(torch.load('ssd300_mAP_77.43_v2.pth', map_location = lambda storage, loc: storage)) transform = BaseTransform(nn_obj.size, (104/256.0, 117/256.0, 123/256.0)) reader = imageio.get_reader('horse_vid.mp4') fps = reader.get_meta_data()['fps'] writer = imageio.get_writer('output_vid_2.mp4',fps = fps) for i,frame in enumerate(reader): new_frame = detect_obj(frame,nn_obj,transform) writer.append_data(new_frame) print(i) writer.close()
def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', cfg, args.use_pred_module) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2007', set_type)], BaseTransform(cfg['min_dim'], dataset_mean), VOCAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, cfg['min_dim'], thresh=args.confidence_threshold)
coords = (pt[0], pt[1], pt[2], pt[3]) pred_num += 1 with open(filename, mode='a') as f: f.write( str(pred_num) + ' label: ' + label_name + ' score: ' + str(score) + ' ' + ' || '.join(str(c) for c in coords) + '\n') j += 1 if __name__ == '__main__': # load net num_classes = len(VHR_CLASSES) + 1 # +1 background net = build_stdn('test', num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data testset = VHRDetection(args.vhr_root, ['test'], None, AnnotationTransform_VHR()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, testset, BaseTransform(net.size, (104, 117, 123)), thresh=args.visual_threshold)
cudnn.benchmark = True else: print('move to cpu') net = net.cpu() net.eval() #net.load_state_dict(state_dict) print('Finished loading model!') #print(net) # load data if args.dataset == 'VOC': testset = VOCDetection( VOCroot, [('2007', 'test')], None, AnnotationTransform()) elif args.dataset == 'COCO': testset = COCODetection( COCOroot, [('2014', 'minival')], None) #COCOroot, [('2015', 'test-dev')], None) else: print('Only VOC and COCO dataset are supported now!') # evaluation #top_k = (300, 200)[args.dataset == 'COCO'] top_k = 200 detector = Detect(num_classes,0,cfg) save_folder = os.path.join(args.save_folder,args.dataset) rgb_means = ((104, 117, 123),(103.94,116.78,123.68))[args.version == 'RFB_mobile'] test_net(save_folder, net, detector, args.cuda, testset, BaseTransform(net.size, rgb_means, (2, 0, 1)), top_k, thresh=0.01)
if args.aug_method == "vanilla": ssd_aug = SSDAugmentation(args.gt_pixel_jitter, args.expand_ratio, args.ssd_dim, args.means, use_normalize=args.use_normalize, p_only=args.p_only, use_pixel_link=True, pixel_link_version=config.version) elif args.aug_method == "cuda": raise NotImplementedError("--aug_method = 'cuda' is deprecated!") # ssd_aug = SSDAugmentationCUDA(args.gt_pixel_jitter, args.expand_ratio, args.ssd_dim, args.means, # use_normalize=args.use_normalize, p_only=args.p_only, use_pixel_link=True, pixel_link_version=config.version) # for ap calculation base_aug = BaseTransform(args.ssd_dim, args.means, use_normalize=args.use_normalize, p_only=args.p_only) for i in range(args.cross_validation): cv_train.append( data.DataLoader(FISHdetectionV2( args.datapath, data_splitter.data_cv_train[i], ssd_aug, dataset_name='lesion_cv_train_' + str(i), load_data_to_ram=args.load_data_to_ram, use_pixel_link=True), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate_v2_pixel_link,
# save_folder = os.path.join(VOCroot, "eval") # # logging.info("==> save eval result txt of each class to the directory of: ", eval_result_txt) # if not os.path.exists(eval_result_txt): # os.makedirs(eval_result_txt) # # if not os.path.exists(cachedir): # os.makedirs(cachedir) # logging.info("==> save annotation cache file to the directory of: ", cachedir) # # output_dir = get_output_dir(save_folder, "PR_curves") # logging.info("==> save detections.pkl to the directory of: ", output_dir) # comp_id = "comp4_" transform = BaseTransform(input_size, dataset_mean) # dataset = VOCDetection(VOCroot, [(set_type)], transform, AnnotationTransform()) if use_cuda: net = net.cuda() def main(net): logging.info("==> do detect on every image with model reference.") img_list_file = "/home/hyer/datasets/OCR/ssd_k1_test.txt" with open(img_list_file, "r") as f: data = f.readlines() img_list = [] for li in data: img_list.append(li.strip())
scale).numpy() #assigns the coordinates cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255, 0, 0), 2) cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2, cv2.LINE_AA) j += 1 return frame #Creating SSD neural network net = build_ssd('test') #we use test phase cause we have a trained model net.load_state_dict( torch.load('ssd300_mAP_77.43_v2.pth', map_location=lambda storage, loc: storage) ) #the weights are attributed to net transform = BaseTransform( net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0)) #tranform the image according to the trained neural network. #doing object detection on a video reader = imageio.get_reader('input.mp4') fps = reader.get_meta_data()['fps'] writer = imageio.get_writer('output.mp4', fps=fps) for i, frame in enumerate(reader): frame = detect(frame, net.eval(), transform) writer.append_data(frame) print(i) writer.close()
def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data #test_sets = "./data/sixray/test_1650.txt" test_sets = imgsetpath dataset = SIXrayDetection(test_sets, BaseTransform(300, dataset_mean), SIXrayAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)
F2_index = np.argmax(2*prec*rec/(prec+rec)) F2 = np.max(2*prec*rec/(prec+rec)) print('F2_corresponding score = ', sorted_scores[F2_index]) print('F2 coresponding rec prec = ', rec[F2_index], prec[F2_index]) print('F2=',F2) if __name__ == '__main__': # load net # num_classes = 1 + 1 # +1 for background net = PeleeNet('test', icdar2015) net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = ICDAR2015Detection(args.icdar2015_root, 'test', BaseTransform(304, dataset_mean),#### ICDAR2015AnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(304, dataset_mean), args.top_k, 304,#### thresh=args.confidence_threshold)
def Model_Params(self, model_dir="output", use_gpu=True): ''' User Function - Set Model Params Args: model_dir (str): Select the right model name as per training model_path (str): Relative path to params file use_gpu (bool): If True use GPU else run on CPU Returns: None ''' f = open(model_dir +"/config_final.py", 'r'); lines = f.read(); f.close(); if(not use_gpu): lines = lines.replace("cuda=True", "cuda=False"); f = open(model_dir +"/config_test.py", 'w'); f.write(lines); f.close(); print("Loading model for inference"); self.system_dict["cfg"] = Config.fromfile(model_dir +"/config_test.py") anchor_config = anchors(self.system_dict["cfg"].model) self.system_dict["priorbox"] = PriorBox(anchor_config) self.system_dict["net"] = build_net('test', self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model) init_net(self.system_dict["net"], self.system_dict["cfg"], model_dir + "/VOC/Final_Pelee_VOC_size304.pth") print_info('===> Finished constructing and loading model', ['yellow', 'bold']) self.system_dict["net"].eval() with torch.no_grad(): self.system_dict["priors"] = self.system_dict["priorbox"].forward() if self.system_dict["cfg"].test_cfg.cuda: self.system_dict["net"] = self.system_dict["net"].cuda() self.system_dict["priors"] = self.system_dict["priors"].cuda() cudnn.benchmark = True else: self.system_dict["net"] = self.system_dict["net"].cpu() self.system_dict["_preprocess"] = BaseTransform(self.system_dict["cfg"].model.input_size, self.system_dict["cfg"].model.rgb_means, (2, 0, 1)) self.system_dict["num_classes"] = self.system_dict["cfg"].model.num_classes self.system_dict["detector"] = Detect(self.system_dict["num_classes"], self.system_dict["cfg"].loss.bkg_label, anchor_config) print("Done...."); print("Loading other params"); base = int(np.ceil(pow(self.system_dict["num_classes"], 1. / 3))) self.system_dict["colors"] = [self._to_color(x, base) for x in range(self.system_dict["num_classes"])] cats = ['__background__']; f = open(self.system_dict["class_list"]); lines = f.readlines(); f.close(); for i in range(len(lines)): if(lines != ""): cats.append(lines[i][:len(lines[i])-1]) self.system_dict["labels"] = cats; print("Done....");
with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset) def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2012', set_type)], BaseTransform(300, dataset_mean), VOCAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)
def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net net = build_ssd('test', args.input_size, 2) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = CAR_CARPLATEDetection( root=args.voc_root, transform=BaseTransform(args.input_size, dataset_mean), target_transform=CAR_CARPLATEAnnotationTransform(keep_difficult=True), dataset_name=set_type) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, args.input_size, thresh=args.confidence_threshold)
scale = torch.Tensor([width, height, width, height]) #tensor element: batch, jumlah object, occurence of object, tuple of [score, x0, y0, x1, y1] for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= 0.6: #threshold 0.6 pt = (detections[0, i, j, 1:] * scale).numpy() #convert tensor balik ke numpy array cv2.rectangle(frame, (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])), (255, 0, 0), 1) cv2.putText(frame, labelmap[i - 1], (int(pt[0]), int(pt[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0), 1, cv2.LINE_AA) j += 1 return frame net = build_ssd('test') #SSD neural network, parameter: train/test phase net.load_state_dict( torch.load('ssd300_mAP_77.43_v2.pth', map_location=lambda storage, loc: storage)) transform = BaseTransform(net.size, (104 / 256.0, 117 / 256.0, 123 / 256.0)) reader = imageio.get_reader('funny_dog.mp4') fps = reader.get_meta_data()['fps'] writer = imageio.get_writer('object_detection.mp4', fps=fps) for i, frame in enumerate(reader): frame = detect(frame, net.eval(), transform) writer.append_data(frame) print(i) writer.close()
def main(): global my_dict, keys, k_len, arr, xxx, args, log_file, best_prec1 parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training') parser.add_argument('--version', default='v2', help='conv11_2(v2) or pool6(v1) as last layer') parser.add_argument('--basenet', default='vgg16_reducedfc.pth', help='pretrained base model') parser.add_argument('--dataset', default='ucf24', help='pretrained base model') parser.add_argument('--ssd_dim', default=300, type=int, help='Input Size for SSD') # only support 300 now parser.add_argument('--modality', default='rgb', type=str, help='INput tyep default rgb options are [rgb,brox,fastOF]') parser.add_argument('--jaccard_threshold', default=0.5, type=float, help='Min Jaccard index for matching') parser.add_argument('--batch_size', default=1, type=int, help='Batch size for training') parser.add_argument('--num_workers', default=0, type=int, help='Number of workers used in dataloading') parser.add_argument('--max_iter', default=120000, type=int, help='Number of training iterations') parser.add_argument('--man_seed', default=123, type=int, help='manualseed for reproduction') parser.add_argument('--cuda', default=True, type=str2bool, help='Use cuda to train model') parser.add_argument('--ngpu', default=1, type=str2bool, help='Use cuda to train model') parser.add_argument('--lr', '--learning-rate', default=0.0005, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, help='momentum') parser.add_argument('--stepvalues', default='70000,90000', type=str, help='iter number when learning rate to be dropped') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD') parser.add_argument('--gamma', default=0.2, type=float, help='Gamma update for SGD') parser.add_argument('--log_iters', default=True, type=bool, help='Print the loss at each iteration') parser.add_argument('--visdom', default=False, type=str2bool, help='Use visdom to for loss visualization') parser.add_argument('--data_root', default='/data4/lilin/my_code/realtime/', help='Location of VOC root directory') parser.add_argument('--save_root', default='/data4/lilin/my_code/realtime/realtime-lstm/saveucf24/', help='Location to save checkpoint models') parser.add_argument('--iou_thresh', default=0.5, type=float, help='Evaluation threshold') parser.add_argument('--conf_thresh', default=0.01, type=float, help='Confidence threshold for evaluation') parser.add_argument('--nms_thresh', default=0.45, type=float, help='NMS threshold') parser.add_argument('--topk', default=50, type=int, help='topk for evaluation') parser.add_argument('--clip_gradient', default=40, type=float, help='gradients clip') parser.add_argument('--resume', default="/data4/lilin/my_code/realtime/realtime-lstm/saveucf24/ucf101_CONV-SSD-ucf24-rgb-bs-32-vgg16-lr-00050_train_ssd_conv_lstm_01-06_epoch_11_checkpoint.pth.tar", type=str, help='Resume from checkpoint') parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') parser.add_argument('--epochs', default=35, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('--eval_freq', default=2, type=int, metavar='N', help='evaluation frequency (default: 5)') parser.add_argument('--snapshot_pref', type=str, default="ucf101_vgg16_ssd300_") parser.add_argument('--lr_milestones', default=[-2, -5], type=float, help='initial learning rate') parser.add_argument('--arch', type=str, default="VGG16") parser.add_argument('--Finetune_SSD', default=False, type=str) parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') print(__file__) file_name = (__file__).split('/')[-1] file_name = file_name.split('.')[0] print(file_name) ## Parse arguments args = parser.parse_args() ## set random seeds np.random.seed(args.man_seed) torch.manual_seed(args.man_seed) if args.cuda: torch.cuda.manual_seed_all(args.man_seed) if args.cuda and torch.cuda.is_available(): torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') args.cfg = v2 args.train_sets = 'train' args.means = (104, 117, 123) num_classes = len(CLASSES) + 1 args.num_classes = num_classes args.stepvalues = [int(val) for val in args.stepvalues.split(',')] args.loss_reset_step = 30 args.eval_step = 10000 args.print_step = 10 args.data_root += args.dataset + '/' ## Define the experiment Name will used to same directory args.snapshot_pref = ('ucf101_CONV-SSD-{}-{}-bs-{}-{}-lr-{:05d}').format(args.dataset, args.modality, args.batch_size, args.basenet[:-14], int(args.lr*100000)) + '_' + file_name + '_' + day print (args.snapshot_pref) if not os.path.isdir(args.save_root): os.makedirs(args.save_root) net = build_ssd(300, args.num_classes) if args.Finetune_SSD is True: print ("load snapshot") pretrained_weights = "/data4/lilin/my_code/realtime/ucf24/rgb-ssd300_ucf24_120000.pth" pretrained_dict = torch.load(pretrained_weights) model_dict = net.state_dict() # 1. filter out unnecessary keys pretrained_dict_2 = {k: v for k, v in pretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict # pretrained_dict_2['vgg.25.bias'] = pretrained_dict['vgg.24.bias'] # pretrained_dict_2['vgg.25.weight'] = pretrained_dict['vgg.24.weight'] # pretrained_dict_2['vgg.27.bias'] = pretrained_dict['vgg.26.bias'] # pretrained_dict_2['vgg.27.weight'] = pretrained_dict['vgg.26.weight'] # pretrained_dict_2['vgg.29.bias'] = pretrained_dict['vgg.28.bias'] # pretrained_dict_2['vgg.29.weight'] = pretrained_dict['vgg.28.weight'] # pretrained_dict_2['vgg.32.bias'] = pretrained_dict['vgg.31.bias'] # pretrained_dict_2['vgg.32.weight'] = pretrained_dict['vgg.31.weight'] # pretrained_dict_2['vgg.34.bias'] = pretrained_dict['vgg.33.bias'] # pretrained_dict_2['vgg.34.weight'] = pretrained_dict['vgg.33.weight'] model_dict.update(pretrained_dict_2) # 3. load the new state dict elif args.resume: if os.path.isfile(args.resume): print(("=> loading checkpoint '{}'".format(args.resume))) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] xxx = checkpoint['state_dict'] net.load_state_dict(checkpoint['state_dict']) print(("=> loaded checkpoint '{}' (epoch {})" .format(args.evaluate, checkpoint['epoch']))) else: print(("=> no checkpoint found at '{}'".format(args.resume))) elif args.modality == 'fastOF': print('Download pretrained brox flow trained model weights and place them at:::=> ',args.data_root + 'ucf24/train_data/brox_wieghts.pth') pretrained_weights = args.data_root + 'train_data/brox_wieghts.pth' print('Loading base network...') net.load_state_dict(torch.load(pretrained_weights)) else: vgg_weights = torch.load(args.data_root +'train_data/' + args.basenet) print('Loading base network...') net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() def xavier(param): init.xavier_uniform(param) def weights_init(m): if isinstance(m, nn.Conv2d): xavier(m.weight.data) m.bias.data.zero_() print('Initializing weights for extra layers and HEADs...') # initialize newly added layers' weights with xavier method # if args.Finetune_SSD is False: # net.extras.apply(weights_init) # net.loc.apply(weights_init) # net.conf.apply(weights_init) parameter_dict = dict(net.named_parameters()) # Get parmeter of network in dictionary format wtih name being key params = [] #Set different learning rate to bias layers and set their weight_decay to 0 for name, param in parameter_dict.items(): # if name.find('vgg') > -1 and int(name.split('.')[1]) < 23:# :and name.find('cell') <= -1 # param.requires_grad = False # print(name, 'layer parameters will be fixed') # else: if name.find('bias') > -1: print(name, 'layer parameters will be trained @ {}'.format(args.lr*2)) params += [{'params': [param], 'lr': args.lr*2, 'weight_decay': 0}] else: print(name, 'layer parameters will be trained @ {}'.format(args.lr)) params += [{'params':[param], 'lr': args.lr, 'weight_decay':args.weight_decay}] optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(args.num_classes, 0.5, True, 0, True, 3, 0.5, False, args.cuda) scheduler = None # scheduler = LogLR(optimizer, lr_milestones=args.lr_milestones, total_epoch=args.epochs) scheduler = MultiStepLR(optimizer, milestones=args.stepvalues, gamma=args.gamma) print('Loading Dataset...') train_dataset = UCF24Detection(args.data_root, args.train_sets, SSDAugmentation(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality) val_dataset = UCF24Detection(args.data_root, 'test', BaseTransform(args.ssd_dim, args.means), AnnotationTransform(), input_type=args.modality, full_test=False) train_data_loader = data.DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) val_data_loader = data.DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False, collate_fn=detection_collate, pin_memory=True) print ("train epoch_size: ", len(train_data_loader)) print('Training SSD on', train_dataset.name) my_dict = copy.deepcopy(train_data_loader.dataset.train_vid_frame) keys = list(my_dict.keys()) k_len = len(keys) arr = np.arange(k_len) xxx = copy.deepcopy(train_data_loader.dataset.ids) log_file = open(args.save_root + args.snapshot_pref + "_training_" + day + ".log", "w", 1) log_file.write(args.snapshot_pref+'\n') for arg in vars(args): print(arg, getattr(args, arg)) log_file.write(str(arg)+': '+str(getattr(args, arg))+'\n') log_file.write(str(net)) torch.cuda.synchronize() for epoch in range(args.start_epoch, args.epochs): train(train_data_loader, net, criterion, optimizer, scheduler, epoch) print('Saving state, epoch:', epoch) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, epoch = epoch) #### log lr ### # scheduler.step() # evaluate on validation set if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1: torch.cuda.synchronize() tvs = time.perf_counter() mAP, ap_all, ap_strs = validate(args, net, val_data_loader, val_dataset, epoch, iou_thresh=args.iou_thresh) # remember best prec@1 and save checkpoint is_best = mAP > best_prec1 best_prec1 = max(mAP, best_prec1) print('Saving state, epoch:', epoch) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_prec1': best_prec1, }, is_best,epoch) for ap_str in ap_strs: print(ap_str) log_file.write(ap_str+'\n') ptr_str = '\nMEANAP:::=>'+str(mAP)+'\n' print(ptr_str) log_file.write(ptr_str) torch.cuda.synchronize() t0 = time.perf_counter() prt_str = '\nValidation TIME::: {:0.3f}\n\n'.format(t0-tvs) print(prt_str) log_file.write(ptr_str) log_file.close()
def evaluate_detections(box_list, output_dir, dataset): write_voc_results_file(box_list, dataset) do_python_eval(output_dir) if __name__ == '__main__': # load net num_classes = len(labelmap) + 1 # +1 for background net = build_ssd('test', 300, num_classes) # initialize SSD net.load_state_dict(torch.load(args.trained_model)) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, [('2019', set_type)], BaseTransform(300, dataset_mean), VOCAnnotationTransform()) if args.cuda: net = net.cuda() cudnn.benchmark = True # evaluation test_net(args.save_folder, net, args.cuda, dataset, BaseTransform(net.size, dataset_mean), args.top_k, 300, thresh=args.confidence_threshold)
# use cuda if args.cuda: print('use cuda') cudnn.benchmark = True device = torch.device("cuda") else: device = torch.device("cpu") input_size = [args.input_size, args.input_size] num_classes = len(labelmap) # build model if args.version == 'yolo': from models.yolo import myYOLO net = myYOLO(device, input_size=input_size, num_classes=num_classes, trainable=False) print('Let us test yolo on the VOC0712 dataset ......') else: print('Unknown Version !!!') exit() # load net net.load_state_dict(torch.load(args.trained_model, map_location=device)) net.eval() print('Finished loading model!') # load data dataset = VOCDetection(args.voc_root, img_size=None, image_sets=[('2007', set_type)], transform=BaseTransform(net.input_size)) net = net.to(device) # evaluation test_net(args.save_folder, net, device, dataset, args.top_k, thresh=args.confidence_threshold)