def img_crop_face(root_, pathinfo, mode="train", confidence_threshold=0.05, top_k=10): # move rate img from train set to val set # root = "/aidata/dataset/faces/CASIA-FaceV5/train" # root_ = "/aidata/dataset/faces/CASIA-FaceV5_Crop" # to 112*112 for arc training resize=1 if not os.path.exists(root_): os.mkdir(root_) if not os.path.exists(root_ + "/" + mode): os.mkdir(root_ + "/" + mode) for key, imgs in pathinfo.items(): newdir = os.path.join(root_, "train", os.path.split(key)[1]) if not os.path.exists(newdir): os.mkdir(newdir) for img_p in imgs: img_raw = cv2.imread(os.path.join(key, img_p), cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf = net(img) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, 0.3, force_cpu=False) # nms_threshold dets = dets[keep, :] # keep top-K faster NMS dets = dets[:5, :] # keep_top_k for inx, b in enumerate(dets): if b[4] < 0.7: continue # text = "{:.4f}".format(b[4]) b = list(map(int, b)) crop_img = img_raw[b[1]:b[1]+b[3], b[0]:b[0]+b[2]] crop_resized = cv2.resize(crop_img, (112, 112)) # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) cv2.imwrite(os.path.join(newdir, img_p, str(inx), '.jpg'), crop_img)
def face_detector(frame): img_raw = frame.copy() img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > CONFIDENCE)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESHOLD) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:750, :] landms = landms[:750, :] dets = np.concatenate((dets, landms), axis=1) bboxs = [] for b in dets: if b[4] < VIZ_THRESHOLD: continue b = list(map(int, b)) margin = 10 x1, y1, x2, y2 = b[0], b[1], b[2], b[3] img_h, img_w, _ = frame.shape w = x2 - x1 h = y2 - y1 margin = int(min(w, h) * margin / 100) x_a = x1 - margin y_a = y1 - margin x_b = x1 + w + margin y_b = y1 + h + margin if x_a < 0: x_b = min(x_b - x_a, img_w - 1) x_a = 0 if y_a < 0: y_b = min(y_b - y_a, img_h - 1) y_a = 0 if x_b > img_w: x_a = max(x_a - (x_b - img_w), 0) x_b = img_w if y_b > img_h: y_a = max(y_a - (y_b - img_h), 0) y_b = img_h name = "" print(name) face = frame[y_a:y_b, x_a:x_b] rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) encodings = face_recognition.face_encodings(rgb, [(y_a, x_b, y_b, x_a)]) matches = face_recognition.compare_faces(face_data["encodings"], encodings[0], tolerance=0.55) print(matches) if True in matches: matchedIdxs = [i for (i, b) in enumerate(matches) if b] counts = {} for i in matchedIdxs: name = face_data["names"][i] counts[name] = counts.get(name, 0) + 1 name = max(counts, key=counts.get) print("name1", name) cv2.putText(img_raw, name, (x_a + 10, y_a), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 1, cv2.LINE_AA) cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 0), 1) bboxs.append([x_a, y_a, x_b, y_b]) return img_raw, bboxs
def main(): global args global minmum_loss args.gpu = 0 args.world_size = 1 if args.distributed: args.gpu = args.local_rank % torch.cuda.device_count() torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() args.total_batch_size = args.world_size * args.batch_size ## DATA loading code if args.dataset == 'COCO': if not os.path.exists(cfg['coco_root']): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") cfg = coco dataset = COCODetection(root=cfg['coco_root'], transform=SSDAugmentation( cfg['min_dim'], MEANS)) if args.dataset == 'VOC': cfg = voc dataset = VOCDetection(root=cfg['voc_root'], transform=SSDAugmentation( cfg['min_dim'], MEANS)) print('Training SSD on:', dataset.name) print('Loading the dataset...') train_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) print("Build ssd network") model = build_ssd('train', cfg['min_dim'], cfg['num_classes']) if args.pretrained: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') model.vgg.load_state_dict(vgg_weights) model = model.cuda() # optimizer and loss function optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, True) ## get the priorbox of ssd priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load( args.resume, map_location=lambda storage, loc: storage.cuda(args.gpu)) args.start_epoch = checkpoint['epoch'] minmum_loss = checkpoint['minmum_loss'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: print('Initializing weights...') # initialize newly added layers' weights with xavier method model.extras.apply(weights_init) model.loc.apply(weights_init) model.conf.apply(weights_init) print('Using the specified args:') print(args) for epoch in range(args.start_epoch, args.epochs): # train for one epoch end = time.time() loss = train(train_loader, model, priors, criterion, optimizer, epoch) # remember best prec@1 and save checkpoint if args.local_rank == 0: is_best = loss < minmum_loss minmum_loss = min(loss, minmum_loss) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': minmum_loss, 'optimizer': optimizer.state_dict(), }, is_best, epoch) epoch_time = time.time() - end print('Epoch %s time cost %f' % (epoch, epoch_time))
def train(opt, train_dict, device, tb_writer=None): log_dir = Path(tb_writer.log_dir) if tb_writer else Path( train_dict['logdir']) / 'logs' wdir = str(log_dir / 'weights') + os.sep os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' results_file = 'results.txt' with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(train_dict, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' rank = opt.global_rank init_seeds(2 + rank) train_path = train_dict['train'] test_path = train_dict['val'] train_dict['weights'] = last if not train_dict['pretrain'] or ( train_dict['pretrain'] and not os.path.exists(train_dict['weights'])) else train_dict['weights'] model = RetinaFace(train_dict, phase='Train') pretrained = False if os.path.exists(train_dict['weights']): pretrained = True logger('Loading resume network from ====>{}'.format( train_dict['weights'])) state_dict = torch.load(train_dict['weights'], map_location=device) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict['model'].items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v model.load_state_dict(new_state_dict) pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if train_dict['adam']: optimizer = optim.Adam(pg0, lr=train_dict['lr0'], betas=(train_dict['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=train_dict['lr0'], momentum=train_dict['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': train_dict['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 epochs = train_dict['epoch'] lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if state_dict['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = state_dict['best_fitness'] # Results if state_dict.get('training_results') is not None: with open(results_file, 'w') as file: file.write(state_dict['training_results']) # write results.txt # Epochs start_epoch = state_dict['epoch'] + 1 if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, state_dict['epoch'], epochs)) epochs += state_dict['epoch'] # finetune additional epochs del ckpt, state_dict if train_dict['sync_bn'] and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # ddp if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank)) # Trainloader batch_size = train_dict['batch_size'] image_size = train_dict['image_size'] # dataloader, dataset = create_dataloader(train_path,image_size, batch_size, opt, hyp=train_dict, augment=True, # rect=opt.rect, rank=rank, # world_size=opt.world_size, workers=train_dict['workers']) rgb_mean = (104, 117, 123) # bgr order dataset = WiderFaceDetection(train_path, preproc(image_size, rgb_mean)) sampler = torch.utils.data.distributed.DistributedSampler(dataset) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8, sampler=sampler, pin_memory=True, collate_fn=detection_collate) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(train_dict, image_size=(image_size, image_size)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() for epoch in range(start_epoch, epochs): if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) if rank in [-1, 0]: pbar = tqdm(pbar) # progress bar optimizer.zero_grad() for i, ( images, targets ) in pbar: # batch ------------------------------------------------------------- with amp.autocast(enabled=cuda): images = images.cuda() targets = [anno.cuda() for anno in targets] out = model(images) optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion( out, priors, targets) * opt.world_size loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if rank in [-1, 0]: print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) torch.save(net.state_dict(), wdir + os.sep + '{}_Final.pth'.format(i))
def face_detector(frame): img_raw = frame.copy() img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > CONFIDENCE)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESHOLD) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:750, :] landms = landms[:750, :] dets = np.concatenate((dets, landms), axis=1) bboxs = [] for b in dets: if b[4] < VIZ_THRESHOLD: continue b = list(map(int, b)) margin = 10 x1,y1,x2,y2 = b[0], b[1], b[2], b[3] img_h, img_w, _ = frame.shape w = x2-x1 h = y2-y1 margin = int(min(w,h) * margin / 100) x_a = x1 - margin y_a = y1 - margin x_b = x1 + w + margin y_b = y1 + h + margin if x_a < 0: x_b = min(x_b - x_a, img_w-1) x_a = 0 if y_a < 0: y_b = min(y_b - y_a, img_h-1) y_a = 0 if x_b > img_w: x_a = max(x_a - (x_b - img_w), 0) x_b = img_w if y_b > img_h: y_a = max(y_a - (y_b - img_h), 0) y_b = img_h cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 255), 1) bboxs.append([x_a,y_a,x_b,y_b]) return img_raw,bboxs
def train(): net = RetinaFace(cfg=cfg) logger.info("Printing net...") logger.info(net) if args.resume_net is not None: logger.info('Loading resume network...') state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch logger.info('Loading Dataset...') trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train') validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid') # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train') # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid') trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.') epoch_size = math.ceil(len(trainset) / batch_size) max_iter = max_epoch * epoch_size logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}') # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay) scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 best_loss_val = float('inf') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator # batch_iterator = iter(tqdm(trainloader, total=len(trainloader))) batch_iterator = iter(trainloader) # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): # torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth') epoch += 1 torch.cuda.empty_cache() if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0): net.eval() # validation loss_l_val = 0. loss_c_val = 0. loss_landm_val = 0. loss_val = 0. # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)): for val_no, (images, targets) in enumerate(validloader): # load data images = images.cuda() targets = [anno.cuda() for anno in targets] # forward with torch.no_grad(): out = net(images) loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss_l_val += loss_l.item() loss_c_val += loss_c.item() loss_landm_val += loss_landm.item() loss_val += loss.item() loss_l_val /= len(validloader) loss_c_val /= len(validloader) loss_landm_val /= len(validloader) loss_val /= len(validloader) logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_val, loss_l_val, loss_c_val, loss_landm_val)) if loss_val < best_loss_val: best_loss_val = loss_val pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth') torch.save(net.state_dict(), pth) logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})') net.train() load_t0 = time.time() # if iteration in stepvalues: # step_index += 1 # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if iteration % verbose_steps == 0: logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))
def do_annotation_over_video(args, _dir, device, nets, resize, cfg): exts = ['mp4', 'avi'] _dir = _dir.replace('\n', '') for video_path in glob.glob(_dir + '/*'): video_name = os.path.split(video_path)[-1] ext = os.path.splitext(video_name)[-1].split('.')[-1] if ext not in exts: continue video_name_no_ext = os.path.splitext(video_name)[0] target_dir = f'auto_annotation_labels/{video_name_no_ext}' if os.path.exists(target_dir): shutil.rmtree(target_dir) os.makedirs(target_dir, exist_ok=True) tagfile = os.path.join(target_dir, 'tag.csv') if os.path.exists(tagfile): os.remove(tagfile) cam = cv2.VideoCapture(video_path) frm_num = 0 while True: _, img_raw = cam.read() if img_raw is None: break frm_num += 1 if frm_num % 100 != 0: continue img_name = video_name + '_' + str(format(frm_num, '06d')) + '.jpg' img_raws = [img_raw.copy() for i in range(len(nets))] img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() all_boxes = [] all_scores = [] drawn_images = [] for idx, net in enumerate(nets): loc, conf, landms = net(img) # forward pass # print ('net forward time: {:.4f}'.format (time.time () - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack( (boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] boxes = boxes[keep] all_boxes.append( np.hstack( [boxes, np.full_like(boxes[:, :1], fill_value=idx)])) all_scores.append(scores[keep]) # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raws[idx], (b[0], b[1]), (b[2], b[3]), (255, 255, 255), 2) cx = b[0] cy = b[1] + 12 # cv2.putText (img_raws[idx], text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if idx == len(nets) - 1: # last model in list of models # name = os.path.join(os.getcwd (), f"det_results/{img_name}") img_to_draw = np.vstack(img_raws) # cv2.imwrite (name, cv2.resize(img_to_draw, dsize=None, fx=0.5, fy=0.5)) all_boxes = np.concatenate(all_boxes, axis=0) all_scores = np.concatenate(all_scores, axis=0) # keep top-K before NMS order = all_scores.argsort()[::-1][:args.top_k] all_boxes = all_boxes[order] all_scores = all_scores[order] # do NMS dets = np.hstack( (all_boxes, all_scores[:, np.newaxis])).astype(np.float32, copy=False) keep, dont_care = customized_cpu_nms(dets, args.nms_threshold, total_models=len(nets)) dets_dc = dets[dont_care, :] dets = dets[keep, :] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] dets_dc = dets_dc[:args.keep_top_k, :] dets = np.delete(dets, [4], axis=1) dets_dc = np.delete(dets_dc, [4], axis=1) bbox = np.hstack( [dets[:, :5], np.full((dets.shape[0], 1), fill_value=4)]) if bbox.shape[0] < 3: continue bbox_dc = np.hstack([ dets_dc[:, :5], np.full((dets_dc.shape[0], 1), fill_value=10) ]) bbox = np.vstack([bbox, bbox_dc]) write_csv(tagfile, bbox, file_name=img_name) imgfile = f'auto_annotation_labels/{video_name_no_ext}/{img_name}' cv2.imwrite(imgfile, img_raw) # show image # if args.save_image: if 0: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (255, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) for b in dets_dc: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image name = os.path.join(os.getcwd(), f"det_results/{img_name}") img_to_draw = cv2.resize( img_to_draw, dsize=(int(img_raw.shape[1] * (img_raw.shape[0] / img_to_draw.shape[0])), img_raw.shape[0])) img_to_draw = np.hstack([img_to_draw, img_raw]) cv2.imwrite(name, img_to_draw)
net = torch.nn.DataParallel(net, device_ids=list(range(num_gpu))) teacher_net = torch.nn.DataParallel(teacher_net, device_ids=list(range(num_gpu))) device = torch.device('cuda:0' if gpu_train else 'cpu') cudnn.benchmark = True net = net.to(device) teacher_net = teacher_net.to(device) optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) tdkd = TDKD() priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors, priors_by_layer = priorbox.forward() priors = priors.to(device) def train(): net.train() teacher_net.eval() for param in teacher_net.parameters(): param.requires_grad = False epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean))
class SSD(nn.Module): """Single Shot Multibox Architecture The network is composed of a base VGG network followed by the added multibox conv layers. Each multibox layer branches into 1) conv2d for class conf scores 2) conv2d for localization predictions 3) associated priorbox layer to produce default bounding boxes specific to the layer's feature map size. See: https://arxiv.org/pdf/1512.02325.pdf for more details. Args: phase: (string) Can be "test" or "train" base: VGG16 layers for input, size of either 300 or 500 extras: extra layers that feed to multibox loc and conf layers head: "multibox head" consists of loc and conf conv layers """ def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = 300 # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) # fused conv4_3 and conv5_3 self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1) self.deconv = nn.ConvTranspose2d(512, 512, 2, 2) self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1) self.L2Norm5_3 = L2Norm(512, 10) self.fused_relu = nn.ReLU(inplace=True) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if self.phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,300,300]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() loc = list() conf = list() # Apply vgg up to conv4_3 relu # Fused conv4_3,conv5_3 for k in range(23): x = self.vgg[k](x) conv4_3 = self.conv4_3(x) s4_3 = self.L2Norm(conv4_3) for k in range(23, 30): x = self.vgg[k](x) deconv = self.deconv(x) conv5_3 = self.conv5_3(deconv) s5_3 = self.L2Norm5_3(conv5_3) # print(s4_3.size()) # print(s5_3.size()) s = s4_3 + s5_3 s = self.fused_relu(s) sources.append(s) # apply vgg up to fc7 for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # Apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # Apply multibox head to source layers for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == 'test': conf_preds = conf.view(-1, self.num_classes) conf_preds = self.softmax(conf_preds).view(conf.size(0), -1, self.num_classes) # TODO 测试 # loc = loc.view(loc.size(0), -1, 4) # print(loc.size()) # print(conf_preds.size()) # print(self.priors.size()) output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds conf_preds, self.priors.type(type(x.data)) # default boxes ) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') self.load_state_dict( torch.load(base_file, map_location=lambda storage, loc: storage)) print('Finished!') else: print("Sorry only .pth or .pkl files supported.") def load_weights_fused(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') params = torch.load(base_file, map_location=lambda storage, loc: storage) onw_dict = self.state_dict() for k, v in onw_dict.items(): param = params.get(k) if param is None: continue v.copy_(param) print('Finished!') else: print("Sorry only .pth or .pkl files supported.") def load_weights_for_rosd(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') params = torch.load(base_file, map_location=lambda storage, loc: storage) own_dict = self.state_dict() for k, v in list(own_dict.items())[:-28]: param = params.get(k) if param is None: continue v.copy_(param) print('Finished!') else: print("Sorry only .pth or .pkl files supported.")
def test_begin(img_name): image_path = os.path.join(args.test_dir, img_name) save_path = os.path.join(args.save_dir, img_name) # for i in range(100): # image_path = "./test_img/11008.jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image # save_path = "./save_img/test.jpg" cv2.imwrite(save_path, img_raw)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = net.to(device) torch.backends.cudnn.benchmark = True # vs = FileVideoStream("/home/mdt/ownCloud/datasets/Face_Recognition/output.avi").start() vs = FileVideoStream( "/home/mdt/Downloads/Captures/ 2020-02-25 14-58-37.mp4").start() # vs = VideoStream("rtsp://*****:*****@192.168.100.64:554").start() # vs = VideoStream("rtsp://*****:*****@322nguyentrai.ddns.net:554").start() # vs = VideoStream("rtsp://*****:*****@118.70.82.46:554").start() # vs = VideoStream("rtsp://*****:*****@192.168.101.65:555").start() # vs = VideoStream("rtsp://*****:*****@113.161.36.165:554").start() fps = FPS().start() net_inshape = (640, 640) # h, w rgb_mean = (104, 117, 123) # bgr order priorbox = PriorBox(cfg, image_size=net_inshape) priors = priorbox.forward() priors = priors.numpy() while True: frame = vs.read() # frame = imutils.rotate_bound(frame, 90) if frame is None: break frame_raw = frame.copy() # image_path = "fail.jpg" # frame = cv2.imread(image_path, cv2.IMREAD_COLOR) # frame_raw = frame.copy() h, w = frame.shape[:2] d = max(h, w) dy = (d - h)
def detect_faces(self, img, return_best=False): """ Computes a list of faces detected in the input image in the form of a list of bounding-boxes, one per each detected face. Arguments: img: The image to be input to the RetinaFace model return_best: boolean indicating whether to return just to best detection or the complete list of detections Returns: A list of arrays. Each array contains the image coordinates of the corners of a bounding-box and the score of the detection in the form [x1,y1,x2,y2,score], where (x1,y1) are the integer coordinates of the top-left corner of the box and (x2,y2) are the coordinates of the bottom-right corner of the box. The score is a floating-point number. When return_best is True, the returned list will contain only one bounding-box """ if numpy.all(img != None): try: im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = numpy.float32(img) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) # note below that the landmarks (3rd returned value) are ignored loc, conf, _ = self.net(img) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = numpy.where(scores > CONF_THRESH)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] scores = scores[order] # do NMS dets = numpy.hstack( (boxes, scores[:, numpy.newaxis])).astype(numpy.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESH) # keep top-K faster NMS detections = dets[keep, :] if len(detections) > 0: if return_best: # detections is ordered by confidence so the first one is the best det = numpy.squeeze(detections[0, 0:5]) bounding_box = numpy.zeros(5, dtype=numpy.float32) # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0] + 1) height = round(det[3] - det[1] + 1) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] bounding_box[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) bounding_box[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) bounding_box[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) bounding_box[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) # prevent going off image bounding_box[0] = int(max(bounding_box[0], 0)) bounding_box[1] = int(max(bounding_box[1], 0)) bounding_box[2] = int( min(bounding_box[2], img.shape[3])) bounding_box[3] = int( min(bounding_box[3], img.shape[2])) bounding_box[4] = det[4] return [bounding_box] else: det_list = [] for j in range(len(detections)): det = numpy.squeeze(detections[j, 0:5]) bounding_box = numpy.zeros(5, dtype=numpy.float32) # extend detection extend_factor = self.face_rect_expand_factor width = round(det[2] - det[0] + 1) height = round(det[3] - det[1] + 1) length = (width + height) / 2.0 centrepoint = [ round(det[0]) + width / 2.0, round(det[1]) + height / 2.0 ] bounding_box[0] = centrepoint[0] - round( (1 + extend_factor) * length / 2.0) bounding_box[1] = centrepoint[1] - round( (1 + extend_factor) * length / 2.0) bounding_box[2] = centrepoint[0] + round( (1 + extend_factor) * length / 2.0) bounding_box[3] = centrepoint[1] + round( (1 + extend_factor) * length / 2.0) # prevent going off image bounding_box[0] = int(max(bounding_box[0], 0)) bounding_box[1] = int(max(bounding_box[1], 0)) bounding_box[2] = int( min(bounding_box[2], img.shape[3])) bounding_box[3] = int( min(bounding_box[3], img.shape[2])) bounding_box[4] = det[4] det_list.append(bounding_box) return det_list else: return None except Exception as e: print('Exception in FaceDetectorRetinaFace: ' + str(e)) pass return None
def get_det(self, img): img_raw = cv2.imread(img, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) crops = [] cords = [] for det in dets: if (det[4] < self.vis_thres): continue b = list(map(int, det)) crop = img_raw[b[1]:b[3], b[0]:b[2]].copy() if (len(crops) < 10): crops.append(crop) cords.append(b) return crops, cords
def detect(img_path): torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') #net = FaceBoxes(phase='test', size=None, num_classes=2) net = load_model(net, args.trained_model, args.cpu) net.eval() #print('Finished loading model!') #print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) _t = {'forward_pass': Timer(), 'misc': Timer()} resize = 1 # testing begin # for i, img_name in enumerate(test_dataset): # image_path = testset_folder + img_name + '.jpg' # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) if type(img_path) is not np.ndarray: img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img_raw = np.array(img) else: img_raw = img_path #img_raw = img_path img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) #priorbox = PriorBox1(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # save dets # if args.dataset == "FDDB": # fw.write('{:s}\n'.format(img_name)) # fw.write('{:.1f}\n'.format(dets.shape[0])) # for k in range(dets.shape[0]): # xmin = dets[k, 0] # ymin = dets[k, 1] # xmax = dets[k, 2] # ymax = dets[k, 3] # score = dets[k, 4] # w = xmax - xmin + 1 # h = ymax - ymin + 1 # # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) # fw.write('{:d} {:d} {:d} {:d} {:.10f}\n'.format(int(xmin), int(ymin), int(w), int(h), score)) print('forward_pass_time: {:.4f}s misc: {:.4f}s'.format( _t['forward_pass'].average_time, _t['misc'].average_time)) # if type(img_path) is not np.ndarray: # img_raw = cv2.imread(img_path, cv2.IMREAD_COLOR) # else: # img_raw = img_path # # show image # if args.save_image: # for b in dets: # if b[4] < args.vis_thres: # continue # text = "{:.4f}".format(b[4]) # b = list(map(int, b)) # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image # if not os.path.exists("./results/"): # os.makedirs("./results/") # name = "./results/" + str(i) + ".jpg" # cv2.imwrite(name, img_raw) return dets, img_path
def detect(self, img, origin_size=True, target_size=480, max_size=2150, confidence_threshold=0.7, nms_threshold=0.35, top_k=5000, keep_top_k=750): img, resize = self._process_image(img, origin_size, target_size, max_size) img = img.to(self.device) _, _, im_height, im_width = img.size() # anchor priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward().to(self.device) prior_data = priors.data # forward loc, conf, landms = self(img) # decoder output scores = conf.squeeze(0).data.cpu().numpy()[:, 1] boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) scale = torch.Tensor([ im_width, im_height, im_width, im_height, ]).to(self.device) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, im_width, im_height, ]).to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] # dets = np.concatenate((dets, landms), axis=1) scores = dets[:, -1] # (N,) dets = dets[:, :-1] # (N, 4) landms = landms.reshape(-1, 5, 2) # (N, 5, 2) return scores, dets, landms
def train(): if args.dataset == 'COCO': cfg = coco dataset = COCODetection(root=cfg['coco_root'], transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.dataset == 'VOC': cfg = voc dataset = VOCDetection(root=cfg['voc_root'], transform=SSDAugmentation(cfg['min_dim'], MEANS)) if args.visdom: import visdom viz = visdom.Visdom() ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if args.visdom and iteration != 0 and (iteration % epoch_size == 0): update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), requires_grad=False) for ann in targets] else: images = Variable(images) targets = [Variable(ann, requires_grad=False) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot, epoch_plot, 'append') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')
net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() def get_path_dict(train_dir): '''Get path dictionary for image paths with/without mask type. - key: image path without mask type - value: image path with mask type ''' MASK_TYPES = ('cloth', 'surgical_blue', 'surgical', 'KN95') path_dict = {} for img_path in glob.glob(f'{train_dir}/**/*.jpg'): for mtype in MASK_TYPES: if mtype in img_path:
#net.setInput(blob) # Runs the forward pass to get output of the output layers scale = torch.Tensor([ frame.shape[1], frame.shape[0], frame.shape[1], frame.shape[0] ]) frame -= (104, 117, 123) frame = frame.transpose(2, 0, 1) frame = torch.from_numpy(frame).unsqueeze(0) frame = frame.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf = net(frame) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(IMG_WIDTH, IMG_HEIGHT)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # Remove the bounding boxes with low confidence # faces = post_process(frame, outs, CONF_THRESHOLD, NMS_THRESHOLD) inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order]
def main(): args = arg_parse() filename, extension = splitext(basename(args.input)) print("Loading file [{}] ....".format(filename)) if not exists(args.input): raise ValueError("File [{}] is not recognized".format(args.input)) if not isfile(args.trained_model): raise ValueError(f'The model {args.trained_model} is not found') if not exists(args.output_dir): print( f'Output directory {args.output_dir} does not exist, Creating one') makedirs(args.output_dir) torch.set_grad_enabled(False) cfg = cfg_mnet if args.network == "mobile0.25" else cfg_re50 resize = 1 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = model_cfg(args.trained_model, cfg=cfg, device=device, cpu=args.cpu) if is_video(extension): vdo = cv2.VideoCapture() codec = cv2.VideoWriter_fourcc(*'XVID') output = join(args.output_dir, filename + '.avi') if vdo.open(args.input): property_id = int(cv2.CAP_PROP_FRAME_COUNT) total_frames = int(cv2.VideoCapture.get(vdo, property_id)) width = int(vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = vdo.get(cv2.CAP_PROP_FPS) writer = cv2.VideoWriter(output, codec, fps, (width, height)) print('') print('processing video ...') frame_idx = 0 with tqdm(range(total_frames)) as pbar: while vdo.grab(): frame_idx += 1 pbar.update(1) ret, frame = vdo.retrieve() if not ret: break args.step = 1 if args.step < 1 else args.step if frame_idx % args.step == 0: frame = pipeline(net, frame, args, device, resize, cfg) writer.write(frame) pbar.close() print('process finished Successfully') print('process finished. file is stored as {}'.format(output)) vdo.release() writer.release() elif is_image(extension): frame = cv2.imread(args.input) img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) objects_to_draw = dict(draw_box=True, draw_text=False, draw_landmarks=False) frame = draw(frame, dets, args.vis_thres, **objects_to_draw) output = args.output_dir + filename + '.jpg' cv2.imwrite(output, frame) print('output is stored as {}'.format(output)) else: print('cant read video {}'.format(args.input))
def detect_faces(self, img_raw, mean=(104, 117, 123)): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= mean img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) tic = time.time() loc, conf, landms = self.net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) # keep = nms(dets, self.nms_threshold,force_cpu=self.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if self.show_image: for b in dets: if b[4] < self.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # Show image cv2.imshow('result', img_raw) cv2.waitKey(100) results = [] for det in dets: box = det[:4] score = det[4] keypoints = det[5:] if score < self.vis_thres: continue results.append({'box':box.tolist(), 'score':score.tolist(), 'keypoints':keypoints.tolist()}) return results
def do_annotation_over_dir(args, _dir, device, nets, resize, cfg): exts = ['jpeg', 'png', 'jpg', 'bmp'] _dir = _dir.replace('\n', '') last_dir = _dir.split('/')[-1] os.makedirs(f'auto_annotation_labels/{last_dir}', exist_ok=True) tagfile = os.path.join(f'auto_annotation_labels/{last_dir}', 'auto_tag.csv') if os.path.exists(tagfile): os.remove(tagfile) assert os.path.exists(_dir), f'{_dir} does not exist' for image_path in glob.glob(_dir + '/*'): img_name = os.path.split(image_path)[-1] ext = os.path.splitext(img_name)[-1].split('.')[-1] if ext not in exts: continue img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img_raws = [img_raw.copy(), img_raw.copy()] img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() all_boxes = [] all_landms = [] all_scores = [] drawn_images = [] for idx, net in enumerate(nets): loc, conf, landms = net(img) # forward pass # print ('net forward time: {:.4f}'.format (time.time () - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] all_boxes.append(boxes) all_landms.append(landms) all_scores.append(scores) # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raws[idx], (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 # cv2.putText (img_raws[idx], text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if idx == len(nets) - 1: # last model in list of models # name = os.path.join(os.getcwd (), f"det_results/{img_name}") img_to_draw = np.vstack(img_raws) # cv2.imwrite (name, cv2.resize(img_to_draw, dsize=None, fx=0.5, fy=0.5)) all_boxes = np.concatenate(all_boxes, axis=0) all_landms = np.concatenate(all_landms, axis=0) all_scores = np.concatenate(all_scores, axis=0) # keep top-K before NMS order = all_scores.argsort()[::-1][:args.top_k] all_boxes = all_boxes[order] all_landms = all_landms[order] all_scores = all_scores[order] # do NMS dets = np.hstack( (all_boxes, all_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] all_landms = all_landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] all_landms = all_landms[:args.keep_top_k, :] dets = np.concatenate((dets, all_landms), axis=1) bbox = np.hstack([dets[:, :5], np.ones((dets.shape[0], 1))]) write_csv(tagfile, bbox, file_name=img_name) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (255, 0, 255), 2) cx = b[0] cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image name = os.path.join(os.getcwd(), f"det_results/{img_name}") img_to_draw = cv2.resize( img_to_draw, dsize=(int(img_raw.shape[1] * (img_raw.shape[0] / img_to_draw.shape[0])), img_raw.shape[0])) img_to_draw = np.hstack([img_to_draw, img_raw]) cv2.imwrite(name, img_to_draw)
else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if args.ngpu > 1 and gpu_train: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) device = torch.device('cuda:0' if gpu_train else 'cpu') cudnn.benchmark = True net = net.to(device) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.to(device) def train(): net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') dataset = VOCDetection(args.training_dataset, preproc(img_dim, rgb_means), AnnotationTransform()) epoch_size = math.ceil(len(dataset) / args.batch_size) max_iter = args.max_epoch * epoch_size
img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device)
def detect(net, img, cfg, size=(200, 400), confidence_threshold=0.02, nms_threshold=0.4): # testing scale img = np.float32(img) device = torch.device('cuda') target_size = size[0] max_size = size[1] im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) # if args.origin_size: # resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) return dets
def main(): cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet18": cfg = cfg_re18 elif args.network == "resnet34": cfg = cfg_re34 elif args.network == "resnet50": cfg = cfg_re50 elif args.network == "Efficientnet-b0": cfg = cfg_eff_b0 elif args.network == "Efficientnet-b4": cfg = cfg_eff_b4 elif args.network == "resnet34_hsfd": cfg = cfg_re34_hsfd_finetune elif args.network == "resnet34_hsfd_not_finetune": cfg = cfg_re34_hsfd_not_finetune # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # # testing dataset # testset_folder = args.dataset_folder # # testset_list = args.dataset_folder[:-7] + "wider_val.txt" # # with open(testset_list, 'r') as fr: # # test_dataset = fr.read().split() # test_dataset = [] # for event in os.listdir(testset_folder): # subdir = os.path.join(testset_folder, event) # img_names = os.listdir(subdir) # for img_name in img_names: # test_dataset.append([event, os.path.join(subdir, img_name)]) # num_images = len(test_dataset) used_channels = cfg['used_channels'] img_dim = cfg['image_size'] test_dataset = EcustHsfdDetection(args.dataset_file, used_channels, preproc=valid_preproc(img_dim, None), mode='valid') num_images = len(test_dataset) datadir = '/'.join(args.dataset_file.split('/')[:-1]) pred_file = os.path.join(f'{args.save_folder:s}_pred.txt') gt_file = os.path.join(f'{args.save_folder:s}_gt.txt') fp1 = open(pred_file, 'w') fp2 = open(gt_file, 'w') _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, img_name in enumerate(test_dataset.imgs_path): if i % 100 == 0: torch.cuda.empty_cache() # image_path = testset_folder + img_name img_raw = load_datacube(img_name)[..., used_channels] # img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = img_dim max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = np.stack([ cv2.resize(img[..., i], None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) \ for i in range(img.shape[-1]) ], axis=-1) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = (img - 127.5) / 128.0 # img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] prediction = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- # save_name = os.path.join(args.save_folder, img_name.split('/')[-1].split('.')[0] + ".txt") # dirname = os.path.dirname(save_name) # if not os.path.isdir(dirname): # os.makedirs(dirname) # with open(save_name, "w") as fd: # bboxs = dets # file_name = os.path.basename(save_name)[:-4] + "\n" # bboxs_num = str(len(bboxs)) + "\n" # fd.write(file_name) # fd.write(bboxs_num) # for box in bboxs: # x = int(box[0]) # y = int(box[1]) # w = int(box[2]) - int(box[0]) # h = int(box[3]) - int(box[1]) # confidence = str(box[4]) # line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n" # fd.write(line) fp1.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n") if dets.shape[0] > 0: dets = prediction[0][:4].astype(np.int).tolist() dets[2] -= dets[0] dets[3] -= dets[1] landms = prediction[0][4:14] scores = prediction[0][14] label = [0. for _ in range(20)] label[-1] = scores label[:4] = dets label[4:-1] = landms label = ' '.join(list(map(str, label))) fp1.write(f'{label}\n') gt_label = ' '.join(list(map(str, test_dataset.words[i][0]))) fp2.write(f"# {img_name.lstrip(datadir).lstrip('/')}\n") fp2.write(f'{gt_label}\n') print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'. format(i + 1, num_images, _t['forward_pass'].average_time, _t['misc'].average_time)) # # save image # if args.save_image: # for b in dets: # if b[4] < args.vis_thres: # continue # text = "{:.4f}".format(b[4]) # b = list(map(int, b)) # cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # cx = b[0] # cy = b[1] + 12 # cv2.putText(img_raw, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # # save image # if not os.path.exists("./results/"): # os.makedirs("./results/") # name = "./results/" + str(i) + ".jpg" # cv2.imwrite(name, img_raw) fp1.close()
def train(): if args.dataset == 'COCO': if args.dataset_root == VOC_ROOT: if not os.path.exists(COCO_ROOT): parser.error('Must specify dataset_root if specifying dataset') print("WARNING: Using default COCO dataset_root because " + "--dataset_root was not specified.") args.dataset_root = COCO_ROOT cfg = coco dataset = COCODetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'VOC': if args.dataset_root == COCO_ROOT: parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) ssd_net = build_net('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) # else: # vgg_weights = torch.load(args.save_folder + args.basenet) # print('Loading base network...') # ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) priorbox = PriorBox(cfg) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) # load train data try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, priors, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if iteration != 0 and iteration % 5000 == 0: print('Saving state, iter:', iteration) torch.save( ssd_net.state_dict(), 'weights/Mobile-Net-ssd300_COCO_' + repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')