def inference(): args = parse_args() print(f'args: {args}') assert args.trained_dataset_name in ['COCO', 'VOC'], 'Unsupported dataset!' assert args.model in models.__dict__.keys(), 'Unsupported model!' assert args.decoder in decode.__dict__.keys(), 'Unsupported decoder!' if args.use_gpu: # only use one Graphics card to inference os.environ['CUDA_VISIBLE_DEVICES'] = '0' assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() if args.seed: seed = args.seed os.environ['PYTHONHASHSEED'] = str(seed) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if args.use_gpu: torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # for cudnn cudnn.enabled = True cudnn.deterministic = True cudnn.benchmark = False model = models.__dict__[args.model]( **{ 'num_classes': args.trained_num_classes, }) decoder = decode.__dict__[args.decoder]() if args.use_gpu: model = model.cuda() decoder = decoder.cuda() if args.trained_model_path: saved_model = torch.load(args.trained_model_path, map_location=torch.device('cpu')) model.load_state_dict(saved_model) model.eval() flops, params = compute_flops_and_params(args, model) print(f'model: {args.model}, flops: {flops}, params: {params}') resized_img, origin_img, scale = load_image_for_detection_inference(args) resized_img = torch.tensor(resized_img) if args.use_gpu: resized_img = resized_img.cuda() out_tuples = model(resized_img.permute(2, 0, 1).float().unsqueeze(0)) scores, classes, boxes = decoder(*out_tuples) scores, classes, boxes = scores.cpu(), classes.cpu(), boxes.cpu() boxes /= scale scores = scores.squeeze(0) classes = classes.squeeze(0) boxes = boxes.squeeze(0) scores = scores[classes > -1] boxes = boxes[classes > -1] classes = classes[classes > -1] boxes = boxes[scores > args.min_score_threshold] classes = classes[scores > args.min_score_threshold] scores = scores[scores > args.min_score_threshold] # clip boxes origin_h, origin_w = origin_img.shape[0], origin_img.shape[1] boxes[:, 0] = torch.clamp(boxes[:, 0], min=0) boxes[:, 1] = torch.clamp(boxes[:, 1], min=0) boxes[:, 2] = torch.clamp(boxes[:, 2], max=origin_w) boxes[:, 3] = torch.clamp(boxes[:, 3], max=origin_h) if args.trained_dataset_name == 'COCO': dataset_classes_name = COCO_CLASSES dataset_classes_color = COCO_CLASSES_COLOR else: dataset_classes_name = VOC_CLASSES dataset_classes_color = VOC_CLASSES_COLOR # draw all pred boxes for per_score, per_class_index, per_box in zip(scores, classes, boxes): per_score = per_score.numpy().astype(np.float32) per_class_index = per_class_index.numpy().astype(np.int32) per_box = per_box.numpy().astype(np.int32) class_name, class_color = dataset_classes_name[ per_class_index], dataset_classes_color[per_class_index] left_top, right_bottom = (per_box[0], per_box[1]), (per_box[2], per_box[3]) cv2.rectangle(origin_img, left_top, right_bottom, color=class_color, thickness=2, lineType=cv2.LINE_AA) text = f'{class_name}:{per_score:.3f}' text_size = cv2.getTextSize(text, 0, 0.5, thickness=1)[0] fill_right_bottom = (max(left_top[0] + text_size[0], right_bottom[0]), left_top[1] - text_size[1] - 3) cv2.rectangle(origin_img, left_top, fill_right_bottom, color=class_color, thickness=-1, lineType=cv2.LINE_AA) cv2.putText(origin_img, text, (left_top[0], left_top[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color=(0, 0, 0), thickness=1, lineType=cv2.LINE_AA) if args.save_image_path: cv2.imwrite(os.path.join(args.save_image_path, 'detection_result.jpg'), origin_img) if args.show_image: cv2.namedWindow("detection_result", cv2.WINDOW_AUTOSIZE) cv2.imshow('detection_result', origin_img) cv2.waitKey(0) cv2.destroyAllWindows() return
def main(): assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() args = parse_args() sys.path.append(args.work_dir) from train_config import config log_dir = os.path.join(args.work_dir, 'log') checkpoint_dir = os.path.join(args.work_dir, 'checkpoints') resume_model = os.path.join(checkpoint_dir, 'latest.pth') set_seed(config.seed) local_rank = args.local_rank # start init process if config.distributed: torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.cuda.set_device(local_rank) init_fn = functools.partial(worker_seed_init_fn, num_workers=config.num_workers, local_rank=local_rank, seed=config.seed) train_sampler = torch.utils.data.distributed.DistributedSampler( config.train_dataset, shuffle=True) if config.distributed else None train_loader = DataLoader(config.train_dataset, batch_size=config.batch_size, shuffle=(train_sampler is None), pin_memory=True, num_workers=config.num_workers, sampler=train_sampler, worker_init_fn=init_fn) val_sampler = torch.utils.data.distributed.DistributedSampler( config.val_dataset, shuffle=False) if config.distributed else None val_loader = DataLoader(config.val_dataset, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=config.num_workers, sampler=val_sampler) if (config.distributed and local_rank == 0) or not config.distributed: # automatically create checkpoint folder if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) if not os.path.exists(log_dir): os.makedirs(log_dir) logger = get_logger('train', log_dir) for key, value in config.__dict__.items(): if not key.startswith('__'): if key not in ['model', 'criterion']: log_info = f'{key}: {value}' logger.info(log_info) if ( config.distributed and local_rank == 0) or not config.distributed else None gpus_type, gpus_num = torch.cuda.get_device_name( ), torch.cuda.device_count() log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None model = config.model.cuda() criterion = config.criterion for name in criterion.keys(): criterion[name] = criterion[name].cuda() # parameters needs to be updated by the optimizer # buffers doesn't needs to be updated by the optimizer for name, param in model.named_parameters(): log_info = f'name: {name}, grad: {param.requires_grad}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None for name, buffer in model.named_buffers(): log_info = f'name: {name}, grad: {buffer.requires_grad}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None optimizer = build_optimizer(config, model) scheduler = build_scheduler(config, optimizer) model = build_training_mode(config, model, optimizer) start_epoch = 1 # automatically resume model for training if checkpoint model exist if os.path.exists(resume_model): checkpoint = torch.load(resume_model, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) scheduler.load_state_dict(checkpoint['scheduler_state_dict']) saved_epoch = checkpoint['epoch'] start_epoch += saved_epoch best_top1, loss, lr = checkpoint['best_top1'], checkpoint[ 'loss'], checkpoint['lr'] log_info = f'resuming model from {resume_model}. resume_epoch: {saved_epoch}, best_top1: {best_top1:.3f}%, loss: {loss:.4f}, lr: {lr:.6f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None # calculate training time start_time = time.time() best_top1 = 0.0 for epoch in range(start_epoch, config.epochs + 1): torch.cuda.empty_cache() train_sampler.set_epoch(epoch) if config.distributed else None top1, top5, loss = train_KD(train_loader, model, criterion, optimizer, scheduler, epoch, logger, config) log_info = f'train: epoch {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None top1, top5, loss = validate_KD(val_loader, model, criterion) log_info = f'eval: epoch: {epoch:0>3d}, top1: {top1:.2f}%, top5: {top5:.2f}%, total_loss: {loss:.2f}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None if (config.distributed and local_rank == 0) or not config.distributed: # save best top1 model and each epoch checkpoint if top1 > best_top1: torch.save(model.module.student.state_dict(), os.path.join(checkpoint_dir, 'best_student.pth')) best_top1 = top1 torch.save( { 'epoch': epoch, 'best_top1': best_top1, 'loss': loss, 'lr': scheduler.get_lr()[0], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), }, os.path.join(checkpoint_dir, 'latest.pth')) if os.path.exists(os.path.join(checkpoint_dir, 'best.pth')): os.rename( os.path.join(checkpoint_dir, 'best_student'), os.path.join( checkpoint_dir, f'{config.student}-epoch{epoch}-top1{best_top1:.3f}.pth' )) training_time = (time.time() - start_time) / 3600 flops, params = compute_flops_and_params(config, model) log_info = f'train done. teacher: {config.teacher}, student: {config.student}, total_flops: {flops}, total_params: {params}, training time: {training_time:.3f} hours, best_top1: {best_top1:.3f}%' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None
def inference(): args = parse_args() print(f'args: {args}') assert args.trained_dataset_name in ['COCO', 'VOC'], 'Unsupported dataset!' assert args.model in models.__dict__.keys(), 'Unsupported model!' assert args.decoder in decode.__dict__.keys(), 'Unsupported decoder!' if args.use_gpu: # only use one Graphics card to inference os.environ['CUDA_VISIBLE_DEVICES'] = '0' assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() if args.seed: seed = args.seed os.environ['PYTHONHASHSEED'] = str(seed) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if args.use_gpu: torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # for cudnn cudnn.enabled = True cudnn.deterministic = True cudnn.benchmark = False model = models.__dict__[args.model]( **{ 'num_classes': args.trained_num_classes, }) decoder = decode.__dict__[args.decoder]() if args.use_gpu: model = model.cuda() decoder = decoder.cuda() if args.trained_model_path: saved_model = torch.load(args.trained_model_path, map_location=torch.device('cpu')) model.load_state_dict(saved_model) model.eval() flops, params = compute_flops_and_params(args, model) print(f'model: {args.model}, flops: {flops}, params: {params}') video = cv2.VideoCapture(args.test_video_path) if not video.isOpened(): raise IOError("Couldn't open video!") # video_FourCC = int(video.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc(*"XVID") video_fps = video.get(cv2.CAP_PROP_FPS) video_size = ( int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)), ) if args.save_video_path: print(f'save_video_info:{video_FourCC},{video_fps},{video_size}') out_video = cv2.VideoWriter( os.path.join(args.save_video_path, 'detection_result.avi'), video_FourCC, video_fps, video_size) frame_num = 0 time_sum = 0 end = time.time() while True: return_value, frame = video.read() if not return_value: break img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resized_img, origin_img, scale = load_image_for_detection_inference( img, args) resized_img = torch.tensor(resized_img) if args.use_gpu: resized_img = resized_img.cuda() out_tuples = model(resized_img.permute(2, 0, 1).float().unsqueeze(0)) scores, classes, boxes = decoder(*out_tuples) scores, classes, boxes = scores.cpu(), classes.cpu(), boxes.cpu() boxes /= scale scores = scores.squeeze(0) classes = classes.squeeze(0) boxes = boxes.squeeze(0) scores = scores[classes > -1] boxes = boxes[classes > -1] classes = classes[classes > -1] boxes = boxes[scores > args.min_score_threshold] classes = classes[scores > args.min_score_threshold] scores = scores[scores > args.min_score_threshold] # clip boxes origin_h, origin_w = origin_img.shape[0], origin_img.shape[1] boxes[:, 0] = torch.clamp(boxes[:, 0], min=0) boxes[:, 1] = torch.clamp(boxes[:, 1], min=0) boxes[:, 2] = torch.clamp(boxes[:, 2], max=origin_w) boxes[:, 3] = torch.clamp(boxes[:, 3], max=origin_h) if args.trained_dataset_name == 'COCO': dataset_classes_name = COCO_CLASSES dataset_classes_color = COCO_CLASSES_COLOR else: dataset_classes_name = VOC_CLASSES dataset_classes_color = VOC_CLASSES_COLOR # draw all pred boxes for per_score, per_class_index, per_box in zip(scores, classes, boxes): per_score = per_score.numpy().astype(np.float32) per_class_index = per_class_index.numpy().astype(np.int32) per_box = per_box.numpy().astype(np.int32) class_name, class_color = dataset_classes_name[ per_class_index], dataset_classes_color[per_class_index] left_top, right_bottom = (per_box[0], per_box[1]), (per_box[2], per_box[3]) cv2.rectangle(origin_img, left_top, right_bottom, color=class_color, thickness=2, lineType=cv2.LINE_AA) text = f'{class_name}:{per_score:.3f}' text_size = cv2.getTextSize(text, 0, 0.5, thickness=1)[0] fill_right_bottom = (max(left_top[0] + text_size[0], right_bottom[0]), left_top[1] - text_size[1] - 3) cv2.rectangle(origin_img, left_top, fill_right_bottom, color=class_color, thickness=-1, lineType=cv2.LINE_AA) cv2.putText(origin_img, text, (left_top[0], left_top[1] - 2), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color=(0, 0, 0), thickness=1, lineType=cv2.LINE_AA) frame_num += 1 time_sum += time.time() - end fps = 1. / (time_sum / frame_num) fps_text = f'FPS:{fps:.2f}' cv2.putText(origin_img, fps_text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color=(51, 204, 51), thickness=1, lineType=cv2.LINE_AA) if args.save_video_path: out_video.write(origin_img) if args.show_video: cv2.imshow('detection_result', origin_img) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break return
def main(): assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() args = parse_args() sys.path.append(args.work_dir) from test_config import config log_dir = os.path.join(args.work_dir, 'log') set_seed(config.seed) collater = DetectionCollater() val_loader = DataLoader(config.val_dataset, batch_size=config.batch_size, shuffle=False, pin_memory=True, num_workers=config.num_workers, collate_fn=collater.next) if not os.path.exists(log_dir): os.makedirs(log_dir) logger = get_logger('test', log_dir) for key, value in config.__dict__.items(): if not key.startswith('__'): if key not in [ 'model', 'criterion', 'decoder', 'train_dataset', 'val_dataset' ]: log_info = f'{key}: {value}' logger.info(log_info) gpus_type, gpus_num = torch.cuda.get_device_name( ), torch.cuda.device_count() log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}' logger.info(log_info) model = config.model decoder = config.decoder if config.trained_model_path: saved_model = torch.load(os.path.join(BASE_DIR, config.trained_model_path), map_location=torch.device('cpu')) model.load_state_dict(saved_model) flops, params = compute_flops_and_params(config, model) log_info = f'model: {config.network}, flops: {flops}, params: {params}' logger.info(log_info) model = model.cuda() decoder = decoder.cuda() model = nn.DataParallel(model) result_dict = validate_detection(config.val_dataset, val_loader, model, decoder, config) log_info = f'eval_result: ' if result_dict: for key, value in result_dict.items(): log_info += f'{key}: {value} ,' else: log_info += f', no target detected in testset images!' logger.info(log_info) return
def inference(): args = parse_args() print(f'args: {args}') assert args.model in backbones.__dict__.keys(), 'Unsupported model!' if args.use_gpu: # only use one Graphics card to inference os.environ['CUDA_VISIBLE_DEVICES'] = '0' assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() if args.seed: seed = args.seed os.environ['PYTHONHASHSEED'] = str(seed) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if args.use_gpu: torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) # for cudnn cudnn.enabled = True cudnn.deterministic = True cudnn.benchmark = False model = backbones.__dict__[args.model]( **{ 'num_classes': args.trained_num_classes, }) if args.use_gpu: model = model.cuda() if args.trained_model_path: saved_model = torch.load(args.trained_model_path, map_location=torch.device('cpu')) model.load_state_dict(saved_model) model.eval() flops, params = compute_flops_and_params(args, model) print(f'model: {args.model}, flops: {flops}, params: {params}') origin_img = Image.open(args.test_image_path) img = origin_img transform = transforms.Compose([ transforms.Resize(int(args.input_image_size * (256 / 224))), transforms.CenterCrop(args.input_image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) img = transform(img) img = torch.tensor(img) if args.use_gpu: img = img.cuda() origin_img = cv2.cvtColor(np.asarray(origin_img), cv2.COLOR_RGB2BGR) output = model(img.unsqueeze(0)) output = F.softmax(output, dim=1) pred_score, pred_class = output.max(dim=1) pred_score, pred_class = pred_score.item(), pred_class.item() color = [random.randint(0, 255) for _ in range(3)] print(f'score: {pred_score:.3f}, class: {pred_class}, color: {color}') text = f'{pred_class}:{pred_score:.3f}' cv2.putText(origin_img, text, (30, 30), cv2.FONT_HERSHEY_PLAIN, 1.5, color=color, thickness=1) if args.save_image_path: cv2.imwrite( os.path.join(args.save_image_path, 'classification_result.jpg'), origin_img) if args.show_image: cv2.namedWindow("classification_result", cv2.WINDOW_AUTOSIZE) cv2.imshow('classification_result', origin_img) cv2.waitKey(0) cv2.destroyAllWindows() return
def main(): assert torch.cuda.is_available(), 'need gpu to train network!' torch.cuda.empty_cache() args = parse_args() sys.path.append(args.work_dir) from test_config import config log_dir = os.path.join(args.work_dir, 'log') set_seed(config.seed) local_rank = args.local_rank # start init process if config.distributed: torch.distributed.init_process_group(backend='nccl', init_method='env://') torch.cuda.set_device(local_rank) val_sampler = torch.utils.data.distributed.DistributedSampler( config.val_dataset, shuffle=False) if config.distributed else None val_loader = DataLoader(config.val_dataset, batch_size=config.batch_size, shuffle=False, pin_memory=False, num_workers=config.num_workers, sampler=val_sampler) if (config.distributed and local_rank == 0) or not config.distributed: if not os.path.exists(log_dir): os.makedirs(log_dir) global logger logger = get_logger('test', log_dir) for key, value in config.__dict__.items(): if not key.startswith('__'): if key not in ['model', 'criterion']: log_info = f'{key}: {value}' logger.info(log_info) if ( config.distributed and local_rank == 0) or not config.distributed else None gpus_type, gpus_num = torch.cuda.get_device_name( ), torch.cuda.device_count() log_info = f'gpus_type: {gpus_type}, gpus_num: {gpus_num}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None model = config.model criterion = config.criterion if config.trained_model_path: saved_model = torch.load(os.path.join(BASE_DIR, config.trained_model_path), map_location=torch.device('cpu')) model.load_state_dict(saved_model) flops, params = compute_flops_and_params(config, model) log_info = f'model: {config.network}, flops: {flops}, params: {params}' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None model = model.cuda() criterion = criterion.cuda() if config.distributed: model = nn.parallel.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) else: model = nn.DataParallel(model) top1, top5, loss, per_image_load_time, per_image_inference_time = validate_classification( val_loader, model, criterion, config) log_info = f'top1: {top1:.3f}%, top5: {top5:.3f}%, loss: {loss:.4f}, per_image_load_time: {per_image_load_time:.3f}ms, per_image_inference_time: {per_image_inference_time:.3f}ms' logger.info(log_info) if (config.distributed and local_rank == 0) or not config.distributed else None return