def features( self ) -> Tuple[Base.ConvLayers, Base.LateralLayers, Base.DealiasingLayers, int]: resnet50 = torchvision.models.resnet50(pretrained=self._pretrained) # list(resnet50.children()) consists of following modules # [0] = Conv2d, [1] = BatchNorm2d, [2] = ReLU, [3] = MaxPool2d, # [4] = Sequential(Bottleneck...), # [5] = Sequential(Bottleneck...), # [6] = Sequential(Bottleneck...), # [7] = Sequential(Bottleneck...), # [8] = AvgPool2d, [9] = Linear children = list(resnet50.children()) conv1 = nn.Sequential(*children[:4]) conv2 = children[4] conv3 = children[5] conv4 = children[6] conv5 = children[7] num_features_out = 256 lateral_c2 = nn.Conv2d(in_channels=256, out_channels=num_features_out, kernel_size=1) lateral_c3 = nn.Conv2d(in_channels=512, out_channels=num_features_out, kernel_size=1) lateral_c4 = nn.Conv2d(in_channels=1024, out_channels=num_features_out, kernel_size=1) lateral_c5 = nn.Conv2d(in_channels=2048, out_channels=num_features_out, kernel_size=1) dealiasing_p2 = nn.Conv2d(in_channels=num_features_out, out_channels=num_features_out, kernel_size=3, padding=1) dealiasing_p3 = nn.Conv2d(in_channels=num_features_out, out_channels=num_features_out, kernel_size=3, padding=1) dealiasing_p4 = nn.Conv2d(in_channels=num_features_out, out_channels=num_features_out, kernel_size=3, padding=1) for parameters in [module.parameters() for module in [conv1, conv2]]: for parameter in parameters: parameter.requires_grad = False conv_layers = Base.ConvLayers(conv1, conv2, conv3, conv4, conv5) lateral_layers = Base.LateralLayers(lateral_c2, lateral_c3, lateral_c4, lateral_c5) dealiasing_layers = Base.DealiasingLayers(dealiasing_p2, dealiasing_p3, dealiasing_p4) return conv_layers, lateral_layers, dealiasing_layers, num_features_out
def _eval(path_to_checkpoint: str, dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_results_dir: str): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.EVAL, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) Log.i('Start evaluating with 1 GPU (1 batch per GPU)') mean_ap, detail = evaluator.evaluate(model) Log.i('Done') Log.i('mean AP = {:.4f}'.format(mean_ap)) Log.i('\n' + detail)
def __init__(self, backbone: BackboneBase, num_classes: int, pooler_mode: Pooler.Mode, anchor_ratios: List[Tuple[int, int]], anchor_scales: List[int], rpn_pre_nms_top_n: int, rpn_post_nms_top_n: int): super().__init__() self._device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") conv_layers, lateral_layers, dealiasing_layers, num_features_out = backbone.features( ) self.conv1, self.conv2, self.conv3, self.conv4, self.conv5 = conv_layers self.lateral_c2, self.lateral_c3, self.lateral_c4, self.lateral_c5 = lateral_layers self.dealiasing_p2, self.dealiasing_p3, self.dealiasing_p4 = dealiasing_layers self._bn_modules = [it for it in self.conv1.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.conv2.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.conv3.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.conv4.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.conv5.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.lateral_c2.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.lateral_c3.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.lateral_c4.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.lateral_c5.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.dealiasing_p2.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.dealiasing_p3.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in self.dealiasing_p4.modules() if isinstance(it, nn.BatchNorm2d)] self.num_classes = num_classes self.rpn = RegionProposalNetwork(num_features_out, anchor_ratios, anchor_scales, rpn_pre_nms_top_n, rpn_post_nms_top_n) self.detection = Model.Detection(pooler_mode, self.num_classes)
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): image = transforms.Image.open(path_to_input_image) dataset_class = DatasetBase.from_name(dataset_name) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) forward_input = Model.ForwardInput.Eval(image_tensor.cuda()) forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input) detection_bboxes = forward_output.detection_bboxes / scale detection_classes = forward_output.detection_classes detection_probs = forward_output.detection_probs kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def __init__(self, backbone: BackboneBase, num_classes: int, pooler_mode: Pooler.Mode, anchor_ratios: List[Tuple[int, int]], anchor_sizes: List[int], rpn_pre_nms_top_n: int, rpn_post_nms_top_n: int, anchor_smooth_l1_loss_beta: Optional[float] = None, proposal_smooth_l1_loss_beta: Optional[float] = None): super().__init__() self.features, hidden, num_features_out, num_hidden_out = backbone.features( ) self._bn_modules = nn.ModuleList([ it for it in self.features.modules() if isinstance(it, nn.BatchNorm2d) ] + [it for it in hidden.modules() if isinstance(it, nn.BatchNorm2d)]) # NOTE: It's crucial to freeze batch normalization modules for few batches training, which can be done by following processes # (1) Change mode to `eval` # (2) Disable gradient (we move this process into `forward`) for bn_module in self._bn_modules: for parameter in bn_module.parameters(): parameter.requires_grad = False self.rpn = RegionProposalNetwork(num_features_out, anchor_ratios, anchor_sizes, rpn_pre_nms_top_n, rpn_post_nms_top_n, anchor_smooth_l1_loss_beta) self.detection = Model.Detection(pooler_mode, hidden, num_hidden_out, num_classes, proposal_smooth_l1_loss_beta)
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image = np.array(image) frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) elapse = time.time() - timestamp fps = 1 / elapse cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('easy-faster-rcnn.pytorch', frame) if cv2.waitKey(10) == 27: break video_capture.release() cv2.destroyAllWindows()
def _infer(path_to_input_dir: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) images = glob.glob(path_to_input_dir + '/*.jpg') with torch.no_grad(): for image in tqdm(images): name = image.split("/")[-1] image = transforms.Image.open(image).convert("RGB") image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_dir + name) print(f'Output image is saved to {path_to_output_dir}')
def __init__(self, backbone: BackboneBase, num_classes: int, anchor_sizes: List[int], model_def, image_size: Optional[int] = 416): # TODO:删除model_def super(YOLOV3, self).__init__() # Initiate model self.backbone = backbone.backbone(model_def) self.backbone.apply(weights_init_normal) self.feature = backbone.features() anchor_group_1, anchor_group_2, anchor_group_3 = group_anchors( anchor_sizes, num_groups=3) self.detections = nn.ModuleList([ YOLOV3.Detection(anchor_group_1, num_classes, image_size), YOLOV3.Detection(anchor_group_2, num_classes, image_size), YOLOV3.Detection(anchor_group_3, num_classes, image_size) ])
def _infer_websocket(path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) async def handler(websocket, path): print('Connection established:', path) with torch.no_grad(): while True: frame = await websocket.recv() frame = np.frombuffer(frame, dtype=np.uint8).reshape(480, 640, 3) image = Image.fromarray(frame) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] message = [] for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] message.append({ 'left': int(bbox.left), 'top': int(bbox.top), 'right': int(bbox.right), 'bottom': int(bbox.bottom), 'category': category }) message = json.dumps(message) await websocket.send(message) server = websockets.serve(handler, host='*', port=8765, max_size=2 ** 32, compression=None) asyncio.get_event_loop().run_until_complete(server) print('Service is ready. Please navigate to http://127.0.0.1:8000/') asyncio.get_event_loop().run_forever()
def __init__(self, filename): """Initialize the class.""" self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) self.dataset_class = DatasetBase.from_name('voc2007') self.backbone = BackboneBase.from_name('resnet101')(pretrained=False) self.model = Model(self.backbone, self.dataset_class.num_classes(), pooler_mode=Pooler.Mode.ALIGN, anchor_ratios=[(1, 2), (1, 1), (2, 1)], anchor_sizes=[128, 256, 512], rpn_pre_nms_top_n=6000, rpn_post_nms_top_n=300).cpu() self.load(filename)
def __init__(self, backbone: BackboneBase, num_classes: int, pooler_mode: Pooler.Mode, anchor_ratios: List[Tuple[int, int]], anchor_sizes: List[int], rpn_pre_nms_top_n: int, rpn_post_nms_top_n: int, anchor_smooth_l1_loss_beta: Optional[float] = None, proposal_smooth_l1_loss_beta: Optional[float] = None): super().__init__() self.features, hidden, num_features_out, num_hidden_out = backbone.features( ) self.detection = Model.Detection(pooler_mode, hidden, num_hidden_out, num_classes, proposal_smooth_l1_loss_beta)
def __init__(self, backbone: BackboneBase, num_classes: int, pooler_mode: Pooler.Mode, anchor_ratios: List[Tuple[int, int]], anchor_sizes: List[int], rpn_pre_nms_top_n: int, rpn_post_nms_top_n: int, anchor_smooth_l1_loss_beta: Optional[float] = None, proposal_smooth_l1_loss_beta: Optional[float] = None): super().__init__() #所用的backbone为resnet18/50/101 #所有的resnet共10部分(0到9) #其中features是0到6(其0到4的参数冻结)部分,包括layer1到layer3的3个block. #hidden是layer4. #以下使用resnet101为例说明: #num_features_out=1024 #num_hidden_out=2048 self.features, hidden, num_features_out, num_hidden_out = backbone.features( ) self._bn_modules = nn.ModuleList([ it for it in self.features.modules() if isinstance(it, nn.BatchNorm2d) ] + [it for it in hidden.modules() if isinstance(it, nn.BatchNorm2d)]) # NOTE: It's crucial to freeze batch normalization modules for few batches training, which can be done by following processes # (1) Change mode to `eval` # (2) Disable gradient (we move this process into `forward`) for bn_module in self._bn_modules: for parameter in bn_module.parameters(): parameter.requires_grad = False #以下参数用于infer: #num_features_out=1024 #anchor_ratios=[(1, 2), (1, 1), (2, 1)] #anchor_sizes=[64, 128, 256, 512] #rpn_pre_nms_top_n=6000 #rpn_post_nms_top_n=1000 #anchor_smooth_l1_loss_beta=None self.rpn = RegionProposalNetwork(num_features_out, anchor_ratios, anchor_sizes, rpn_pre_nms_top_n, rpn_post_nms_top_n, anchor_smooth_l1_loss_beta) self.detection = Model.Detection(pooler_mode, hidden, num_hidden_out, num_classes, proposal_smooth_l1_loss_beta)
def _eval(path_to_checkpoint, backbone_name, path_to_results_dir): dataset = AVA_video(EvalConfig.VAL_DATA) evaluator = Evaluator(dataset, path_to_results_dir) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)() model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=TrainConfig.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=TrainConfig.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) print("load from:", path_to_checkpoint) Log.i('Start evaluating with 1 GPU (1 batch per GPU)') mean_ap, detail = evaluator.evaluate(model) Log.i('Done') Log.i('mean AP = {:.4f}'.format(mean_ap)) Log.i('\n' + detail)
def tourch_script(): weights = 'weights/slowfast_weight.pth' # '/home/ganhaiyang/output/ava/temp_4/model_save/2019-12-26-11-06-33/model-80.pth' backbone_name = Config.BACKBONE_NAME dataset = AVA_video(Config.TRAIN_DATA) backbone = BackboneBase.from_name(backbone_name)() os.environ['CUDA_VISIBLE_DEVICES'] = '7' device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") chkpt = torch.load(weights, map_location=device) model = Model( backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA).to( device) try: model_dict = model.module.state_dict() except AttributeError: model_dict = model.state_dict() # 读取参数, # 将pretrained_dict里不属于model_dict的键剔除掉 chkpt = {k: v for k, v in chkpt.items() if k in model_dict} print("load pretrain model") model_dict.update(chkpt) model.load_state_dict(model_dict) # z转换为评估模型 model.eval() # 向模型中输入数据以得到模型参数 e1 = torch.rand(1, 3, 64, 300, 400).cuda() e2 = torch.rand(1, 3, 4).cuda() traced_script_module = torch.jit.trace(model, (e1, e2)) traced_script_module.save("slowfast_50_eval_three.pt") print("out put save") exit(0)
def __init__(self, path_to_checkpoint, dataset_name='obstacle', backbone_name='resnet101', prob_thresh=0.6): self.path_to_checkpoint = path_to_checkpoint self.dataset_name = dataset_name self.backbone_name = backbone_name self.prob_thresh = prob_thresh self.dataset_class = DatasetBase.from_name(dataset_name) self.backbone = BackboneBase.from_name(backbone_name)(pretrained=False) # Set up model self.model = Model( self.backbone, self.dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() self.model.load(path_to_checkpoint) self.model.eval() # Set in evaluation mode
def _train(backbone_name, path_to_checkpoints_dir, path_to_resuming_checkpoint): # backbone_name, args = parse_args() cfg = Config1.fromfile(args.config) # logger = Logger('./logs') dataset = AVA_video(Config.TRAIN_DATA) dataloader = DataLoader( dataset, batch_size=4, num_workers=8, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True, shuffle=False) # batch_size=4,num_workers=8,shuffle=True, Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)() # backbone1 = build_recognizer(cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = Config.GPU_OPTION device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config. PROPOSAL_SMOOTH_L1_LOSS_BETA).cuda() model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3]) # multi-Gpu model.to(device) print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) #类似list,限制长度的deque增加超过限制数的项时,另一边的项会自动删除。 mean_losses = deque(maxlen=100) cur_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) logdir = os.path.join(path_to_checkpoints_dir, 'summaries', 'logdir', cur_time) if not os.path.exists(logdir): os.makedirs(logdir) summary_writer = SummaryWriter( logdir ) # summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler) print("load from:", path_to_resuming_checkpoint) device_count = torch.cuda.device_count() assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format( torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) print("loading data ... ") while not should_stop: for n_iter, (_, image_batch, _, bboxes_batch, labels_batch, detector_bboxes_batch) in enumerate(dataloader): batch_size = image_batch.shape[0] image_batch = image_batch.cuda() bboxes_batch = bboxes_batch.cuda() labels_batch = labels_batch.cuda() detector_bboxes_batch = detector_bboxes_batch.cuda() proposal_class_losses = \ model.train().forward(image_batch, bboxes_batch, labels_batch,detector_bboxes_batch) #eval(). proposal_class_loss = proposal_class_losses.mean() loss = proposal_class_loss mean_loss = proposal_class_losses.mean() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) mean_losses.append(mean_loss.item()) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) if n_iter % 10000 == 0: for name, param in model.named_parameters(): name = name.replace('.', '/') if name.find("conv") >= 0: summary_writer.add_histogram(name, param.data.cpu().numpy(), global_step=n_iter) summary_writer.add_histogram( name + 'grad', param.grad.data.cpu().numpy(), global_step=n_iter) step += 1 if step == num_steps_to_finish: #222670 should_stop = True if step % num_steps_to_display == 0: #20 elapsed_time = time.time() - time_checkpoint print("time_checkpoint :", time_checkpoint, "elapsed_time:", elapsed_time) time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) avg_mean_loss = sum(mean_losses) / len(mean_losses) lr = scheduler.get_lr()[0] print_string='[Step {0}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)'\ .format(step,avg_loss=avg_loss,lr=lr,samples_per_sec=samples_per_sec,eta=eta) print(print_string) with open(log_file, 'a') as f: f.writelines(print_string + '\n') model_save_dir = os.path.join(path_to_checkpoints_dir, 'model_save', cur_time) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if step % num_steps_to_snapshot == 0 or should_stop: #20000 path_to_checkpoint = model.module.save(model_save_dir, step, optimizer, scheduler) #model.save Log.i('Model has been saved to {}'.format(path_to_checkpoint)) if should_stop: break Log.i('Done')
def _infer(path_to_input_image: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) with torch.no_grad(): image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] jsonData = OrderedDict() resultData = OrderedDict() detectionResultDataList = [] image_name = path_to_input_image.split('/')[-1] frame_num = image_name.split('.')[0].split('_')[-1] jsonData["image_path"] = image_name jsonData["modules"] = "Faster_R-CNN_ResNet101" jsonData["cam_id"] = "0" jsonData["frame_num"] = frame_num for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] detectionResultData = OrderedDict() detectionResultData["label"] = [{ 'description': category, 'score': prob }] detectionResultData["position"] = { 'x': bbox.left, 'y': bbox.top, 'w': (bbox.right - bbox.left), 'h': (bbox.bottom - bbox.top) } detectionResultDataList.append(detectionResultData) resultData["module_name"] = "Faster_R-CNN_ResNet101" resultData["detection_result"] = detectionResultDataList jsonData["results"] = [resultData] output_file_path = path_to_output_dir + "/" + datetime.now().strftime( "%Y-%m-%d__%H:%M:%S__") + image_name.split('.')[0] + ".json" with open('{}'.format(output_file_path), 'w', encoding="utf-8") as make_file: json.dump(jsonData, make_file, ensure_ascii=False, indent="\t")
def __init__(self, backbone: BackboneBase, num_classes: int, pooling_mode: ROIWrapper.Mode, anchor_ratios: List[Tuple[int, int]], anchor_sizes: List[int], rpn_pre_nms_top_n: int, rpn_post_nms_top_n: int): super().__init__() self.features, pool_handler, hidden, hidden_handler, num_features_out, num_hidden_out = backbone.features() self._bn_modules = [it for it in self.features.modules() if isinstance(it, nn.BatchNorm2d)] + \ [it for it in hidden.modules() if isinstance(it, nn.BatchNorm2d)] self.rpn = RegionProposalNetwork(num_features_out, anchor_ratios, anchor_sizes, rpn_pre_nms_top_n, rpn_post_nms_top_n) self.detection = Model.Detection(pooling_mode, pool_handler, hidden, hidden_handler, num_hidden_out, num_classes)
def _train(backbone_name, path_to_checkpoints_dir, path_to_resuming_checkpoint): logger = Logger('./logs') dataset = AVA_video(Config.TRAIN_DATA) dataloader = DataLoader(dataset, batch_size=4, num_workers=8, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True, shuffle=True) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)() model = Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config. PROPOSAL_SMOOTH_L1_LOSS_BETA).cuda() optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) mean_losses = deque(maxlen=100) summary_writer = SummaryWriter( os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.load(path_to_resuming_checkpoint, optimizer, scheduler) print("load from:", path_to_resuming_checkpoint) device_count = torch.cuda.device_count() assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format( torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) while not should_stop: for n_iter, (_, image_batch, _, bboxes_batch, labels_batch, detector_bboxes_batch) in enumerate(dataloader): batch_size = image_batch.shape[0] image_batch = image_batch.cuda() bboxes_batch = bboxes_batch.cuda() labels_batch = labels_batch.cuda() detector_bboxes_batch = detector_bboxes_batch.cuda() #sure 1 # print("bboxes_batch:",bboxes_batch) # print("detector_bboxes_batch:",detector_bboxes_batch) # print("labels_batch:",labels_batch) proposal_class_losses = \ model.eval().forward(image_batch, bboxes_batch, labels_batch,detector_bboxes_batch) proposal_class_loss = proposal_class_losses.mean() loss = proposal_class_loss mean_loss = proposal_class_losses.mean() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) mean_losses.append(mean_loss.item()) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) if n_iter % 10000 == 0: for name, param in model.named_parameters(): name = name.replace('.', '/') if name.find("conv") >= 0: summary_writer.add_histogram(name, param.data.cpu().numpy(), global_step=n_iter) summary_writer.add_histogram( name + 'grad', param.grad.data.cpu().numpy(), global_step=n_iter) #summary_writer.add_graph(model, (image_batch)) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) avg_mean_loss = sum(mean_losses) / len(mean_losses) lr = scheduler.get_lr()[0] #Log.i('[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:%.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)') print_string='[Step {0}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)'\ .format(step,avg_loss=avg_loss,lr=lr,samples_per_sec=samples_per_sec,eta=eta) print(print_string) if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.save(path_to_checkpoints_dir, step, optimizer, scheduler) Log.i('Model has been saved to {}'.format(path_to_checkpoint)) if should_stop: break Log.i('Done')
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, sampler=DatasetBase.NearestRatioRandomSampler(dataset.image_ratios, num_neighbors=Config.BATCH_SIZE), num_workers=8, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = nn.DataParallel( Model( backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA ).cuda() ) optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}') device_count = torch.cuda.device_count() assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format(torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) while not should_stop: for _, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): batch_size = image_batch.shape[0] image_batch = image_batch.cuda() bboxes_batch = bboxes_batch.cuda() labels_batch = labels_batch.cuda() anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \ model.train().forward(image_batch, bboxes_batch, labels_batch) anchor_objectness_loss = anchor_objectness_losses.mean() anchor_transformer_loss = anchor_transformer_losses.mean() proposal_class_loss = proposal_class_losses.mean() proposal_transformer_loss = proposal_transformer_losses.mean() loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=Config.BATCH_SIZE, sampler=DatasetBase.NearestRatioRandomSampler( dataset.image_ratios, num_neighbors=Config.BATCH_SIZE), num_workers=0, collate_fn=DatasetBase.padding_collate_fn, pin_memory=True) #为便于调试,num_works置为0 Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = nn.DataParallel( Model(backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA ).cuda()) # 便于调试 # model = Model( # backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE, # anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, # rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N, # anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA # ).cuda() ''' 训练用参数: IMAGE_MIN_SIDE: float = 600.0 IMAGE_MAX_SIDE: float = 1000.0 ANCHOR_RATIOS: List[Tuple[int, int]] = [(1, 2), (1, 1), (2, 1)] ANCHOR_SIZES: List[int] = [128, 256, 512] POOLER_MODE: Pooler.Mode = Pooler.Mode.ALIGN RPN_PRE_NMS_TOP_N: int = 12000 RPN_POST_NMS_TOP_N: int = 2000 ANCHOR_SMOOTH_L1_LOSS_BETA: float = 1.0 PROPOSAL_SMOOTH_L1_LOSS_BETA: float = 1.0 BATCH_SIZE: int = 1 LEARNING_RATE: float = 0.001 MOMENTUM: float = 0.9 WEIGHT_DECAY: float = 0.0005 STEP_LR_SIZES: List[int] = [50000, 70000] STEP_LR_GAMMA: float = 0.1 WARM_UP_FACTOR: float = 0.3333 WARM_UP_NUM_ITERS: int = 500 NUM_STEPS_TO_DISPLAY: int = 20 NUM_STEPS_TO_SNAPSHOT: int = 10000 NUM_STEPS_TO_FINISH: int = 90000 ''' #动量的意义: #1.降低病态条件数带来的振荡 #2.减少随机梯度带来的方差(权值的衰减也有这个用处) #优化算法的两种衰减: #1. 权值的衰减:表现为在总的损失函数后面再加上权值的L2范数 #2.学习率的衰减:表现为通过学习率调节器以不同策略随着学习步增加,对学习率进行衰减调节 #optimizer = optim.Adam(model.parameters()) optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = WarmUpMultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA, factor=Config.WARM_UP_FACTOR, num_iters=Config.WARM_UP_NUM_ITERS) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter( os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i( f'Model has been restored from file: {path_to_resuming_checkpoint}' ) device_count = torch.cuda.device_count() #BATCH_SIZE默认是1 assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count' Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format( torch.cuda.device_count(), Config.BATCH_SIZE // torch.cuda.device_count())) while not should_stop: for _, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): #训练使用的数据集采用voc2007 batch_size = image_batch.shape[0] #(1,) image_batch = image_batch.cuda() #(1,3,h,w) bboxes_batch = bboxes_batch.cuda() #(1,gt_n,4) labels_batch = labels_batch.cuda() #(1,gt_n) anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \ model.train().forward(image_batch, bboxes_batch, labels_batch) #rpn的损失 anchor_objectness_loss = anchor_objectness_losses.mean() anchor_transformer_loss = anchor_transformer_losses.mean() #detection的损失 proposal_class_loss = proposal_class_losses.mean() proposal_transformer_loss = proposal_transformer_losses.mean() loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] #lr = optimizer.param_groups[0]['lr'] Log.i( f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)' ) #test if step == 10: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer) Log.i(f'Model has been saved to {path_to_checkpoint}') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler) #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) ''' 默认选项: pooler_mode=Config.POOLER_MODE= Pooler.Mode.ALIGN anchor_ratios=Config.ANCHOR_RATIOS= [(1, 2), (1, 1), (2, 1)] anchor_sizes=对于infer,这里默认增加了一个64,因此最后就是[64,128, 256, 512] 用于Eval的RPN_NMS: RPN_PRE_NMS_TOP_N: int = 6000 RPN_POST_NMS_TOP_N: int = 300 ''' with torch.no_grad(): #预处理,使得输入图像至少一边满足min_side或max_side #yolo需要固定图像尺寸,这里并不需要. image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) #先增加一个批的维度,再以eval模式下执行forward. #(gd_n,4) (gd_n,) (gd_n,) detection_bboxes, detection_classes, detection_probs, _ = model.eval( ).forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale #原图像是经过乘scale的,因此这里对于detection_box要除scale。 kept_indices = detection_probs > prob_thresh #0.6 detection_bboxes = detection_bboxes[kept_indices] #(gd_thresh_n,4) detection_classes = detection_classes[kept_indices] #(gd_thresh_n,) detection_probs = detection_probs[kept_indices] #(gd_thresh_n,) draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).to(device) model.load(path_to_checkpoint) if os.path.isfile(path_to_input_image): files = [path_to_input_image] else: files = os.listdir(path_to_input_image) print('Running inference on folder:', path_to_input_image) with torch.no_grad(): for file in tqdm(files): image = transforms.Image.open( os.path.join(path_to_input_image, file)) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_probs, detection_vertices, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).to(device)) detection_bboxes /= scale detection_vertices /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_probs = detection_probs[kept_indices] detection_vertices = detection_vertices[kept_indices] draw = ImageDraw.Draw(image) for bbox, prob, vert in zip(detection_bboxes.tolist(), detection_probs.tolist(), detection_vertices.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = "cuboid" draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) quads = [] quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0])))) quads.append( ((int(vert[0][1]), int(vert[1][1])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][1]), int(vert[1][1])))) quads.append( ((int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][4]), int(vert[1][4])))) quads.append( ((int(vert[0][2]), int(vert[1][2])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])))) for quad in quads: draw.line(quad, fill=color) output_path = os.path.join(path_to_output_image, file) image.save(output_path) if detection_probs.size()[0] > 0: max_index = torch.argmax(detection_probs) detection_vertices = detection_vertices[max_index] detection_vertices = detection_vertices.cpu().numpy() with open(os.path.join(path_to_output_image, file + '.npy'), 'wb') as f: np.save(f, detection_vertices)
def _infer_stream(path_to_input_stream_endpoint: str, path_to_output_dir: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): frame_num = 1 for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] # draw = ImageDraw.Draw(image) jsonData = OrderedDict() resultData = OrderedDict() detectionResultDataList = [] image_name = path_to_input_stream_endpoint.split('/')[-1] jsonData["image_path"] = image_name jsonData["modules"] = "Faster_R-CNN_ResNet101" jsonData["cam_id"] = "0" jsonData["frame_num"] = frame_num for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = 'yellow' # color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] detectionResultData = OrderedDict() detectionResultData["label"] = [{ 'description': category, 'score': prob }] detectionResultData["position"] = { 'x': bbox.left, 'y': bbox.top, 'w': (bbox.right - bbox.left), 'h': (bbox.bottom - bbox.top) } detectionResultDataList.append(detectionResultData) # draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) # draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) resultData["module_name"] = "Faster_R-CNN_ResNet101" resultData["detection_result"] = detectionResultDataList jsonData["results"] = [resultData] output_file_path = path_to_output_dir + "/" + datetime.now( ).strftime("%Y-%m-%d__%H:%M:%S.%f__") + image_name.split( '.')[0] + ".json" with open('{}'.format(output_file_path), 'w', encoding="utf-8") as make_file: json.dump(jsonData, make_file, ensure_ascii=False, indent="\t") print(f'Saved JSON File : [NAME] {output_file_path}') frame_num += 1 # image = np.array(image) # frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # elapse = time.time() - timestamp # fps = 1 / elapse # cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow('easy-faster-rcnn.pytorch', frame) # if cv2.waitKey(10) == 27: # break video_capture.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--image_min_side', type=float, help='default: {:g}'.format(Config.IMAGE_MIN_SIDE)) parser.add_argument('--image_max_side', type=float, help='default: {:g}'.format(Config.IMAGE_MAX_SIDE)) parser.add_argument('--anchor_ratios', type=str, help='default: "{!s}"'.format( Config.ANCHOR_RATIOS)) parser.add_argument('--anchor_sizes', type=str, help='default: "{!s}"'.format(Config.ANCHOR_SIZES)) parser.add_argument('--pooler_mode', type=str, choices=Pooler.OPTIONS, help='default: {.value:s}'.format( Config.POOLER_MODE)) parser.add_argument('--rpn_pre_nms_top_n', type=int, help='default: {:d}'.format( Config.RPN_PRE_NMS_TOP_N)) parser.add_argument('--rpn_post_nms_top_n', type=int, help='default: {:d}'.format( Config.RPN_POST_NMS_TOP_N)) args = parser.parse_args() input_root = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/frames' output_root = input_root + '_output' path_to_checkpoint = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/checkpoints/obstacle/model-90000.pth' dataset_name = 'obstacle' backbone_name = 'resnet101' prob_thresh = 0.6 Config.setup(image_min_side=args.image_min_side, image_max_side=args.image_max_side, anchor_ratios=args.anchor_ratios, anchor_sizes=args.anchor_sizes, pooler_mode=args.pooler_mode, rpn_pre_nms_top_n=args.rpn_pre_nms_top_n, rpn_post_nms_top_n=args.rpn_post_nms_top_n) print('Arguments:') for k, v in vars(args).items(): print(f'\t{k} = {v}') print(Config.describe()) os.makedirs(output_root, exist_ok=True) input_sub_dirnames = [ directory for directory in os.listdir(input_root) if os.path.isdir(os.path.join(input_root, directory)) ] dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) for sub_dir in input_sub_dirnames: input_sub_dirpath = os.path.join(input_root, sub_dir) output_sub_dirpath = os.path.join(output_root, sub_dir) filenames = [ image_basename(f) for f in os.listdir(input_sub_dirpath) if is_image(f) ] for filename in filenames: path_to_input_image = image_path(input_sub_dirpath, filename, '.jpg') # path_to_input_image = '/faster-RCNN/frames/1_360p/1_360p_0001.jpg' path_to_output_image = image_path(output_sub_dirpath, filename, '.jpg') # path_to_output_image = '/faster-RCNN/frames_output/1_360p/1_360p_0001.jpg' os.makedirs(os.path.join( os.path.curdir, os.path.dirname(path_to_output_image)), exist_ok=True) with torch.no_grad(): image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice([ 'red', 'green', 'blue', 'yellow', 'purple', 'white' ]) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
type=str, help="path for storing label files in the data set") args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") os.makedirs("output", exist_ok=True) os.makedirs("checkpoints", exist_ok=True) # Get data configuration config.merge_from_file(args.config) config = setup_config(config, args) config.freeze() # Initiate model backbone = BackboneBase.from_name('darknet53')(pretrained=False, models_dir='') model = YOLOV3(backbone, config.GLOBAL.NUM_CLASSES, config.GLOBAL.ANCHORS, args.model_def, image_size=config.GLOBAL.IMAGE_SIZE[0]) model.load_state_dict(torch.load(args.checkpoint)) if torch.cuda.is_available(): model = model.cuda() # If specified we start from checkpoint # if config.TRAIN.PRETRAINED_WEIGHTS is not None and config.TRAIN.PRETRAINED_WEIGHTS != '': # if config.TRAIN.PRETRAINED_WEIGHTS.endswith(".pth"): # backbone.load_state_dict(torch.load(config.TRAIN.PRETRAINED_WEIGHTS)) # else:
inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() # model(get_test_input(inp_dim, CUDA), CUDA) model.eval() #######for sp detec########## #初始化模型 path_to_checkpoint = "/home/aiuser/Downloads/NEW-FRCNN-rewrite (another copy)/v100_model/model-20700-v100.pth" backbone_name = Config.BACKBONE_NAME backbone = BackboneBase.from_name(backbone_name)() model_sf = Model(backbone, 81, pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=TrainConfig.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=TrainConfig.RPN_POST_NMS_TOP_N).cuda() model_sf.load(path_to_checkpoint) #videofile = "/home/aiuser/ava/ava/preproc_val/clips/rXFlJbXyZyc/948.mkv" videofile = "/home/aiuser/ava/ava/preproc_train/clips/gjdgj04FzR0/1611.mp4" cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source'
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]): dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) Log.i('Found {:d} samples'.format(len(dataset))) backbone = BackboneBase.from_name(backbone_name)(pretrained=True) model = Model(backbone, dataset.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_scales=Config.ANCHOR_SCALES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE, momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY) scheduler = MultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA) step = 0 time_checkpoint = time.time() losses = deque(maxlen=100) summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries')) should_stop = False num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT num_steps_to_finish = Config.NUM_STEPS_TO_FINISH if path_to_resuming_checkpoint is not None: step = model.load(path_to_resuming_checkpoint, optimizer, scheduler) Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}') Log.i('Start training') while not should_stop: for batch_index, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader): assert image_batch.shape[0] == 1, 'only batch size of 1 is supported' image = image_batch[0].cuda() bboxes = bboxes_batch[0].cuda() labels = labels_batch[0].cuda() forward_input = Model.ForwardInput.Train(image, gt_classes=labels, gt_bboxes=bboxes) forward_output: Model.ForwardOutput.Train = model.train().forward(forward_input) anchor_objectness_loss, anchor_transformer_loss, proposal_class_loss, proposal_transformer_loss = forward_output loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() losses.append(loss.item()) summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step) summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step) summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step) summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step) summary_writer.add_scalar('train/loss', loss.item(), step) step += 1 if step == num_steps_to_finish: should_stop = True if step % num_steps_to_display == 0: elapsed_time = time.time() - time_checkpoint time_checkpoint = time.time() steps_per_sec = num_steps_to_display / elapsed_time samples_per_sec = dataloader.batch_size * steps_per_sec eta = (num_steps_to_finish - step) / steps_per_sec / 3600 avg_loss = sum(losses) / len(losses) lr = scheduler.get_lr()[0] Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.6f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)') if step % num_steps_to_snapshot == 0 or should_stop: path_to_checkpoint = model.save(path_to_checkpoints_dir, step, optimizer, scheduler) Log.i(f'Model has been saved to {path_to_checkpoint}') if should_stop: break Log.i('Done')