def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales): """ rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (1, 1, HxA, W), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (1, 4xA, H, W), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (1, 4xA, H, W) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (1, 4xA, H, W) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ rpn_cls_score = rpn_cls_score.data.cpu().numpy() rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \ anchor_target_layer_py(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales) rpn_labels = network.np_to_variable(rpn_labels, is_cuda=True, dtype=torch.LongTensor) rpn_bbox_targets = network.np_to_variable(rpn_bbox_targets, is_cuda=True) rpn_bbox_inside_weights = network.np_to_variable( rpn_bbox_inside_weights, is_cuda=True) rpn_bbox_outside_weights = network.np_to_variable( rpn_bbox_outside_weights, is_cuda=True) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes): """ ---------- rpn_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int # gt_ishard: (G, 1) {0 | 1} 1 indicates hard dontcare_areas: (D, 4) [ x1, y1, x2, y2] num_classes ---------- Returns ---------- rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1} bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2] bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss """ rpn_rois = rpn_rois.data.cpu().numpy() rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \ proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes) # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape rois = network.np_to_variable(rois, is_cuda=True) labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor) bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True) bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True) bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True) return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None): conv1s = self.conv1s(im_data) conv2 = self.conv2(conv1s) conv3 = self.conv3(conv2) conv1s_reorg = self.reorg(conv1s) cat_1_3 = torch.cat([conv1s_reorg, conv3], 1) conv4 = self.conv4(cat_1_3) conv5 = self.conv5(conv4) # batch_size, out_channels, h, w # for detection # bsize, c, h, w -> bsize, h, w, c -> bsize, h x w, num_anchors, 5+num_classes bsize, _, h, w = conv5.size() # assert bsize == 1, 'detection only support one image per batch' conv5_reshaped = conv5.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5) # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to) xy_pred = F.sigmoid(conv5_reshaped[:, :, :, 0:2]) wh_pred = torch.exp(conv5_reshaped[:, :, :, 2:4]) bbox_pred = torch.cat([xy_pred, wh_pred], 3) iou_pred = F.sigmoid(conv5_reshaped[:, :, :, 4:5]) score_pred = conv5_reshaped[:, :, :, 5:].contiguous() prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred) # for training if self.training: bbox_pred_np = bbox_pred.data.cpu().numpy() _boxes, _ious, _classes, _mask = self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) _mask = net_utils.np_to_variable(_mask, dtype=torch.FloatTensor) num_boxes = torch.sum(_mask) bbox_mask = _mask.expand_as(_boxes) bbox_loss = F.smooth_l1_loss(bbox_mask * bbox_pred, bbox_mask * _boxes, size_average=False) / num_boxes iou_loss = nn.MSELoss()(iou_pred, _ious) cls_mask = _mask.expand_as(score_pred) cls_loss = nn.MSELoss(size_average=True)(prob_pred * cls_mask, _classes * cls_mask) / num_boxes # cls_loss = F.cross_entropy(score_pred.view(-1, score_pred.size()[-1]), _classes.view(-1)) # print prob_pred.size(), _classes.size(), _mask.size() # cls_loss = nn.MSELoss()(prob_pred * _mask, _classes * _mask) # print num_boxes # print bbox_loss, iou_loss, cls_loss self.loss = 5. * bbox_loss + iou_loss + cls_loss return bbox_pred, iou_pred, prob_pred
def process(self): while True: image, im_data = preprocess(self.camera) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute( 0, 3, 1, 2) bbox_pred, iou_pred, prob_pred = self.net(im_data) bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, self.thresh) out = np.ones((1, 2)).astype('float32') for x in range(len(bboxes)): if cls_inds[x] == 14: topleft = (bboxes[x][0], bboxes[x][1]) bottomright = (bboxes[x][2], bboxes[x][3]) conf = scores[x] detect = True diff = self._difference(topleft, bottomright) area = self._area(topleft, bottomright) out[0][0] = diff out[0][0] /= self.center[0] out[0][1] = area out[0][1] /= self.max_area return out
def train_batch(net, sample_batched, train_loss_epoch, coord_loss_epoch, conf_loss_epoch): net.train() batch = sample_batched[0] size_index = sample_batched[1] im = batch['image'] gt_boxes = batch['gt_boxes'] gt_classes = batch['gt_classes'] gt_RT = batch['gt_RT'] dontcare = batch['dontcare'] # forward im_data = net_utils.np_to_variable(im, is_cuda=True, volatile=False).permute(0, 3, 1, 2) bbox_pred, conf_pred, score_pred = net(im_data) if args.mGPUs: coord_loss_var, conf_objloss_var, conf_noobjloss_var, cls_loss_var = net.module.loss(bbox_pred, conf_pred, score_pred, gt_boxes, gt_classes, gt_RT, dontcare, size_index) else: coord_loss_var, conf_objloss_var, conf_noobjloss_var, cls_loss_var = net.loss(bbox_pred, conf_pred, score_pred, gt_boxes, gt_classes, gt_RT, dontcare, size_index) loss_var = cfg.lambda_coord * coord_loss_var + cfg.lambda_objconf * conf_objloss_var + \ cfg.lambda_noobjconf * conf_noobjloss_var + cfg.lambda_class * cls_loss_var coord_loss_np = coord_loss_var.data.cpu().numpy() conf_objloss_np = conf_objloss_var.data.cpu().numpy() conf_noobjloss_np = conf_noobjloss_var.data.cpu().numpy() cls_loss_np = cls_loss_var.data.cpu().numpy() train_loss_np = loss_var.data.cpu().numpy() train_loss_epoch += train_loss_np coord_loss_epoch += coord_loss_np conf_loss_epoch += conf_objloss_np prob_pred = [] for i in xrange(3): prob_pred.append(F.softmax(score_pred[i].view(-1, score_pred[i].size()[-1]), dim=1).view_as(score_pred[i])) vis_scaleid = randint(0, 2) ### for visualisation of predictions in tensorboard bbox_pred_np = bbox_pred[vis_scaleid].data[0:1].cpu().numpy() conf_pred_np = conf_pred[vis_scaleid].data[0:1].cpu().numpy() prob_pred_np = prob_pred[vis_scaleid].data[0:1].cpu().numpy() optimizer.zero_grad() loss_var.backward() torch.nn.utils.clip_grad_norm(net.parameters(), 5) optimizer.step() return bbox_pred_np, conf_pred_np, prob_pred_np, coord_loss_np, conf_objloss_np, conf_noobjloss_np, cls_loss_np, train_loss_np, train_loss_epoch, coord_loss_epoch, conf_loss_epoch
def main(): trained_model = cfg.trained_model thresh = 0.5 image_dir = '/home/cory/cedl/vid/videos/vid04' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') print(net) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] image_abs_paths = sorted([ os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions ]) t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int( 1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) cv2.imshow('test', im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() key = cv2.waitKey(1) if key == ord('q'): break
def getCarinfofromPic(self, content, method='nparray'): image, im_data = self.preprocess(content, method='nparray') im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) bbox_pred, iou_pred, prob_pred = self.net(im_data) bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, self.thresh, size_index=0) roi = [] for i in range(len(bboxes)): roiimage = image[bboxes[i][1]:bboxes[i][3], bboxes[i][0]:bboxes[i][2]] roi.append(roiimage) return bboxes, scores, cls_inds, image, roi
def loss(self, conf_pred, gt_conf): _confs = net_utils.np_to_variable(gt_conf, volatile=True) conf_loss = nn.MSELoss(size_average=False)(conf_pred, _confs) / len(gt_conf) return conf_loss
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales): rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales) x = network.np_to_variable(x, is_cuda=True) return x.view(-1, 5)
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): #ci chu xiu gaile num_images = imdb.num_images # num_images = 2 ipdb.set_trace() # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) # all_boxes = [[[] for _ in range(num_images)] # for _ in range(imdb.num_classes)] # timers # _t = {'im_detect': Timer(), 'misc': Timer()} # det_file = os.path.join(output_dir, 'detections.pkl') # ============================================================================= size_index = 0 # ============================================================================= # ============================================================================= # change of me # size_index = args.image_size_index # ============================================================================= for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im = batch['images'] gt_boxes = batch['gt_boxes'] gt_classes = batch['gt_classes'] dontcare = batch['dontcare'] im_data = net_utils.np_to_variable(im, is_cuda=True, volatile=True).permute(0, 3, 1, 2) # _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data, gt_boxes, gt_classes, dontcare, size_index) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) # loss = net.loss bbox_loss = net.bbox_loss.data.cpu().numpy()[0] iou_loss = net.iou_loss.data.cpu().numpy()[0] cls_loss = net.cls_loss.data.cpu().numpy()[0] print('bbox_loss', bbox_loss) print('iou_loss', iou_loss) print('cls_loss', cls_loss)
def detect_image(cfg, image_path, net, thresh): image, im_data = preprocess(image_path, cfg['inp_size']) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) bbox_pred, iou_pred, prob_pred = net.forward(im_data) bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) return bboxes, cls_inds, image, scores
def loss_fxn(gt_boxes, gt_classes, dontcare, size_index, bbox_pred, iou_pred, prob_pred): bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() # print('1') _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = build_target( bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index) # print('2') _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) # print(num_boxes, 'here are the number of boxes') # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4]) box_mask = box_mask.expand_as(_boxes) bbox_loss = nn.L1Loss(size_average=False)( bbox_pred * box_mask, _boxes * box_mask) / num_boxes # noqa iou_loss = nn.L1Loss(size_average=False)( iou_pred * iou_mask, _ious * iou_mask) / num_boxes # noqa class_mask = class_mask.expand_as(prob_pred) cls_loss = nn.CrossEntropyLoss(size_average=False)( prob_pred * class_mask, _classes * class_mask) / num_boxes # noqa #Wrong, Cross entropy loss return bbox_loss, iou_loss, cls_loss
def training_target(cfg, bbox_pred, class_pred, labels, inp_size, iou_pred): # inp_size = (w, h) gt_boxes, gt_classes = restore_gt_numpy(labels) bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = _build_target( cfg, bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) box_mask = box_mask.expand_as(_boxes) class_mask = class_mask.expand_as(class_pred) bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes class_loss = nn.MSELoss(size_average=False)( class_pred * class_mask, _classes * class_mask) / num_boxes return bbox_loss, iou_loss, class_loss
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) features = self.features(im_data) rpn_conv1 = self.conv1(features) # rpn score rpn_cls_score = self.score_conv(rpn_conv1) rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2) rpn_cls_prob = F.softmax(rpn_cls_score_reshape) rpn_cls_prob_reshape = self.reshape_layer( rpn_cls_prob, len(self.anchor_scales) * 3 * 2) # rpn boxes rpn_bbox_pred = self.bbox_conv(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, self._feat_stride, self.anchor_scales) # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, self._feat_stride, self.anchor_scales) self.cross_entropy, self.loss_box = self.build_loss( rpn_cls_score_reshape, rpn_bbox_pred, rpn_data) return features, rois
net.cuda() net.eval() print('load model succ...') t_det = Timer() t_total = Timer() im_fnames = sorted((fname for fname in os.listdir(im_path) if os.path.splitext(fname)[-1] == '.jpg')) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) pool = Pool(processes=1) for i, (image, im_data) in enumerate(pool.imap( preprocess, im_fnames, chunksize=1)): t_total.tic() im_data = net_utils.np_to_variable( im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg)
def test_net(net2, imdb, dataloader, args, output_dir, size_index, batch_size, objpoints3D, corners_3d, vertices, iter_count=0, thresh=0.5, vis=True, verbose=True, summary=None): net2.eval() cv2.setNumThreads(1) test_loss = 0 pool = Pool(processes=2) num_images = imdb.__len__() # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb._num_classes)] # timers _t = {'network': Timer(), 'postpro': Timer()} # corners3d = yolo_utils.threed_corners(dataset='linemod') batch_num_summary = randint(0, num_images / batch_size - 1) if args.confidence_plotlogs: confidence_plotlogs = {} confidence_plotlogs['gt_conf'] = [] confidence_plotlogs['cullnet_conf'] = [] network_time = 0 postpro_time = 0 for i_batch, sample_batched in enumerate(dataloader): _t['network'].tic() rgb_patches, gt_2dconfs, gt_3dconfs, bboxes = sample_batched[ 'pose_proposals'] ori_im = np.array(sample_batched['origin_im']) if cfg.args.cullnet_confidence == 'conf2d': gt_confs = gt_2dconfs elif cfg.args.cullnet_confidence == 'conf3d': gt_confs = gt_3dconfs with torch.no_grad(): if cfg.args.seg_cullnet: confidence_new_batch = [] rgb_patch_np = np.array(rgb_patches) gtconf_patch_np = np.array(gt_confs) bboxes_batch = bboxes current_batch_size = rgb_patch_np.shape[0] subnetwork_batchsize = 128 if cfg.args.cullnet_type == 'vgg19_bn': subnetwork_batchsize = 128 if cfg.args.cullnet_type == 'allconvnet': subnetwork_batchsize = 64 if cfg.args.cullnet_type == 'allconvnet_small': subnetwork_batchsize = 256 if cfg.args.cullnet_type == 'resnet18': subnetwork_batchsize = 512 if cfg.args.cullnet_type == 'resnet18_gn' or cfg.args.cullnet_type == 'resnet18concat_gn': subnetwork_batchsize = 320 if cfg.args.cullnet_type == 'resnet50' or cfg.args.cullnet_type == 'resnet50_gn' or cfg.args.cullnet_type == 'resnet50concat_gn': subnetwork_batchsize = 160 if cfg.args.sub_bs_test is not None: subnetwork_batchsize = int(cfg.args.sub_bs_test) partition_size = (current_batch_size * cfg.k_proposals_test) / subnetwork_batchsize subnetwork_numimages = int( math.ceil(subnetwork_batchsize / cfg.k_proposals_test)) for i in range(partition_size): rgb_patches_var = net_utils.np_to_variable( rgb_patch_np[i * subnetwork_numimages:(i + 1) * subnetwork_numimages], is_cuda=True, volatile=True).permute(0, 1, 4, 2, 3) if cfg.args.cullnet_inconf == 'concat': confidence_new = net2( rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input)) else: confidence_new = net2( rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input)) gtconf_patch = gtconf_patch_np[i * subnetwork_numimages:( i + 1) * subnetwork_numimages].reshape(-1, 1) if args.confidence_plotlogs: confidence_plotlogs[ 'gt_conf'] += gtconf_patch[:, 0].tolist() confidence_plotlogs[ 'cullnet_conf'] += confidence_new[:, 0].tolist() if args.mGPUs: conf_loss_var = net2.module.loss( confidence_new, gtconf_patch) else: conf_loss_var = net2.loss(confidence_new, gtconf_patch) confidence_new_np = confidence_new.data.cpu().numpy() ### debugging purpose # confidence_new_np = gtconf_patch confidence_new_batch.append(confidence_new_np) # bbox_pred.register_hook(extract) conf_loss_np = conf_loss_var.data.cpu().numpy() test_loss += conf_loss_np confidence_new_batch = np.array(confidence_new_batch) left_overpatches = (current_batch_size * cfg.k_proposals_test ) % subnetwork_batchsize confidence_new_batch = confidence_new_batch.reshape( partition_size * subnetwork_numimages, cfg.k_proposals_test) if i_batch == len(dataloader) - 1 and left_overpatches > 0: rgb_patches_var = net_utils.np_to_variable( rgb_patch_np[partition_size * subnetwork_numimages:], is_cuda=True, volatile=True).permute(0, 1, 4, 2, 3) if cfg.args.cullnet_inconf == 'concat': confidence_new = net2( rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input)) else: confidence_new = net2( rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input)) gtconf_patch = gtconf_patch_np[ partition_size * subnetwork_numimages:].reshape(-1, 1) if args.confidence_plotlogs: confidence_plotlogs[ 'gt_conf'] += gtconf_patch[:, 0].tolist() confidence_plotlogs[ 'cullnet_conf'] += confidence_new[:, 0].tolist() if args.mGPUs: conf_loss_var = net2.module.loss( confidence_new, gtconf_patch) else: conf_loss_var = net2.loss(confidence_new, gtconf_patch) confidence_new_np = confidence_new.data.cpu().numpy() ### debugging purpose # confidence_new_np = gtconf_patch confidence_new_np = confidence_new_np.reshape( -1, cfg.k_proposals_test) confidence_new_batch = np.concatenate( (confidence_new_batch, confidence_new_np), 0) # bbox_pred.register_hook(extract) conf_loss_np = conf_loss_var.data.cpu().numpy() test_loss += conf_loss_np network_time += _t['network'].toc() _t['postpro'].tic() if cfg.args.seg_cullnet: targets = pool.map( partial(yolo_utils.seg_cullnet_postprocess, sample_batched['origin_im'][0].shape, size_index), ((bboxes_batch[b], confidence_new_batch[b]) for b in range(rgb_patch_np.shape[0]))) bboxes_batch = [row[0] for row in targets] scores_batch = [row[1] for row in targets] cls_inds_batch = [row[2] for row in targets] ########### # targets = pool.map(yolo_utils.final_postprocess, # ((bboxes_batch[b], scores_batch[b], cls_inds_batch[b]) # for b in range(im_data.shape[0]))) # bboxes_batch = [row[0] for row in targets] # scores_batch = [row[1] for row in targets] # cls_inds_batch = [row[2] for row in targets] if summary and i_batch == batch_num_summary: imnum_summary = randint(0, sample_batched['image'].shape[0] - 1) image = sample_batched['origin_im'][imnum_summary] # bboxes_sum, scores_sum, cls_inds_sum = yolo_utils.seg_cullnet_postprocess(sample_batched['origin_im'][imnum_summary].shape, size_index, (bboxes_batch[imnum_summary], # confidence_new_batch[imnum_summary])) im2show = yolo_utils.draw_detection( image, bboxes_batch[imnum_summary], scores_batch[imnum_summary], cls_inds_batch[imnum_summary], cfg, imdb._classes, 0.5, objpoints3D, corners_3d, vertices) summary.add_image('predict_' + imdb._image_set, cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB), iter_count) for batch_id in range(rgb_patch_np.shape[0]): if vis: det_im = yolo_utils.draw_detection( ori_im[batch_id].copy(), bboxes_batch[batch_id], scores_batch[batch_id], cls_inds_batch[batch_id], cfg, imdb._classes, thresh, objpoints3D, corners_3d, vertices) bboxes = bboxes_batch[batch_id] scores = scores_batch[batch_id] cls_inds = cls_inds_batch[batch_id] for j in range(imdb._num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i_batch * batch_size + batch_id] = np.empty( [0, 2 * args.num_detection_points + 1], dtype=np.float32) continue # bboxes[inds] = yolo_utils.refine_2dboxes(bboxes[inds], corners3d[j]) ## bboxes_batch[batch_id][inds] = bboxes[inds] ## c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i_batch * batch_size + batch_id] = c_dets if vis: if args.num_detection_points > 9: gt_image = yolo_utils.vis_corner_points( ori_im[batch_id].copy(), np.reshape(sample_batched['origin_gt_boxes'][batch_id], (2, args.num_detection_points), order='F'), objpoints3D, vertices) cuboid_gtimage = yolo_utils.vis_corner_cuboids( gt_image, np.reshape(sample_batched['origin_gt_boxes'][batch_id], (2, args.num_detection_points), order='F'), objpoints3D, corners_3d) else: cuboid_gtimage = yolo_utils.vis_corner_cuboids( ori_im[batch_id].copy(), np.reshape(sample_batched['origin_gt_boxes'][batch_id], (2, args.num_detection_points), order='F')) im2show = np.hstack( (det_im, cuboid_gtimage, ori_im[batch_id].copy())) cv2.imwrite( test_output_dir + '/' + imdb._image_indexes[i_batch * batch_size + batch_id] + '.jpg', im2show) # cv2.imshow('test', im2show) # cv2.waitKey(0) postpro_time += _t['postpro'].toc() # print('Culling network time: {:.3f}s ,, Postprocessing time: {:.3f}s'.format(network_time, postpro_time)) # print('Total Images: {:d}'.format(imdb.__len__())) pool.close() pool.join() det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) if args.confidence_plotlogs: with open( 'confidence_plotlogs/' + args.class_name + '_cullnet_logs.yml', 'w') as outfile: yaml.dump(confidence_plotlogs, outfile, default_flow_style=False) print('Evaluating detections') accuracy_epoch, twod_dists, threed_dists = imdb.evaluate_detections( all_boxes, output_dir, verbose) return accuracy_epoch, twod_dists, threed_dists, test_loss / len( dataloader)
# OG yoloV2 changes scales every 10 epochs # Selecting index first thing than last otherwise one scale gets more trained than others due to multiple start-stops if i % 10 == 0: size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print('new scale is {}'.format(cfg.multi_scale_inp_size[size_index])) batch = dataset.fetch_parse(batch_of_index, size_index) im = batch['images'] gt_boxes = batch['gt_boxes'] gt_classes = batch['gt_classes'] dontcare = batch['dontcare'] origin_im = ['origin_im'] # sending images onto gpu after turning them into torch variable im = net_utils.np_to_variable(im, is_cuda=True, volatile=False).permute(0, 3, 1, 2) bbox_pred, iou_pred, prob_pred = net(im) bbox_loss_i, iou_loss_i, cls_loss_i = loss(gt_boxes, gt_classes, dontcare, size_index, bbox_pred, iou_pred, prob_pred) # accumulating mini-batch loss loss = bbox_loss_i + iou_loss_i + cls_loss_i bbox_loss += bbox_loss_i.data.cpu().numpy()[0] iou_loss += iou_loss_i.data.cpu().numpy()[0] cls_loss += cls_loss_i.data.cpu().numpy()[0] train_loss += loss.data.cpu().numpy()[0] # clearing grads before calculating new ones and then updating wts optimizer.zero_grad() loss.backward()
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index #helper: 0:320, 1:352, 2:384, 3:416, 4:448, 5:480, 6:512, 7:544, 8:576' #here val_img sometimes is 5123 for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) _t['im_detect'].tic() with torch.set_grad_enabled(False): bbox_pred, iou_pred, prob_pred = net(im_data) ''' bbox->(batch,h*w,prior 4) iou ->(batch,h*w,prior,1) prob_pred-->(batch,h*w,prior,20) ''' # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() ''' 这里后处理的是: return bbox_pred, scores, cls_inds ''' bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() ''' 以下的操作是 对我们预测的值进行处理,这里需要注意的是,对于 这些问题,我们在最后头保留它的概率 并对最后的概率获取 ''' for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections')
def test_net_img_only(net, img_list, max_per_image=300, thresh=0.5, vis=False): num_images = len(img_list) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(cfg.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index inp_size = cfg.multi_scale_inp_size if not os.path.exists("result"): os.mkdir("result") dt = dataTransform.dataTransform() for i in range(num_images): img_name = img_list[i] im, _, __, ___, ori_im = test_only_transform(img_name, inp_size, size_index) im = np.reshape(im, newshape=(-1, im.shape[0], im.shape[1], im.shape[2])) im_data = net_utils.np_to_variable(im, is_cuda=True, volatile=True).permute(0, 3, 1, 2) with torch.set_grad_enabled(False): bbox_pred, iou_pred, prob_pred = net(im_data) ''' bbox->(batch,h*w,prior 4) iou ->(batch,h*w,prior,1) prob_pred-->(batch,h*w,prior,20) ''' # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() ''' 这里后处理的是: return bbox_pred, scores, cls_inds ''' bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() ''' 以下的操作是 对我们预测的值进行处理,这里需要注意的是,对于 这些问题,我们在最后头保留它的概率 并对最后的概率获取 ''' for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] #save detect_result to xml dt.writeXml(img_name, "./result", ori_im, cfg.label_names, cls_inds.tolist(), bboxes.tolist()) if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.5) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0)
def main(): output_dir = '../output' output_template_dir = '../output_template' kitti_output_dir = '../kitti_det_output' input_file_list = '/home/cory/project/yolo2-pytorch/train_data/kitti/kitti_val_images.txt' # input_file_list = '/home/cory/project/yolo2-pytorch/flow/w01_imgs.txt' vis_enable = False thresh = 0.5 trained_model = '/home/cory/project/yolo2-pytorch/models/training/kitti_new_2_flow_center_ft_flownet2_joint/' \ 'kitti_new_2_flow_center_ft_flownet2_joint_30.h5' shutil.rmtree(output_dir, ignore_errors=True) shutil.rmtree(kitti_output_dir, ignore_errors=True) shutil.copytree(output_template_dir, output_dir) os.makedirs(kitti_output_dir) net = Darknet19(cfg) net_utils.load_net(trained_model, net) net.eval() net.cuda() print(trained_model) print('load model successfully') img_files = open(input_file_list) image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_output_dir, src_label='kitti') total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms) %s' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000, image_path)) t_det.clear() t_total.clear() if vis_enable: im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite(output_dir + '/detection/{:04d}.jpg'.format(i), im2show) key = cv2.waitKey(0) if key == ord('q'): break
def produce_bbnet_patches(net1, sample_batched, objpoints3D, vertices, obj_diameter): net1.eval() batch = sample_batched[0] size_index = sample_batched[1] im = batch['image'] gt_boxes = batch['gt_boxes'] gt_classes = batch['gt_classes'] gt_RT = batch['gt_RT'] non_nms = cfg.args.non_nms thresh = cfg.args.thresh if cfg.args.dataset_name=='LINEMOD': K = cfg.cam_K elif cfg.args.dataset_name=='YCB': K = cfg.cam_K1 pool = Pool(processes=4) if cfg.args.seg_cullnet: with torch.no_grad(): im_data = net_utils.np_to_variable(im, is_cuda=True, volatile=False).permute(0, 3, 1, 2) bbox_pred_all, conf_pred_all, score_pred_all = net1(im_data) prob_pred_all = [] for i in xrange(3): prob_pred_all.append(F.softmax(score_pred_all[i].view(-1, score_pred_all[i].size()[-1]), dim=1).view_as(score_pred_all[i])) ##### concatenating outputs at multiple scale feature maps after postprocessing operation bbox_pred = bbox_pred_all[0].data.cpu().numpy() conf_pred = conf_pred_all[0].data.cpu().numpy() prob_pred = prob_pred_all[0].data.cpu().numpy() targets1 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms), ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]]) for b in range(im_data.shape[0]))) bbox_pred = bbox_pred_all[1].data.cpu().numpy() conf_pred = conf_pred_all[1].data.cpu().numpy() prob_pred = prob_pred_all[1].data.cpu().numpy() targets2 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms), ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]]) for b in range(im_data.shape[0]))) bbox_pred = bbox_pred_all[2].data.cpu().numpy() conf_pred = conf_pred_all[2].data.cpu().numpy() prob_pred = prob_pred_all[2].data.cpu().numpy() targets3 = pool.map(partial(yolo_utils.postprocess, batch['origin_im'][0].shape, thresh, size_index, non_nms), ((bbox_pred[[b]], conf_pred[[b]], prob_pred[[b]]) for b in range(im_data.shape[0]))) bboxes_batch = [np.concatenate((row1[0], row2[0], row3[0])) for row1, row2, row3 in zip(targets1, targets2, targets3)] scores_batch = [np.concatenate((row1[1], row2[1], row3[1])) for row1, row2, row3 in zip(targets1, targets2, targets3)] cls_inds_batch = [np.concatenate((row1[2], row2[2], row3[2])) for row1, row2, row3 in zip(targets1, targets2, targets3)] ########## targets = pool.map(partial(yolo_utils.pose_proposals, objpoints3D, vertices, K, obj_diameter), ((bboxes_batch[b], scores_batch[b], cls_inds_batch[b], batch['origin_gtboxes'][b], batch['origin_im'][b]) for b in range(im_data.shape[0]))) Rt_pr_patch = [row[0] for row in targets] corner_patch = [row[1] for row in targets] gtconf2d_patch = [row[2] for row in targets] gtconf3d_patch = [row[3] for row in targets] bboxes_batch = [row[4] for row in targets] pool.close() pool.join() return Rt_pr_patch, corner_patch, gtconf2d_patch, gtconf3d_patch, bboxes_batch
def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None, size_index=0): ''' 这里我们主要论述一下,该算法的主要操作方式 首先是提取特征到细粒度提取层 ''' conv1s = self.conv1s(im_data) conv2 = self.conv2(conv1s) conv3 = self.conv3(conv2) conv1s_reorg = self.reorg(conv1s) cat_1_3 = torch.cat([conv1s_reorg, conv3], 1) conv4 = self.conv4(cat_1_3) conv5 = self.conv5(conv4) # batch_size, out_channels, h, w global_average_pool = self.global_average_pool(conv5) # for detection # bsize, c, h, w -> bsize, h, w, c -> # bsize, h x w, num_anchors, 5+num_classes bsize, _, h, w = global_average_pool.size() # assert bsize == 1, 'detection only support one image per batch' global_average_pool_reshaped = \ global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5) # noqa ''' 操作先review(bs,w,h,c)--->(bs,w*h,anchor,class+5) 其中class + 5 表达的是 voc 的 20 个类别 ,外加 5 个执行度,表达的是 dx,dy,dw,dh 以及第五类 d(o) 表达的是置信度 ''' # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to) xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2]) wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4]) bbox_pred = torch.cat([xy_pred, wh_pred], 3) iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5]) score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous() ''' 对每一类分类做softmax ,也就是说是20 类做了softmax 在这里 prob_pred --->(bs,w*h,anchors,classes) ''' prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred) # noqa # for training if self.training: bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() ''' 这一步就有意思了类似于传统的操作去计算我们的bbox那个类别是对的,对应的anchor ,就是用anchor 进行正负对比 ''' _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \ self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4]) box_mask = box_mask.expand_as(_boxes) self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes # noqa self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes # noqa class_mask = class_mask.expand_as(prob_pred) self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes # noqa return bbox_pred, iou_pred, prob_pred
def home(): data = request.body.read() body = json.loads(data) im_path = body['dir_path'] #im_path = 'demo' im_fnames = sorted((fname for fname in os.listdir(im_path)\ if os.path.splitext(fname)[-1] == '.jpg')) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) min_record_tmp_list = [0] * len(det_class) for i, (image, im_data, fname) in enumerate(pool.imap(preprocess, im_fnames, chunksize=1)): print(fname) t_total.tic() im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) ## create list that used to write to database path_list = fname.split("/") filename = path_list.pop() time_folder = im_path # wirte im2show to out dir im_out_path = os.path.join(time_folder, "out") check_path_create(im_out_path) cv2.imwrite(os.path.join(im_out_path, filename), im2show) tmp_list = ['0'] * len(det_class) for i in cls_inds: try: tmp_list[det_class.index(cfg.label_names[i])] = '1' min_record_tmp_list[det_class.index(cfg.label_names[i])] += 1 except: pass tmp_list.insert(0, time_folder) tmp_list.insert(0, filename) conn.execute( """insert into images_det (name, time_folder, %s)\ values (%s)""" % (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list) conn.commit() total_time = t_total.toc() if i % 1 == 0: format_str = 'frame: %d, (detection: %.1f Hz, %.1f ms) (total: %.1f Hz, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_total.clear() t_det.clear() tmp_list = [im_path] min_record_tmp_list = [str(i) for i in min_record_tmp_list] tmp_list.extend(min_record_tmp_list) conn.execute( """insert into minute_det (time_folder, %s) values (%s)""" % (",".join(det_class), ",".join(['?'] * len(tmp_list))), tmp_list) conn.commit()
if imdb.epoch > prev_epoch: # save trained weights save_name = os.path.join(cfg.train_output_dir, '{}_{}.h5'.format(cfg.exp_name, imdb.epoch)) net_utils.save_net(save_name, net) print('save model: {}'.format(save_name)) # update check_point file ckp = open(os.path.join(cfg.check_point_file), 'w') ckp.write(str(imdb.epoch)) ckp.close() # prepare optimizer for next epoch optimizer = get_optimizer(cfg, net, imdb.epoch) # forward im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=False).permute(0, 3, 1, 2) x = net.forward(im_data, batch['gt_boxes'], batch['gt_classes'], batch['dontcare'], network_size) # loss bbox_loss += net.bbox_loss.data.cpu().numpy()[0] iou_loss += net.iou_loss.data.cpu().numpy()[0] cls_loss += net.cls_loss.data.cpu().numpy()[0] train_loss += net.loss.data.cpu().numpy()[0] cnt += 1 # print('train_loss', net.loss.data.cpu().numpy()[0]) # backward optimizer.zero_grad() net.loss.backward() optimizer.step()
# Use only half of the image since ZED camera has 2 cameras frame = frame[:, 0:frame.shape[1] / 2, :] # Crop the middle square of the image frame_centre = cap.get(3) / 4 frame_height = cap.get(4) frame = frame[:, frame_centre - frame_height / 2:frame_centre + frame_height / 2, :] # Preprocess the image t_total.tic() image, im_data = preprocess(frame) im_data = net_utils.np_to_variable(im_data, use_cuda=cfg.use_cuda, volatile=True).permute(0, 3, 1, 2) # Forward t_det.tic() bbox_pred, iou_pred, prob_pred = model(im_data) det_time = t_det.toc() # Postprocess the image # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy()
def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None, size_index=0): conv1s = self.conv1s(im_data) conv2 = self.conv2(conv1s) conv3 = self.conv3(conv2) conv1s_reorg = self.reorg(conv1s) cat_1_3 = torch.cat([conv1s_reorg, conv3], 1) conv4 = self.conv4(cat_1_3) conv5 = self.conv5(conv4) # batch_size, out_channels, h, w global_average_pool = self.global_average_pool(conv5) # for detection # bsize, c, h, w -> bsize, h, w, c -> # bsize, h x w, num_anchors, 5+num_classes bsize, _, h, w = global_average_pool.size() # assert bsize == 1, 'detection only support one image per batch' global_average_pool_reshaped = \ global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5) # noqa # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to) xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2]) wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4]) bbox_pred = torch.cat([xy_pred, wh_pred], 3) iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5]) score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous() prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as(score_pred) # noqa # for training if self.training: bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \ self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4]) box_mask = box_mask.expand_as(_boxes) self.bbox_loss = nn.MSELoss(size_average=False)(bbox_pred * box_mask, _boxes * box_mask) / num_boxes # noqa self.iou_loss = nn.MSELoss(size_average=False)(iou_pred * iou_mask, _ious * iou_mask) / num_boxes # noqa class_mask = class_mask.expand_as(prob_pred) self.cls_loss = nn.MSELoss(size_average=False)(prob_pred * class_mask, _classes * class_mask) / num_boxes # noqa return bbox_pred, iou_pred, prob_pred
def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None, size_index=0): conv1s = self.conv1s(im_data) conv2 = self.conv2(conv1s) conv3 = self.conv3(conv2) conv1s_reorg = self.reorg(conv1s) cat_1_3 = torch.cat([conv1s_reorg, conv3], 1) conv4 = self.conv4(cat_1_3) conv5 = self.conv5(conv4) # batch_size, out_channels, h, w #IFF for i in range(1): conv5 = self.conv5(conv4 + self.conv_back(conv5)) #conv5 = self.conv5(conv4.mul(self.conv_back(conv5))) global_average_pool = self.global_average_pool(conv5) # for detection # bsize, c, h, w -> bsize, h, w, c -> # bsize, h x w, num_anchors, 5+num_classes bsize, _, h, w = global_average_pool.size() # assert bsize == 1, 'detection only support one image per batch' global_average_pool_reshaped = \ global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5) # noqa # tx, ty, tw, th, to -> sig(tx), sig(ty), exp(tw), exp(th), sig(to) xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2]) wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4]) bbox_pred = torch.cat([xy_pred, wh_pred], 3) iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5]) score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous() prob_pred = F.softmax(score_pred.view(-1, score_pred.size()[-1])).view_as( score_pred) # noqa # for training if self.training: bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \ self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4]) box_mask = box_mask.expand_as(_boxes) self.bbox_loss = nn.MSELoss(size_average=False)( bbox_pred * box_mask, _boxes * box_mask) / num_boxes # noqa self.iou_loss = nn.MSELoss(size_average=False)( iou_pred * iou_mask, _ious * iou_mask) / num_boxes # noqa class_mask = class_mask.expand_as(prob_pred) self.cls_loss = nn.MSELoss(size_average=False)( prob_pred * class_mask, _classes * class_mask) / num_boxes # noqa return bbox_pred, iou_pred, prob_pred
def train_batch(net2, rgb_patch, gtconf_patch, train_loss_epoch, conf_loss_epoch): net2.train() if cfg.args.seg_cullnet: confidence_new_batch = [] rgb_patch_np = np.array(rgb_patch) gtconf_patch_np = np.array(gtconf_patch) current_batch_size = rgb_patch_np.shape[0] subnetwork_batchsize = 80 if cfg.args.cullnet_type == 'vgg19_bn': subnetwork_batchsize = 32 if cfg.args.cullnet_type == 'allconvnet': subnetwork_batchsize = 32 if cfg.args.cullnet_type == 'allconvnet_small': subnetwork_batchsize = 128 if cfg.args.cullnet_type == 'resnet18': subnetwork_batchsize = 512 if cfg.args.cullnet_type == 'resnet18_gn' or cfg.args.cullnet_type == 'resnet18concat_gn': subnetwork_batchsize = 80 if cfg.args.cullnet_type == 'resnet50concat_gn' or cfg.args.cullnet_type == 'resnet50_gn': subnetwork_batchsize = 80 if not cfg.args.sub_bs == 80: subnetwork_batchsize = cfg.args.sub_bs ### partition_size is number of batches for the network2 using a batched output of network1 partition_size = (current_batch_size*cfg.args.k_proposals) / subnetwork_batchsize subnetwork_numimages = int(math.ceil(subnetwork_batchsize/cfg.args.k_proposals)) conf_loss_np_subnetwrk = 0 for i in range(partition_size): if subnetwork_batchsize < cfg.args.k_proposals: b_id = (i * subnetwork_batchsize)/cfg.args.k_proposals p_id = (i * subnetwork_batchsize)%cfg.args.k_proposals rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[b_id:b_id+1, p_id: p_id + subnetwork_batchsize], is_cuda=True, volatile=True).permute(0, 1, 4, 2, 3) gtconf_patch = gtconf_patch_np[b_id:b_id+1, p_id: p_id + subnetwork_batchsize].reshape(-1, 1) else: rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[i*subnetwork_numimages:(i+1)*subnetwork_numimages], is_cuda=True, volatile=True).permute(0, 1, 4, 2, 3) gtconf_patch = gtconf_patch_np[i*subnetwork_numimages:(i+1)*subnetwork_numimages].reshape(-1, 1) if cfg.args.cullnet_inconf=='concat': confidence_new = net2(rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input)) else: confidence_new = net2(rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input)) if args.mGPUs: conf_loss_var = net2.module.loss(confidence_new, gtconf_patch) else: conf_loss_var = net2.loss(confidence_new, gtconf_patch) confidence_new_np = confidence_new.data.cpu().numpy() loss_var = conf_loss_var optimizer.zero_grad() loss_var.backward() optimizer.step() confidence_new_batch.append(confidence_new_np) conf_loss_np = conf_loss_var.data.cpu().numpy() conf_loss_np_subnetwrk += conf_loss_np confidence_new_batch = np.array(confidence_new_batch) left_overpatches = (current_batch_size*cfg.args.k_proposals) % subnetwork_batchsize if subnetwork_batchsize < cfg.args.k_proposals: confidence_new_batch = confidence_new_batch.reshape(int(partition_size * (float(subnetwork_batchsize)/ cfg.args.k_proposals)), cfg.args.k_proposals) else: confidence_new_batch = confidence_new_batch.reshape(partition_size * subnetwork_numimages, cfg.args.k_proposals) if left_overpatches > 0: rgb_patches_var = net_utils.np_to_variable(rgb_patch_np[partition_size*subnetwork_numimages:], is_cuda=True, volatile=True).permute(0, 1, 4, 2, 3) if cfg.args.cullnet_inconf=='concat': confidence_new = net2(rgb_patches_var.view(-1, 4, cfg.args.cullnet_input, cfg.args.cullnet_input)) else: confidence_new = net2(rgb_patches_var.view(-1, 3, cfg.args.cullnet_input, cfg.args.cullnet_input)) gtconf_patch = gtconf_patch_np[partition_size*subnetwork_numimages:].reshape(-1, 1) if args.mGPUs: conf_loss_var = net2.module.loss(confidence_new, gtconf_patch) else: conf_loss_var = net2.loss(confidence_new, gtconf_patch) loss_var = conf_loss_var optimizer.zero_grad() loss_var.backward() optimizer.step() confidence_new_np = confidence_new.data.cpu().numpy() confidence_new_np = confidence_new_np.reshape(-1, cfg.args.k_proposals) confidence_new_batch = np.concatenate((confidence_new_batch, confidence_new_np), 0) conf_loss_np = conf_loss_var.data.cpu().numpy() conf_loss_np_subnetwrk += conf_loss_np train_loss_np = conf_loss_np_subnetwrk train_loss_epoch += train_loss_np conf_loss_epoch += conf_loss_np_subnetwrk return confidence_new_batch, conf_loss_np_subnetwrk, train_loss_np, train_loss_epoch, conf_loss_epoch
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): # ============================================================================= # chang here for ryan # ============================================================================= num_images = imdb.num_images print('num-images',num_images) # num_images = 3 # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') # ============================================================================= # change size_index = 0 # ============================================================================= # size_index = args.image_size_index size_index = 0 for i in range(num_images): batch = imdb.next_batch(size_index=size_index) # print('next_batch') ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) # print('im_data') _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index ) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess(bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index ) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def forward(self, im_data, gt_boxes=None, gt_classes=None, dontcare=None, size_index=0): feature = self.basenet(im_data) conv5 = self.conv5(conv4) global_average_pool = self.global_average_pool(conv5) # for detection # bsize, c, h, w -> bsize, h, w, c -> # bsize, h x w, num_anchors, 5+num_classes bsize, _, h, w = global_average_pool.size() # assert bsize == 1, 'detection only support one image per batch' global_average_pool_reshaped = \ global_average_pool.permute(0, 2, 3, 1).contiguous().view(bsize, -1, cfg.num_anchors, cfg.num_classes + 5) xy_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 0:2]) wh_pred = torch.exp(global_average_pool_reshaped[:, :, :, 2:4]) bbox_pred = torch.cat([xy_pred, wh_pred], 3) iou_pred = F.sigmoid(global_average_pool_reshaped[:, :, :, 4:5]) score_pred = global_average_pool_reshaped[:, :, :, 5:].contiguous() prob_pred = F.softmax(score_pred.view( -1, score_pred.size()[-1])).view_as(score_pred) # for training if self.training: bbox_pred_np = bbox_pred.data.cpu().numpy() iou_pred_np = iou_pred.data.cpu().numpy() _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask = \ self._build_target(bbox_pred_np, gt_boxes, gt_classes, dontcare, iou_pred_np, size_index) _boxes = net_utils.np_to_variable(_boxes) _ious = net_utils.np_to_variable(_ious) _classes = net_utils.np_to_variable(_classes) box_mask = net_utils.np_to_variable(_box_mask, dtype=torch.FloatTensor) iou_mask = net_utils.np_to_variable(_iou_mask, dtype=torch.FloatTensor) class_mask = net_utils.np_to_variable(_class_mask, dtype=torch.FloatTensor) num_boxes = sum((len(boxes) for boxes in gt_boxes)) # _boxes[:, :, :, 2:4] = torch.log(_boxes[:, :, :, 2:4]) box_mask = box_mask.expand_as(_boxes) self.bbox_loss = nn.MSELoss(size_average=False)( bbox_pred * box_mask, _boxes * box_mask) / num_boxes # noqa self.iou_loss = nn.MSELoss(size_average=False)( iou_pred * iou_mask, _ious * iou_mask) / num_boxes # noqa class_mask = class_mask.expand_as(prob_pred) self.cls_loss = nn.MSELoss(size_average=False)( prob_pred * class_mask, _classes * class_mask) / num_boxes # noqa return bbox_pred, iou_pred, prob_pred
def main(): shutil.rmtree('output', ignore_errors=True) shutil.copytree('output_template', 'output') shutil.rmtree('kitti_det_output', ignore_errors=True) os.makedirs('kitti_det_output') trained_model = '/home/cory/yolo2-pytorch/models/training/kitti_new_2/kitti_new_2_100.h5' thresh = 0.5 use_kitti = True image_dir = '/home/cory/KITTI_Dataset/data_object_image_2/training/image_2' net = Darknet19() net_utils.load_net(trained_model, net) net.eval() net.cuda() print('load model successfully') # print(net) def str_index(filename): if use_kitti: return filename begin_pos = filename.rfind('_') + 1 end_pos = filename.rfind('.') str_v = filename[begin_pos:end_pos] return int(str_v) image_extensions = ['.jpg', '.JPG', '.png', '.PNG'] img_files = open( '/home/cory/yolo2-pytorch/train_data/kitti/kitti_val_images.txt') image_abs_paths = img_files.readlines() image_abs_paths = [f.strip() for f in image_abs_paths] '''image_abs_paths = sorted([os.path.join(image_dir, name) for name in os.listdir(image_dir) if name[-4:] in image_extensions], key=str_index)''' key_frame_path = '' detection_period = 5 use_flow = False kitti_filename = 'yolo_flow_kitti_det.txt' try: os.remove(kitti_filename) except OSError: pass t_det = Timer() t_total = Timer() for i, image_path in enumerate(image_abs_paths): t_total.tic() image, im_data = preprocess(image_path) im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2) layer_of_flow = 'conv4' t_det.tic() bbox_pred, iou_pred, prob_pred = net.forward(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) det_obj = detection_objects(bboxes, scores, cls_inds) save_as_kitti_format(i, det_obj, kitti_filename, src_label='kitti') vis_enable = False if vis_enable: im2show = yolo_utils.draw_detection(image, bboxes, scores, cls_inds, cfg) cv2.imshow('detection', im2show) cv2.imwrite('output/detection/{:04d}.jpg'.format(i), im2show) total_time = t_total.toc() format_str = 'frame: %d, (detection: %.1f fps, %.1f ms) (total: %.1f fps, %.1f ms)' print(format_str % (i, 1. / det_time, det_time * 1000, 1. / total_time, total_time * 1000)) t_det.clear() t_total.clear() if vis_enable: key = cv2.waitKey(0) if key == ord('q'): break