def gen_depth(img): # returns dataframe with image bounding box df = pd.DataFrame(columns=[ 'filename', 'class', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax' ]) img = t.from_numpy(img)[None] faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load( '/home/olixu/distance-cnn/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth.701052458187' ) opt.caffe_pretrain = False # this model was trained from torchvision-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) box_new = np.asarray(_bboxes) label_new = at.tonumpy(_labels[0]).reshape(-1) score_new = at.tonumpy(_scores[0]).reshape(-1) for i in range(box_new.shape[1]): df.at[i, 'filename'] = 'file' + str(i) df.at[i, 'class'] = label_new[i] df.at[i, 'confidence'] = score_new[i] # bbox coordinates df.at[i, 'xmin'] = box_new[0, i, 1] df.at[i, 'ymin'] = box_new[0, i, 0] df.at[i, 'xmax'] = box_new[0, i, 3] df.at[i, 'ymax'] = box_new[0, i, 2] return inf.infer(df)
def clip_bboxs_on_image(rois, roi_locs): """ :param rois: Tensor :param roi_locs: Tensor :return: bbox: Tensor """ loc_normalize_mean = (0., 0., 0., 0.) loc_normalize_std = (0.1, 0.1, 0.2, 0.2) mean = torch.Tensor(loc_normalize_mean).cuda(). \ repeat(2)[None] std = torch.Tensor(loc_normalize_std).cuda(). \ repeat(2)[None] roi_locs = roi_locs * std + mean roi_loc = roi_locs.view(-1, 2, 4) rois = at.totensor(rois) rois = rois.view(-1, 1, 4).expand_as(roi_loc) bbox = loc2bbox(at.tonumpy(rois).reshape((-1, 4)), at.tonumpy(roi_loc).reshape((-1, 4))) bbox = at.totensor(bbox) box = bbox.view(-1, 8) box[:, 0::2] = (box[:, 0::2]).clamp(min=0, max=800) box[:, 1::2] = (box[:, 1::2]).clamp(min=0, max=800) box = box.reshape((-1, 2, 4))[:, 1, :] return box
def draw(dataloader, faster_rcnn, test_num=100): for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_, id_) in enumerate(dataloader): sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_, _feature = faster_rcnn.predict( imgs, [sizes]) img_file = opt.voc_data_dir + '/JPEGImages/' + str(id_[0]) + '.jpg' image = cv2.imread(img_file) # 转成 numpy格式 bboxs = at.tonumpy(pred_bboxes_[0]) name = at.tonumpy(pred_labels_[0]).reshape(-1) score = at.tonumpy(pred_scores_[0]).reshape(-1) # 保存测试集每一轮预测的结果 最好加个epoch判断 每10轮保存一次 不然太浪费时间 for i in range(len(name)): xmin = int(round(float(bboxs[i, 1]))) ymin = int(round(float(bboxs[i, 0]))) xmax = int(round(float(bboxs[i, 3]))) ymax = int(round(float(bboxs[i, 2]))) if score[i] <= opt.threshold: continue cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1) cv2.putText(image, opt.VOC_BBOX_LABEL_NAMES[name[i]], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (0, 0, 255), 1) cv2.putText(image, str(score[i])[0:3], (xmin + 30, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (0, 0, 255), 1) cv2.imwrite('result/' + str(id_[0]) + '.jpg', image)
def train(model, train_loader, criterion, epoch, vis): model.train() batch_loss = 0 for batch_idx, sample_batched in enumerate(train_loader): data = sample_batched['image'] target = sample_batched['mask'] data, target = Variable(data.type(opt.dtype)), Variable( target.type(opt.dtype)) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() batch_loss += loss.data[0] if (batch_idx + 1) % opt.plot_every == 0: ori_img_ = inverse_normalize(at.tonumpy(data[0])) target_ = at.tonumpy(target[0]) pred_ = at.tonumpy(output[0]) vis.img('gt_img', ori_img_) vis.img('gt_mask', target_) vis.img('pred_mask', (pred_ >= 0.5).astype(np.float32)) batch_loss /= (batch_idx + 1) print('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss)) with open('logs.txt', 'a') as file: file.write('epoch: ' + str(epoch) + ', train loss: ' + str(batch_loss) + '\n') vis.plot('train loss', batch_loss)
def eval(dataloader, faster_rcnn, vis, test_num=10000): pred_bboxes, pred_labels, pred_scores = list(), list(), list() gt_bboxes, gt_labels, gt_difficults = list(), list(), list() for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in tqdm(enumerate(dataloader)): # plot groud truth bboxes sizes = [sizes[0][0].item(), sizes[1][0].item()] pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict( imgs, [sizes]) img = imgs.cuda().float() ori_img_ = inverse_normalize(at.tonumpy(img[0])) pred_img = visdom_bbox(ori_img_, at.tonumpy(pred_bboxes_[0]), at.tonumpy(pred_labels_[0]).reshape(-1), at.tonumpy(pred_scores_[0])) vis.img('test_pred_img', pred_img) gt_bboxes += list(gt_bboxes_.numpy()) gt_labels += list(gt_labels_.numpy()) gt_difficults += list(gt_difficults_.numpy()) pred_bboxes += pred_bboxes_ pred_labels += pred_labels_ pred_scores += pred_scores_ if ii == test_num: break result = eval_detection_voc(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults, use_07_metric=True) return result
def predict(self, imgs=[]): imglist = [] for i in imgs: img_path = os.path.join(self.imgs_path, i) img = read_image(img_path) if self.isneed_enhance: img = Image_Enhance().api(img) img = t.from_numpy(img)[None] imglist.append(img) for index, img in enumerate(imglist): starttime = datetime.datetime.now() _bboxes, _labels, _scores = self.trainer.faster_rcnn.predict( img, visualize=True) endtime = datetime.datetime.now() print('predict time consum=%s' % round( (endtime-starttime).microseconds/1000000+(endtime-starttime).seconds, 6)) if self.imgs_vis_path: img_path = os.path.join(self.imgs_vis_path, imgs[index]) img = read_image(img_path) img = t.from_numpy(img)[None] ax = vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) fig = ax.get_figure() fig.savefig("output.png")
def rpn_loss(self, rpn_loc, rpn_score, bbox, anchor, im_size): ## Get ground truth locs and labels #gt_rpn_loc, gt_rpn_lbl = self.anchor_target_creator(at.tonumpy(bbox), \ # anchor, im_size) #gt_rpn_lbl = at.totensor(gt_rpn_loc).long() #gt_rpn_loc = at.totensor(gt_prn_loc) # ## calculate localization loss for rpn (sigma = 3) #rpn_loss_loc = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_lbl.data, 3) # ## calculate classification loss for rpn #rpn_loss_cls = F.cross-entropy(rpn_score, gt_prn_lbl.device(), ignore_index=-1) ## Get ground truth locs and labels gt_rpn_loc, gt_rpn_lbl = self.anchor_target_creator( at.tonumpy(bbox), anchor, im_size) gt_rpn_lbl = at.totensor(gt_rpn_lbl).long() gt_rpn_loc = at.totensor(gt_rpn_loc) ## calculate localization loss for rpn (sigma = 3) rpn_loc_loss = self.loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_lbl.data, 3) # calculate classification loss for rpn rpn_cls_loss = F.cross_entropy(rpn_score.to(self.device), gt_rpn_lbl.to(self.device), ignore_index=-1) _gt_rpn_lbl = gt_rpn_lbl[gt_rpn_lbl > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_lbl) > -1] #self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_lbl.data.long()) return rpn_cls_loss, rpn_loc_loss
def forward(self, features, img_size, scale, gt_bbox, gt_label): n = 1 h = F.relu(self.rpn_conv(features)) loc = self.rpn_loc(h) score = self.rpn_score(h) h, w = loc.shape[2:] loc = loc.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) score = score.permute(0, 2, 3, 1).contiguous() softmax_score = F.softmax(score.view(n, h, w, self.n_anchor, 2), dim=4) fg_score = softmax_score[:, :, :, :, 1].contiguous().view(n, -1) score = score.view(n, -1, 2) feat_shape = (h, w) feat_stride = img_size[0] / h anchor = generate_anchors(self.scales, self.ratios, feat_shape, feat_stride) loc = loc[0] score = score[0] fg_score = fg_score[0] roi = self.proposal_layer(loc.cpu().data.numpy(), fg_score.cpu().data.numpy(), anchor, img_size, scale) if self.training: # if training phase, then sample RoIs sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_layer( roi, at.tonumpy(gt_bbox), at.tonumpy(gt_label), self.loc_normalize_mean, self.loc_normalize_std) # get gt_loc(offset from anchor to gt_bbox) gt_rpn_loc, gt_rpn_label = self.anchor_target_layer( at.tonumpy(gt_bbox), anchor, img_size) gt_rpn_loc = at.totensor(gt_rpn_loc) gt_rpn_label = at.totensor(gt_rpn_label).long() # bounding-box regression loss rpn_loc_loss = bbox_regression_loss(loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # foreground-background classification loss rpn_cls_loss = F.cross_entropy(score, gt_rpn_label.cuda(), ignore_index=-1) return sample_roi, gt_roi_loc, gt_roi_label, rpn_loc_loss, rpn_cls_loss return roi
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) best_map = 0 lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) # ori_img_ = (at.tonumpy(img[0])) losses = trainer.get_meter_data() print(losses) write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png') _bboxes = trainer.faster_rcnn.predict([ori_img_], visualize=True) _bboxes = at.tonumpy(_bboxes[0]) # plot predicted bboxes write_image(ori_img_, _bboxes, 'pred.png') print('saved an image') if epoch == 13: break
def test(img): img = t.from_numpy(img)[None] opt.caffe_pretrain=False # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) #output the 坐标 bboxes = at.tonumpy(_bboxes[0]) print(bboxes) #输出框的坐标,array格式 test_img = visdom_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) trainer.vis.img('test_img', test_img)
def forward(self, imgs, bboxes, lbls, scale): # forward pass, get losses as tuple n = bboxes.shape[0] _, _, H, W = imgs.shape #H, W = dimensions of images im_size = (H, W) features = self.model.extractor(imgs) rpn_locs, rpn_scores, rois, roi_ind, anchor = self.model.rpn( features, im_size, scale) #batch size = 1, therefore make variable singular rpn_loc = rpn_locs[0] rpn_score = rpn_scores[0] roi = rois bbox = bboxes[0] lbl = lbls[0] sample_roi, gt_roi_loc, gt_roi_lbl = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(lbl), self.model.loc_normalize_mean, self.model.loc_normalize_std) sample_roi_ind = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.model.head(features, sample_roi, sample_roi_ind) # ----- RPN Losses ----- rpn_cls_loss, rpn_loc_loss = self.rpn_loss(rpn_loc, rpn_score, bbox, anchor, im_size) # ----- ROI losses ----- roi_cls_loss, roi_loc_loss = self.roi_loss(roi_cls_loc, gt_roi_loc, gt_roi_lbl) total = rpn_loc_loss + rpm_cls_loss + roi_loc_loss + roi_cls_loss # not sure if losses should be a dictionary instead, but here's a definition for that just in case #losses = { # 'rpn_loc_loss': rpn_loc_loss, # 'rpn_cls_loss': rpn_cls_loss, # 'roi_loc_loss': roi_loc_loss, # 'roi_cls_loss': roi_cls_loss, # 'total_loss' : total #} losses = [ rpn_loc_loss.to(self.device), rpm_cls_loss.to(self.device), roi_loc_loss.to(self.device), roi_cls_loss.to(self.device), total.to(self.device) ] return losses
def compute_iou(pred_masks, gt_masks): pred_masks, gt_masks = np.squeeze(at.tonumpy(pred_masks)), np.squeeze(at.tonumpy(gt_masks)) ious = [] for i in range(len(pred_masks)): pred_mask = pred_masks[i] gt_mask = gt_masks[i] union = np.sum(np.logical_or(pred_mask, gt_mask)) intersection = np.sum(np.logical_and(pred_mask, gt_mask)) iou = intersection/union ious.append(iou) batch_iou = np.sum(np.array(ious)) return batch_iou
def test(): img_arr = read_image('demo.jpg') img = t.from_numpy(img_arr)[None] faster_rcnn = FasterRCNN() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load('weights/chainer_best_model_converted_to_pytorch_0.7053.pth') opt.caffe_pretrain = True _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1))
def step2(self, imgs, bboxes, labels, scale, epoch): self.optimizer.zero_grad() _, _, H, W = imgs.shape img_size = (H, W) ############ EXTRACTOR STEP ################# features1 = self.faster_rcnn.extractor1(imgs) features2 = self.faster_rcnn.extractor2(imgs) ############ RPN STEP ####################### rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features1, img_size, scale) bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois ############ HEAD STEP ####################### sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head(features2, sample_roi, sample_roi_index) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), at.totensor(gt_roi_label).long()] gt_roi_label = at.totensor(gt_roi_label).long() gt_roi_loc = at.totensor(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss(roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) rpn_loc_loss = t.tensor([0]).cuda() rpn_cls_loss = t.tensor([0]).cuda() losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] + [rpn_loc_loss + rpn_cls_loss ] + [roi_loc_loss + roi_cls_loss] all_losses = LossTuple(*losses) all_losses.total_roi.backward() self.optimizer.step() self.update_meters(all_losses) return all_losses
def single_predict(self, img_path=''): starttime = datetime.datetime.now() img = read_image(img_path) img = t.from_numpy(img)[None] _bboxes, _labels, _scores = self.trainer.faster_rcnn.predict( img, visualize=True) endtime = datetime.datetime.now() print('predict time consum=%s' % round( (endtime-starttime).microseconds/1000000+(endtime-starttime).seconds, 6)) ax = vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) fig = ax.get_figure() fig.savefig("output.png")
def Recognize(img): ''' ##作者:左家乐 ##日期:2020-07-30 ##功能:该函数用来检测目标赛车roboot ##IN-para : 一帧ZED数据 ##return : 车的目标框坐标 ''' img = t.from_numpy(img)[None] opt.caffe_pretrain = False # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) bboxes = at.tonumpy(_bboxes[0]) labels = at.tonumpy(_labels[0].reshape(-1)) return bboxes, labels
def __call__(self, masks): # mask = mask[0] # masks = [] # for i in range(bboxs.shape[0]): # bbox = bboxs[i] # # TODO: instead of convert to int, use crop function in ROIAlign # sub_mask = mask[:, int(bbox[0]+1):int(bbox[2]), int(bbox[1]+1):int(bbox[3])] # masks.append(sub_mask) # # return masks # img_id = img_id[0] # masks_path = os.path.join(opt.root_dir, img_id, 'masks.npy') # temp = [] # masks = np.load(masks_path) # for i in range(masks.shape[0]): # temp.append(transform.resize(masks[i], (14, 14), preserve_range=False, mode='constant')>=0.5) # # temp = np.array(temp).astype(np.float32) # return temp temp = [] for i in range(len(masks)): mask = at.tonumpy(masks[i])[0].copy() # print(mask.shape) mask = transform.resize(mask, (14, 14), preserve_range=False, mode='constant') temp.append(mask.astype(np.float32)) return np.array(temp).copy()
def apply_mask_bbox(image, masks, bbox, color, alpha=0.5): """Apply the given mask to the image. """ ax = plot.subplot(111) ax.imshow(np.transpose(np.squeeze(image / 255.), (1, 2, 0))) for i in range(bbox.shape[0]): y1, x1, y2, x2 = int(bbox[i][0]), int(bbox[i][1]), int( bbox[i][2]), int(bbox[i][3]) h = y2 - y1 w = x2 - x1 rect = patches.Rectangle((x1, y1), w, h, linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) mask = at.tonumpy(masks[i])[0] mask = transform.resize(mask, (int(h), int(w)), preserve_range=False, mode='constant') for c in range(3): image[ 0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]] = np.where( mask == 1, image[0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]] * (1 - 0.5) + alpha * color[c] * 255, image[0, c, y1:y1 + mask.shape[0], x1:x1 + mask.shape[1]]) ax.imshow(np.transpose(np.squeeze(image / 255.), (1, 2, 0))) plot.show()
def val(model, val_loader, criterion, epoch, vis): model.eval() batch_loss = 0 avg_iou = 0 for batch_idx, sample_batched in enumerate(val_loader): data = sample_batched['image'] target = sample_batched['mask'] data, target = Variable(data.type(opt.dtype), volatile=True), Variable( target.type(opt.dtype), volatile=True) output = model.forward(data) loss = criterion(output, target) batch_loss += loss.data[0] avg_iou += compute_iou(pred_masks=at.tonumpy(output >= 0.5).astype( np.float32), gt_masks=target) batch_loss /= (batch_idx + 1) avg_iou /= len(val_loader.dataset) print('epoch: ' + str(epoch) + ', validation loss: ' + str(batch_loss), ', avg_iou: ', avg_iou) with open('logs.txt', 'a') as file: file.write('epoch: ' + str(epoch) + ', validation loss: ' + str(batch_loss) + ', avg_iou: ' + str(avg_iou) + '\n') vis.plot('val loss', batch_loss) vis.plot('validation average IOU', avg_iou) return avg_iou
def show_batch_train(sample_batched): """ Visualize one training image and its corresponding bbox """ img_id, image, mask = sample_batched['img_id'], sample_batched['image'], sample_batched['mask'] image, mask = np.squeeze(at.tonumpy(image)), np.squeeze(at.tonumpy(mask)) image = inverse_normalize(image) combined = np.multiply(image, mask) ax1 = plt.subplot(121) ax1.imshow(image / 255.) ax1.set_title(img_id[0]) ax2 = plt.subplot(122) ax2.imshow(combined / 255.) ax2.set_title(img_id[0]) plt.show()
def predict(self, imgs,sizes=None,visualize=False): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() #取消BN和dropout if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) #预处理 prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): #遍历我们要预测的每张图片 img = at.totensor(img[None]).float() # #1 C H W scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) #进入forward 向前计算 # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale #对应resize前真实图片的roi # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ #均值repeat n_class repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ #标准差repeat n_class repeat(self.n_class)[None]
def test(**kwargs): opt._parse(kwargs) faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load( 'C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/checkpoints/fasterrcnn_08042317_0.9090909090909093' ) print('load successs!') img = read_image('test_img/test.jpg') img = t.from_numpy(img)[None] opt.caffe_pretrain = False # this model was trained from caffe-pretrained model _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) test_img = visdom_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) trainer.vis.img('test_img', test_img)
def imgflip(img, bbox, x_flip=True, y_flip=True): imgs = at.tonumpy(img[0]) if y_flip: imgs = imgs[:, ::-1, :] if x_flip: imgs = imgs[:, :, ::-1] # print imgs imgs = np.expand_dims(imgs, axis=0) return inverse_normalize(imgs)
def eval_mAP(trainer, val_loader): tqdm.monitor_interval = 0 mAP = [] for ii, sample in tqdm(enumerate(val_loader)): if len(sample.keys()) == 5: img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \ sample['label'] img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) else: img_id, img, scale = sample['img_id'], sample['image'], sample[ 'scale'] bbox = np.zeros((1, 0, 4)) label = np.zeros((1, 0, 1)) img = img.cuda().float() img = Variable(img) # if bbox is None: # continue scale = at.scalar(scale) ori_img_ = inverse_normalize(at.tonumpy(img[0])) pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict( [ori_img_], visualize=True) pred_boxes = pred_boxes[0] pred_labels = pred_labels[0] pred_scores = pred_scores[0] bbox = at.tonumpy(bbox[0]) # Rescale back C, H, W = ori_img_.shape ori_img_ = transform.resize(ori_img_, (C, H * (1 / scale), W * (1 / scale)), mode='reflect') o_H, o_W = H * (1 / scale), W * (1 / scale) pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W)) bbox = resize_bbox(bbox, (H, W), (o_H, o_W)) mAP.append(map_iou(bbox, pred_boxes, pred_scores)) # if ii>=100: # break mAP = np.array(mAP) mAP = mAP[mAP != np.array(None)].astype(np.float32) return np.mean(mAP)
def _smooth_l1_loss(x, t, in_weight, sigma): sigma2 = sigma**2 diff = in_weight * (x - t) abs_diff = diff.abs() flag = (abs_diff.data < (1. / sigma2)).float() y = (flag * (sigma2 / 2.) * (diff**2) + (1 - flag) * (abs_diff - 0.5 / sigma2)) modif = at.tonumpy(y) modif[np.isnan(modif)] = 0 return modif.sum()
def run_test(model, test_loader): pred_masks = [] img_ids = [] images = [] for batch_idx, sample_batched in tqdm(enumerate(test_loader)): data, img_id = sample_batched['image'], sample_batched['img_id'] data = Variable(data.type(opt.dtype), volatile=True) output = model.forward(data) # output = (output > 0.5) output = at.tonumpy(output) for i in range(0, output.shape[0]): pred_mask = np.squeeze(output[i]) id = img_id[i] pred_mask = (pred_mask >= 0.5).astype(np.float32) pred_masks.append(pred_mask) img_ids.append(id) ori_img_ = inverse_normalize(at.tonumpy(data[i])) images.append(ori_img_) return img_ids, images, pred_masks
def draw_predict(pred_bboxes_, pred_labels_, pred_scores_): pred_bboxes1 = iter(pred_bboxes_) pred_labels1 = iter(pred_labels_) pred_scores1 = iter(pred_scores_) if opt.nms_type == 'soft_nms': write_path = 'result/' else: write_path = 'result_nms/' if opt.nms_use_label == True: write_path = 'label_' + write_path print(write_path) f = open('/media/chenli/E/VOCdevkit/VOC2007/ImageSets/Main/test2.txt') for pred_bbox, pred_label, pred_score in six.moves.zip( pred_bboxes1, pred_labels1, pred_scores1): id_ = f.readline()[:-1] # print id_ img_file = '/media/chenli/E/VOCdevkit/VOC2007/JPEGImages/' + \ str(id_) + '.jpg' image = cv2.imread(img_file) # 转成 numpy格式 bboxs = at.tonumpy(pred_bbox) name = at.tonumpy(pred_label).reshape(-1) score = at.tonumpy(pred_score).reshape(-1) # 保存测试集每一轮预测的结果 最好加个epoch判断 每10轮保存一次 不然太浪费时间 for i in range(len(name)): xmin = int(round(float(bboxs[i, 1]))) ymin = int(round(float(bboxs[i, 0]))) xmax = int(round(float(bboxs[i, 3]))) ymax = int(round(float(bboxs[i, 2]))) if score[i] <= opt.threshold: continue cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1) cv2.putText(image, opt.VOC_BBOX_LABEL_NAMES[name[i]], (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (0, 0, 255), 1) cv2.putText(image, str(score[i])[0:3], (xmin + 30, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * image.shape[0], (0, 0, 255), 1) cv2.imwrite(write_path + str(id_) + '.jpg', image)
def forward(self, img_size, x, rois, roi_indices): """Forward the chain. We assume that there are :math:`N` batches. Args: x (Variable): 4D image variable. rois (Tensor): A bounding box array containing coordinates of proposal boxes. This is a concatenation of bounding box arrays from multiple images in the batch. Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed RoIs from the :math:`i` th image, :math:`R' = \\sum _{i=1} ^ N R_i`. roi_indices (Tensor): An array containing indices of images to which bounding boxes correspond to. Its shape is :math:`(R',)`. """ # in case roi_indices is ndarray img_h, img_w = img_size # size of rois in the input images. (h, w) roi_size = np.concatenate( (np.expand_dims(at.tonumpy(rois[:, 2] - rois[:, 0]), axis=1), (np.expand_dims(at.tonumpy(rois[:, 3] - rois[:, 1]), axis=1))), axis=1) feature_h, feature_w = x.shape[2], x.shape[3] roi_indices = at.totensor(roi_indices).int() rois = at.totensor(rois).float() rois[:, 0] = rois[:, 0] / img_h * feature_h rois[:, 2] = rois[:, 2] / img_h * feature_h rois[:, 1] = rois[:, 1] / img_w * feature_w rois[:, 3] = rois[:, 3] / img_w * feature_w # pool = self.roi(x, indices_and_rois) rois = at.tovariable(rois) roi_indices = at.tovariable(roi_indices) pool = self.roi(x, rois, roi_indices) # (128, 512, 7, 7) pool = pool.view(pool.size(0), -1) fc7 = self.classifier(pool) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.score(fc7) return roi_cls_locs, roi_scores
def predict(self, imgs, sizes=None, visualize=False): self.eval() if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(array_tool.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = array_tool.totensor(img[None]).float() #img增加一维(_, C, H, W) scale = img.shape[3] / size[1] # W' / W, 处理后图像和原图比例 roi_cls_locs, roi_scores, rois, roi_indices = self(img, scale=scale) #batch size为1 roi_score = roi_scores.data roi_cls_loc = roi_cls_locs.data roi = array_tool.totensor(rois) / scale mean = torch.Tensor(self.loc_normalize_mean).cuda().repeat( self.n_class)[None] #(1,84) std = torch.Tensor(self.loc_normalize_std).cuda().repeat( self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) #(R, 21 ,4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) #扩充维度 #(R, 21, 4) cls_bbox = loc2bbox( array_tool.tonumpy(roi).reshape(-1, 4), array_tool.tonumpy(roi_cls_loc).reshape(-1, 4)) cls_bbox = array_tool.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) #(R, 84) cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp( min=0, max=size[1]) #裁剪预测bbox不超出原图尺寸 prob = array_tool.tonumpy( functional.softmax(array_tool.totensor(roi_score), dim=1)) raw_cls_bbox = array_tool.tonumpy(cls_bbox) raw_prob = array_tool.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) #将每个batch_size的压在一起 self.use_preset('evaluate') self.train() return bboxes, labels, scores
def detec_test_pic(pth, pic_test): opt.load_path = opt.caffe_pretrain_path opt.env = 'detec-tset-pic' faster_rcnn = FasterRCNNVGG16() trainer = FasterRCNNTrainer(faster_rcnn).cuda() trainer.load(pth) opt.caffe_pretrain = True # this model was trained from caffe-pretrained model pic_index = 0 for pic in tqdm(os.listdir(pic_test)): time.sleep(1) img = read_image(os.path.join(pic_test, pic)) img = t.from_numpy(img)[None] _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True) pred_img = visdom_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1)) trainer.vis.img('pred_img', pred_img) pic_index += 1 if pic_index > 1000: break
def forward(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~torch.autograd..Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing. Returns: namedtuple of 5 losses """ n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = \ self.faster_rcnn.rpn(features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward # it's fine to break the computation graph of rois, # consider them as constant input sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, at.tonumpy(bbox), at.tonumpy(label), self.loc_normalize_mean, self.loc_normalize_std) # NOTE it's all zero because now it only support for batch=1 now sample_roi_index = t.zeros(len(sample_roi)) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( at.tonumpy(bbox), anchor, img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) # NOTE: default value of ignore_index is -100 ... rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) # ------------------ ROI losses (fast rcnn loss) -------------------# n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \ at.totensor(gt_roi_label).long()] gt_roi_label = at.tovariable(gt_roi_label).long() gt_roi_loc = at.tovariable(gt_roi_loc) roi_loc_loss = _fast_rcnn_loc_loss( roi_loc.contiguous(), gt_roi_loc, gt_roi_label.data, self.roi_sigma) roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda()) self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses)
def train(**kwargs): opt._parse(kwargs) dataset = Dataset(opt) print('load data') dataloader = data_.DataLoader(dataset, \ batch_size=1, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) testset = TestDataset(opt) test_dataloader = data_.DataLoader(testset, batch_size=1, num_workers=opt.test_num_workers, shuffle=False, \ pin_memory=True ) faster_rcnn = FasterRCNNVGG16() print('model construct completed') trainer = FasterRCNNTrainer(faster_rcnn).cuda() if opt.load_path: trainer.load(opt.load_path) print('load pretrained model from %s' % opt.load_path) trainer.vis.text(dataset.db.label_names, win='labels') best_map = 0 for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): scale = at.scalar(scale) img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() img, bbox, label = Variable(img), Variable(bbox), Variable(label) trainer.train_step(img, bbox, label, scale) if (ii + 1) % opt.plot_every == 0: if os.path.exists(opt.debug_file): ipdb.set_trace() # plot loss trainer.vis.plot_many(trainer.get_meter_data()) # plot groud truth bboxes ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])) trainer.vis.img('gt_img', gt_img) # plot predicti bboxes _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True) pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0])) trainer.vis.img('pred_img', pred_img) # rpn confusion matrix(meter) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') # roi confusion matrix trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float()) eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) if eval_result['map'] > best_map: best_map = eval_result['map'] best_path = trainer.save(best_map=best_map) if epoch == 9: trainer.load(best_path) trainer.faster_rcnn.scale_lr(opt.lr_decay) trainer.vis.plot('test_map', eval_result['map']) lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), str(eval_result['map']), str(trainer.get_meter_data())) trainer.vis.log(log_info) if epoch == 13: break
def predict(self, imgs,sizes=None,visualize=False): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ self.eval() if visualize: self.use_preset('visualize') prepared_imgs = list() sizes = list() for img in imgs: size = img.shape[1:] img = preprocess(at.tonumpy(img)) prepared_imgs.append(img) sizes.append(size) else: prepared_imgs = imgs bboxes = list() labels = list() scores = list() for img, size in zip(prepared_imgs, sizes): img = t.autograd.Variable(at.totensor(img).float()[None], volatile=True) scale = img.shape[3] / size[1] roi_cls_loc, roi_scores, rois, _ = self(img, scale=scale) # We are assuming that batch size is 1. roi_score = roi_scores.data roi_cls_loc = roi_cls_loc.data roi = at.totensor(rois) / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = t.Tensor(self.loc_normalize_mean).cuda(). \ repeat(self.n_class)[None] std = t.Tensor(self.loc_normalize_std).cuda(). \ repeat(self.n_class)[None] roi_cls_loc = (roi_cls_loc * std + mean) roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4) roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc) cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)), at.tonumpy(roi_cls_loc).reshape((-1, 4))) cls_bbox = at.totensor(cls_bbox) cls_bbox = cls_bbox.view(-1, self.n_class * 4) # clip bounding box cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0]) cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1]) prob = at.tonumpy(F.softmax(at.tovariable(roi_score), dim=1)) raw_cls_bbox = at.tonumpy(cls_bbox) raw_prob = at.tonumpy(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) self.use_preset('evaluate') self.train() return bboxes, labels, scores