Example #1
0
    def train(self):
        self.trainer.reset_meters()
        for ii, (img, bbox_, label_,
                 scale) in tqdm(enumerate(self.train_loader)):
            scale[0] = at.scalar(scale[0])
            scale[1] = at.scalar(scale[1])
            img = img.to(self.device)
            bbox = bbox_.to(self.device)
            label = label_.to(self.device)
            self.trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    os.makedirs(opt.debug_file)

                self.trainer.vis.plot_many(self.trainer.get_meter_data())

                # plot groud truth bboxes

                # plot predicti bboxes

                # rpn confusion matrix(meter)
                self.trainer.vis.text(str(
                    self.trainer.rpn_cm.value().tolist()),
                                      win='rpn_cm')
                self.trainer.vis.img(
                    'roi_cm',
                    at.totensor(self.trainer.roi_cm.conf, False).float())
Example #2
0
def train(**kwargs):
    opt._parse(kwargs)
    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
           batch_size=1, \
           shuffle=True, \
								# pin_memory=True,

           num_workers=opt.num_workers)
    print('Loading Model')
    # faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    # trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    lr_ = opt.lr
    extractor, classifier = decom_vgg16()
    img, bbox_, label_, scale = dataset[1]
    _, H, W = img.shape
    img_size = (H, W)
    img, bbox_, label_ = to_tensor(img), to_tensor(bbox_), to_tensor(label_)
    scale = at.scalar(scale)
    img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
    img, bbox, label = Variable(img), Variable(bbox), Variable(label)
    pdb.set_trace()
    features = extractor(img)

    rpn = RegionProposalNetwork(512,
                                512,
                                ratios=ratios,
                                anchor_scales=anchor_scales,
                                feat_stride=self.feat_stride)

    rpn_locs, rpn_scores, rois, roi_indices, anchor = \
            self.faster_rcnn.rpn(features, img_size, scale
            )
Example #3
0
    def train_one_epoch(self):
        """
        Return:
            total_loss: the total loss during training
            accuracy: the mAP
        """
        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
        self.trainer.reset_meters()
        for ii, (img, sizes, bbox_, label_, scale, gt_difficults_) in \
                tqdm.tqdm(enumerate(self.dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            self.trainer.train_step(img, bbox, label, scale)
            if (ii + 1) % self.opt.plot_every == 0:
                sizes = [sizes[0][0].item(), sizes[1][0].item()]
                pred_bboxes_, pred_labels_, pred_scores_ = \
                    self.faster_rcnn.predict(img, [sizes])
                pred_bboxes += pred_bboxes_
                pred_labels += pred_labels_
                pred_scores += pred_scores_
                gt_bboxes += list(bbox_.numpy())
                gt_labels += list(label_.numpy())
                gt_difficults += list(gt_difficults_.numpy())

        return self.trainer.get_meter_data()['total_loss']
Example #4
0
 def update_meters(self, losses):
     """对各个损失分别求均值"""
     # 由于train_step返回的是nametuple形式的损失,所以要先变成字典
     loss_dict = {k: scalar(v) for k, v in losses._asdict().items()}
     # 分别遍历每种损失,求其均值
     for key, meter in self.meters.items():
         meter.add(loss_dict[key])
def evaluate_coco(data, data_loader, model):
    n_ids = len(data.img_ids)
    result = []

    for i, (img, bbox, label, scale, size, _) in tqdm(zip(data.img_ids, data_loader), total=n_ids):
        scale = at.scalar(scale)
        original_size = [size[0][0].item(), size[1][0].item()]
        pred_bbox, pred_label, pred_score = model(img, scale, None, None, original_size)

        for b, l, s in zip(pred_bbox, pred_label, pred_score):
            ymin, xmin, ymax, xmax = b
            obj = OrderedDict({
                'image_id': i,
                'category_id': data.label_to_coco_label(l),
                'bbox': [xmin, ymin, xmax - xmin, ymax - ymin],
                'score': float(s)
            })
            result.append(obj)

    result_path = f'./results/coco/predictions/{opt.model}.json'
    result_dir = os.path.dirname(result_path)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    with open(result_path, 'w', encoding='utf-8') as fout:
        json.dump(result, fout, cls=COCOEncoder, ensure_ascii=False)

    eval_result = data.evaluate(result_path)

    return eval_result
def train(**kwargs):
    opt._parse(kwargs)  # 解析配置参数
    #
    dataset = Dataset(opt)  # 训练集 voc2007  5011 张
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt, split='val')  # 验证集 2500左右
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNN_ResNet50()  # 生成一个faster-rcnn实例
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()  # tocuda
    if opt.load_path:  # 加载与训练模型
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    best_map = 0
    lr_ = opt.lr
    writer = SummaryWriter('logs', comment='faster-rcnn-vgg16')
    global_step = 0
    for epoch in range(opt.epoch):  # 开始迭代 14轮  0-12  13个epoch

        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            loss = trainer.train_step(img, bbox, label, scale)
            rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss = loss
            writer.add_scalar('rpn_loc_loss', rpn_loc_loss.detach().cpu().numpy(), global_step)
            writer.add_scalar('rpn_cls_loss', rpn_cls_loss.detach().cpu().numpy(), global_step)
            writer.add_scalar('roi_loc_loss', roi_loc_loss.detach().cpu().numpy(), global_step)
            writer.add_scalar('roi_cls_loss', roi_cls_loss.detach().cpu().numpy(), global_step)
            writer.add_scalar('total_loss', total_loss.detach().cpu().numpy(), global_step)
            global_step += 1
            if (ii + 1) % opt.plot_every == 0:
                pass
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{}'.format(str(lr_), str(eval_result['map']))
        print(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Example #7
0
 def update_meters(self, losses, BR=False):
     loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
     if not BR:
         for key, meter in self.meters.items():
             meter.add(loss_d[key])
     else:
         for key, meter in self.BR_meters.items():
             meter.add(loss_d[key])
def evaluate_voc(data_loader, model):
    pred_bboxes, pred_labels, pred_scores = [], [], []
    gt_bboxes, gt_labels, gt_difficults = [], [], []

    for img, gt_bbox, gt_label, scale, size, gt_difficult in tqdm(data_loader):
        scale = at.scalar(scale)
        original_size = [size[0][0].item(), size[1][0].item()]
        pred_bbox, pred_label, pred_score = model(img, scale, None, None, original_size)
        gt_bboxes += list(gt_bbox.numpy())
        gt_labels += list(gt_label.numpy())
        gt_difficults += list(gt_difficult.numpy())
        pred_bboxes += [pred_bbox]
        pred_labels += [pred_label]
        pred_scores += [pred_score]

    eval_results = {'AP': 0, 'AP_0.5': 0, 'AP_0.75': 0, 'AP_s': 0, 'AP_m': 0, 'AP_l': 0}
    iou_threshes = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]

    area_names = ['s', 'm', 'l']
    area_ranges = [(0, 32 ** 2), (32 ** 2, 96 ** 2), (96 ** 2, np.inf)]
    for name, range in zip(area_names, area_ranges):
        # evaluate predictions for multiple iou threshes
        for iou_thresh in iou_threshes:
            result = eval_detection_voc(
                pred_bboxes, pred_labels, pred_scores,
                gt_bboxes, gt_labels, gt_difficults,
                iou_thresh, True, range
            )
            # accumulate results
            eval_results[f'AP_{name}'] += result['map']
        # average results
        eval_results[f'AP_{name}'] /= 10.

    # evaluate results regardless of area size
    for iou_thresh in iou_threshes:
        result = eval_detection_voc(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults,
            iou_thresh, True
        )
        # accumulate results
        eval_results['AP'] += result['map']
        # save map for iou 0.5 & 0.75
        if iou_thresh == 0.5:
            eval_results['AP_0.5'] = result['map']
        elif iou_thresh == 0.75:
            eval_results['AP_0.75'] = result['map']
        else:
            continue
    eval_results['AP'] /= 10

    # print results
    eval_log = ''
    for k, v in eval_results.items():
        eval_log += f'{k}: {v * 100:.2f},  '
    print(eval_log)

    return eval_results['AP']
Example #9
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    # img, bbox, label, scale = dataset[0]
    # 返回的img是被scale后的图像,可能已经被随机翻转了
    # 返回的 bbox 按照 ymin xmin ymax xmax 排列
    #  H, W = size(im)
    # 对于一张屏幕上显示的图片,a,b,c,d 代表 4 个顶点
    #        a   ...   b     ymin
    #        .         .
    #        c   ...   d     ymax  H高度    y的范围在 [0, H-1] 间
    #        xmin    xmax
    #        W宽度   x的范围在 [0, W-1] 间

    print('load data')
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')

    trainer = FasterRCNNTrainer(faster_rcnn)

    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    for epoch in range(opt.epoch):
        for ii, (img, bbox_, label_, scale) in (enumerate(dataloader)):
            print('step: ', ii)

            scale = at.scalar(scale)
            img, bbox, label = img.float(), bbox_, label_
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if ((ii + 1) % opt.plot_every == 0) and (epoch > 50):
                # 运行多少步以后再predict一次,epoch跑的太少的话根本预测不准什么东西
                #                if os.path.exists(opt.debug_file):
                #                    ipdb.set_trace()

                # plot groud truth bboxes  画出原本的框
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                # gt_img  np类型  范围是 [0, 1] 间   3 x H x W
                # 这里要将 gt_img 这个带框,带标注的图像保存或者显示出来

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
Example #10
0
def train(train_set,
          val_set,
          load_path=False,
          epochs=1,
          lr=1e-3,
          record_every=300,
          lr_decay=1e-3,
          test_num=500):
    '''
    Uses the training set and validation set as arguments to create dataloader. Loads and trains model
    '''
    train_dataloader = td.DataLoader(train_set,
                                     batch_size=1,
                                     pin_memory=False,
                                     shuffle=True)
    test_dataloader = td.DataLoader(val_set, batch_size=1, pin_memory=True)
    faster_rcnn = RFCNResnet101().cuda()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    saved_loss = []
    iterations = []
    if load_path:
        trainer.load(load_path)
        print('load pretrained model from %s' % load_path)
        state_dict = t.load(load_path)
        saved_loss = state_dict['losses']
        iterations = state_dict['iterations']

    best_map = 0
    lr_ = lr
    for epoch in range(epochs):
        trainer.reset_meters()
        for ii, (img, bbox_, label_,
                 scale) in tqdm(enumerate(train_dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)
            loss_info = 'Iter {}; Losses: RPN loc {}, RPN cls: {}, ROI loc {}, ROI cls {}, Total:{}'.format(
                str(ii), "%.3f" % losses[0].cpu().data.numpy(),
                "%.3f" % losses[1].cpu().data.numpy(),
                "%.3f" % losses[2].cpu().data.numpy(),
                "%.3f" % losses[3].cpu().data.numpy(),
                "%.3f" % losses[4].cpu().data.numpy())
            print(loss_info)
            if (ii + 1) % record_every == 0:

                iterations.append(ii + 1)
                saved_loss.append([
                    losses[0].cpu().item(), losses[1].cpu().item(),
                    losses[2].cpu().item(), losses[3].cpu().item(),
                    losses[4].cpu().item()
                ])
                kwargs = {"losses": saved_loss, "iterations": iterations}
                trainer.save(saved_loss=saved_loss, iterations=iterations)
                print("new model saved")
 def update_meters(self, losses, BR=False):
     loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
     if not BR:
         for key, meter in self.meters.items():
             meter.add(loss_d[key])
     else:
         for key, meter in self.BR_meters.items():
             # print('meter:',type(meter))
             # print('loss_d[key]',loss_d[key])
             # print('after_cpuu:',loss_d[key].cpu().numpy())
             meter.add(loss_d[key].cpu().numpy())
Example #12
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # ori_img_ = (at.tonumpy(img[0]))
                losses = trainer.get_meter_data()
                print(losses)
                write_image(ori_img_, at.tonumpy(bbox[0]), 'gt.png')
                _bboxes = trainer.faster_rcnn.predict([ori_img_],
                                                      visualize=True)
                _bboxes = at.tonumpy(_bboxes[0])
                # plot predicted bboxes
                write_image(ori_img_, _bboxes, 'pred.png')
                print('saved an image')

        if epoch == 13:
            break
Example #13
0
def train(individual, **kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True)
    faster_rcnn = FasterRCNN_mine(individual)
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()

        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)

            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                ori_img_ = inverse_normalize(at.tonumpy(img[0]))

                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        best_path = None
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        individual.accuracy = best_map
Example #14
0
    def train(self):
        """
        while loss<target loss
            forward
            backward
            record loss
            if loop_n % RECORD_N:
                summary & save_progress
        """
        time_start = time.time()
        self.net.train()
        try:
            while self.epoch < self.conf.max_epoch:
                self.epoch_loss = 0
                self.reset_meters()
                for step, (img_name, img, bbox, label, index, relation, scale, _) in tqdm(enumerate(self.train_loader)): #bbox shape is [1, 2, 4] #label is [1,n] label_attr is[1,m,n]
                    if bbox.size(1) < 2:
                        continue

                    self.optimizer.zero_grad()
                    img, bbox, label, index, relation, scale = \
                        img.to(self.conf.dev), bbox.to(self.conf.dev), label.to(self.conf.dev), index.to(self.conf.dev),\
                        relation.to(self.conf.dev), at.scalar(scale)
                    loss = self.train_forward_net(img_name, img, bbox, label, index, relation, scale)
                    #print("loss {0}".format(loss.total_loss))
                    loss.total_loss.backward()
                    self.optimizer.step()
                    self.update_meters(loss)
                    self.epoch_loss += loss.total_loss.detach().cpu().numpy() * img.size(0)

                    #if step == 3500:
                    #    break
                    # if step % 200 == 0:
                    #     print('Step=', step)
                # ['epoch_loss', 'test_loss', 'training_loss']
                self.epoch_loss = self.epoch_loss / len(self.train_loader.dataset)
                # self.valid_loss = self.test(use_validation=True, display=True)
                self.tp.record_data({'epoch_loss': self.epoch_loss})  # 'validation_loss': self.valid_loss})
                self.lr_scheduler.step({'loss': self.epoch_loss, 'epoch': self.epoch})  # , 'torch': self.valid_loss})
                if self.epoch % self.conf.se == 0:
                    print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch)
                    self.summary()
                self.epoch += 1
            print(timeSince(time_start), ': Trainer Summary Epoch=', self.epoch)
            self.summary(save_optim=True)  # for resume training
        except KeyboardInterrupt:
            save = input('Save Current Progress ? y for yes: ')
            if 'y' in save:
                print('Saving Progress...')
                self.save_progress(save_optim=True, display=True)
Example #15
0
    def train(self, train_set, test_set, num_epoch, B=1, lr=1e-3):
        #device = 'cuda' if torch.cuda.is_available() else 'cpu'

        #model = model.to(device)
        #adam = torch.optim.Adam(model.parameters(), lr=lr)

        train_loader = td.DataLoader(train_set,
                                     batch_size=B,
                                     pin_memory=False,
                                     shuffle=True)
        test_loader = td.DataLoader(test_set,
                                    batch_size=B,
                                    pin_memory=True,
                                    shuffle=False)

        # load stuff here from log file

        best_map = 0

        self.model.zero_grad()

        # set up plots here

        for epoch in range(num_epoch):
            #clear stuff (RFCNtrainer.reset_meters())
            for batch_ind, (image, bbox, bbox_labels,
                            scale) in enumerate(train_loader):
                #move data to device
                scale = at.scalar(scale)
                img = image.to(self.device)
                bbox = bbox.to(self.device)
                lbl = bbox_labels.to(self.device)
                self.step(img, bbox, lbl, scale)

            #plot loss and stuff every 2 epochs
            if (epoch + 1) % 2 == 0:
                # plot stuff (loss, boxes, rpn confusion matrix, etc.) goes here
                emptyval = []

        # test with evaluation data, plot results #-->
        #result = eval(train_loader, self.model) #-->

        # log info to file here #-->

        #plot #-->

        #if (result['map'] > best_map): #-->
        #    best_map = result['map']#-->

        return
Example #16
0
def eval_mAP(trainer, val_loader):
    tqdm.monitor_interval = 0
    mAP = []
    for ii, sample in tqdm(enumerate(val_loader)):
        if len(sample.keys()) == 5:
            img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                sample['label']
            img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)

        else:
            img_id, img, scale = sample['img_id'], sample['image'], sample[
                'scale']
            bbox = np.zeros((1, 0, 4))
            label = np.zeros((1, 0, 1))
            img = img.cuda().float()
            img = Variable(img)
        # if bbox is None:
        #     continue
        scale = at.scalar(scale)
        ori_img_ = inverse_normalize(at.tonumpy(img[0]))
        pred_boxes, pred_labels, pred_scores = trainer.faster_rcnn.predict(
            [ori_img_], visualize=True)
        pred_boxes = pred_boxes[0]
        pred_labels = pred_labels[0]
        pred_scores = pred_scores[0]
        bbox = at.tonumpy(bbox[0])
        # Rescale back
        C, H, W = ori_img_.shape
        ori_img_ = transform.resize(ori_img_,
                                    (C, H * (1 / scale), W * (1 / scale)),
                                    mode='reflect')
        o_H, o_W = H * (1 / scale), W * (1 / scale)
        pred_boxes = resize_bbox(pred_boxes, (H, W), (o_H, o_W))
        bbox = resize_bbox(bbox, (H, W), (o_H, o_W))
        mAP.append(map_iou(bbox, pred_boxes, pred_scores))
        # if ii>=100:
        #     break

    mAP = np.array(mAP)
    mAP = mAP[mAP != np.array(None)].astype(np.float32)

    return np.mean(mAP)
Example #17
0
def pretrain(embedding_file):
    dataset = COCODataset(embedding_file, opt, True)
    opt.n_class = dataset.n_class
    faster_rcnn = FasterRCNNVGG16()
    faster_rcnn_trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    opt.caffe_pretrain = False
    lstm = nn.LSTM(input_size=dataset.word_embedding.vector_size,
                   hidden_size=opt.hidden_size,
                   batch_first=True)
    predict_param = [lstm.hidden_size, opt.n_class]
    text_predictor = PredictNet(predict_param)
    text_predictor.parameters()
    lstm_trainer = LSTMTrainer(lstm, text_predictor).cuda()
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    for epoch in range(opt.epoch):
        for ii, (img, bbox_, text, label_,
                 scale) in tqdm(enumerate(dataloader)):
            # train faster rcnn
            scale = at.scalar(scale)
            img, bbox, text, label = img.cuda().float(), bbox_.cuda(
            ), text.cuda(), label_.cuda()
            faster_rcnn_trainer.train_step(img, bbox, label, scale)
            # train lstm
            lstm_trainer.lstm_step(text, label)
    state = {
        'rcnn': {
            'model': faster_rcnn_trainer.state_dict(),
            'optimizer': faster_rcnn_trainer.optimizer.state_dict()
        },
        'lstm': {
            'model': lstm_trainer.state_dict(),
            'optimizer': lstm_trainer.optimizer.state_dict()
        }
    }

    t.save(state, 'pretrain.pth')
 def update_meters(self, losses):
     loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
     for key, meter in self.meters.items():
         meter.add(loss_d[key])
Example #19
0
def train(**kwargs):
    precisions = np.zeros([configurations.epoch, 20])
    recall = np.zeros([configurations.epoch, 20])
    configurations._parse(kwargs)

    dataset = Dataset(configurations)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,
                                  num_workers=configurations.num_workers)
    testset = TestDataset(configurations)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=configurations.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNN()
    #faster_rcnn.load_state_dict(torch.load('faster_rcnn_model_0.ckpt'))
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if configurations.load_path:
        trainer.load(configurations.load_path)
        print('load pretrained model from %s' % configurations.load_path)

    best_map = 0
    lr_ = configurations.lr
    for epoch in range(configurations.epoch):
        trainer.reset_meters()
        ########### FREEZING REQD MODEL #####################################
        if epoch == 0:  ##freezing ex2 and head
            for param in trainer.faster_rcnn.extractor2[10:].parameters():
                param.requires_grad = False
            for param in trainer.faster_rcnn.head.parameters():
                param.requires_grad = False
            trainer.faster_rcnn.extractor1.train()
            trainer.faster_rcnn.rpn.train()

        elif epoch == 3:  ##freezing ex1 and rpn, unfreeze ex2 and head
            #unfreeze ex2 and head
            for param in trainer.faster_rcnn.extractor2[10:].parameters():
                param.requires_grad = True
            for param in trainer.faster_rcnn.head.parameters():
                param.requires_grad = True
            #make ex1 and rpn eval and frozen
            for param in trainer.faster_rcnn.extractor1[10:].parameters():
                param.requires_grad = False
            for param in trainer.faster_rcnn.rpn.parameters():
                param.requires_grad = False
            trainer.faster_rcnn.extractor1.eval()
            trainer.faster_rcnn.rpn.eval()
            trainer.faster_rcnn.head.train()
            trainer.faster_rcnn.extractor2.train()

        elif epoch == 7:
            trainer.faster_rcnn.rpn.train()
            for param in trainer.faster_rcnn.rpn.parameters():
                param.requires_grad = True
            for param in trainer.faster_rcnn.extractor2[10:].parameters():
                param.requires_grad = False
            for param in trainer.faster_rcnn.head.parameters():
                param.requires_grad = False
            trainer.faster_rcnn.extractor2.eval()
            trainer.faster_rcnn.head.eval()

        elif epoch == 9:
            for param in trainer.faster_rcnn.rpn.parameters():
                param.requires_grad = False
            for param in trainer.faster_rcnn.head.parameters():
                param.requires_grad = True
            trainer.faster_rcnn.rpn.eval()
            trainer.faster_rcnn.head.train()

        #######################################################################

        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            if epoch <= 2:
                trainer.step1(img, bbox, label, scale, epoch)
            elif epoch >= 3 and epoch <= 6:
                trainer.step2(img, bbox, label, scale, epoch)
            elif epoch >= 7 and epoch <= 8:
                trainer.step3(img, bbox, label, scale, epoch)
            elif epoch >= 9 and epoch <= 10:
                trainer.step4(img, bbox, label, scale, epoch)

            if ((ii + 1) % 500 == 0):
                append_loss(trainer.get_meter_data())

            if (ii + 1) % configurations.plot_every == 0:
                if os.path.exists(configurations.debug_file):
                    ipdb.set_trace()

                #plot loss
                if not os.path.exists(plot_dir):
                    os.mkdir(plot_dir)
                plot_loss(loss_list, plot_dir)

            # if ii == 7000:
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     gt_img = fetch_image(ori_img_,
            #                          at.tonumpy(bbox_[0]),
            #                          at.tonumpy(label_[0]))
            #     gt_img = gt_img.transpose(1,2,0)
            #     if not os.path.exists(img_dir):
            #         os.mkdir(img_dir)
            #     plt.imsave('{}/actual_image_{}_{}.jpg'.format(img_dir, epoch, ii), gt_img)

            #     # plot prediction bboxes
            #     _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
            #     pred_img = fetch_image(ori_img_,
            #                            at.tonumpy(_bboxes[0]),
            #                            at.tonumpy(_labels[0]).reshape(-1),
            #                            at.tonumpy(_scores[0]))
            #     pred_img = pred_img.transpose(1,2,0)
            #     plt.imsave('{}/predicted_image_{}_{}.jpg'.format(img_dir,epoch, ii), pred_img)

        torch.save(faster_rcnn.state_dict(),
                   'faster_rcnn_model_{}.ckpt'.format(epoch + 1))
        all_losses = np.zeros((7, len(total_loss)))
        all_losses[0, :] = rpn_loc_loss
        all_losses[1, :] = rpn_cls_loss
        all_losses[2, :] = roi_loc_loss
        all_losses[3, :] = roi_cls_loss
        all_losses[4, :] = total_loss
        all_losses[5, :] = total_rpn
        all_losses[6, :] = total_roi

        save_dir = 'prec_rec_loss/'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        np.save(save_dir + 'all_losses_' + str(epoch) + '.npy', all_losses)
        print("Epoch {} completed".format(epoch + 1))
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from {}'.format(opt.load_path))

    # trainer.vis.text(dataset.db.label_names, win='labels')
    adversary = None
    if opt.flagadvtrain:
        print("flagadvtrain turned: Adversarial training!")
        atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4)
        # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4)
        # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3,
        #                       rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False)
    best_map = 0
    lr_ = opt.lr
    normal_total_loss = []
    adv_total_loss = []
    total_time = 0.0
    total_imgs = 0
    true_imgs = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        once = True
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            temp_img = copy.deepcopy(img).cuda()
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

            if opt.flagadvtrain:
                before_time = time.time()
                img = atk(img, bbox, label, scale)
                after_time = time.time()
                # with ctx_noparamgrad_and_eval(trainer.faster_rcnn):
                #     img = adversary.perturb(img, label)
                # print("Adversarial training done!")

            total_time += after_time - before_time
            # print("Normal training starts\n")
            # trainer.train_step(img, bbox, label, scale)


            if (ii + 1) % opt.plot_every == 0:
                # adv_total_loss.append(trainer.get_meter_data()["total_loss"])
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0]))
                # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii))

                # temp_gt_img = visdom_bbox(temp_ori_img_,
                #                           at.tonumpy(bbox_[0]),
                #                           at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = temp_gt_img.shape
                # plt.imshow(np.reshape(temp_gt_img, (h, w, c)))
                # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii))
                # plt.close()

                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # print("GT Label is {} and pred_label is {}".format(label_[0],))
                # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii))

                # gt_img = visdom_bbox(ori_img_,
                #                      at.tonumpy(bbox_[0]),
                #                      at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = gt_img.shape
                # plt.imshow(np.reshape(gt_img, (h, w, c)))
                # plt.savefig("imgs/orig_images/gt_img{}".format(ii))
                # plt.close()

                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)

                fig1 = plt.figure()
                ax1 = fig1.add_subplot(1,1,1)
                # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8))
                final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8))
                ax1.imshow(final1)

                gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0]))
                fig1.savefig("imgs/adv_images/adv_img{}".format(ii))
                plt.close()

                _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True)

                fig2 = plt.figure()
                ax2 = fig2.add_subplot(1, 1, 1)
                final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8))
                # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8))
                ax2.imshow(final2)

                gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0]))
                fig2.savefig("imgs/orig_images/gt_img{}".format(ii))
                plt.close()
                # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii))

                # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels))
                total_imgs += 1
                if len(_temp_labels) == 0:
                    continue
                if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True:
                    true_imgs += 1
                # pred_img = visdom_bbox(ori_img_,
                #                        at.tonumpy(_bboxes[0]),
                #                        at.tonumpy(_labels[0]).reshape(-1),
                #                        at.tonumpy(_scores[0]))
                #

                # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape))
                # temp_pred_img = visdom_bbox(temp_ori_img_,
                #                             at.tonumpy(_temp_bboxes[0]),
                #                             at.tonumpy(_temp_labels[0]).reshape(-1),
                #                             at.tonumpy(_temp_scores[0]))
                #

                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())

        # fig = plt.figure()
        # ax1 = fig.add_subplot(2,1,1)
        # ax1.plot(normal_total_loss)
        # ax2 = fig.add_subplot(2,1,2)
        # ax2.plot(adv_total_loss)
        # fig.savefig("losses/both_loss{}".format(epoch))

        # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,
        #                    flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary)

        # trainer.vis.plot('test_map', eval_result['map'])
        # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
        #                                           str(eval_result['map']),
        #                                           str(trainer.get_meter_data()))
        # print(log_info)
        # # trainer.vis.log(log_info)
        #
        # if eval_result['map'] > best_map:
        #     best_map = eval_result['map']
        #     best_path = trainer.save(best_map=best_map)
        # if epoch == 9:
        #     trainer.load(best_path)
        #     trainer.faster_rcnn.scale_lr(opt.lr_decay)
        #     lr_ = lr_ * opt.lr_decay

        if epoch == 0:
            break

        if epoch == 13:
            break

    print("Total number of images is {}".format(total_imgs))
    print("True images is {}".format(true_imgs))
    print("Total time is {}".format(total_time))
    print("Avg time is {}".format(total_time/total_imgs))
def train(**kwargs):
    opt._parse(
        kwargs
    )  #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面!

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        #pin_memory=True
    )  #pin_memory锁页内存,开启时使用显卡的内存,速度更快

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.dataset.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数
    for epoch in range(opt.epoch):
        print('epoch {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()  #首先在可视化界面重设所有数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = array_tool.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                #可视化画出loss
                trainer.vis.plot_many(trainer.get_meter_data())
                #可视化画出groudtruth bboxes
                ori_img_ = inverse_normalize(array_tool.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]),
                                     array_tool.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                #可视化画出预测bboxes
                # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(
                    ori_img_, array_tool.tonumpy(_bboxes[0]),
                    array_tool.tonumpy(_labels[0]).reshape(-1),
                    array_tool.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)
                # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.img(
                    'roi_cm',
                    array_tool.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{}, loss{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)  #将学习率以及map等信息及时显示更新

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:  #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
def train(**kwargs):
    opt._parse(kwargs)
    dataset = Dataset(opt)
    # 300w_dataset = FaceLandmarksDataset()
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,\
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    attacker = attacks.DCGAN(train_adv=False)
    if opt.load_attacker:
        attacker.load(opt.load_attacker)
        print('load attacker model from %s' % opt.load_attacker)
    trainer = VictimFasterRCNNTrainer(faster_rcnn, attacker,
                                      attack_mode=True).cuda()
    # trainer = VictimFasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    # eval_result = eval(test_dataloader, faster_rcnn, test_num=2000)
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters(adv=True)
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            ipdb.set_trace()
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())
                trainer.vis.plot_many(trainer.get_meter_data(adv=True))

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)
                if trainer.attacker is not None:
                    adv_img = trainer.attacker.perturb(img)
                    adv_img_ = inverse_normalize(at.tonumpy(adv_img[0]))
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                        [adv_img_], visualize=True)
                    adv_pred_img = visdom_bbox(
                        adv_img_, at.tonumpy(_bboxes[0]),
                        at.tonumpy(_labels[0]).reshape(-1),
                        at.tonumpy(_scores[0]))
                    trainer.vis.img('adv_img', adv_pred_img)
                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

                if (ii) % 500 == 0:
                    best_path = trainer.save(epochs=epoch, save_rcnn=True)

        if epoch % 2 == 0:
            best_path = trainer.save(epochs=epoch)
def train(**kwargs):
    opt._parse(kwargs)

    log_dir = os.path.join("logs", "faster_rcnn_train_onGray")
    os.makedirs(log_dir, exist_ok=True)
    log_path = os.path.join(
        log_dir, time.strftime("%Y-%m-%d-%H%M.log", time.localtime(time.time()))
    )
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[logging.FileHandler(log_path), logging.StreamHandler()],
    )
    logger = logging.getLogger()

    dataset = Dataset(opt)
    print("load data")
    dataloader = data_.DataLoader(
        dataset,
        batch_size=1,
        shuffle=True,  # pin_memory=True,
        num_workers=opt.num_workers,
    )
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        pin_memory=True,
    )

    faster_rcnn = FasterRCNNVGG16()
    print("model construct completed")
    logger.info(faster_rcnn)
    logger.info("-" * 50)

    trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print("load pretrained model from %s" % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win="labels")
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        trainer.reset_ave()
        for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

            if (ii + 1) % opt.print_freq == 0:
                logger.info(
                    "[Train] Epoch:{} [{:03d}/{:03d}]({:.0f}%)\t".format(
                        epoch, ii + 1, len(dataloader), (ii + 1) / len(dataloader) * 100
                    )
                )
                trainer.train_step(
                    img, bbox, label, scale, print_epoch=epoch, print_info=True
                )
            else:
                trainer.train_step(
                    img, bbox, label, scale, print_epoch=epoch, print_info=False
                )

            # if (ii + 1) % opt.plot_every == 0:
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     # plot loss
            #     trainer.vis.plot_many(trainer.get_meter_data())
            #
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     gt_img = visdom_bbox(
            #         ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])
            #     )
            #     trainer.vis.img("gt_img", gt_img)

            # if (ii + 1) % opt.plot_every == 0:
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     # plot loss
            #     trainer.vis.plot_many(trainer.get_meter_data())
            #
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     gt_img = visdom_bbox(
            #         ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])
            #     )
            #     trainer.vis.img("gt_img", gt_img)
            #
            #     # plot predicti bboxes
            #     _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
            #         [ori_img_], visualize=True
            #     )
            #     pred_img = visdom_bbox(
            #         ori_img_,
            #         at.tonumpy(_bboxes[0]),
            #         at.tonumpy(_labels[0]).reshape(-1),
            #         at.tonumpy(_scores[0]),
            #     )
            #     trainer.vis.img("pred_img", pred_img)
            #
            #     # rpn confusion matrix(meter)
            #     trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm")
            #     # roi confusion matrix
            #     trainer.vis.img(
            #         "roi_cm", at.totensor(trainer.roi_cm.conf, False).float()
            #     )
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        # trainer.vis.plot("test_map", eval_result["map"])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]["lr"]
        log_info = "lr:{}, map:{},loss:{}".format(
            str(lr_), str(eval_result["map"]), str(trainer.get_meter_data())
        )
        logger.info(log_info)
        # trainer.vis.log(log_info)

        if eval_result["map"] > best_map:
            best_map = eval_result["map"]
            best_path = trainer.save(
                best_map=best_map,
                save_path="checkpoints/trainedOnGray/fasterrcnn_%s"
                % time.strftime("%m%d%H%M"),
            )
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
def train(**kwargs):
    opt._parse(kwargs)

    dataset = LargeImageDataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=False, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    rpn_loc_loss = []
    rpn_cls_loss = []
    roi_loc_loss = []
    roi_cls_loss = []
    total_loss = []
    test_map_list = []

    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                losses_dict = trainer.get_meter_data()
                rpn_loc_loss.append(losses_dict['rpn_loc_loss'])
                roi_loc_loss.append(losses_dict['roi_loc_loss'])
                rpn_cls_loss.append(losses_dict['rpn_cls_loss'])
                roi_cls_loss.append(losses_dict['roi_cls_loss'])
                total_loss.append(losses_dict['total_loss'])

                save_losses('rpn_loc_loss', rpn_loc_loss, epoch)
                save_losses('roi_loc_loss', roi_loc_loss, epoch)
                save_losses('rpn_cls_loss', rpn_cls_loss, epoch)
                save_losses('total_loss', total_loss, epoch)
                save_losses('roi_cls_loss', roi_cls_loss, epoch)

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        test_map_list.append(eval_result['map'])
        save_map(test_map_list, epoch)

        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        print(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Example #26
0
def train(**kwargs):
    opt._parse(kwargs) #获得config设置信息

    dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集
    print('load data')
    dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集
    test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
                                       
    faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器
    if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重
        trainer.load(opt.load_path) #训练器加载权重
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels') 
    best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用
    lr_ = opt.lr #得到预设的学习率
    for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): 
            scale = at.scalar(scale) #进行类别处理得到scale(待定)
            #bbox是gt_box坐标(ymin, xmin, ymax, xmax)
            #label是类别的下标VOC_BBOX_LABEL_NAMES
            #img是图片,代码仅支持batch_size=1的训练
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练
            trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型

            if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过)
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次
        trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info) #可视化内容,(跳过)

        if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型
            best_map = eval_result['map'] #保存模型的map信息
            best_path = trainer.save(best_map=best_map) #调用保存模型函数
        if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练
            trainer.load(best_path) #加载模型
            trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率
            lr_ = lr_ * opt.lr_decay #获得当前学习率

        if epoch == 13: #13个epoch停止训练
            break
Example #27
0
 def update_meters(self, losses):
     loss_d = {k: at.scalar(v) for k, v in losses._asdict().items()}
     for key, meter in self.meters.items():
         meter.add(loss_d[key])
Example #28
0
def train(**kwargs):
    opt._parse(kwargs)
    log = SummaryWriter(log_dir=opt.log_dir)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True
                                       )
    # 配置文件
    # cfg = VGConf()

    # 训练数据集
    # trainset = Dataset(cfg)
    # valset = Dataset(cfg, valid=True)
    # 加载数据
    # print("load data2..")
    # dataloader = DataLoader(dataloader, batch_size=1, shuffle=True,
    #                         pin_memory=True, num_workers=opt.num_workers)
    # valloader = DataLoader(test_dataloader, batch_size=1, shuffle=False,
    #                        pin_memory=True, num_workers=opt.num_workers)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    idx = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            # 获取损失值
            losses = trainer.get_meter_data()
            log.add_scalars(main_tag='Training(batch)',
                            tag_scalar_dict=losses,
                            global_step=idx)
            idx = idx+1

            if (ii + 1) % opt.plot_every == 0:
                # if os.path.exists(opt.debug_file):
                #     ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())
                print(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # gt_img = visdom_bbox(ori_img_,
                #                      at.tonumpy(bbox_[0]),
                #                      at.tonumpy(label_[0]))
                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                # _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                # pred_img = visdom_bbox(ori_img_,
                #                        at.tonumpy(_bboxes[0]),
                #                        at.tonumpy(_labels[0]).reshape(-1),
                #                        at.tonumpy(_scores[0]))
                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        log.add_scalar(tag='mAP', scalar_value=eval_result['map'], global_step=epoch)
        # trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        print(log_info)
        # trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13: 
            break
Example #29
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Example #30
0
def train(**kwargs):
    # opt._parse(kwargs)

    print('load data')
    dataloader = get_train_loader(opt.root_dir,
                                  batch_size=opt.batch_size,
                                  shuffle=opt.shuffle,
                                  num_workers=opt.num_workers,
                                  pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, sample in tqdm(enumerate(dataloader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox_.cuda(
                ), label_.cuda()
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            # if label.size == 0:
            #     continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        if epoch % 10 == 0:
            best_path = trainer.save(best_map=best_map)
Example #31
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print("load data")
    dataloader = data_.DataLoader(
        dataset,
        batch_size=1,
        shuffle=True,  # pin_memory=True,
        num_workers=opt.num_workers,
    )
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=2,
        shuffle=False,  # pin_memory=True
    )
    faster_rcnn = FasterRCNNVGG16()
    print("model construct completed")
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print("load pretrained model from %s" % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win="labels")
    best_map = 0
    for epoch in range(7):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale,
                 ori_img) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
                gt_img = visdom_bbox(
                    at.tonumpy(ori_img_)[0],
                    at.tonumpy(bbox_)[0], label_[0].numpy())
                trainer.vis.img("gt_img", gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    ori_img, visualize=True)
                pred_img = visdom_bbox(
                    at.tonumpy(ori_img[0]),
                    at.tonumpy(_bboxes[0]),
                    at.tonumpy(_labels[0]).reshape(-1),
                    at.tonumpy(_scores[0]),
                )
                trainer.vis.img("pred_img", pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win="rpn_cm")
                # roi confusion matrix
                trainer.vis.img(
                    "roi_cm",
                    at.totensor(trainer.roi_cm.conf, False).float())
        if epoch == 4:
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

    eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
    print("eval_result")
    trainer.save(mAP=eval_result["map"])
Example #32
0
def train_val():
    print('load data')
    train_loader, val_loader = get_train_val_loader(
        opt.root_dir,
        batch_size=opt.batch_size,
        val_ratio=0.1,
        shuffle=opt.shuffle,
        num_workers=opt.num_workers,
        pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    # faster_rcnn = FasterRCNNResNet50()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        tqdm.monitor_interval = 0
        for ii, sample in tqdm(enumerate(train_loader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda(
                )
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            if bbox.size == 0:
                continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]),
                                     at.tonumpy(label[0]))

                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, img_id[0],
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))

                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        mAP = eval_mAP(trainer, val_loader)
        trainer.vis.plot('val_mAP', mAP)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(mAP), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if mAP > best_map:
            best_map = mAP
            best_path = trainer.save(best_map=best_map)
        if epoch == opt.epoch - 1:
            best_path = trainer.save()

        if (epoch + 1) % 10 == 0:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay