def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=2,
                                       shuffle=False, \
                                       # pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(7):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
                gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
                                    at.tonumpy(bbox_)[0], 
                                    label_[0].numpy())
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
                pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
                                        at.tonumpy(_bboxes[0]),
                                        at.tonumpy(_labels[0]).reshape(-1), 
                                        at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        if epoch==4:
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

    eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
    print('eval_result')
    trainer.save(mAP=eval_result['map'])
Exemple #2
0
from data.dataset import TrainDataset
from torch.utils import data as data_
from tqdm import tqdm
from model.faster_rcnn_vgg16 import decom_vgg16, VGG16RoIHead
import torch
from model.region_proposal_network import RegionProposalNetwork
import numpy as np
from model.faster_rcnn_vgg16 import FasterRCNNVGG16
from trainer import FasterRCNNTrainer

data_set = TrainDataset()
#data_loader = data_.DataLoader(data_set, batch_size=1, shuffle=False)

img, bbox, label, scale = data_set.__getitem__(0)
model = FasterRCNNVGG16().cuda()
trainer = FasterRCNNTrainer(model)
loss = trainer.forward(
    torch.from_numpy(img[None, :]).cuda(), bbox, label, scale)
print(loss)
"""
roi_locs, roi_scores, rpn_locs, rpn_scores = model.forward(torch.from_numpy(img[None, :]).cuda())
print(roi_locs.shape)
print(roi_scores.shape)
print(rpn_locs.shape)
print(rpn_scores.shape)
"""
"""
extractor, classifier = decom_vgg16()
feature_map = extractor.cuda()(torch.from_numpy(img[None, :]).cuda())
print(img.shape)#3, 600, 800
print(feature_map.shape)#1, 512, 37, 50
Exemple #3
0
def train(**kwargs):
    opt._parse(kwargs)

    if not VOC:
        dataset = CsvDataset('/home/artemlyan/data/avito_intro/images/',
                             'labeled_with_classes.csv')
        print('load data')
        dataloader = data_.DataLoader(dataset, \
                                      batch_size=1, \
                                      shuffle=True, \
                                      # pin_memory=True,

                                      num_workers=opt.num_workers)

        test_dataloader = data_.DataLoader(dataset,
                                           batch_size=1,
                                           num_workers=opt.test_num_workers,
                                           shuffle=False, \
                                           pin_memory=True
                                           )
    else:
        dataset = Dataset(opt)
        print('load data for VOC')
        dataloader = data_.DataLoader(dataset, \
                                      batch_size=1, \
                                      shuffle=True, \
                                      # pin_memory=True,

                                      num_workers=opt.num_workers)
        testset = TestDataset(opt)
        test_dataloader = data_.DataLoader(testset,
                                           batch_size=1,
                                           num_workers=opt.test_num_workers,
                                           shuffle=False, \
                                           pin_memory=True
                                           )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        if not VOC:
            dataset.set_mode('train')
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            print(img.size(), bbox_.size(), label_.size(), scale.size())
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                print('pred', _bboxes, 'gt', bbox_[0])
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        if not VOC:
            dataset.set_mode('val')

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        print("eval reuslt:", eval_result)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        trainer.vis.plot('test_map', eval_result['map'])
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 30:
            break
import os
import torch as t
from utils.config import opt
from model.faster_rcnn_vgg16 import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import  read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at

from matplotlib import pyplot as plt

img = read_image('/home/fengkai/dog.jpg')
img = t.from_numpy(img)[None]

faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()

trainer.load('/home/fengkai/PycharmProjects/my-faster-rcnn/checkpoints/fasterrcnn_04231732_0.6941460588341642')
opt.caffe_pretrain=False # this model was trained from torchvision-pretrained model
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,visualize=True)
vis_bbox(at.tonumpy(img[0]),
         at.tonumpy(_bboxes[0]),
         at.tonumpy(_labels[0]).reshape(-1),
         at.tonumpy(_scores[0]).reshape(-1))
plt.show()




Exemple #5
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=False, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale, human_box, object_box, action) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            human_box, object_box, action = human_box.cuda(), object_box.cuda(), action.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     # at.tonumpy(action[0]),
                                     at.tonumpy(label_[0])
                                     )
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                print(_labels[0])
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13: 
            break
def fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch):
    train_util = FasterRCNNTrainer(net, optimizer)
    total_loss = 0
    rpn_loc_loss = 0
    rpn_cls_loss = 0
    roi_loc_loss = 0
    roi_cls_loss = 0
    val_toal_loss = 0
    for iteration, batch in enumerate(gen):
        if iteration >= epoch_size:
            break
        start_time = time.time()
        imgs, boxes, labels = batch[0], batch[1], batch[2]

        with torch.no_grad():
            imgs = Variable(torch.from_numpy(imgs).type(
                torch.FloatTensor)).cuda()
            boxes = [
                Variable(torch.from_numpy(box).type(torch.FloatTensor)).cuda()
                for box in boxes
            ]
            labels = [
                Variable(torch.from_numpy(label).type(
                    torch.FloatTensor)).cuda() for label in labels
            ]
        losses = train_util.train_step(imgs, boxes, labels, 1)
        rpn_loc, rpn_cls, roi_loc, roi_cls, total = losses
        total_loss += total
        rpn_loc_loss += rpn_loc
        rpn_cls_loss += rpn_cls
        roi_loc_loss += roi_loc
        roi_cls_loss += roi_cls

        waste_time = time.time() - start_time
        print('\nEpoch:' + str(epoch + 1) + '/' + str(Epoch))
        print('iter:' + str(iteration) + '/' + str(epoch_size) + ' || total_loss: %.4f|| rpn_loc_loss: %.4f || rpn_cls_loss: %.4f || roi_loc_loss: %.4f || roi_cls_loss: %.4f || %.4fs/step' \
            % (total_loss/(iteration+1), rpn_loc_loss/(iteration+1),rpn_cls_loss/(iteration+1),roi_loc_loss/(iteration+1),roi_cls_loss/(iteration+1),waste_time))

    print('Start Validation')
    for iteration, batch in enumerate(genval):
        if iteration >= epoch_size_val:
            break
        imgs, boxes, labels = batch[0], batch[1], batch[2]
        with torch.no_grad():
            imgs = Variable(torch.from_numpy(imgs).type(
                torch.FloatTensor)).cuda()
            boxes = Variable(torch.from_numpy(boxes).type(
                torch.FloatTensor)).cuda()
            labels = Variable(
                torch.from_numpy(labels).type(torch.FloatTensor)).cuda()

            train_util.optimizer.zero_grad()
            losses = train_util.forward(imgs, boxes, labels, 1)
            _, _, _, _, val_total = losses
            val_toal_loss += val_total
    print('Finish Validation')
    print('\nEpoch:' + str(epoch + 1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' %
          (total_loss / (epoch_size + 1), val_toal_loss /
           (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    torch.save(
        model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' %
        ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss /
         (epoch_size_val + 1)))
Exemple #7
0
            f.close()

    if img.ndim == 2:
        # reshape (H, W) -> (1, H, W)
        return img[np.newaxis]
    else:
        # transpose (H, W, C) -> (C, H, W)
        return img.transpose((2, 0, 1))


if __name__ == '__main__':
    #1.加载网络 load network
    opt.env = 'test'
    opt.caffe_pretrain = True
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    trainer.load(
        'C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/checkpoints/fasterrcnn_04250634_0.6951113529274409')
    print('network loads successs!')

    # 2.Init the camera
    # cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture('C:/Users/86188/Desktop/simple-faster-rcnn-pytorch-master/test.mp4')
    cap.set(3, 900)
    cap.set(4, 900)

    while 1:
        ret, frame = cap.read()  # 读取每一帧
        cv2.imshow('摄像头', frame)  # 显示每一帧
        k = cv2.waitKey(1)
        #img = read_image('misc/catdog.jpg')
Exemple #8
0
        pred_labels += pred_labels_
        pred_scores += pred_scores_

    result = eval_detection_voc(pred_bboxes,
                                pred_labels,
                                pred_scores,
                                gt_bboxes,
                                gt_labels,
                                gt_difficults,
                                use_07_metric=True)
    return result


if __name__ == "__main__":
    testset = TestDataset()
    opt = Option()
    args = opt.parser.parse_args()
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    dataloader = data.DataLoader(testset,
                                 batch_size=1,
                                 num_workers=4,
                                 shuffle=False,
                                 pin_memory=True)
    faster_rcnn = FasterRCNN_vgg16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load('checkpoints/threat_model.pth')
    eval_result = eval(dataloader, faster_rcnn, test_num=10000)
    #eval_result = attack_eval(dataloader, faster_rcnn)
    print(eval_result['map'])
Exemple #9
0
# This can be removed once PyTorch 0.4.x is out.
# See https://discuss.pytorch.org/t/question-about-rebuild-tensor-v2/14560
import torch._utils
try:
    torch._utils._rebuild_tensor_v2
except AttributeError:
    def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
        tensor = torch._utils._rebuild_tensor(storage, storage_offset, size, stride)
        tensor.requires_grad = requires_grad
        tensor._backward_hooks = backward_hooks
        return tensor
    torch._utils._rebuild_tensor_v2 = _rebuild_tensor_v2

#%%
faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()
trainer.load('./checkpoints/fasterrcnn_09031352_0')
opt.caffe_pretrain=True  # this model was trained from caffe-pretrained model
# Plot examples on training set
dataset = RSNADataset(opt.root_dir)
for i in range(0, len(dataset)):
    sample = dataset[i]
    img = sample['image']
    ori_img_ = inverse_normalize(at.tonumpy(img))

    # plot predicti bboxes
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
    pred_img = vis_bbox(ori_img_,
                           at.tonumpy(_bboxes[0]),
                           at.tonumpy(_labels[0]).reshape(-1),
                           at.tonumpy(_scores[0]))
Exemple #10
0
import os
import torch as t
import torch.nn as nn
from utils.config import opt
from model import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at
from utils.average import AverageVal
import matplotlib.pyplot as plt

img = read_image("misc/demo.jpg")
img = t.from_numpy(img)[None]

logger = AverageVal()
faster_rcnn = FasterRCNNVGG16()

trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda()

trainer.load("model/chainer_best_model_converted_to_pytorch_0.7053.pth")
opt.caffe_pretrain = True  # this model was trained from caffe-pretrained model

print(img.shape)

h = trainer.faster_rcnn.getFeatureMap(img)
print(h)
def train(**kwargs):
    opt._parse(kwargs)

    carrada = download('Carrada')
    train_set = Carrada().get('Train')
    val_set = Carrada().get('Validation')
    test_set = Carrada().get('Test')

    train_seqs = SequenceCarradaDataset(train_set)
    val_seqs = SequenceCarradaDataset(val_set)
    test_seqs = SequenceCarradaDataset(test_set)

    train_seqs_loader = data_.DataLoader(train_seqs, \
                                         batch_size=1, \
                                         shuffle=True, \
                                         # pin_memory=True,
                                         num_workers=opt.num_workers)

    val_seqs_loader = data_.DataLoader(val_seqs,
                                       batch_size=1,
                                       shuffle=False,
                                       # pin_memory=True,
                                       num_workers=opt.num_workers)

    test_seqs_loader = data_.DataLoader(test_seqs,
                                        batch_size=1,
                                        shuffle=False,
                                        # pin_memory=True,
                                        num_workers=opt.num_workers)

    # faster_rcnn = FasterRCNNVGG16(n_fg_class=3)
    # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3)
    faster_rcnn = FasterRCNNRESNET18(n_fg_class=3)
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9)
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    writer_path = os.path.join(opt.logs_path, opt.model_name)
    os.makedirs(writer_path, exist_ok=True)
    writer = SummaryWriter(writer_path)
    iteration = 0
    best_map = 0
    lr_ = opt.lr

    for epoch in range(opt.epoch):
        print('Processing epoch: {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()
        for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)):
            seq_name, seq = sequence_data
            path_to_frames = os.path.join(carrada, seq_name[0])
            train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type,
                                             path_to_frames)
            train_frame_loader = data_.DataLoader(train_frame_set,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=opt.num_workers)

            for ii, (img, bbox_, label_, scale) in tqdm(enumerate(train_frame_loader)):
                iteration += 1
                scale = at.scalar(scale)
                img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
                img = normalize(img)

                if opt.debug_step and (iteration+1) % opt.debug_step == 0:
                    trainer.train_step(img, bbox, label, scale, stop=True)
                else:
                    trainer.train_step(img, bbox, label, scale)

                if (iteration + 1) % opt.plot_every == 0:
                    if os.path.exists(opt.debug_file):
                        ipdb.set_trace()

                    train_results = trainer.get_meter_data()
                    writer.add_scalar('Losses/rpn_loc', train_results['rpn_loc_loss'],
                                      iteration)
                    writer.add_scalar('Losses/rpn_cls', train_results['rpn_cls_loss'],
                                      iteration)
                    writer.add_scalar('Losses/roi_loc', train_results['roi_loc_loss'],
                                      iteration)
                    writer.add_scalar('Losses/roi_cls', train_results['roi_cls_loss'],
                                      iteration)
                    writer.add_scalar('Losses/total', train_results['total_loss'],
                                      iteration)

                if (iteration + 1) % opt.img_every == 0:
                    ori_img_ = at.tonumpy(img[0])
                    gt_img = visdom_bbox(ori_img_,
                                         at.tonumpy(bbox_[0]),
                                         at.tonumpy(label_[0]))
                    gt_img_grid = make_grid(torch.from_numpy(gt_img))
                    writer.add_image('Ground_truth_img', gt_img_grid, iteration)

                    # plot predicti bboxes
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], opt.signal_type,
                                                                            visualize=True)
                    # FLAG: vis
                    pred_img = visdom_bbox(ori_img_,
                                           at.tonumpy(_bboxes[0]),
                                           at.tonumpy(_labels[0]).reshape(-1),
                                           at.tonumpy(_scores[0]))
                    pred_img_grid = make_grid(torch.from_numpy(pred_img))
                    writer.add_image('Predicted_img', pred_img_grid, iteration)

                    if opt.train_eval and (iteration + 1) % opt.train_eval == 0:
                        train_eval_result, train_best_iou = eval(train_seqs_loader, faster_rcnn,
                                                                 opt.signal_type)
                        writer.add_scalar('Train/mAP', train_eval_result['map'],
                                          iteration)
                        writer.add_scalar('Train/Best_IoU', train_best_iou,
                                          iteration)

        eval_result, best_val_iou = eval(val_seqs_loader, faster_rcnn, opt.signal_type,
                                         test_num=opt.test_num)
        writer.add_scalar('Validation/mAP', eval_result['map'],
                          iteration)
        writer.add_scalar('Validation/Best_IoU', best_val_iou,
                          iteration)
        lr_ = scheduler.get_lr()[0]
        writer.add_scalar('learning_rate', lr_, iteration)

        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        print(log_info)
        if eval_result['map'] > best_map:
            test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type,
                                              test_num=opt.test_num)
            writer.add_scalar('Test/mAP', test_result['map'],
                              iteration)
            writer.add_scalar('Test/Best_IoU', test_best_iou,
                              iteration)
            best_map = eval_result['map']
            best_test_map = test_result['map']
            best_path = trainer.save(best_val_map=best_map, best_test_map=best_test_map)
            # best_path = trainer.save(best_map=best_map)

        if (epoch + 1) % opt.lr_step == 0:
            scheduler.step()
Exemple #12
0
def predict(**kwargs):
    dsbpredopt._parse(kwargs)

    dataset = DSBPredictDataset(dsbpredopt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=False, \
                                  pin_memory=True,
                                  num_workers=dsbpredopt.num_workers)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if dsbpredopt.load_path:
        trainer.load(dsbpredopt.load_path)
        print('load checkpoint from %s' % dsbpredopt.load_path)

    new_test_ids = []
    rles = []

    for ii, (imgs, sizes, predicted_mask, id_) in tqdm(enumerate(dataloader)):
        sizes = [sizes[0][0], sizes[1][0]]
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
            imgs, [sizes])
        #pred_img = visdom_bbox(at.tonumpy(imgs[0]),
        #                       at.tonumpy(pred_bboxes_[0]),
        #                       at.tonumpy(pred_labels_[0]).reshape(-1),
        #                       at.tonumpy(pred_scores_[0]))
        #pred_mask_img = visdom_bbox(at.tonumpy(predicted_mask[0]),
        #                       at.tonumpy(pred_bboxes_[0]),
        #                       at.tonumpy(pred_labels_[0]).reshape(-1),
        #                       at.tonumpy(pred_scores_[0]))

        #trainer.vis.img('pred_img', pred_img)
        #trainer.vis.img('pred_mask_img', pred_mask_img)
        #input("Press Enter to continue...")

        predicted_mask_labeled = np.squeeze(
            at.tonumpy(predicted_mask[0]).copy())
        pred_bboxes_ = at.tonumpy(pred_bboxes_[0]).astype(np.uint16)

        if pred_bboxes_.shape[0] == 0:
            print(id_[0])

        if predicted_mask_labeled.shape[0] != sizes[
                0] or predicted_mask_labeled.shape[1] != sizes[1]:
            print('wtf')

        for idx, pred_bbox in enumerate(pred_bboxes_):
            mask = predicted_mask_labeled[pred_bbox[0]:pred_bbox[2],
                                          pred_bbox[1]:pred_bbox[3]]
            #print(predicted_mask_labeled.shape)
            #print(pred_bbox[0])
            #print(pred_bbox[2])
            #print(pred_bbox[1])
            #print(pred_bbox[3])
            #print(mask)
            #input("input")
            if (pred_bbox[2] > sizes[0] or pred_bbox[3] > sizes[1]):
                print('wtf')
            mask[mask > 0] = idx + 1
            predicted_mask_labeled[pred_bbox[0]:pred_bbox[2],
                                   pred_bbox[1]:pred_bbox[3]] = mask

        predicted_mask_labeled[predicted_mask_labeled == 255] = 0
        #print(predicted_mask_labeled)
        rle = list(prob_to_rles(predicted_mask_labeled))
        #print(rle)
        #exit()
        #for r_i, rle_i in enumerate(rle):
        #for r_j, rle_j in enumerate(rle_i):
        #if r_j % 2 == 0:
        #if (rle_j-1)%sizes[0]+1+rle_i[r_j+1]-1 >= sizes[0]:
        #print(rle_j)
        #print(rle_i[r_j+1])
        #print((rle_j-1)%sizes[0]+1+rle_i[r_j+1]-1)
        #print(sizes[0])
        #print('out of size 0')
        #print(rle[r_i][r_j+1])
        #print(r_i)
        #print(r_j+1)
        #rle[r_i][r_j+1] = rle[r_i][r_j+1] - 1
        #print(rle[r_i][r_j+1])
        #if rle_j + rle_i[r_j+1]-1 >= sizes[0] * sizes[1]:
        #print('out of total number')
        #rle[0][1] = 6
        #print(rle)
        #exit()
        rles.extend(rle)
        new_test_ids.extend([id_[0]] * len(rle))

    sub = pd.DataFrame()
    sub['ImageId'] = new_test_ids
    sub['EncodedPixels'] = pd.Series(rles).apply(
        lambda x: ' '.join(str(y) for y in x))
    sub.to_csv("predicts/unet__data_0_d6_t_c_lr9_bs4_size256_epoch_74.csv",
               index=False)
Exemple #13
0
def train(epochs,
          img_box_dict,
          pretrained_model=None,
          save_path=None,
          rpn_rois=None,
          train_rpn=True,
          train_rcnn=True,
          validate=False,
          lock_grad_for_rpn=False,
          lock_grad_for_rcnn=False):

    fpn_resnet = FPNResNet().cuda()

    # lock gradient
    if lock_grad_for_rcnn:
        for param in fpn_resnet.parameters():
            param.requires_grad = False
        for param in fpn_resnet.head.parameters():
            param.requires_grad = True

    if lock_grad_for_rpn:
        for param in fpn_resnet.parameters():
            param.requires_grad = False
        for param in fpn_resnet.rpn.parameters():
            param.requires_grad = True

    fpn_resnet.get_optimizer(Config.lr)
    trainer = FasterRCNNTrainer(fpn_resnet).cuda()
    print('model constructed')

    if pretrained_model is not None:
        trainer.load(pretrained_model, load_optimizer=False)

    if validate:
        dict_train, dict_val = generate_train_val_data(img_box_dict,
                                                       p_train=0.95)
    else:
        dict_train = img_box_dict
        dict_val = None

    for epoch in range(epochs):
        print('epoch: ', epoch)
        for i, [img_dir, img_info] in tqdm(enumerate(dict_train.items())):
            img, img_info, flipped = rescale_image(img_dir,
                                                   img_info,
                                                   flip=True)
            img_size = list(img_info['img_size'])
            img_tensor = create_img_tensor(img)
            if rpn_rois:
                img_rois = rpn_rois[img_dir]
                if flipped:
                    max = img_size[1] - img_rois[:, 1]
                    min = img_size[1] - img_rois[:, 3]
                    img_rois[:, 1] = min
                    img_rois[:, 3] = max

                img_rois = torch.from_numpy(img_rois).cuda()
                trainer.train_step(img_tensor, img_info, img_rois, train_rpn,
                                   train_rcnn)
            else:
                trainer.train_step(img_tensor, img_info, None, train_rpn,
                                   train_rcnn)

        trainer.save(save_path, save_optimizer=False)
        if validate:
            map = evaluation(dict_val, trainer.fpn_resnet)
            print('mAP: ', map)

        # lr decay
        if epoch == int(epochs * 0.7):
            trainer.scale_lr(Config.lr_decay)
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    best_ap = np.array([0.] * opt.label_number)
    lr_ = opt.lr
    vis = trainer.vis
    starttime = datetime.datetime.now()
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                roi_cm = at.totensor(trainer.roi_cm.conf, False).float()
                trainer.vis.img('roi_cm', roi_cm)

        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           vis=vis,
                           test_num=opt.test_num)
        best_ap = dict(zip(opt.VOC_BBOX_LABEL_NAMES, eval_result['ap']))
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        if eval_result['map'] > best_map:
            print('roi_cm=\n', trainer.roi_cm.value())
            plot_confusion_matrix(trainer.roi_cm.value(),
                                  classes=('animal', 'plant', 'rock',
                                           'background'),
                                  normalize=False,
                                  title='Normalized Confusion Matrix')
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map, best_ap=best_ap)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        # if epoch == 13:
        #     break
    endtime = datetime.datetime.now()
    train_consum = (endtime - starttime).seconds
    print("train_consum=", train_consum)
def train(**kwargs):
    opt._parse(kwargs)

    print('dataset = Dataset(opt)')
    transform = transforms.Compose([
        # you can add other transformations in this list
        transforms.ToTensor()
    ])
    dataset = Dataset(opt, transform=transform)
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers,
                                  )
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    print('faster_rcnn = FasterRCNNVGG16()')
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s at trian.py line 70' %
              opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            print("tqdm(enumerate(dataloader)):")
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)

            print("train.py trainer.train_step(img, bbox, label, scale)")
            print(img.shape)
            print(bbox.shape)
            print(label.shape)

            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                print("trian.py line94")
                print(trainer.get_meter_data())
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        trainer.vis.plot('test_map', eval_result['map'])
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13:
            break
Exemple #16
0
def fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch):
    train_util = FasterRCNNTrainer(net, optimizer)
    total_loss = 0
    rpn_loc_loss = 0
    rpn_cls_loss = 0
    roi_loc_loss = 0
    roi_cls_loss = 0
    val_toal_loss = 0
    with tqdm(total=epoch_size,
              desc=f'Epoch {epoch + 1}/{Epoch}',
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
                break
            imgs, boxes, labels = batch[0], batch[1], batch[2]

            with torch.no_grad():
                imgs = Variable(
                    torch.from_numpy(imgs).type(torch.FloatTensor)).cuda()
                boxes = [
                    Variable(torch.from_numpy(box).type(
                        torch.FloatTensor)).cuda() for box in boxes
                ]
                labels = [
                    Variable(torch.from_numpy(label).type(
                        torch.FloatTensor)).cuda() for label in labels
                ]
            losses = train_util.train_step(imgs, boxes, labels, 1)
            rpn_loc, rpn_cls, roi_loc, roi_cls, total = losses
            total_loss += total
            rpn_loc_loss += rpn_loc
            rpn_cls_loss += rpn_cls
            roi_loc_loss += roi_loc
            roi_cls_loss += roi_cls

            pbar.set_postfix(
                **{
                    'total': total_loss.item() / (iteration + 1),
                    'rpn_loc': rpn_loc_loss.item() / (iteration + 1),
                    'rpn_cls': rpn_cls_loss.item() / (iteration + 1),
                    'roi_loc': roi_loc_loss.item() / (iteration + 1),
                    'roi_cls': roi_cls_loss.item() / (iteration + 1),
                    'lr': get_lr(optimizer)
                })
            pbar.update(1)

    print('Start Validation')
    with tqdm(total=epoch_size_val,
              desc=f'Epoch {epoch + 1}/{Epoch}',
              postfix=dict,
              mininterval=0.3) as pbar:
        for iteration, batch in enumerate(genval):
            if iteration >= epoch_size_val:
                break
            imgs, boxes, labels = batch[0], batch[1], batch[2]
            with torch.no_grad():
                imgs = Variable(
                    torch.from_numpy(imgs).type(torch.FloatTensor)).cuda()
                boxes = Variable(
                    torch.from_numpy(boxes).type(torch.FloatTensor)).cuda()
                labels = Variable(
                    torch.from_numpy(labels).type(torch.FloatTensor)).cuda()

                train_util.optimizer.zero_grad()
                losses = train_util.forward(imgs, boxes, labels, 1)
                _, _, _, _, val_total = losses
                val_toal_loss += val_total
            pbar.set_postfix(
                **{'total_loss': val_toal_loss.item() / (iteration + 1)})
            pbar.update(1)

    print('Finish Validation')
    print('Epoch:' + str(epoch + 1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' %
          (total_loss / (epoch_size + 1), val_toal_loss /
           (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    torch.save(
        model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' %
        ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss /
         (epoch_size_val + 1)))
Exemple #17
0
def main():
    faster_rcnn = FasterRCNNVGG16(mask=opt.mask)
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    assert os.path.isfile(
        args.load_path), f"Need valid checkpoint, {args.load_path} not found"
    trainer.load(args.load_path)
    '''
    Check to make sure weights are dense
    '''
    for n, m in trainer.named_modules():
        if hasattr(m, 'sparse'):
            m.sparse = False
    for n, m in trainer.named_modules():
        if hasattr(m, 'weight'):
            if m.weight.is_sparse:
                print("Weights are already sparse")
                return
    print("\n\n=========SIZE BEFORE=============")
    try:
        trainer.faster_rcnn.set_pruned()
    except:
        print("No masks.")
    get_size(trainer)
    trainer.quantize(bits=args.bits, verbose=args.verbose)
    print("\n\n=========SIZE AFTER==============")
    get_size(trainer)
    print("Saving a maskedmodel")
    trainer.save(save_path=args.save_path)
    print("Saving a SparseDense Model")
    trainer.replace_with_sparsedense()
    sd_file = args.save_path.split("/")
    sd_file[-1] = "SparseDense_" + sd_file[-1]
    sd_file = "/".join(sd_file)
    trainer.save(save_path=sd_file)
Exemple #18
0
def train(**kwargs):
    opt._parse(kwargs) #获得config设置信息

    dataset = Dataset(opt) #传入opt,利用设置的数据集参数来创建训练数据集
    print('load data')
    dataloader = data_.DataLoader(dataset, \ #用创建的训练数据集创建训练DataLoader,代码仅支持batch_size=1
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt) #传入opt,利用设置的数据集参数来加载测试数据集
    test_dataloader = data_.DataLoader(testset, #用创建的测试数据集创建训练DataLoader,代码仅支持batch_size=1
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
                                       
    faster_rcnn = FasterRCNNVGG16() #创建以vgg为backbone的FasterRCNN网络
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda() #把创建好的FasterRCNN网络放入训练器
    if opt.load_path: #若有FasterRCNN网络的预训练加载,则加载load_path权重
        trainer.load(opt.load_path) #训练器加载权重
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels') 
    best_map = 0 #初始化best_map,训练时用于判断是否需要保存模型,类似打擂台后面用
    lr_ = opt.lr #得到预设的学习率
    for epoch in range(opt.epoch): #开始训练,训练次数为opt.epoch
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)): 
            scale = at.scalar(scale) #进行类别处理得到scale(待定)
            #bbox是gt_box坐标(ymin, xmin, ymax, xmax)
            #label是类别的下标VOC_BBOX_LABEL_NAMES
            #img是图片,代码仅支持batch_size=1的训练
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda() #使用gpu训练
            trainer.train_step(img, bbox, label, scale) #预处理完毕,进入模型

            if (ii + 1) % opt.plot_every == 0: #可视化内容,(跳过)
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num) #训练一个epoch评估一次
        trainer.vis.plot('test_map', eval_result['map']) #可视化内容,(跳过)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr'] #获得当前的学习率
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_), #日志输出学习率,map,loss
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info) #可视化内容,(跳过)

        if eval_result['map'] > best_map: #若这次评估的map大于之前最大的map则保存模型
            best_map = eval_result['map'] #保存模型的map信息
            best_path = trainer.save(best_map=best_map) #调用保存模型函数
        if epoch == 9: #若训练到第9个epoch则加载之前最好的模型并且减低学习率继续训练
            trainer.load(best_path) #加载模型
            trainer.faster_rcnn.scale_lr(opt.lr_decay) #降低学习率
            lr_ = lr_ * opt.lr_decay #获得当前学习率

        if epoch == 13: #13个epoch停止训练
            break
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
def train(**kwargs):
    opt._parse(kwargs)

    log_dir = os.path.join("logs", "faster_rcnn_train_onGray")
    os.makedirs(log_dir, exist_ok=True)
    log_path = os.path.join(
        log_dir, time.strftime("%Y-%m-%d-%H%M.log", time.localtime(time.time()))
    )
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[logging.FileHandler(log_path), logging.StreamHandler()],
    )
    logger = logging.getLogger()

    dataset = Dataset(opt)
    print("load data")
    dataloader = data_.DataLoader(
        dataset,
        batch_size=1,
        shuffle=True,  # pin_memory=True,
        num_workers=opt.num_workers,
    )
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        pin_memory=True,
    )

    faster_rcnn = FasterRCNNVGG16()
    print("model construct completed")
    logger.info(faster_rcnn)
    logger.info("-" * 50)

    trainer = FasterRCNNTrainer(faster_rcnn, logger).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print("load pretrained model from %s" % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win="labels")
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        trainer.reset_ave()
        for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

            if (ii + 1) % opt.print_freq == 0:
                logger.info(
                    "[Train] Epoch:{} [{:03d}/{:03d}]({:.0f}%)\t".format(
                        epoch, ii + 1, len(dataloader), (ii + 1) / len(dataloader) * 100
                    )
                )
                trainer.train_step(
                    img, bbox, label, scale, print_epoch=epoch, print_info=True
                )
            else:
                trainer.train_step(
                    img, bbox, label, scale, print_epoch=epoch, print_info=False
                )

            # if (ii + 1) % opt.plot_every == 0:
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     # plot loss
            #     trainer.vis.plot_many(trainer.get_meter_data())
            #
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     gt_img = visdom_bbox(
            #         ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])
            #     )
            #     trainer.vis.img("gt_img", gt_img)

            # if (ii + 1) % opt.plot_every == 0:
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     # plot loss
            #     trainer.vis.plot_many(trainer.get_meter_data())
            #
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     gt_img = visdom_bbox(
            #         ori_img_, at.tonumpy(bbox_[0]), at.tonumpy(label_[0])
            #     )
            #     trainer.vis.img("gt_img", gt_img)
            #
            #     # plot predicti bboxes
            #     _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
            #         [ori_img_], visualize=True
            #     )
            #     pred_img = visdom_bbox(
            #         ori_img_,
            #         at.tonumpy(_bboxes[0]),
            #         at.tonumpy(_labels[0]).reshape(-1),
            #         at.tonumpy(_scores[0]),
            #     )
            #     trainer.vis.img("pred_img", pred_img)
            #
            #     # rpn confusion matrix(meter)
            #     trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win="rpn_cm")
            #     # roi confusion matrix
            #     trainer.vis.img(
            #         "roi_cm", at.totensor(trainer.roi_cm.conf, False).float()
            #     )
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        # trainer.vis.plot("test_map", eval_result["map"])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]["lr"]
        log_info = "lr:{}, map:{},loss:{}".format(
            str(lr_), str(eval_result["map"]), str(trainer.get_meter_data())
        )
        logger.info(log_info)
        # trainer.vis.log(log_info)

        if eval_result["map"] > best_map:
            best_map = eval_result["map"]
            best_path = trainer.save(
                best_map=best_map,
                save_path="checkpoints/trainedOnGray/fasterrcnn_%s"
                % time.strftime("%m%d%H%M"),
            )
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
Exemple #21
0
def train(**kwargs):
    opt._parse(kwargs)

    print('load data')
    dataset = Dataset(opt)
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )

    faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count(),
                                  ratios=[1],
                                  anchor_scales=[1])
    print('model construct completed')

    trainer = FasterRCNNTrainer(faster_rcnn,
                                n_fg_class=dataset.get_class_count())

    if opt.use_cuda:
        trainer = trainer.cuda()

    if opt.load_path:
        old_state = trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    if opt.validate_only:
        num_eval_images = len(testset)
        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           trainer,
                           testset,
                           test_num=num_eval_images)
        print('Evaluation finished, obtained {} using {} out of {} images'.
              format(eval_result, num_eval_images, len(testset)))
        return

    if opt.load_path and 'epoch' in old_state.keys():
        starting_epoch = old_state['epoch'] + 1
        print('Model was trained until epoch {}, continuing with epoch {}'.
              format(old_state['epoch'], starting_epoch))
    else:
        starting_epoch = 0

    #trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    global_step = 0
    for epoch in range(starting_epoch, opt.num_epochs):
        lr_ = opt.lr * (opt.lr_decay**(epoch // opt.epoch_decay))
        trainer.faster_rcnn.set_lr(lr_)

        print('Starting epoch {} with learning rate {}'.format(epoch, lr_))
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader),
                                                    total=len(dataset)):
            global_step = global_step + 1
            scale = at.scalar(scale)
            if opt.use_cuda:
                img, bbox, label = img.cuda().float(), bbox_.float().cuda(
                ), label_.float().cuda()
            else:
                img, bbox, label = img.float(), bbox_.float(), label_.float()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            losses = trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                #trainer.vis.plot_many(trainer.get_meter_data())
                """
                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]),
                                     label_names=dataset.get_class_names()+['BG'])
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]),
                                       label_names=dataset.get_class_names()+['BG'])
                trainer.vis.img('pred_img', pred_img)

                """
                # rpn confusion matrix(meter)
                #trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                #trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
                #print('Current total loss {}'.format(losses[-1].tolist()))
                trainer.vis.plot('train_total_loss', losses[-1].tolist())

            if (global_step) % opt.snapshot_every == 0:
                snapshot_path = trainer.save(epoch=epoch)
                print("Snapshotted to {}".format(snapshot_path))

        #snapshot_path = trainer.save(epoch=epoch)
        #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path))

        for lo in losses:
            del lo
        del img, bbox_, label_, scale
        torch.cuda.empty_cache()
        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           trainer,
                           testset,
                           test_num=min(opt.test_num, len(testset)))
        print(eval_result)
        # TODO: this definitely is not good and will bias evaluation
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=eval_result['map'], epoch=epoch)
            print("After epoch {}: snapshotted to {}".format(epoch, best_path))

        trainer.vis.plot('test_map', eval_result['map'])
        del eval_result
        torch.cuda.empty_cache()
Exemple #22
0
def main(**kwargs):
    opt._parse(kwargs)
    checkpoint = t.load('se_0314_all')
    num_classes = 8
    # step = [112, 112]
    classifier = t.hub.load(
        'moskomule/senet.pytorch',
        'se_resnet50',
        pretrained=True,
    )
    num_ftrs = classifier.fc.in_features
    classifier.fc = nn.Linear(num_ftrs, num_classes)
    classifier.load_state_dict(checkpoint['state_dict'])
    classifier.eval()
    classifier = classifier.cuda()
    result_file = open('result0522.txt', 'w')
    save_root = './result/bbox/'
    makeDir()

    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    trainer.load('checkpoints/fasterrcnn_04081709_0.6626689194895079')

    data_root = '/home/lsm/testSamples700_new/'
    test_file = 'GT707.txt'
    test700 = Test700Dataset(data_root, test_file, opt)
    test_dataloader = data_.DataLoader(test700,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    print('data loaded!')
    for ii, (fn, imgs, sizes, gt_bboxes_) in tqdm(enumerate(test_dataloader)):
        # print(gt_bboxes_)
        gt_x1 = int(gt_bboxes_[0][0][1])
        gt_y1 = int(gt_bboxes_[0][0][0])
        gt_x2 = int(gt_bboxes_[0][0][3])
        gt_y2 = int(gt_bboxes_[0][0][2])
        # print([gt_x1,gt_y1,gt_x2,gt_y2])
        sizes = [sizes[0][0].item(), sizes[1][0].item()]
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
            imgs, [sizes])
        result_file.write(fn[0])
        # drawBbox(data_root,fn[0],pred_bboxes_,save_root)
        img = Image.open(data_root + fn[0]).convert("RGB")
        plt.imshow(img)
        currentAxis = plt.gca()
        for i in range(len(pred_bboxes_[0])):
            bbox = pred_bboxes_[0][i]
            score = pred_scores_[0][i]
            x1, y1, x2, y2 = bbox[1], bbox[0], bbox[3], bbox[2]
            canditate = img.crop((x1, y1, x2, y2))
            # decision = decide(classifier, canditate, step)
            decision = decide2(classifier, canditate)
            if decision != 0:
                # plt.text(x1, y1, toolNameList[decision]+" "+str(score), size=15, color='r')
                rect = patches.Rectangle((x1, y1),
                                         x2 - x1,
                                         y2 - y1,
                                         fill=False,
                                         edgecolor='r',
                                         linewidth=2)
                currentAxis.add_patch(rect)
                result_file.write(' ' + toolNameList[decision] + ' ' +
                                  str(x1) + ' ' + str(y1) + ' ' + str(x2) +
                                  ' ' + str(y2))
        rect = patches.Rectangle((gt_x1, gt_y1),
                                 gt_x2 - gt_x1,
                                 gt_y2 - gt_y1,
                                 fill=False,
                                 edgecolor='g',
                                 linewidth=2)
        currentAxis.add_patch(rect)
        plt.savefig(save_root + fn[0])
        plt.close()
        result_file.write('\n')
    result_file.close()
Exemple #23
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False,
                                       pin_memory=True
                                       )
    testset_all = TestDataset_all(opt, 'test2')
    test_all_dataloader = data_.DataLoader(testset_all,
                                           batch_size=1,
                                           num_workers=opt.test_num_workers,
                                           shuffle=False,
                                           pin_memory=True
                                           )

    tsf = Transform(opt.min_size, opt.max_size)
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    print('model construct completed')

    # 加载训练过的模型,在config配置路径就可以了
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    #提取蒸馏知识所需要的软标签
    if opt.is_distillation == True:
        opt.predict_socre = 0.3
        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, scale, id_) in tqdm(enumerate(dataloader)):
            if len(gt_bboxes_) == 0:
                continue
            sizes = [sizes[0][0].item(), sizes[1][0].item()]
            pred_bboxes_, pred_labels_, pred_scores_, features_ = trainer.faster_rcnn.predict(imgs, [
                sizes])

            img_file = os.path.join(
                opt.voc_data_dir, 'JPEGImages', id_[0] + '.jpg')
            ori_img = read_image(img_file, color=True)
            img, pred_bboxes_, pred_labels_, scale_ = tsf(
                (ori_img, pred_bboxes_[0], pred_labels_[0]))

            #去除软标签和真值标签重叠过多的部分,去除错误的软标签
            pred_bboxes_, pred_labels_, pred_scores_ = py_cpu_nms(
                gt_bboxes_[0], gt_labels_[0], pred_bboxes_, pred_labels_, pred_scores_[0])

            #存储软标签,这样存储不会使得GPU占用过多
            np.save('label/' + str(id_[0]) + '.npy', pred_labels_)
            np.save('bbox/' + str(id_[0]) + '.npy', pred_bboxes_)
            np.save('feature/' + str(id_[0]) + '.npy', features_)
            np.save('score/' + str(id_[0]) + '.npy', pred_scores_)

        opt.predict_socre = 0.05
    t.cuda.empty_cache()

    # visdom 显示所有类别标签名
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr

    for epoch in range(opt.epoch):
        print('epoch=%d' % epoch)

        # 重置混淆矩阵
        trainer.reset_meters()
        # tqdm可以在长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator),
        # 是一个快速、扩展性强
        for ii, (img, sizes, bbox_, label_, scale, id_) in tqdm(enumerate(dataloader)):
            if len(bbox_) == 0:
                continue
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            # 训练的就这一步 下面的都是打印的信息
            # 转化成pytorch能够计算的格式,转tensor格式
            if opt.is_distillation == True:
                #读取软标签
                teacher_pred_labels = np.load(
                    'label/' + str(id_[0]) + '.npy')
                teacher_pred_bboxes = np.load(
                    'bbox/' + str(id_[0]) + '.npy')
                teacher_pred_features_ = np.load(
                    'feature/' + str(id_[0]) + '.npy')
                teacher_pred_scores = np.load(
                    'score/' + str(id_[0]) + '.npy')
                #格式转换
                teacher_pred_bboxes = teacher_pred_bboxes.astype(np.float32)
                teacher_pred_labels = teacher_pred_labels.astype(np.int32)
                teacher_pred_scores = teacher_pred_scores.astype(np.float32)
                #转成pytorch格式
                teacher_pred_bboxes_ = at.totensor(teacher_pred_bboxes)
                teacher_pred_labels_ = at.totensor(teacher_pred_labels)
                teacher_pred_scores_ = at.totensor(teacher_pred_scores)
                teacher_pred_features_ = at.totensor(teacher_pred_features_)
                #使用GPU
                teacher_pred_bboxes_ = teacher_pred_bboxes_.cuda()
                teacher_pred_labels_ = teacher_pred_labels_.cuda()
                teacher_pred_scores_ = teacher_pred_scores_.cuda()
                teacher_pred_features_ = teacher_pred_features_.cuda()

                # 如果dataset.py 中的Transform 设置了图像翻转,就要使用这个判读软标签是否一起翻转
                if(teacher_pred_bboxes_[0][1] != bbox[0][0][1]):
                    _, o_C, o_H, o_W = img.shape
                    teacher_pred_bboxes_ = flip_bbox(
                        teacher_pred_bboxes_, (o_H, o_W), x_flip=True)

                losses = trainer.train_step(img, bbox, label, scale, epoch,
                                            teacher_pred_bboxes_, teacher_pred_labels_, teacher_pred_features_, teacher_pred_scores)
            else:
                trainer.train_step(img, bbox, label, scale, epoch)

            # visdom显示的信息
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(teacher_pred_bboxes_),
                                     at.tonumpy(teacher_pred_labels_),
                                     at.tonumpy(teacher_pred_scores_))
                trainer.vis.img('gt_img_all', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores, _ = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # 混淆矩阵
                # rpn confusion matrix(meter)
                trainer.vis.text(
                    str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.text(
                    str(trainer.roi_cm.value().tolist()), win='roi_cm')
                # trainer.vis.img('roi_cm', at.totensor(
                # trainer.roi_cm.value(), False).float())

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{},ap:{}, map:{},loss:{}'.format(str(lr_),
                                                        str(eval_result['ap']),
                                                        str(eval_result['map']),
                                                        str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        # 保存最好结果并记住路径
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)

        if epoch == 20:
            trainer.save(best_map='20')
            result = eval(test_all_dataloader,
                          trainer.faster_rcnn, test_num=5000)
            print('20result={}'.format(str(result)))
            # trainer.load(best_path)
            # result=eval(test_all_dataloader,trainer.faster_rcnn,test_num=5000)
            # print('bestmapresult={}'.format(str(result)))
            break

        # 每10轮加载前面最好权重,并且减少学习率
        if epoch % 20 == 15:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
import os
import torch as t
from utils.config import opt
from model import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at
import matplotlib.pyplot as plt
if __name__ == "__main__":
    img = read_image('misc/demo.jpg')
    img = t.from_numpy(img)[None]

    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load('./chainer_best_model_converted_to_pytorch_0.7053.pth')
    opt.caffe_pretrain = True  # this model was trained from caffe-pretrained model
    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                            visualize=True)
    ax = vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]),
                  at.tonumpy(_labels[0]).reshape(-1),
                  at.tonumpy(_scores[0]).reshape(-1))

    # plt.savefig("detection.png")

    print("hoge")
Exemple #25
0
def train(**kwargs):  # *变量名, 表示任何多个无名参数, 它是一个tuple;**变量名, 表示关键字参数, 它是一个dict
    opt._parse(kwargs)  # 识别参数,传递过来的是一个字典,用parse来解析

    dataset = Dataset(opt)  # 作者自定义的Dataset类
    print('读取数据中...')

    # Dataloader 定义了一次获取批次数据的方法
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers) # PyTorch自带的DataLoader类,生成一个多线程迭代器来迭代dataset, 以供读取一个batch的数据
    testset = TestDataset(opt, split='trainval')

    # 测试集loader
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()  # 网络定义
    print('模型构建完毕!')

    trainer = FasterRCNNTrainer(
        faster_rcnn).cuda()  # 定义一个训练器,返回loss, .cuda()表示把返回的Tensor存入GPU

    if opt.load_path:  # 如果要加载预训练模型
        trainer.load(opt.load_path)
        print('已加载预训练参数 %s' % opt.load_path)
    else:
        print("未引入预训练参数, 随机初始化网络参数")

    trainer.vis.text(dataset.db.label_names, win='labels')  # 显示labels标题
    best_map = 0  # 定义一个best_map

    for epoch in range(opt.epoch):  # 对于每一个epoch

        trainer.reset_meters()  # 重置测各种测量仪

        # 对每一个数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)  # 转化为标量
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda(
            )  # 存入GPU
            img, bbox, label = Variable(img), Variable(bbox), Variable(
                label)  # 转换成变量以供自动微分器使用
            # TODO
            trainer.train_step(img, bbox, label, scale)  # 训练一步

            if (ii + 1) % opt.plot_every == 0:  # 如果到达"每多少次显示"
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        # 使用测试数据集来评价模型(此步里面包含预测信息)
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(
                best_map=best_map)  # 好到一定程度就存储模型, 存储在checkpoint文件夹内

        if epoch == 9:  # 到第9轮的时候读取模型, 并调整学习率
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)

        # if epoch == 13:  # 到第14轮的时候停止训练
        #     break

    trainer.save(best_map=best_map)
Exemple #26
0
            gen_val = Generator(lines[num_train:],
                                (IMAGE_SHAPE[0], IMAGE_SHAPE[1])).generate()

        epoch_size = num_train
        epoch_size_val = num_val

        #   冻结一定部分训练

        for param in model.extractor.parameters():
            param.requires_grad = False

        #   由于b==0.8所以冻结bn层

        model.freeze_bn()

        train_util = FasterRCNNTrainer(model, optimizer)

        for epoch in range(Init_Epoch, Freeze_Epoch):
            fit_ont_epoch(net, epoch, epoch_size, epoch_size_val, gen, gen_val,
                          Freeze_Epoch, Cuda)
            lr_scheduler.step()

    if True:
        lr = 1e-5
        Freeze_Epoch = 50
        Unfreeze_Epoch = 100

        optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,
                                                 step_size=1,
                                                 gamma=0.95)
Exemple #27
0
def train(**kwargs):
    # opt._parse(kwargs)

    print('load data')
    dataloader = get_train_loader(opt.root_dir,
                                  batch_size=opt.batch_size,
                                  shuffle=opt.shuffle,
                                  num_workers=opt.num_workers,
                                  pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, sample in tqdm(enumerate(dataloader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox_, scale, label_ = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox_.cuda(
                ), label_.cuda()
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            # if label.size == 0:
            #     continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        if epoch % 10 == 0:
            best_path = trainer.save(best_map=best_map)
def train(**kwargs):
    opt._parse(
        kwargs
    )  #将调用函数时候附加的参数用,config.py文件里面的opt._parse()进行解释,然后获取其数据存储的路径,之后放到Dataset里面!

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=opt.num_workers)

    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(
        testset,
        batch_size=1,
        num_workers=opt.test_num_workers,
        shuffle=False,
        #pin_memory=True
    )  #pin_memory锁页内存,开启时使用显卡的内存,速度更快

    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    #判断opt.load_path是否存在,如果存在,直接从opt.load_path读取预训练模型,然后将训练数据的label进行可视化操作
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.dataset.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    # 之后用一个for循环开始训练过程,而训练迭代的次数opt.epoch=14也在config.py文件中都预先定义好,属于超参数
    for epoch in range(opt.epoch):
        print('epoch {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()  #首先在可视化界面重设所有数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = array_tool.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                #可视化画出loss
                trainer.vis.plot_many(trainer.get_meter_data())
                #可视化画出groudtruth bboxes
                ori_img_ = inverse_normalize(array_tool.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, array_tool.tonumpy(bbox_[0]),
                                     array_tool.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                #可视化画出预测bboxes
                # 调用faster_rcnn的predict函数进行预测,预测的结果保留在以_下划线开头的对象里面
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(
                    ori_img_, array_tool.tonumpy(_bboxes[0]),
                    array_tool.tonumpy(_labels[0]).reshape(-1),
                    array_tool.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)
                # 调用 trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                #将roi_cm也就是roihead网络的混淆矩阵在可视化工具中显示出来
                trainer.vis.img(
                    'roi_cm',
                    array_tool.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{}, loss{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)  #将学习率以及map等信息及时显示更新

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:  #if判断语句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
Exemple #29
0
def train_val():
    print('load data')
    train_loader, val_loader = get_train_val_loader(
        opt.root_dir,
        batch_size=opt.batch_size,
        val_ratio=0.1,
        shuffle=opt.shuffle,
        num_workers=opt.num_workers,
        pin_memory=opt.pin_memory)
    faster_rcnn = FasterRCNNVGG16()
    # faster_rcnn = FasterRCNNResNet50()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    # if opt.load_path:
    #     trainer.load(opt.load_path)
    #     print('load pretrained model from %s' % opt.load_path)

    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        tqdm.monitor_interval = 0
        for ii, sample in tqdm(enumerate(train_loader)):
            if len(sample.keys()) == 5:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], sample['bbox'], sample['scale'], \
                                                    sample['label']
                img, bbox, label = img.cuda().float(), bbox.cuda(), label.cuda(
                )
                img, bbox, label = Variable(img), Variable(bbox), Variable(
                    label)

            else:
                img_id, img, bbox, scale, label = sample['img_id'], sample['image'], np.zeros((1, 0, 4)), \
                                                  sample['scale'], np.zeros((1, 0, 1))
                img = img.cuda().float()
                img = Variable(img)

            if bbox.size == 0:
                continue

            scale = at.scalar(scale)
            trainer.train_step(img_id, img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, img_id[0], at.tonumpy(bbox[0]),
                                     at.tonumpy(label[0]))

                trainer.vis.img('gt_img', gt_img)

                # plot predicted bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, img_id[0],
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))

                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())

        mAP = eval_mAP(trainer, val_loader)
        trainer.vis.plot('val_mAP', mAP)
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(mAP), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if mAP > best_map:
            best_map = mAP
            best_path = trainer.save(best_map=best_map)
        if epoch == opt.epoch - 1:
            best_path = trainer.save()

        if (epoch + 1) % 10 == 0:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from {}'.format(opt.load_path))

    # trainer.vis.text(dataset.db.label_names, win='labels')
    adversary = None
    if opt.flagadvtrain:
        print("flagadvtrain turned: Adversarial training!")
        atk = PGD.PGD(trainer, eps=16/255, alpha=3/255, steps=4)
        # atk = torchattacks.PGD(trainer.faster_rcnn, eps=16, alpha=3, steps=4)
        # adversary = PGDAttack(trainer.faster_rcnn, loss_fn=nn.CrossEntropyLoss(), eps=16, nb_iter=4, eps_iter=3,
        #                       rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False)
    best_map = 0
    lr_ = opt.lr
    normal_total_loss = []
    adv_total_loss = []
    total_time = 0.0
    total_imgs = 0
    true_imgs = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        once = True
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            temp_img = copy.deepcopy(img).cuda()
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

            if opt.flagadvtrain:
                before_time = time.time()
                img = atk(img, bbox, label, scale)
                after_time = time.time()
                # with ctx_noparamgrad_and_eval(trainer.faster_rcnn):
                #     img = adversary.perturb(img, label)
                # print("Adversarial training done!")

            total_time += after_time - before_time
            # print("Normal training starts\n")
            # trainer.train_step(img, bbox, label, scale)


            if (ii + 1) % opt.plot_every == 0:
                # adv_total_loss.append(trainer.get_meter_data()["total_loss"])
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                temp_ori_img_ = inverse_normalize(at.tonumpy(temp_img[0]))
                # img2jpg(temp_ori_img_, "imgs/orig_images/", "gt_img{}".format(ii))

                # temp_gt_img = visdom_bbox(temp_ori_img_,
                #                           at.tonumpy(bbox_[0]),
                #                           at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = temp_gt_img.shape
                # plt.imshow(np.reshape(temp_gt_img, (h, w, c)))
                # plt.savefig("imgs/temp_orig_images/temp_gt_img{}".format(ii))
                # plt.close()

                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                # print("GT Label is {} and pred_label is {}".format(label_[0],))
                # img2jpg(ori_img_, "imgs/adv_images/", "adv_img{}".format(ii))

                # gt_img = visdom_bbox(ori_img_,
                #                      at.tonumpy(bbox_[0]),
                #                      at.tonumpy(label_[0]))

                # plt.figure()
                # c, h, w = gt_img.shape
                # plt.imshow(np.reshape(gt_img, (h, w, c)))
                # plt.savefig("imgs/orig_images/gt_img{}".format(ii))
                # plt.close()

                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)

                fig1 = plt.figure()
                ax1 = fig1.add_subplot(1,1,1)
                # final1 = (at.tonumpy(img[0].cpu()).transpose(1,2,0).astype(np.uint8))
                final1 = (ori_img_.transpose(1, 2, 0).astype(np.uint8))
                ax1.imshow(final1)

                gt_img = visdom_bbox(ax1,at.tonumpy(_bboxes[0]),at.tonumpy(_labels[0]))
                fig1.savefig("imgs/adv_images/adv_img{}".format(ii))
                plt.close()

                _temp_bboxes, _temp_labels, _temp_scores = trainer.faster_rcnn.predict([temp_ori_img_], visualize=True)

                fig2 = plt.figure()
                ax2 = fig2.add_subplot(1, 1, 1)
                final2 = (temp_ori_img_.transpose(1, 2, 0).astype(np.uint8))
                # final2 = (at.tonumpy(temp_img[0].cpu()).transpose(1, 2, 0).astype(np.uint8))
                ax2.imshow(final2)

                gt_img = visdom_bbox(ax2, at.tonumpy(_temp_bboxes[0]), at.tonumpy(_temp_labels[0]))
                fig2.savefig("imgs/orig_images/gt_img{}".format(ii))
                plt.close()
                # img2jpg(temp_gt_img, "imgs/orig_images/", "gt_img{}".format(ii))

                # print("gt labels is {}, pred_orig_labels is {} and pred_adv_labels is {}".format(label_, _labels, _temp_labels))
                total_imgs += 1
                if len(_temp_labels) == 0:
                    continue
                if _labels[0].shape[0] == _temp_labels[0].shape[0] and (_labels[0] == _temp_labels[0]).all() is True:
                    true_imgs += 1
                # pred_img = visdom_bbox(ori_img_,
                #                        at.tonumpy(_bboxes[0]),
                #                        at.tonumpy(_labels[0]).reshape(-1),
                #                        at.tonumpy(_scores[0]))
                #

                # print("Shape of temp_orig_img_ is {}".format(temp_ori_img_.shape))
                # temp_pred_img = visdom_bbox(temp_ori_img_,
                #                             at.tonumpy(_temp_bboxes[0]),
                #                             at.tonumpy(_temp_labels[0]).reshape(-1),
                #                             at.tonumpy(_temp_scores[0]))
                #

                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())

        # fig = plt.figure()
        # ax1 = fig.add_subplot(2,1,1)
        # ax1.plot(normal_total_loss)
        # ax2 = fig.add_subplot(2,1,2)
        # ax2.plot(adv_total_loss)
        # fig.savefig("losses/both_loss{}".format(epoch))

        # eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,
        #                    flagadvtrain=opt.flagadvtrain, adversary=atk)# adversary=adversary)

        # trainer.vis.plot('test_map', eval_result['map'])
        # lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        # log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
        #                                           str(eval_result['map']),
        #                                           str(trainer.get_meter_data()))
        # print(log_info)
        # # trainer.vis.log(log_info)
        #
        # if eval_result['map'] > best_map:
        #     best_map = eval_result['map']
        #     best_path = trainer.save(best_map=best_map)
        # if epoch == 9:
        #     trainer.load(best_path)
        #     trainer.faster_rcnn.scale_lr(opt.lr_decay)
        #     lr_ = lr_ * opt.lr_decay

        if epoch == 0:
            break

        if epoch == 13:
            break

    print("Total number of images is {}".format(total_imgs))
    print("True images is {}".format(true_imgs))
    print("Total time is {}".format(total_time))
    print("Avg time is {}".format(total_time/total_imgs))
import os
import torch as t
from utils.config import opt
from model.faster_rcnn_vgg16 import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import  read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at

from matplotlib import pyplot as plt

img = read_image('/home/fengkai/dog.jpg')
img = t.from_numpy(img)[None]

faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()

trainer.load('/home/fengkai/PycharmProjects/simple-faster-rcnn-pytorch-master/simple-faster-rcnn/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth')
opt.caffe_pretrain=False # this model was trained from torchvision-pretrained model
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,visualize=True)
vis_bbox(at.tonumpy(img[0]),
         at.tonumpy(_bboxes[0]),
         at.tonumpy(_labels[0]).reshape(-1),
         at.tonumpy(_scores[0]).reshape(-1))
plt.show()