コード例 #1
0
def train(**kwargs):
    # 将调用函数时候附加的参数用,config.py的opt._parse()解析,获取存储路径,放入dataset
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')

    # VOCBboxDataset作为数据读取库,读取图片,并调整和随机反转
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    # 数据装载到dataloader中,shuffle=True允许数据打乱,num_workers设置分批处理
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16() # 定义模型
    print('model construct completed')
    # 将FasterRCNNVGG16作为fasterrcnn的模型送入到FasterRCNNTrainer中
    # 并设置好GPU加速
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path: # 判断路径存在
        trainer.load(opt.load_path) # 读取与训练模型
        print('load pretrained model from %s' % opt.load_path)
    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    # 开始训练,迭代次数在config.py预先定义,超参
    for epoch in range(opt.epoch):
        print ("---------------", epoch, " in ", opt.epoch, "-------------")
        trainer.reset_meters() # 可视化界面初始化数据
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            # 从训练数据中枚举dataloader,设置缩放范围,设置gpu加速
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            # 调用trainer.py中的函数trainer.train_step(img, bbox, label, scale) 进行一次参数优化过程
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                # 判断数据读取次数是否能够整除plot_every,
                # 如果达到判断debug_file是否存在,用ipdb工具设置断点,
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss将训练数据读取并上传完成可视化
                trainer.vis.plot_many(trainer.get_meter_data())

                # 绘制Ground truth包围盒
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                # 将每次迭代读取的图片用dataset文件里面的inverse_normalize()
                # 函数进行预处理,将处理后的图片调用visdom_bbox
                trainer.vis.img('ground_truth_img', gt_img)

                # plot predict bboxes
                # 显示原始图片和预测结果(边框+类别)
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('predict_img', pred_img)

                # rpn confusion matrix(meter)
                # 调用trainer.vis.text将rpn_cm也就是RPN网络的混淆矩阵在可视化工具中显示
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        # 调用Trainer.vis.img将Roi_cm将roi的可视化矩阵以图片的形式显示
        trainer.vis.plot('test_map', eval_result['map'])
        # 设置学习率
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        # 将损失学习率以及map等信息及时显示更新
        trainer.vis.log(log_info)

        # 保存效果最好的map
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)

        # if判断句如果学习的epoch达到了9就将学习率*0.1变成原来的十分之一
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13: 
            break # 结束训练过程
コード例 #2
0
def train(**kwargs):
    opt._parse(kwargs)
    # device_num = 6
    data_root = "/home/lsm/TrainSet/"
    train_file = "train.txt"
    test_file = "test.txt"
    trainset = MyDataset(data_root, train_file, opt)
    testset = TestDataset(data_root, test_file, opt)
    print('load data')
    dataloader = data_.DataLoader(trainset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)

    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    # trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    f = open('log.txt', 'w')
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        print("epoch " + str(epoch) + " ...")
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            # break
            # for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            # print(ii)
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            # if (ii + 1) % opt.plot_every == 0:
            #     if os.path.exists(opt.debug_file):
            #         ipdb.set_trace()
            #
            #     # plot loss
            #     # trainer.vis.plot_many(trainer.get_meter_data())
            #
            #     # plot groud truth bboxes
            #     ori_img_ = inverse_normalize(at.tonumpy(img[0]))
            #     # gt_img = visdom_bbox(ori_img_,
            #     #                      at.tonumpy(bbox_[0]),
            #     #                      at.tonumpy(label_[0]))
            #     # trainer.vis.img('gt_img', gt_img)
            #
            #     # plot predicti bboxes
            #     _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        print(log_info)
        # trainer.vis.log(log_info)
        # print(str(lr_)+": loss = "+str(trainer.get_meter_data()))
        f.write(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
        if epoch == 19:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 50:
            break
    f.close()
コード例 #3
0
def train(**kwargs):
    opt._parse(kwargs)

    data_set = TrainDataset()
    print('load data.')
    data_loader = data_.DataLoader(data_set, batch_size=1, shuffle=True)
    testset = TestDataset()
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       shuffle=False,
                                       pin_memory=True)

    faster_rcnn = FasterRCNNVGG16()
    print('model construct.')

    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    lr = opt.lr
    best_map = 0

    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox, label, scale) in tqdm(enumerate(data_loader)):
            img = img.cuda()
            trainer.train_step(img, bbox, label, scale)
            if (ii + 1) % opt.plot_every == 0:
                ipdb.set_trace()
                """plot loss"""
                trainer.vis.plot_many(trainer.get_meter_data())
                """plot gt_bbox"""
                ori_img = inverse_normalize(img[0].cpu().numpy())
                gt_img = visdom_bbox(ori_img, bbox[0].numpy(),
                                     label[0].numpy())
                trainer.vis.img('gt_img', gt_img)
                """plot predicted bbox"""
                pred_bbox, pred_label, pred_score = trainer.faster_rcnn.predict(
                    [ori_img], visualize=True)
                pred_img = visdom_bbox(ori_img, pred_bbox[0], pred_label[0],
                                       pred_score[0])
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', trainer.roi_cm.conf.float().cpu())

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr = lr * opt.lr_decay

        trainer.vis.plot('test_map', eval_result['map'])
        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr), str(eval_result['map']), str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13:
            print('finish!')
            break
コード例 #4
0
import os
import torch as t
from utils.config import Config
from model import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import  read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at
%matplotlib inline



img_name = 'demo.jpg'
raw_img = read_image(f'/content/drive/My Drive/lq_det_hyper/lq_det/misc/{img_name}')
raw_img = t.from_numpy(raw_img).unsqueeze(dim=0)



faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn, using_visdom=False).cuda()


trainer.load('/content/drive/My Drive/lq_det_hyper/lq_det/ckpt/fasterrcnn_12222105_0.712649824453_caffe_pretrain.pth')
Config.caffe_vgg=True # this model was trained from caffe-pretrained model
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(raw_img, visualize=True)
img, bbox, label, score = (at.tonumpy(raw_img[0]), at.tonumpy(_bboxes[0]), at.tonumpy(_labels[0]).reshape(-1), at.tonumpy(_scores[0]).reshape(-1))
vis_bbox(img, bbox, label, score)


import matplotlib.pyplot as plt
plt.show()
コード例 #5
0
ファイル: test.py プロジェクト: zuojiale/Fiber-detection-code
def test(**kwargs):
    #opt._parse(kwargs)
    opt.env = 'test'
    opt.caffe_pretrain = True
    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()

    trainer.load(
        'C:/Users/86188/Desktop/fiber/checkpoints/fasterrcnn_05031937_0.9089769879243565'
    )
    print('成功加载神经网络')

    #1.载入待检测图片
    img2 = Image.open(test_img_path)
    img_end = read_image(test_img_path)
    img_end = t.from_numpy(img_end)[None]

    #2.分割图片
    ImageCut.imagecut(img2, 12, 15, save_path)
    os.remove('bboxes.txt')  #先移除之前的文件

    #3.循环检测
    for filename in os.listdir(save_path):  # listdir的参数是文件夹的路径
        #print(filename)  # 此时的filename是文件夹中文件的名称

        img = read_image(os.path.join(save_path, filename))
        img = t.from_numpy(img)[None]

        opt.caffe_pretrain = False  # this model was trained from caffe-pretrained model
        _bboxes, _labels, _scores = trainer.faster_rcnn.predict(img,
                                                                visualize=True)
        bboxes = at.tonumpy(_bboxes[0])
        scores = at.tonumpy(_scores[0])
        labels = at.tonumpy(_labels[0])
        #print(bboxes)  #输出框的坐标,array格式
        #print(scores)

        #4.换算到绝对坐标
        fn = filename.split('-')
        row = fn[0]
        col1 = fn[1]
        col2 = col1.split('.')
        col = col2[0]
        print(row, col)
        #部分图识别结果显示
        part_img = visdom_bbox(at.tonumpy(img[0]), bboxes, labels.reshape(-1),
                               scores.reshape(-1))
        trainer.vis.img('part_img', part_img)

        #换算到绝对坐标
        bboxes[:, 0] = bboxes[:, 0] + int(row) * 500.333333  # x坐标
        bboxes[:, 1] = bboxes[:, 1] + int(col) * 528.000000  # y坐标
        bboxes[:, 2] = bboxes[:, 2] + int(row) * 500.333333  # x坐标
        bboxes[:, 3] = bboxes[:, 3] + int(col) * 528.000000  # y坐标

        #5.绝对坐标保存到文件
        with open('bboxes.txt', 'ab') as f:
            np.savetxt(f, bboxes, fmt="%f", delimiter=",")  # 保存为float
        print('绝对坐标保存成功')

    #6.读取绝对坐标文件,并显示检测效果
    all_bboxes = np.loadtxt(open('bboxes.txt', "rb"), delimiter=",")
    test_img = visdom_bbox(
        at.tonumpy(img_end[0]),
        all_bboxes,
        #labels.reshape(-1),
        #scores.reshape(-1)
    )
    trainer.vis.img('test_img', test_img)
コード例 #6
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    writer = SummaryWriter('outputs/logs/')
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in enumerate(dataloader):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img(
                    'roi_cm',
                    at.totensor(trainer.roi_cm.conf, False).float())
            if ii % 5 == 4:
                meter_data_trainer = trainer.get_meter_data()
                rpn_loc_loss = meter_data_trainer['rpn_loc_loss']
                rpn_cls_loss = meter_data_trainer['rpn_cls_loss']
                roi_loc_loss = meter_data_trainer['roi_loc_loss']
                roi_cls_loss = meter_data_trainer['roi_cls_loss']
                total_loss = meter_data_trainer['total_loss']
                print(
                    'lr:{:>7.4f}, rpn_loc_loss:{:>7.6f}, rpn_cls_loss:{:>7.6f}, roi_loc_loss:{:>7.6f}, roi_cls_loss:{:>7.6f}, total_loss:{:>7.6f}'
                    .format(lr_, rpn_loc_loss, rpn_cls_loss, roi_loc_loss,
                            roi_cls_loss, total_loss))
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        trainer.vis.plot('test_map', eval_result['map'])
        log_info = 'lr:{:>10.4f}, map:{}, loss:{}'.format(
            lr_, str(eval_result['map']), str(trainer.get_meter_data()))

        print(log_info)
        meter_data_trainer = trainer.get_meter_data()
        rpn_loc_loss = meter_data_trainer['rpn_loc_loss']
        rpn_cls_loss = meter_data_trainer['rpn_cls_loss']
        roi_loc_loss = meter_data_trainer['roi_loc_loss']
        roi_cls_loss = meter_data_trainer['roi_cls_loss']
        total_loss = meter_data_trainer['total_loss']

        writer.add_scalar("Learning Rate:", lr_)
        writer.add_scalar("Train map:", eval_result['map'])
        writer.add_scalar("Rpn Loc Loss:", rpn_loc_loss)
        writer.add_scalar("Rpn Cls Loss:", rpn_cls_loss)
        writer.add_scalar("Roi Loc Loss:", roi_loc_loss)
        writer.add_scalar("Roi Cls Loss:", roi_cls_loss)
        writer.add_scalar("Total Loss:", rpn_loc_loss)
        trainer.vis.log(log_info)

    writer.close()
コード例 #7
0
def train(**kwargs):
    opt._parse(kwargs)

    carrada = download('Carrada')
    train_dataset = Carrada().get('Train')
    # Try to overfit a sequence
    train_set = dict()
    # overfit a sequence
    train_set['2019-09-16-13-20-20'] = train_dataset['2019-09-16-13-20-20']
    val_set = train_set
    train_seqs = SequenceCarradaDataset(train_set)
    val_seqs = SequenceCarradaDataset(val_set)
    # test_seqs = SequenceCarradaDataset(test_set)

    train_seqs_loader = data_.DataLoader(train_seqs, \
                                         batch_size=1, \
                                         shuffle=True, \
                                         # pin_memory=True,

                                         num_workers=opt.num_workers)

    val_seqs_loader = data_.DataLoader(
        val_seqs,
        batch_size=1,
        shuffle=False,
        # pin_memory=True,
        num_workers=opt.num_workers)
    """
    test_seqs_loader = data_.DataLoader(test_seqs,
                                        batch_size=1,
                                        shuffle=False,
                                        # pin_memory=True,
                                        num_workers=opt.num_workers)
    """

    # faster_rcnn = FasterRCNNVGG16(n_fg_class=3)
    # faster_rcnn = FasterRCNNRESNET101(n_fg_class=3)
    faster_rcnn = FasterRCNNRESNET18(n_fg_class=3)
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    scheduler = ExponentialLR(trainer.faster_rcnn.optimizer, gamma=0.9)
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    writer_path = os.path.join(opt.logs_path, opt.model_name)
    os.makedirs(writer_path, exist_ok=True)
    writer = SummaryWriter(writer_path)
    iteration = 0
    best_map = 0
    lr_ = opt.lr

    for epoch in range(opt.epoch):
        print('Processing epoch: {}/{}'.format(epoch, opt.epoch))
        trainer.reset_meters()
        for n_seq, sequence_data in tqdm(enumerate(train_seqs_loader)):
            seq_name, seq = sequence_data
            # overfit an image
            # seq = [seq[155]] # large
            seq = [seq[115]]  # medium
            # seq = [seq[28]] # small
            path_to_frames = os.path.join(carrada, seq_name[0])
            train_frame_set = CarradaDataset(opt, seq, 'box', opt.signal_type,
                                             path_to_frames)
            train_frame_loader = data_.DataLoader(train_frame_set,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=opt.num_workers)

            for ii, (img, bbox_, label_,
                     scale) in tqdm(enumerate(train_frame_loader)):
                iteration += 1
                scale = at.scalar(scale)
                img, bbox, label = img.cuda().float(), bbox_.cuda(
                ), label_.cuda()
                img = normalize(img)

                if opt.debug_step and (epoch + 1) % opt.debug_step == 0:
                    trainer.train_step(img, bbox, label, scale, stop=True)
                else:
                    trainer.train_step(img, bbox, label, scale)

                if (iteration + 1) % opt.plot_every == 0:
                    if os.path.exists(opt.debug_file):
                        ipdb.set_trace()

                    train_results = trainer.get_meter_data()
                    writer.add_scalar('Losses/rpn_loc',
                                      train_results['rpn_loc_loss'], iteration)
                    writer.add_scalar('Losses/rpn_cls',
                                      train_results['rpn_cls_loss'], iteration)
                    writer.add_scalar('Losses/roi_loc',
                                      train_results['roi_loc_loss'], iteration)
                    writer.add_scalar('Losses/roi_cls',
                                      train_results['roi_cls_loss'], iteration)
                    writer.add_scalar('Losses/total',
                                      train_results['total_loss'], iteration)

                if (iteration + 1) % opt.img_every == 0:
                    ori_img_ = at.tonumpy(img[0])
                    gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]),
                                         at.tonumpy(label_[0]))
                    gt_img_grid = make_grid(torch.from_numpy(gt_img))
                    writer.add_image('Ground_truth_img', gt_img_grid,
                                     iteration)

                    # plot predicti bboxes
                    _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                        [ori_img_], opt.signal_type, visualize=True)
                    pred_img = visdom_bbox(ori_img_, at.tonumpy(_bboxes[0]),
                                           at.tonumpy(_labels[0]).reshape(-1),
                                           at.tonumpy(_scores[0]))
                    pred_img_grid = make_grid(torch.from_numpy(pred_img))
                    writer.add_image('Predicted_img', pred_img_grid, iteration)

                if opt.train_eval and (iteration + 1) % opt.train_eval == 0:
                    train_eval_result, train_best_iou = eval(
                        train_seqs_loader, faster_rcnn, opt.signal_type)
                    writer.add_scalar('Train/mAP', train_eval_result['map'],
                                      iteration)
                    writer.add_scalar('Train/Best_IoU', train_best_iou,
                                      iteration)
                if opt.debug_step and (epoch + 1) % opt.debug_step == 0:
                    _, _ = eval(train_seqs_loader,
                                faster_rcnn,
                                opt.signal_type,
                                stop=True)

        eval_result, best_iou = eval(val_seqs_loader,
                                     faster_rcnn,
                                     opt.signal_type,
                                     test_num=opt.test_num)

        writer.add_scalar('Validation/mAP', eval_result['map'], iteration)
        writer.add_scalar('Validation/Best_IoU', best_iou, iteration)
        lr_ = scheduler.get_lr()[0]
        writer.add_scalar('learning_rate', lr_, iteration)

        log_info = 'lr:{}, map:{},loss:{}'.format(
            str(lr_), str(eval_result['map']), str(trainer.get_meter_data()))
        print(log_info)
        if eval_result['map'] > best_map:
            """
            test_result, test_best_iou = eval(test_seqs_loader, faster_rcnn, opt.signal_type,
                                              test_num=opt.test_num)
            writer.add_scalar('Test/mAP', test_result['map'],
                              iteration)
            writer.add_scalar('Test/Best_IoU', test_best_iou,
                              iteration)
            """
            best_val_map = eval_result['map']
            # best_test_map = test_result['map']
            # best_path = trainer.save(best_val_map=best_val_map, best_test_map=best_test_map)
            best_path = trainer.save(best_val_map=best_val_map)

        if (epoch + 1) % opt.lr_step == 0:
            scheduler.step()
コード例 #8
0
ファイル: train.py プロジェクト: Drchip61/firsttry
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    #trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()
                '''
                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
                '''
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        print(eval_result)
        #trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        #trainer.vis.log(log_info)
        
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
            
        
        if (epoch+1) % 2 == 0:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay
        
        if epoch == 13: 
            break
コード例 #9
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=2,
                                       shuffle=False, \
                                       # pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(7):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale, ori_img) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            losses = trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    # ipdb.set_trace()
                    pass

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = (img * 0.225 + 0.45).clamp(min=0, max=1) * 255
                # gt_img = visdom_bbox(at.tonumpy(ori_img_)[0], 
                #                     at.tonumpy(bbox_)[0], 
                #                     label_[0].numpy())
                # trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(ori_img,visualize=True)
                # pred_img = visdom_bbox( at.tonumpy(ori_img[0]), 
                #                         at.tonumpy(_bboxes[0]),
                #                         at.tonumpy(_labels[0]).reshape(-1), 
                #                         at.tonumpy(_scores[0]))
                # trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        if epoch==4:
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

    eval_result = eval(test_dataloader, faster_rcnn, test_num=1e100)
    print('eval_result')
    trainer.save(mAP=eval_result['map'])
コード例 #10
0
class Detector:
    def __init__(self, model_path, useGPU, n_fg_classes=1):
        ''' 
        Creates a new detection model using the weights 
        stored in the file MODEL_PATH and initializes the GPU 
        if USEGPU is set to true.
        MODEL_PATH: path to a trained detection model. 
        USEGPU: if true, the GPU will be used for faster computations.
        '''

        torch.set_num_threads(1)
        opt.load_path = model_path
        self.faster_rcnn = FasterRCNNVGG16(n_fg_class=n_fg_classes)
        self.trainer = FasterRCNNTrainer(self.faster_rcnn,
                                         n_fg_class=n_fg_classes)
        if useGPU:
            self.trainer = self.trainer.cuda()
        state_dict = torch.load(opt.load_path)
        self.trainer.load(state_dict)
        self.transforms = transforms.ToTensor()
        self.useGPU = useGPU

    def predict_image(self, img, topk=1):
        '''
        Detects objects in the provided testing images.
        IMG: PIL image fitting the input of the trained model
        TOPK: the number of bounding boxes to return. We return the
        most confident bounding boxes first. 

        RETURNs: (BBOXES, CONFS) where BBOXES is a n x 4 array,
        where each line corresponds to one bounding box. The 
        bounding box coordniates are stored in the format
        [x_min, y_min, x_max, y_max], where x corresponds to the width
        and y to the height. CONFS are the confidence values for 
        each bounding box and are a n x m array. Each row corresponds 
        to the bounding box in the same row of BBOXES and provides
        the scores for the m classes, that the model was trained to detect.
        '''

        pred_bboxes, pred_labels, pred_scores = self._run_prediction(img)
        return pred_bboxes[:topk, [1, 0, 3, 2]], pred_scores[:topk]

    def annotate_image(self, img, topk):
        ''' 
        Detects objects in the provided testing images.
        IMG: PIL image fitting the input of the trained model
        TOPK: the number of bounding boxes to return. We return the
        most confident bounding boxes first.

        RETURNS: IMG: a PIL image with the detected bounding boxes 
        annotated as rectangles.
        '''

        pred_bboxes, pred_labels, pred_scores = self._run_prediction(img)
        draw = PIL.ImageDraw.Draw(img)
        colors = [(255, 0, 0), (0, 255, 0)]
        for bbox, label, score in zip(pred_bboxes, pred_labels, pred_scores):
            draw.rectangle(bbox[[1, 0, 3, 2]], outline=colors[label])
            #font = PIL.ImageFont.truetype("sans-serif.ttf", 16)
            #draw.text(bbox[[1,0]],"Sample Text",colors[label])
        return img

    def _run_prediction(self, img, confidence_threshold=0.7):
        ''' 
        Prepare an input image for CNN processing. 
        IMG: PIL image

        RETURN: IMG as pytorch tensor in the format 1xCxHxW
        normalized according to data.dataset.caffe_normalize.
        '''

        img = img.convert('RGB')
        img = np.asarray(img, dtype=np.float32)
        if img.ndim == 2:
            # reshape (H, W) -> (1, H, W)
            img = img[np.newaxis]
        else:
            # transpose (H, W, C) -> (C, H, W)
            img = img.transpose((2, 0, 1))
        proc_img = data.dataset.caffe_normalize(img / 255.)
        tensor_img = torch.from_numpy(proc_img).unsqueeze(0)
        if self.useGPU:
            tensor_img = tensor_img.cuda()

            # This preset filters bounding boxes with a score < *confidence_threshold*
        # and has to be set everytime before using predict()
        self.faster_rcnn.use_preset('visualize')
        pred_bboxes, pred_labels, pred_scores = self.faster_rcnn.predict(
            tensor_img, [(img.shape[1], img.shape[2])])
        box_filter = np.array(pred_scores[0]) > confidence_threshold
        return pred_bboxes[0][box_filter], pred_labels[0][
            box_filter], pred_scores[0][box_filter]
コード例 #11
0
def train(**kwargs):
    opt._parse(kwargs)

    results_file = 'fasterrcnn-oversampled-gan-class-2.csv'
    if os.path.exists(results_file):
        file = open(results_file, "w+")
    else:
        file = open(results_file, "w")
        columns = init_cols()
        writer = csv.DictWriter(file, fieldnames=columns)
        writer.writeheader()

    id_file_dir = 'ImageSets/Main/train_oversampled_gan_class_v2.txt'
    img_dir = 'JPEGImagesOversampledGANClassv2'
    anno_dir = 'AnnotationsOversampledGANClassv2'


    #
    # id_file_dir = 'ImageSets/Main/train_oversampled_gan.txt'
    # img_dir = 'JPEGImagesOversampledGAN'
    # anno_dir = 'AnnotationsOversampledGAN'

    # id_file_dir = 'ImageSets/Main/train_oversampled_orig_class.txt'
    # img_dir = 'JPEGImagesOversampledClass'
    # anno_dir = 'AnnotationsOversampledClass'

    # id_file_dir = 'ImageSets/Main/train_oversampled_orig_all.txt'
    # img_dir = 'JPEGImagesOrigOversampledAll'
    # anno_dir = 'AnnotationsOrigOversampledAll'


    dataset = DatasetAugmented(opt, id_file=id_file_dir, img_dir=img_dir, anno_dir=anno_dir)
    print(len(dataset))
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=True, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)
    best_map = 0
    lr_ = opt.lr
    rpn_loc_loss = []
    rpn_cls_loss = []
    roi_loc_loss = []
    roi_cls_loss = []
    total_loss = []
    test_map_list = []

    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:

                # plot loss
                losses_dict = trainer.get_meter_data()
                rpn_loc_loss.append(losses_dict['rpn_loc_loss'])
                roi_loc_loss.append(losses_dict['roi_loc_loss'])
                rpn_cls_loss.append(losses_dict['rpn_cls_loss'])
                roi_cls_loss.append(losses_dict['roi_cls_loss'])
                total_loss.append(losses_dict['total_loss'])

                save_losses('rpn_loc_loss', rpn_loc_loss, epoch)
                save_losses('roi_loc_loss', roi_loc_loss, epoch)
                save_losses('rpn_cls_loss', rpn_cls_loss, epoch)
                save_losses('total_loss', total_loss, epoch)
                save_losses('roi_cls_loss', roi_cls_loss, epoch)

        # print('conf matrix final')
        # print(trainer.roi_cm.conf)
        # save_conf_matrix(trainer.roi_cm.conf, 'confusion_matrix/oversampled-orig-class.csv')

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num,epoch=epoch, file=file)
        test_map_list.append(eval_result['map'])
        save_map(test_map_list, epoch)

        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))

        print(log_info)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            timestr = time.strftime('%m%d%H%M')
            save_path = 'checkpoints-gan-class-2/fasterrcnn_%s' % timestr
            # save_path = 'checkpoints2/fasterrcnn_%s' % timestr
            # save_path = 'checkpoints-class/fasterrcnn_%s' % timestr

            best_path = trainer.save(best_map=best_map, save_path=save_path)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay

        if epoch == 13:
            break
    file.close()
コード例 #12
0
                                 gt_bboxes,
                                 gt_labels,
                                 gt_difficults,
                                 use_07_metric=True)

    if result['map'] < result2['map']:
        result = result2
        pred_b, pred_l, pred_s = pred_bboxes, pred_labels, pred_scores
    if result['map'] < result3['map']:
        result = result3
        pred_b, pred_l, pred_s = pred_bboxes_, pred_labels_, pred_scores_
    return pred_b, pred_l, pred_s, result


# 加载权重
trainer.load('checkpoints/' + model_path)
pred_bboxes1, pred_labels1, pred_scores1, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result(
    test_dataloader, trainer.faster_rcnn)

trainer.load('checkpoints/' + model_path2)
pred_bboxes_, pred_labels_, pred_scores_, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result(
    test_dataloader, trainer.faster_rcnn)

pred_bboxes_, pred_labels_, pred_scores_, result = nms_reuslt(
    pred_bboxes1, pred_labels1, pred_scores1, pred_bboxes_, pred_labels_,
    pred_scores_, gt_bboxes, gt_labels, gt_difficults)

print(result)
trainer.load('checkpoints/' + model_path3)
pred_bboxes1, pred_labels1, pred_scores1, gt_bboxes, gt_labels, gt_difficults, ID = bbox_result(
    test_dataloader, trainer.faster_rcnn)
コード例 #13
0
def main(**kwargs):
    opt._parse(kwargs)
    # checkpoint = t.load('se_0314_all')
    # classifier = t.hub.load(
    #         'moskomule/senet.pytorch',
    #         'se_resnet50',
    #         pretrained=True, )
    checkpoint = t.load('res50_0314_all')
    classifier = models.resnet50()
    num_classes = 8
    step = [112, 112]

    num_ftrs = classifier.fc.in_features
    classifier.fc = nn.Linear(num_ftrs, num_classes)
    classifier.load_state_dict(checkpoint['state_dict'])
    classifier.eval()
    classifier = classifier.cuda()
    result_file = open('result0520_fasterrcnn.txt', 'w')
    save_root = './result/bbox/'
    makeDir()

    faster_rcnn = FasterRCNNVGG16()
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    trainer.load('checkpoints/fasterrcnn_04081709_0.6626689194895079')

    data_root = '/home/lsm/testSamples700_new/'
    test_file = 'GT707.txt'
    test700 = Test700Dataset(data_root, test_file, opt)
    test_dataloader = data_.DataLoader(test700,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    print('data loaded!')
    for ii, (fn, imgs, sizes, gt_bboxes_) in tqdm(enumerate(test_dataloader)):
        # print(gt_bboxes_)
        gt_x1 = int(gt_bboxes_[0][0][1])
        gt_y1 = int(gt_bboxes_[0][0][0])
        gt_x2 = int(gt_bboxes_[0][0][3])
        gt_y2 = int(gt_bboxes_[0][0][2])
        # print([gt_x1,gt_y1,gt_x2,gt_y2])
        sizes = [sizes[0][0].item(), sizes[1][0].item()]
        pred_bboxes_, pred_labels_, pred_scores_ = faster_rcnn.predict(
            imgs, [sizes])
        result_file.write(fn[0])
        # drawBbox(data_root,fn[0],pred_bboxes_,save_root)
        img = Image.open(data_root + fn[0]).convert("RGB")
        plt.imshow(img)
        currentAxis = plt.gca()
        line = fn[0]
        for i in range(len(pred_bboxes_[0])):
            bbox = pred_bboxes_[0][i]
            score = pred_scores_[0][i]
            label = pred_labels_[0][i]
            x1, y1, x2, y2 = bbox[1], bbox[0], bbox[3], bbox[2]
            # plt.text(x1, y1, toolNameList[decision]+" "+str(score), size=15, color='r')
            line = line + ' ' + toolNameList[label + 1] + ' ' + str(
                score) + ' ' + str(x1) + ' ' + str(y1) + ' ' + str(
                    x2) + ' ' + str(y2)
        words = line.split()
        # result_file.write(words[0])
        num_box = int((len(words) - 1) / 6)
        if num_box > 0:
            new_boxes = fuseBoxes(words[1:], num_box)
            writeResult(result_file, new_boxes, currentAxis)

        rect = patches.Rectangle((gt_x1, gt_y1),
                                 gt_x2 - gt_x1,
                                 gt_y2 - gt_y1,
                                 fill=False,
                                 edgecolor='g',
                                 linewidth=2)
        currentAxis.add_patch(rect)
        plt.savefig(save_root + fn[0])
        plt.close()
    result_file.close()
コード例 #14
0
# In[3]:


faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()

# You'll need to download pretrained model from [google dirve](https://drive.google.com/open?id=1cQ27LIn-Rig4-Uayzy_gH5-cW-NRGVzY)
# # 1. model converted from chainer

# In[4]:


# in this machine the cupy isn't install correctly... 
# so it's a little slow
trainer.load('/home/cy/chainer_best_model_converted_to_pytorch_0.7053.pth')
opt.caffe_pretrain = True  # this model was trained from caffe-pretrained model
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True)
vis_bbox(at.tonumpy(img[0]),
         at.tonumpy(_bboxes[0]),
         at.tonumpy(_labels[0]).reshape(-1),
         at.tonumpy(_scores[0]).reshape(-1))
# it failed to find the dog, but if you set threshold from 0.7 to 0.6, you'll find it


# # 2. model trained with torchvision pretrained model

# In[5]:


trainer.load('/home/cy/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth')
コード例 #15
0
ファイル: train.py プロジェクト: KnightHacks20/KnightHacksAPI
def train(**kwargs):
    """
    The main entry point for training; trains a FasterRCNN-based detector.
    """

    opt._parse(kwargs)

    # Loading class names from checkpoint, if available
    # We need to load the checkpoint here
    if opt.load_path:
        old_state = torch.load(opt.load_path)
        class_names = old_state['class_names']
        best_map = old_state['best_map']
    else:
        class_names = []
        best_map = 0
        old_state = None

    print('load data')
    dataset = Dataset(opt, class_names)
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,

                                  num_workers=opt.num_workers)

    testset = TestDataset(opt, dataset.get_class_names())
    test_dataloader = data_.DataLoader(testset, \
                                       batch_size=1, \
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )

    faster_rcnn = FasterRCNNVGG16(n_fg_class=dataset.get_class_count())
    print('Model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn,
                                n_fg_class=dataset.get_class_count())

    if opt.use_cuda:

        trainer = trainer.cuda()

    if opt.load_path:

        trainer.load(old_state)
        print_log('load pretrained model from %s' % opt.load_path)

    if opt.validate_only:

        num_eval_images = len(testset)
        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           trainer,
                           testset,
                           global_step,
                           test_num=num_eval_images)
        print_log('Evaluation finished, obtained {} using {} out of {} images'.
                  format(eval_result, num_eval_images, len(testset)))
        return

    if old_state and 'epoch' in old_state.keys():

        starting_epoch = old_state['epoch'] + 1
        print_log('Model was trained until epoch {}, continuing with epoch {}'.
                  format(old_state['epoch'], starting_epoch))

    else:

        starting_epoch = 0

    lr_ = opt.lr
    global_step = 0

    for epoch in range(starting_epoch, opt.num_epochs):

        writer.add_scalar('epoch', epoch, global_step)
        lr_ = opt.lr * (opt.lr_decay**
                        np.sum(epoch >= np.array(opt.lr_schedule)))
        trainer.faster_rcnn.set_lr(lr_)

        print_log('Starting epoch {} with learning rate {}'.format(epoch, lr_))
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader),
                                                    total=len(dataset)):
            global_step = global_step + 1
            scale = at.scalar(scale).item()
            if opt.use_cuda:
                img = img.cuda().float()
                label = label_.float().cuda()

                if len(bbox_[0]) > 0:
                    bbox = bbox_.float().cuda()
                else:
                    bbox = bbox_
            else:

                img, label = img.float(), label_.float()
                if len(bbox_[0]) > 0:
                    bbox = bbox_.float()
                else:
                    bbox = bbox_

            img, label = Variable(img), Variable(label)
            if len(bbox[0]) > 0:
                bbox = Variable(bbox)
            else:
                bbox = np.asarray(bbox)

            #img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            losses = trainer.train_step(img, bbox, label, scale)

            writer.add_scalars(
                'training/losses',
                dict(total_loss=losses.total_loss,
                     roi_cls_loss=losses.roi_cls_loss,
                     roi_loc_loss=losses.roi_loc_loss,
                     rpn_cls_loss=losses.rpn_cls_loss,
                     rpn_loc_loss=losses.rpn_loc_loss), global_step)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                # trainer.vis.plot_many(trainer.get_meter_data())

                # plot ground truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]),
                                     label_names=dataset.get_class_names() +
                                     ['BG'])
                writer.add_image('gt_img', gt_img, global_step)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict(
                    [ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]),
                                       label_names=dataset.get_class_names() +
                                       ['BG'])
                writer.add_image('pred_img', pred_img, global_step)

                # rpn confusion matrix(meter)
                # trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                # trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())

            if (global_step) % opt.snapshot_every == 0:
                snapshot_path = trainer.save(
                    epoch=epoch, class_names=testset.get_class_names())
                print_log("Snapshotted to {}".format(snapshot_path))

        #snapshot_path = trainer.save(epoch=epoch)
        #print("After epoch {}: snapshotted to {}".format(epoch,snapshot_path))

        for lo in losses:
            del lo
        del img, bbox_, label_, scale
        torch.cuda.empty_cache()
        eval_result = eval(test_dataloader,
                           faster_rcnn,
                           trainer,
                           testset,
                           global_step,
                           test_num=min(opt.test_num, len(testset)))
        print_log(eval_result)
        # TODO: this definitely is not good and will bias evaluation
        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=eval_result['map'],
                                     epoch=epoch,
                                     class_names=testset.get_class_names())
            print_log("After epoch {}: snapshotted to {}".format(
                epoch, best_path))

        del eval_result
        torch.cuda.empty_cache()
コード例 #16
0
ファイル: train.py プロジェクト: TWSFar/Faster-RCNN
class Trainer(object):
    def __init__(self, **kwargs):
        opt._parse(kwargs)
        self.opt = opt
        self.test_num = self.opt.test_num
        self.device, self.device_id = select_device(is_head=True)
        # Define Dataloader
        print("load data")
        self.train_dataset = Datasets(opt, mode='train')
        self.train_loader = DataLoader(self.train_dataset,
                                       batch_size=opt.batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers)
        self.val_dataset = Datasets(opt, mode='val')
        self.val_loader = DataLoader(self.val_dataset,
                                     batch_size=opt.batch_size,
                                     shuffle=False,
                                     pin_memory=True,
                                     num_workers=opt.test_num_workers)
        self.num_batch = len(self.train_loader)

        # Define Network
        # initilize the network here.
        print("define network")
        faster_rcnn = FasterRCNNVGG16()
        self.trainer = FasterRCNNTrainer(faster_rcnn)

        # Resuming Checkpoint
        self.start_epoch = 0
        self.best_map = 0
        self.lr = opt.lr
        if opt.load_path:
            self.trainer.load(opt.load_path)
            self.start_epoch = self.trainer.start_epoch
            self.best_map = self.trainer.best_map
            print('load pretrained model from %s' % opt.load_path)

        # Use multiple GPU
        if opt.use_mgpu and len(self.device_id) > 1:
            self.trainer = torch.nn.DataParallel(self.trainer,
                                                 device_ids=self.device_id)
            print("Using multiple gpu")
        else:
            self.trainer = self.trainer.to(self.device)

        # Visdom
        self.trainer.vis.text(self.train_dataset.classes, win='labels')

    def train(self):
        self.trainer.reset_meters()
        for ii, (img, bbox_, label_,
                 scale) in tqdm(enumerate(self.train_loader)):
            scale[0] = at.scalar(scale[0])
            scale[1] = at.scalar(scale[1])
            img = img.to(self.device)
            bbox = bbox_.to(self.device)
            label = label_.to(self.device)
            self.trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    os.makedirs(opt.debug_file)

                self.trainer.vis.plot_many(self.trainer.get_meter_data())

                # plot groud truth bboxes

                # plot predicti bboxes

                # rpn confusion matrix(meter)
                self.trainer.vis.text(str(
                    self.trainer.rpn_cm.value().tolist()),
                                      win='rpn_cm')
                self.trainer.vis.img(
                    'roi_cm',
                    at.totensor(self.trainer.roi_cm.conf, False).float())

    def eval(self):
        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
コード例 #17
0
import os
import torch as t
from utils.config import opt
from model import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
from data.util import read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at

img = read_image('misc/demo.jpg')
img = t.from_numpy(img)[None]

faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn).cuda()

trainer.load('weights/chainer_best_model_converted_to_pytorch_0.7053.pth')
opt.caffe_pretrain = True
_bboxes, _labels, _scores = trainer.faster_rcnn.predict(img, visualize=True)

vis_bbox(at.tonumpy(img[0]), at.tonumpy(_bboxes[0]),
         at.tonumpy(_labels[0]).reshape(-1),
         at.tonumpy(_scores[0]).reshape(-1))
コード例 #18
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print('load data')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  # pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    trainer.vis.text(dataset.db.label_names, win='labels')
    best_map = 0
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, label_, scale) in tqdm(enumerate(dataloader)):
            scale = at.scalar(scale)
            img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()
            img, bbox, label = Variable(img), Variable(bbox), Variable(label)
            trainer.train_step(img, bbox, label, scale)

            if (ii + 1) % opt.plot_every == 0:
                if os.path.exists(opt.debug_file):
                    ipdb.set_trace()

                # plot loss
                trainer.vis.plot_many(trainer.get_meter_data())

                # plot groud truth bboxes
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_,
                                     at.tonumpy(bbox_[0]),
                                     at.tonumpy(label_[0]))
                trainer.vis.img('gt_img', gt_img)

                # plot predicti bboxes
                _bboxes, _labels, _scores = trainer.faster_rcnn.predict([ori_img_], visualize=True)
                pred_img = visdom_bbox(ori_img_,
                                       at.tonumpy(_bboxes[0]),
                                       at.tonumpy(_labels[0]).reshape(-1),
                                       at.tonumpy(_scores[0]))
                trainer.vis.img('pred_img', pred_img)

                # rpn confusion matrix(meter)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm')
                # roi confusion matrix
                trainer.vis.img('roi_cm', at.totensor(trainer.roi_cm.conf, False).float())
        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            best_path = trainer.save(best_map=best_map)
        if epoch == 9:
            trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)

        trainer.vis.plot('test_map', eval_result['map'])
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']
        log_info = 'lr:{}, map:{},loss:{}'.format(str(lr_),
                                                  str(eval_result['map']),
                                                  str(trainer.get_meter_data()))
        trainer.vis.log(log_info)
        if epoch == 13: 
            break
コード例 #19
0
def train(**kwargs):
    opt._parse(kwargs)

    dataset = Dataset(opt)
    print(f'load data, data:length {len(dataset)}')
    dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,
                                  num_workers=opt.num_workers)
    testset = TestDataset(opt)
    print(f'load test_data, test_data:length {len(testset)}')
    test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       # pin_memory=True

                                       )
    faster_rcnn = FasterRCNNVGG16()
    print('model construct completed')
    trainer = FasterRCNNTrainer(faster_rcnn).cuda()
    if opt.load_path:
        trainer.load(opt.load_path)
        print('load pretrained model from %s' % opt.load_path)

    best_map = 0
    lr_ = opt.lr
    best_path = None
    plot_flag = False
    cutmix_flag = False
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        # index = list(range(len(dataset)))
        # np.random.shuffle(index)
        loss_history = []

        for ii, (imgs, boxes, labels, scale) in tqdm(enumerate(dataloader)):

            scale = at.scalar(scale)
            imgs, boxes, labels = cutout_process(imgs, boxes, labels)
            if plot_flag:
                input_visual(imgs, boxes, labels)
            imgs, boxes, labels = imgs.cuda().float(), boxes.cuda(
            ), labels.cuda()

            # paste_img, paste_bboxes, paste_labels, paste_difficult = dataset.db.get_example(index[ii])
            # paste_img, paste_bboxes, paste_labels, paste_scale = dataset.tsf((paste_img, paste_bboxes, paste_labels))

            copy_cache = [imgs, boxes, labels]
            # paste_cache = [paste_img, paste_bboxes, paste_labels]
            paste_cache = [None] * 3
            losses, info, *_ = trainer.train_step(imgs, scale, None,
                                                  *copy_cache, *paste_cache,
                                                  cutmix_flag, plot_flag)

            # if info["use_cutmix"] == 1:
            #     count += 1
            # #                 count_x.append(ii)
            # #                 count_y.append(losses.total_loss.item())

            loss_history.append(losses.total_loss.item())

            if (ii + 1) % opt.plot_every == 0:
                logging.info(
                    f"[Batch: {epoch}/Iter {ii + 1}] training loss: {np.mean(loss_history):.2f}"
                )

        eval_result = eval(test_dataloader, faster_rcnn, test_num=opt.test_num)
        logging.info(
            f"[Batch: {epoch}] eval loss: {eval_result['map']:.4f} lr:{lr_:.3f}"
        )  # 注意这里是会进行四舍五入
        lr_ = trainer.faster_rcnn.optimizer.param_groups[0]['lr']

        if eval_result['map'] > best_map:
            best_map = eval_result['map']
            if best_map < 0.4:
                best_path = None
            else:
                best_path = trainer.save(best_map=best_map)

        if epoch == 9:
            if best_path is not None:
                trainer.load(best_path)
            trainer.faster_rcnn.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay