Beispiel #1
0
def get_dataset(datasetnames):
    names = datasetnames.split('+')
    dataset = RoiDataset(get_imdb(names[0]))
    print('load dataset {}'.format(names[0]))
    for name in names[1:]:
        tmp = RoiDataset(get_imdb(name))
        dataset += tmp
        print('load and add dataset {}'.format(name))
    return dataset
Beispiel #2
0
def get_dataset(datasetnames):
    names = datasetnames.split(
        '+')  # ['voc_2007_trainval', 'voc_2012_trainval']
    dataset = RoiDataset(get_imdb(names[0]))  # 'voc_2007_trainval'
    print('load dataset {}'.format(names[0]))
    for name in names[1:]:
        tmp = RoiDataset(get_imdb(name))
        dataset += tmp
        print('load and add dataset {}'.format(name))
    return dataset
def combined_roidb(imdb_names, training=True):
    """
    Combine multiple roidbs
    """

    def get_training_roidb(imdb):
        """Returns a roidb (Region of Interest database) for use in training."""
        if cfg.TRAIN.USE_FLIPPED:
            print('Appending horizontally-flipped training examples...')
            imdb.append_flipped_images()
            print('done')

        print('Preparing training data...')

        prepare_roidb(imdb)
        # ratio_index = rank_roidb_ratio(imdb)
        print('done')

        return imdb.roidb

    def get_roidb(imdb_name):
        imdb = get_imdb(imdb_name)
        print('Loaded dataset `{:s}` for training'.format(imdb.name))
        roidb = get_training_roidb(imdb)
        return roidb

    roidbs = [get_roidb(s) for s in imdb_names.split('+')]
    roidb = roidbs[0]

    if len(roidbs) > 1:
        for r in roidbs[1:]:
            roidb.extend(r)
        imdb = get_imdb(imdb_names.split('+')[0])
        # imdb = dataset.imdb.imdb(imdb_names, tmp.classes)
    else:
        imdb = get_imdb(imdb_names)

    if training:
        roidb = filter_roidb(roidb)

    return imdb, roidb
Beispiel #4
0
def train_model(dataset, trainset, num_classes, net, pad, cachepath):
    cachefolder = osp.join(cachepath, dataset+'_'+trainset, net)
    if not osp.isdir(cachefolder):
        os.makedirs(cachefolder)
    ptag = 'S' + ('%d_'*len(options.seg.sizes)) % tuple(options.seg.sizes) \
             + 'IB%d_B%d_E%d-uniform' % (options.seg.imbatch,options.seg.batchsize,options.seg.epoch)
    if options.seg.trainflip:
        ptag += '_F'

    tag = 'S' + ('%d_'*len(options.seg.sizes)) % tuple(options.seg.sizes) \
             + 'IB%d_B%d_E%d-uniform' % (options.seg.imbatch,options.seg.batchsize,options.seg.ftepochall)
    if options.seg.trainflip:
        tag += '_F'

    trainfolder = osp.join(cachefolder,'TRAIN')
    finetunefolder = osp.join(cachefolder,'FT-TR')
    if not osp.isdir(finetunefolder):
        os.makedirs(finetunefolder)

    prefile = osp.join(trainfolder,ptag+'.caffemodel')
    targetfile = osp.join(finetunefolder,tag+'.caffemodel')
    targetlock = osp.join(finetunefolder,tag+'.lock')
    if osp.exists(targetfile) or not osp.exists(prefile):
        return
    try:
        os.mkdir(targetlock)
    except Exception as e:
        return
    # looks like it is a tricky task to redirect the stdout/stderr
    # leave it as of now
    # logfolder = osp.join(trainfolder,'logs')
    print '%s_%s<-%s: %s' % (dataset,trainset,net,tag)
    time.sleep(5)
    datasetname = '%s_%s' % (dataset,trainset)
    segdb = get_imdb(datasetname)

    # create the solver and train file
    solverpath, _ = dump_prototxts(dataset, trainset, num_classes, segdb.num_images, net, pad)
    # solverpath = osp.join(options.netpath,net,'pysol-seg-'+dataset+'-'+trainset+'.prototxt')

    # start training
    # caffe.set_random_seed(options.seed)
    np.random.seed(options.seed)
    sw = SolverWrapper(solverpath, segdb, finetunefolder, tag, prefile)
    sw.train_model()
    del sw

    # after training
    os.rmdir(targetlock)
Beispiel #5
0
def train():
    pretrained_model = os.path.join(cfg.PRETRAINED_DIR, 'npy', 'yolov2.npy')
    assert os.path.exists(pretrained_model), \
            'Model path {} does not exist!'.format(pretrained_model)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)

    name = DATASET + '_train'
    snapshot_infix = DATASET

    output_dir = os.path.join(cfg.TRAIN.TRAINED_DIR, DATASET)

    imdb = get_imdb(name)

    sw = SolverWrapper(imdb, snapshot_infix, output_dir, pretrained_model)

    sw.train_net()
def train():

    # define the hyper parameters first
    args = parse_args()
    args.steplr_epoch = cfg.steplr_epoch
    args.steplr_factor = cfg.steplr_factor
    args.weight_decay = cfg.weight_decay
    args.momentum = cfg.momentum

    print('Called with args:')
    print(args)

    lr = args.lr

    # initial tensorboardX writer
    if args.use_tfboard:
        if args.exp_name == 'default':
            writer = SummaryWriter()
        else:
            writer = SummaryWriter('runs/' + args.exp_name)

    args.imdb_name = 'trainval'
    args.imdbval_name = 'trainval'

    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # load dataset
    print('loading dataset....')
    train_dataset = RoiDataset(get_imdb(args.imdb_name))

    print('dataset loaded.')

    print('training rois number: {}'.format(len(train_dataset)))
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  collate_fn=detection_collate,
                                  drop_last=True)

    # initialize the model
    print('initialize the model')
    tic = time.time()
    model = Yolov2(pretrained=True, arch=args.arch)
    toc = time.time()
    print('model loaded: cost time {:.2f}s'.format(toc - tic))

    # initialize the optimizer
    optimizer = optim.SGD([{
        "params": model.trunk.parameters(),
        "lr": args.lr * cfg.former_lr_decay
    }, {
        "params": model.conv3.parameters()
    }, {
        "params": model.conv4.parameters()
    }],
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer,
                       step_size=args.steplr_epoch,
                       gamma=args.steplr_factor)

    if args.resume:
        print('resume training enable')
        resume_checkpoint_name = 'yolov2_epoch_{}.pth'.format(
            args.checkpoint_epoch)
        resume_checkpoint_path = os.path.join(output_dir,
                                              resume_checkpoint_name)
        print('resume from {}'.format(resume_checkpoint_path))
        checkpoint = torch.load(resume_checkpoint_path)
        model.load_state_dict(checkpoint['model'])
        args.start_epoch = checkpoint['epoch'] + 1
        lr = checkpoint['lr']
        print('learning rate is {}'.format(lr))
        adjust_learning_rate(optimizer, lr)

    if args.use_cuda:
        model.cuda()

    if args.mGPUs:
        model = nn.DataParallel(model)

    # set the model mode to train because we have some layer whose behaviors are different when in training and testing.
    # such as Batch Normalization Layer.
    model.train()

    iters_per_epoch = int(len(train_dataset) / args.batch_size)

    # start training
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        loss_temp = 0
        tic = time.time()
        train_data_iter = iter(train_dataloader)

        scheduler.step()
        lr = get_lr(optimizer)

        if cfg.multi_scale and epoch in cfg.epoch_scale:
            cfg.scale_range = cfg.epoch_scale[epoch]
            print('change scale range to {}'.format(cfg.scale_range))

        for step in range(iters_per_epoch):

            if cfg.multi_scale and (step + 1) % cfg.scale_step == 0:
                scale_index = np.random.randint(*cfg.scale_range)
                cfg.input_size = cfg.input_sizes[scale_index]
                ##print('change input size {}'.format(cfg.input_size))

            im_data, boxes, gt_classes, num_obj = next(train_data_iter)
            if args.use_cuda:
                im_data = im_data.cuda()
                boxes = boxes.cuda()
                gt_classes = gt_classes.cuda()
                num_obj = num_obj.cuda()

            im_data_variable = Variable(im_data)

            box_loss, iou_loss, class_loss = model(im_data_variable,
                                                   boxes,
                                                   gt_classes,
                                                   num_obj,
                                                   training=True)

            loss = box_loss.mean()+ iou_loss.mean() \
                   + class_loss.mean()

            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            loss_temp += loss.item()

            if (step + 1) % args.display_interval == 0:
                toc = time.time()
                loss_temp /= args.display_interval

                iou_loss_v = iou_loss.mean().item()
                box_loss_v = box_loss.mean().item()
                class_loss_v = class_loss.mean().item()

                log = "[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, iou_loss: %.4f, box_loss: %.4f, cls_loss: %.4f" \
                      % (epoch, step+1, iters_per_epoch, loss_temp, lr, iou_loss_v, box_loss_v, class_loss_v)
                print(log)

                logfile = os.path.join(output_dir, 'training_log.txt')
                with open(logfile, 'a') as f:
                    print(log, file=f)

                if args.use_tfboard:

                    n_iter = (epoch - 1) * iters_per_epoch + step + 1
                    writer.add_scalar('lr', lr, n_iter)
                    writer.add_scalar('losses/loss', loss_temp, n_iter)
                    writer.add_scalar('losses/iou_loss', iou_loss_v, n_iter)
                    writer.add_scalar('losses/box_loss', box_loss_v, n_iter)
                    writer.add_scalar('losses/cls_loss', class_loss_v, n_iter)

                loss_temp = 0
                tic = time.time()

        if epoch % args.save_interval == 0:
            save_name = os.path.join(output_dir,
                                     'yolov2_epoch_{}.pth'.format(epoch))
            torch.save(
                {
                    'model':
                    model.module.state_dict()
                    if args.mGPUs else model.state_dict(),
                    'epoch':
                    epoch,
                    'lr':
                    lr
                }, save_name)
Beispiel #7
0
def test():
    args = parse_args()
    args.conf_thresh = 0.005
    args.nms_thresh = 0.45
    if args.vis:
        args.conf_thresh = 0.5
    print('Called with args:')
    print(args)

    # prepare dataset

    val_imdb = get_imdb(args.dataset)

    val_dataset = RoiDataset(val_imdb, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False)

    # load model
    model = Yolov2(arch=args.arch)
    # weight_loader = WeightLoader()
    # weight_loader.load(model, 'yolo-voc.weights')
    # print('loaded')

    model_path = os.path.join(args.output_dir, args.model_name + '.pth')
    print('loading model from {}'.format(model_path))
    if torch.cuda.is_available():
        checkpoint = torch.load(model_path)
    else:
        checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    if args.use_cuda:
        model.cuda()

    model.eval()
    print('model loaded')

    dataset_size = len(val_imdb.image_index)
    print('classes: ', val_imdb.num_classes)

    all_boxes = [[[] for _ in range(dataset_size)]
                 for _ in range(val_imdb.num_classes)]

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    results = []

    img_id = -1
    with torch.no_grad():
        for batch, (im_data, im_infos) in enumerate(val_dataloader):
            if args.use_cuda:
                im_data_variable = Variable(im_data).cuda()
            else:
                im_data_variable = Variable(im_data)

            yolo_outputs = model(im_data_variable)
            for i in range(im_data.size(0)):
                img_id += 1
                output = [item[i].data for item in yolo_outputs]
                im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]}
                detections = yolo_eval(output,
                                       im_info,
                                       conf_threshold=args.conf_thresh,
                                       nms_threshold=args.nms_thresh)

                if img_id % 100 == 0:
                    print('im detect [{}/{}]'.format(img_id + 1,
                                                     len(val_dataset)))
                if len(detections) > 0:
                    for cls in range(val_imdb.num_classes):
                        inds = torch.nonzero(detections[:, -1] == cls).view(-1)
                        if inds.numel() > 0:
                            cls_det = torch.zeros((inds.numel(), 5))
                            cls_det[:, :4] = detections[inds, :4]
                            cls_det[:, 4] = detections[inds,
                                                       4] * detections[inds, 5]
                            all_boxes[cls][img_id] = cls_det.cpu().numpy()

                img = Image.open(val_imdb.image_path_at(img_id))
                if len(detections) > 0:
                    detect_result = {}

                    boxes = detections[:, :5].cpu().numpy()
                    classes = detections[:, -1].long().cpu().numpy()
                    class_names = val_imdb.classes

                    num_boxes = boxes.shape[0]

                    labels = []

                    for i in range(num_boxes):
                        det_bbox = tuple(
                            np.round(boxes[i, :4]).astype(np.int64))
                        score = boxes[i, 4]
                        gt_class_ind = classes[i]
                        class_name = class_names[gt_class_ind]
                        disp_str = '{}: {:.2f}'.format(class_name, score)

                        bbox = tuple(np.round(boxes[i, :4]).astype(np.int64))

                        xmin = bbox[0]
                        ymin = bbox[1]
                        xmax = bbox[2]
                        ymax = bbox[3]

                        box2d = {}
                        box2d["x1"] = str(xmin)
                        box2d["y1"] = str(ymin)
                        box2d["x2"] = str(xmax)
                        box2d["y2"] = str(ymax)

                        bbox = {}
                        bbox["box2d"] = box2d
                        bbox["category"] = class_name

                        labels.append(bbox)

                    detect_result["ImageID"] = os.path.basename(
                        val_imdb.image_path_at(img_id))
                    detect_result["labels"] = labels

                    results.append(detect_result)

                if args.vis:
                    img = Image.open(val_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()
                    im2show = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=val_imdb.classes)
                    plt.figure()
                    plt.imshow(im2show)
                    plt.show()

            #if img_id > 10:
            #    break

    print(results)
    results_file = os.path.join(args.output_dir, 'detections.json')
    with open(results_file, 'w') as f:
        json.dump(results,
                  f,
                  ensure_ascii=False,
                  indent=4,
                  sort_keys=True,
                  separators=(',', ': '))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
Beispiel #8
0
import _init_paths
from config.options import options
from dataset.factory import get_imdb
import pdb

for split in ['train', 'trainval']:
    name = 'aug-voc2012_{}'.format(split)
    print 'DS: ' + name
    imdb = get_imdb(name)
    imdb.compute_stats()

for split in ['train']:
    name = 'context_{}'.format(split)
    print 'DS: ' + name
    imdb = get_imdb(name)
    imdb.compute_stats()

    name = 'context33_{}'.format(split)
    print 'DS: ' + name
    imdb = get_imdb(name)
    imdb.compute_stats()

    name = 'context20_{}'.format(split)
    print 'DS: ' + name
    imdb = get_imdb(name)
    imdb.compute_stats()
Beispiel #9
0
def test():
    args = parse_args()
    args.conf_thresh = 0.005
    args.nms_thresh = 0.45
    if args.vis:
        args.conf_thresh = 0.5
    print('Called with args:')
    print(args)

    # prepare dataset

    if args.dataset == 'voc07trainval':
        args.imdbval_name = 'voc_2007_trainval'

    elif args.dataset == 'voc07test':
        args.imdbval_name = 'voc_2007_test'

    else:
        raise NotImplementedError

    val_imdb = get_imdb(args.imdbval_name)

    val_dataset = RoiDataset(val_imdb, train=False)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=False)

    # load model
    model = Yolov2()
    # weight_loader = WeightLoader()
    # weight_loader.load(model, 'yolo-voc.weights')
    # print('loaded')

    model_path = os.path.join(args.output_dir, args.model_name + '.pth')
    print('loading model from {}'.format(model_path))
    if torch.cuda.is_available():
        checkpoint = torch.load(model_path)
    else:
        checkpoint = torch.load(model_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])

    if args.use_cuda:
        model.cuda()

    model.eval()
    print('model loaded')

    dataset_size = len(val_imdb.image_index)

    all_boxes = [[[] for _ in range(dataset_size)]
                 for _ in range(val_imdb.num_classes)]

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    img_id = -1
    with torch.no_grad():
        for batch, (im_data, im_infos) in enumerate(val_dataloader):
            if args.use_cuda:
                im_data_variable = Variable(im_data).cuda()
            else:
                im_data_variable = Variable(im_data)

            yolo_outputs = model(im_data_variable)
            for i in range(im_data.size(0)):
                img_id += 1
                output = [item[i].data for item in yolo_outputs]
                im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]}
                detections = yolo_eval(output,
                                       im_info,
                                       conf_threshold=args.conf_thresh,
                                       nms_threshold=args.nms_thresh)
                print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset)))
                if len(detections) > 0:
                    for cls in range(val_imdb.num_classes):
                        inds = torch.nonzero(detections[:, -1] == cls).view(-1)
                        if inds.numel() > 0:
                            cls_det = torch.zeros((inds.numel(), 5))
                            cls_det[:, :4] = detections[inds, :4]
                            cls_det[:, 4] = detections[inds,
                                                       4] * detections[inds, 5]
                            all_boxes[cls][img_id] = cls_det.cpu().numpy()

                if args.vis:
                    img = Image.open(val_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()
                    im2show = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=val_imdb.classes)
                    plt.figure()
                    plt.imshow(im2show)
                    plt.show()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
 def get_roidb(imdb_name):
     imdb = get_imdb(imdb_name)
     print('Loaded dataset `{:s}` for training'.format(imdb.name))
     roidb = get_training_roidb(imdb)
     return roidb
Beispiel #11
0
def get_dataset(datasetnames):
    names = datasetnames
    dataset = RoiDataset(get_imdb(names))
    return dataset
Beispiel #12
0
def test():
    args = parse_args()

    if args.vis:
        args.conf_thresh = 0.5

    # load test data
    if args.dataset == 'voc07test':
        dataset_name = 'voc_2007_test'
    elif args.dataset == 'voc12test':
        dataset_name = 'voc_2012_test'
    else:
        raise NotImplementedError

    test_imdb = get_imdb(dataset_name)
    test_dataset = RoiDataset(test_imdb, train=False)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 num_workers=args.num_workers,
                                 shuffle=False)

    # load model
    model = YOLOv2()

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)
    weight_file_path = os.path.join(
        args.output_dir, 'yolov2_epoch_{}.pth'.format(args.check_epoch))

    if torch.cuda.is_available:
        state_dict = torch.load(weight_file_path)
    else:
        state_dict = torch.load(weight_file_path, map_location='cpu')

    model.load_state_dict(state_dict['model'])

    if args.use_cuda:
        model = model.cuda()

    model.eval()

    num_data = len(test_dataset)

    all_boxes = [[[] for _ in range(num_data)]
                 for _ in range(test_imdb.num_classes)]

    img_id = -1

    det_file = os.path.join(args.output_dir, 'detections.pkl')

    with torch.no_grad():
        for batch_size, (im_data, im_infos) in enumerate(test_dataloader):

            if args.use_cuda:
                im_data = im_data.cuda()
                im_infos = im_infos.cuda()

            im_data_variable = Variable(im_data)

            outputs = model(im_data_variable)

            for i in range(im_data.size(0)):
                img_id += 1

                output = [item[i].data for item in outputs]
                im_info = im_infos[i]

                detections = eval(output, im_info, args.conf_thresh,
                                  args.nms_thresh)

                if len(detections) > 0:
                    for i in range(cfg.CLASS_NUM):
                        idxs = torch.nonzero(detections[:, -1] == i).view(-1)
                        if idxs.numel() > 0:
                            cls_det = torch.zeros((idxs.numel(), 5))
                            cls_det[:, :4] = detections[idxs, :4]
                            cls_det[:, 4] = detections[idxs,
                                                       4] * detections[idxs, 5]
                            all_boxes[i][img_id] = cls_det.cpu().numpy()

                if args.vis:
                    img = Image.open(test_imdb.image_path_at(img_id))
                    if len(detections) == 0:
                        continue
                    det_boxes = detections[:, :5].cpu().numpy()
                    det_classes = detections[:, -1].long().cpu().numpy()

                    imshow = draw_detection_boxes(
                        img,
                        det_boxes,
                        det_classes,
                        class_names=test_imdb.classes)

                    plt.figure()
                    plt.imshow(imshow)
                    plt.show()

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    test_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)