コード例 #1
0
def evaluate_ssd():
    """Evaluate a SSD network."""

    # Set results directory and solver
    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')
    test_solver_file = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME,
                                cfg.METHOD_NAME, cfg.MODEL_NAME,
                                'test_solver.prototxt')

    make_if_not_exist(results_dir)
    check_if_exist('Solver', test_solver_file)

    # Find most recent model
    test_model = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_')

    if test_model is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    # Test model
    cmd = './frameworks/caffe-rcnn-ssd/build/tools/caffe train \
           --solver="{}" --weights="{}" --gpu="{}"\
          '.format(test_solver_file, test_model, cfg.GPU_ID)

    subprocess.call(cmd, shell=True)

    # Set imdb and do evaluation
    imdb_name = '{:s}_val'.format(cfg.DATASET_NAME)
    imdb = get_imdb(imdb_name)
    imdb._do_pascal_voc_eval(results_dir)
コード例 #2
0
    def evaluate_detections(self, all_boxes):

        results_dir = cfg.OUTPUT_DIR + '/results'
        make_if_not_exist(results_dir)

        self._write_pascal_voc_results_files(all_boxes, results_dir)
        self._do_pascal_voc_eval(results_dir)
コード例 #3
0
def evaluate_yolov2(conf_thresh, nms_thresh):
    """Evaluate a YOLOv2 network."""

    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')
    data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME))
    model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME))

    make_if_not_exist(results_dir)
    check_if_exist('YOLOv2 data config', data_cfg)
    check_if_exist('YOLOv2 model config', model_cfg)

    # Change model config for testing
    with open(model_cfg, 'r') as f:
        data = f.readlines()

    for i in range(len(data)):
        if 'height' in data[i]:
            data[i] = 'height={:d}\n'.format(cfg.TEST.SCALES[0])
            data[i + 1] = 'width={:d}\n'.format(cfg.TEST.MAX_SIZE)

    with open(model_cfg, 'w') as f:
        f.writelines(data)

    # Find most recent model
    test_model = get_model_path(cfg.OUTPUT_DIR, '.weights', '_batch_')

    if test_model is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    result_file_prefix = '{}_det_test_'.format(cfg.DATASET_NAME)

    # Test model
    cmd = ('./frameworks/darknet/darknet-cpp detector valid {} {} {} -out {} '
           '-gpus {} -nms_thresh {:f}').format(data_cfg, model_cfg, test_model,
                                               result_file_prefix, cfg.GPU_ID,
                                               nms_thresh)

    subprocess.call(cmd, shell=True)

    # Set imdb and evaluate
    imdb_name = '{:s}_val'.format(cfg.DATASET_NAME)
    imdb = get_imdb(imdb_name)
    imdb._do_pascal_voc_eval(results_dir)
コード例 #4
0
BCE_EPOCHS = 32
INTERMEDIATE_EPOCHS = 2
PRETRAINED_COOLDOWN = 2
DROPOUT_COOLDOWN = 2
NUM_EPOCHS = BCE_EPOCHS + INTERMEDIATE_EPOCHS + CYCLES * CYCLE_LENGTH

CLASS_WEIGHT = 0.05
MASKS_WEIGHT = 0.12
PROB = 0.5
PROB_CLASS = 0.8

VAL_METRIC_CRITERION = 'comp_metric'
MODEL_FILE_DIR = './saved_models_' + str(EXP_NAME)
LOGS_DIR = './logs/logs_' + str(EXP_NAME)

make_if_not_exist(MODEL_FILE_DIR)
make_if_not_exist(LOGS_DIR)

MODEL_FILE_PATH = MODEL_FILE_DIR + '/model_{}_{:.4f}'


def predict(config,
            model,
            data_loader,
            thresholding=True,
            threshold=THRESHOLD,
            tta=True):
    model.set_training(False)

    y_preds = []
    with torch.no_grad():
    skip = args.skip

    classnames = []
    if labelmap_file:
        check_if_exist('Label map file', labelmap_file)
        labelmap = cpb2.LabelMap()
        with open(labelmap_file, 'r') as f:
            text_format.Merge(str(f.read()), labelmap)

        for item in labelmap.item:
            classname = str(item.display_name)
            classnames.append(classname)

    if save:
        save_dir = osp.splitext(result_file)[0]
        make_if_not_exist(save_dir)
        print('Saving to directory: {}'.format(save_dir))

    img_results = OrderedDict()
    with open(result_file, "r") as f:
        for line in f.readlines():
            img_path, label, score, xmin, ymin, xmax, ymax = line.strip(
                "\n").split()

            result = dict()
            result["label"] = int(label)
            result["score"] = float(score)
            result["bbox"] = [
                float(xmin),
                float(ymin),
                float(xmax),
コード例 #6
0
        check_if_exist('Config', cfg_file)

        extra_cfg = ('METHOD_NAME {:s} MODEL_NAME {:s} '
                     'DATASET_NAME {:s} GPU_ID {:d}'.format(
                         method_name, model_name, dataset_name, gpu_id))

        set_cfgs = extra_cfg.split()

        # Update config
        cfg_from_file(cfg_file)
        cfg_from_list(set_cfgs)

        # Set and create output dir
        cfg.OUTPUT_DIR = osp.join(cfg.OUTPUT_DIR, cfg.DATASET_NAME,
                                  cfg.METHOD_NAME, cfg.MODEL_NAME)
        make_if_not_exist(cfg.OUTPUT_DIR)

        # Get classes from label map
        label_map_file = osp.join(
            cfg.DATA_DIR, cfg.DATASET_NAME,
            '{}_labelmap.prototxt'.format(cfg.DATASET_NAME))

        cfg.CLASSES = get_classnames_from_labelmap(label_map_file)
        cfg.NUM_CLASSES = len(cfg.CLASSES)

        # Dump full config to output dir
        dst = osp.join(cfg.OUTPUT_DIR, 'config.yml')
        with open(dst, 'w') as f:
            yaml.dump(cfg, f, default_flow_style=False)
    else:
        # Get config from given output directory
コード例 #7
0
def custom_dataset_eval(detpath,
                        annopath,
                        imagesetfile,
                        classname,
                        cachedir,
                        ovthresh=0.5,
                        use_07_metric=False):
    """rec, prec, ap = custom_dataset_eval(detpath,
                                annopath,
                                imagesetfile,
                                classname,
                                [ovthresh],
                                [use_07_metric])

    Top level function that does the evaluation.

    detpath: Path to detections
        detpath.format(classname) should produce the detection results file.
    annopath: Path to annotations
        annopath.format(imagename) should be the xml annotations file.
    imagesetfile: Text file containing the list of images, one image per line.
    classname: Category name (duh)
    cachedir: Directory for caching the annotations
    [ovthresh]: Overlap threshold (default = 0.5)
    [use_07_metric]: Whether to use voc07's 11 point AP computation
        (default False)
    """
    # assumes detections are in detpath.format(classname)
    # assumes annotations are in annopath.format(imagename)
    # assumes imagesetfile is a text file with each line an image name
    # cachedir caches the annotations in a pickle file

    # first load gt
    make_if_not_exist(cachedir)

    cachefile = osp.join(cachedir, 'gt_annotations.pkl')
    # read list of images
    with open(imagesetfile, 'r') as f:
        lines = f.readlines()
    imagenames = [x.strip() for x in lines]

    if not osp.isfile(cachefile):
        # load annots
        recs = {}
        for i, imagename in enumerate(imagenames):
            recs[imagename] = parse_rec(annopath.format(imagename))
            if i % 100 == 0:
                print 'Reading annotation for {:d}/{:d}'.format(
                    i + 1, len(imagenames))
        # save
        print 'Saving cached annotations to {:s}'.format(cachefile)
        with open(cachefile, 'w') as f:
            cPickle.dump(recs, f)
    else:
        # load
        with open(cachefile, 'r') as f:
            recs = cPickle.load(f)

    # extract gt objects for this class
    class_recs = {}
    npos = 0
    for imagename in imagenames:
        R = [obj for obj in recs[imagename] if obj['name'] == classname]
        bbox = np.array([x['bbox'] for x in R])
        difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
        det = [False] * len(R)
        npos = npos + sum(~difficult)
        class_recs[imagename] = {
            'bbox': bbox,
            'difficult': difficult,
            'det': det
        }

    # read dets
    detfile = detpath.format(classname)
    with open(detfile, 'r') as f:
        lines = f.readlines()

    splitlines = [x.strip().split(' ') for x in lines]
    image_ids = [x[0] for x in splitlines]
    confidence = np.array([float(x[1]) for x in splitlines])
    BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

    # sort by confidence
    sorted_ind = np.argsort(-confidence)
    sorted_scores = np.sort(-confidence)
    BB = BB[sorted_ind, :]
    image_ids = [image_ids[x] for x in sorted_ind]

    # go down dets and mark TPs and FPs
    nd = len(image_ids)
    tp = np.zeros(nd)
    fp = np.zeros(nd)
    for d in range(nd):
        R = class_recs[image_ids[d]]
        bb = BB[d, :].astype(float)
        ovmax = -np.inf
        BBGT = R['bbox'].astype(float)

        if BBGT.size > 0:
            # compute overlaps
            # intersection
            ixmin = np.maximum(BBGT[:, 0], bb[0])
            iymin = np.maximum(BBGT[:, 1], bb[1])
            ixmax = np.minimum(BBGT[:, 2], bb[2])
            iymax = np.minimum(BBGT[:, 3], bb[3])
            iw = np.maximum(ixmax - ixmin + 1., 0.)
            ih = np.maximum(iymax - iymin + 1., 0.)
            inters = iw * ih

            # union
            uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
                   (BBGT[:, 2] - BBGT[:, 0] + 1.) *
                   (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)

            overlaps = inters / uni
            ovmax = np.max(overlaps)
            jmax = np.argmax(overlaps)

        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

    # compute precision recall
    fp = np.cumsum(fp)
    tp = np.cumsum(tp)
    rec = tp / float(npos)

    # avoid divide by zero in case the first detection matches a difficult
    # ground truth
    prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
    ap = custom_dataset_ap(rec, prec, use_07_metric)

    return rec, prec, ap
    # Remove background class
    if 'background' in classnames:
        classnames.remove('background')

    # Assume all images have same extension
    image_extension = osp.splitext(glob.glob(image_dir + '/*')[0])[1]

    yolov2_annotations_dir = osp.join(dataset_dir, 'yolov2_Annotations')
    yolov2_imageset_dir = osp.join(dataset_dir, 'yolov2_ImageSets')

    # # Remove existing labels
    # if osp.exists(yolov2_annotations_dir):
    #   shutil.rmtree(yolov2_annotations_dir)

    make_if_not_exist(yolov2_annotations_dir)
    make_if_not_exist(yolov2_imageset_dir)

    # Create imagesets for YOLOv2
    for imageset in imagesets:
        print('Creating YOLOv2 Imageset: {:s}'.format(imageset))
        pascal_image_ids = open(osp.join(pascal_imageset_dir,
                                         imageset)).read().strip().split()

        yolov2_imageset_file = open(osp.join(yolov2_imageset_dir, imageset),
                                    'w')

        # Shuffle train file
        if imageset == 'train.txt':
            random.shuffle(pascal_image_ids)
    check_if_exist('Annotation directory', pascal_annotations_dir)
    check_if_exist('Imageset directory', pascal_imageset_dir)

    # src_dir = osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__))))
    caffe_root = osp.join(src_dir, 'frameworks', 'caffe-rcnn-ssd')

    imagesets = [
        os.path.basename(s) for s in glob.glob(pascal_imageset_dir + '/*.txt')
    ]
    # imagesets = ['train', 'val']

    # Assume all images have same extension
    image_extension = osp.splitext(glob.glob(image_dir + '/*')[0])[1]

    ssd_imageset_dir = osp.join(dataset_dir, 'ssd_ImageSets')
    make_if_not_exist(ssd_imageset_dir)

    # Create imagesets for SSD
    for imageset in imagesets:
        print('Creating SSD Imageset: {:s}'.format(imageset))
        pascal_image_ids = open(osp.join(pascal_imageset_dir,
                                         imageset)).read().strip().split()

        ssd_imageset_filename = osp.join(ssd_imageset_dir, imageset)
        ssd_imageset_file = open(ssd_imageset_filename, 'w')

        # Shuffle train file
        if imageset == 'train.txt':
            random.shuffle(pascal_image_ids)

        for image_id in pascal_image_ids:
コード例 #10
0
def create_ssd_model_definition(max_iters, conf_thresh, nms_thresh):
    """Create SSD network definition files based on config settings."""

    # Training and testing data created by data/data_utils/pascal_voc_to_ssd.py
    train_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'train_lmdb')
    test_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'val_lmdb')

    models_dir = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                          cfg.MODEL_NAME)

    make_if_not_exist(models_dir)
    check_if_exist('Training data', train_data)
    check_if_exist('Test data', test_data)

    # Directory which stores the detection results
    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')

    # Model definition files.
    train_net_file = osp.join(models_dir, 'train.prototxt')
    test_net_file = osp.join(models_dir, 'test.prototxt')
    deploy_net_file = osp.join(models_dir, 'deploy.prototxt')
    train_solver_file = osp.join(models_dir, 'train_solver.prototxt')
    test_solver_file = osp.join(models_dir, 'test_solver.prototxt')

    # The name of the model
    model_name = '{}_ssd'.format(cfg.MODEL_NAME.lower())

    # Snapshot prefix.
    snapshot_prefix = osp.join(cfg.OUTPUT_DIR, model_name)

    # Stores the test image names and sizes
    name_size_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'ssd_ImageSets',
                              'val_name_size.txt')

    label_map_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                              '{}_labelmap.prototxt'.format(cfg.DATASET_NAME))

    # Specify the batch sampler.
    resize_width = cfg.TRAIN.MAX_SIZE
    resize_height = cfg.TRAIN.MAX_SIZE
    resize = '{}x{}'.format(resize_width, resize_height)
    batch_sampler = [
        {
            'sampler': {},
            'max_trials': 1,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.1,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.3,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.5,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.7,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.9,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'max_jaccard_overlap': 1.0,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
    ]
    train_transform_param = {
        'mirror': True,
        # 'mean_value': [104, 117, 124],
        'mean_value': list(cfg.PIXEL_MEANS[0][0]),
        'force_color': True,
        'resize_param': {
            'prob':
            1,
            'resize_mode':
            P.Resize.WARP,
            'height':
            resize_height,
            'width':
            resize_width,
            # 'resize_mode': P.Resize.FIT_SMALL_SIZE,
            # 'height': resize_height,
            # 'width': resize_width,
            # 'height_scale': resize_height,
            # 'width_scale': resize_width,
            'interp_mode': [
                P.Resize.LINEAR,
                P.Resize.AREA,
                P.Resize.NEAREST,
                P.Resize.CUBIC,
                P.Resize.LANCZOS4,
            ],
        },
        'distort_param': {
            'brightness_prob': 0.5,
            'brightness_delta': 32,
            'contrast_prob': 0.5,
            'contrast_lower': 0.5,
            'contrast_upper': 1.5,
            'hue_prob': 0.5,
            'hue_delta': 18,
            'saturation_prob': 0.5,
            'saturation_lower': 0.5,
            'saturation_upper': 1.5,
            'random_order_prob': 0.0,
        },
        'expand_param': {
            'prob': 0.5,
            'max_expand_ratio': 4.0,
        },
        'emit_constraint': {
            'emit_type': caffe_pb2.EmitConstraint.CENTER,
        }
    }
    test_transform_param = {
        # 'mean_value': [104, 117, 124],
        'mean_value': list(cfg.PIXEL_MEANS[0][0]),
        'force_color': True,
        'resize_param': {
            'prob': 1,
            'resize_mode': P.Resize.WARP,
            'height': resize_height,
            'width': resize_width,
            # 'resize_mode': P.Resize.FIT_SMALL_SIZE,
            # 'height': resize_height,
            # 'width': resize_width,
            # 'height_scale': resize_height,
            # 'width_scale': resize_height,
            'interp_mode': [P.Resize.LINEAR],
        },
    }

    # If true, use batch norm for all newly added layers.
    # Currently only the non batch norm version has been tested.
    use_batchnorm = False
    lr_mult = 1
    # Use different initial learning rate.
    if use_batchnorm:
        base_lr = 0.0004
    else:
        # A learning rate for batch_size = 1, num_gpus = 1.
        base_lr = 0.00004

    # MultiBoxLoss parameters.
    num_classes = cfg.NUM_CLASSES
    share_location = True
    background_label_id = 0
    output_name_prefix = '{}_det_test_'.format(cfg.DATASET_NAME)
    train_on_diff_gt = False
    normalization_mode = P.Loss.VALID
    code_type = P.PriorBox.CENTER_SIZE
    ignore_cross_boundary_bbox = False
    mining_type = P.MultiBoxLoss.MAX_NEGATIVE
    neg_pos_ratio = 3.
    loc_weight = (neg_pos_ratio + 1.) / 4.
    multibox_loss_param = {
        'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
        'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
        'loc_weight': loc_weight,
        'num_classes': num_classes,
        'share_location': share_location,
        'match_type': P.MultiBoxLoss.PER_PREDICTION,
        'overlap_threshold': 0.5,
        'use_prior_for_matching': True,
        'background_label_id': background_label_id,
        'use_difficult_gt': train_on_diff_gt,
        'mining_type': mining_type,
        'neg_pos_ratio': neg_pos_ratio,
        'neg_overlap': 0.5,
        'code_type': code_type,
        'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
    }
    loss_param = {
        'normalization': normalization_mode,
    }

    # parameters for generating priors.
    # minimum dimension of input image
    min_dim = cfg.TRAIN.MAX_SIZE
    # conv4_3 ==> 38 x 38 (300x300) ==> 64 x 64 (512x512)  ==> 76 x 76 (608x608)
    # fc7 ==> 19 x 19 (300x300) ==> 32 x 32 (512x512) ==> 38 x 38 (608x608)
    # conv6_2 ==> 10 x 10 (300x300) ==> 16 x 16 (512x512) ==> 19 x 19 (608x608)
    # conv7_2 ==> 5 x 5 (300x300) ==> 8 x 8 (512x512) ==> 10 x 10 (608x608)
    # conv8_2 ==> 3 x 3 (300x300) ==> 4 x 4 (512x512) ==> 5 x 5 (608x608)
    # conv9_2 ==> 1 x 1 (300x300) ==> 2 x 2 (512x512) ==> 3 x 3 (608x608)
    #                    conv10_2 ==> 1 x 1 (512x512) ==> 1 x 1 (608x608)

    if cfg.CUSTOM_ANCHORS:
        anchor_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                               'custom_anchor_boxes', '6_anchor_boxes.txt')
        if not osp.exists(anchor_file):
            print('Custom anchor boxes `{:s}` does not exist.'.format(
                anchor_file))
            print('Generate custom anchor boxes with '
                  'data/data_utils/k_means_anchor_boxes.py')
            sys.exit()

        # Read anchor file
        with open(anchor_file, 'r') as f:
            data = f.readlines()

        custom_anchors = []
        # aspect_ratio = []
        for i in range(1, len(data)):
            splt = data[i].split(',')
            anchor_width = float(splt[0]) * min_dim
            anchor_height = float(splt[1]) * min_dim
            # aspect_ratio.append(anchor_height/anchor_width)

            custom_anchors.append([anchor_width, anchor_height])

        custom_anchors = np.asarray(custom_anchors)
        print(custom_anchors)

        min_ratio = int(np.floor(np.min(custom_anchors) / min_dim * 100))
        max_ratio = int(np.ceil(np.amax(custom_anchors) / min_dim * 100))

        nb = 1
    else:
        # in percent %
        min_const = 10
        max_const = 20
        min_ratio = 20
        max_ratio = 90

        if min_dim == 512 or min_dim == 608:
            max_const = 10
            min_ratio = 10
            min_const = 4

        nb = 2

    mbox_source_layers = [
        'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
    ]

    if min_dim == 512 or min_dim == 608:
        mbox_source_layers.append('conv10_2')

    step = int(
        np.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - nb)))

    min_sizes = []
    max_sizes = []
    for ratio in xrange(min_ratio, max_ratio + 1, step):
        print(ratio)
        min_sizes.append(min_dim * ratio / 100.)
        max_sizes.append(min_dim * (ratio + step) / 100.)

    steps = [8, 16, 32, 64, 100, 300]
    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
    # L2 normalize conv4_3.
    normalizations = [20, -1, -1, -1, -1, -1]

    if min_dim == 512:
        steps = [8, 16, 32, 64, 128, 256, 512]
        aspect_ratios.insert(2, [2, 3])
        normalizations.append(-1)
    elif min_dim == 608:
        steps = [8, 16, 32, 61, 122, 203, 608]
        aspect_ratios.insert(2, [2, 3])
        normalizations.append(-1)

    print("minsize: ", min_sizes)
    print("maxsize: ", max_sizes)
    if not cfg.CUSTOM_ANCHORS:
        min_sizes = [min_dim * min_const / 100.] + min_sizes
        max_sizes = [min_dim * max_const / 100.] + max_sizes
    print("minsize: ", min_sizes)
    print("maxsize: ", max_sizes)

    if min_dim != 300 and min_dim != 512:
        print('SSD anchor boxes are not optimized for size {}'.format(min_dim))

    # variance used to encode/decode prior bboxes.
    if code_type == P.PriorBox.CENTER_SIZE:
        prior_variance = [0.1, 0.1, 0.2, 0.2]
    else:
        prior_variance = [0.1]

    flip = True
    clip = False

    ### PRIOR CALCULATIONS THAT ARE DONE IN CAFFE LAYER
    for s in range(0, len(min_sizes)):
        min_size = min_sizes[s]
        # first prior: aspect_ratio = 1, size = min_size
        box_width = min_size
        box_height = min_size
        print('\nfirst: {} X {}'.format(box_width, box_height))

        if len(max_sizes) > 0:
            max_size = max_sizes[s]
            box_width = np.sqrt(min_size * max_size)
            box_height = np.sqrt(min_size * max_size)
            print('second: {} X {}'.format(box_width, box_height))

        for r in range(0, len(aspect_ratios[s])):
            ar = aspect_ratios[s][r]
            if np.fabs(ar - 1.) < 1e-6:
                continue

            box_width = min_size * np.sqrt(ar)
            box_height = min_size / np.sqrt(ar)
            print('rest: {} X {}'.format(box_width, box_height))

    # sys.exit()

    # Solver parameters.
    # Defining which GPUs to use.
    gpus = '{:d}'.format(cfg.GPU_ID)
    gpulist = gpus.split(',')
    num_gpus = len(gpulist)

    # Divide the mini-batch to different GPUs.
    batch_size = cfg.TRAIN.IMS_PER_BATCH
    accum_batch_size = cfg.TRAIN.BATCH_SIZE
    iter_size = accum_batch_size / batch_size
    solver_mode = P.Solver.CPU
    device_id = 0
    batch_size_per_device = batch_size
    if num_gpus > 0:
        batch_size_per_device = int(np.ceil(float(batch_size) / num_gpus))
        iter_size = int(
            np.ceil(
                float(accum_batch_size) / (batch_size_per_device * num_gpus)))
        solver_mode = P.Solver.GPU
        device_id = int(gpulist[0])

    if normalization_mode == P.Loss.NONE:
        base_lr /= batch_size_per_device
    elif normalization_mode == P.Loss.VALID:
        base_lr *= 25. / loc_weight
    elif normalization_mode == P.Loss.FULL:
        # Roughly there are 2000 prior bboxes per image.
        # TODO(weiliu89): Estimate the exact # of priors.
        base_lr *= 2000.

    # Get number of test images from name_size_file
    num_test_image = sum(1 for line in open(name_size_file))
    test_batch_size = 8

    # Ideally test_batch_size should be divisible by num_test_image,
    test_iter = int(np.ceil(float(num_test_image) / test_batch_size))

    stepvalue = []
    stepvalue.append(int(np.ceil(max_iters * 0.6667)))
    stepvalue.append(int(np.ceil(max_iters * 0.8333)))
    stepvalue.append(max_iters)

    train_solver_param = {
        # Train parameters
        'base_lr': base_lr,
        'weight_decay': 0.0005,
        'lr_policy': 'multistep',
        'stepvalue': stepvalue,
        'gamma': 0.1,
        'momentum': 0.9,
        'iter_size': iter_size,
        'max_iter': max_iters,
        'snapshot': cfg.TRAIN.SNAPSHOT_ITERS,
        'display': 20,
        'average_loss': 10,
        'type': 'SGD',
        'solver_mode': solver_mode,
        'device_id': device_id,
        'debug_info': False,
        'snapshot_after_train': True,
    }

    test_solver_param = {
        # Test parameters
        'snapshot': 1,
        'snapshot_after_train': False,
        'test_iter': [test_iter],
        'test_interval': 1,
        'eval_type': 'detection',
        'ap_version': 'MaxIntegral',
        'test_initialization': True,
    }

    # Parameters for generating detection output.
    det_out_param = {
        'num_classes': num_classes,
        'share_location': share_location,
        'background_label_id': background_label_id,
        'nms_param': {
            'nms_threshold': nms_thresh,
            'top_k': 200
        },
        'save_output_param': {
            'output_directory': results_dir,
            'output_name_prefix': output_name_prefix,
            'output_format': 'VOC',
            'label_map_file': label_map_file,
            'name_size_file': name_size_file,
            'num_test_image': num_test_image,
        },
        'keep_top_k': 50,
        'confidence_threshold': conf_thresh,
        'code_type': code_type,
    }

    # Parameters for evaluating detection results.
    det_eval_param = {
        'num_classes': num_classes,
        'background_label_id': background_label_id,
        'overlap_threshold': 0.5,
        'evaluate_difficult_gt': False,
        'name_size_file': name_size_file,
    }

    # Create train net.
    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        train_data,
        batch_size=batch_size_per_device,
        train=True,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=train_transform_param,
        batch_sampler=batch_sampler)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    # Create the MultiBoxLossLayer.
    name = "mbox_loss"
    mbox_layers.append(net.label)
    net[name] = L.MultiBoxLoss(
        *mbox_layers,
        multibox_loss_param=multibox_loss_param,
        loss_param=loss_param,
        include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
        propagate_down=[True, True, False, False])

    with open(train_net_file, 'w') as f:
        print('name: "{}_train"'.format(model_name), file=f)
        print(net.to_proto(), file=f)

    # Create test net.
    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        test_data,
        batch_size=test_batch_size,
        train=False,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=test_transform_param)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    conf_name = 'mbox_conf'
    if multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.SOFTMAX:
        reshape_name = '{}_reshape'.format(conf_name)
        net[reshape_name] = L.Reshape(net[conf_name],
                                      shape=dict(dim=[0, -1, num_classes]))
        softmax_name = '{}_softmax'.format(conf_name)
        net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
        flatten_name = '{}_flatten'.format(conf_name)
        net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
        mbox_layers[1] = net[flatten_name]
    elif multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.LOGISTIC:
        sigmoid_name = '{}_sigmoid'.format(conf_name)
        net[sigmoid_name] = L.Sigmoid(net[conf_name])
        mbox_layers[1] = net[sigmoid_name]

    net.detection_out = L.DetectionOutput(
        *mbox_layers,
        detection_output_param=det_out_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    net.detection_eval = L.DetectionEvaluate(
        net.detection_out,
        net.label,
        detection_evaluate_param=det_eval_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    with open(test_net_file, 'w') as f:
        print('name: "{}_test"'.format(model_name), file=f)
        print(net.to_proto(), file=f)

    # Create deploy net.
    # Remove the first and last layer from test net.
    deploy_net = net
    with open(deploy_net_file, 'w') as f:
        net_param = deploy_net.to_proto()
        # Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net.
        del net_param.layer[0]
        del net_param.layer[-1]
        net_param.name = '{}_deploy'.format(model_name)
        net_param.input.extend(['data'])
        net_param.input_shape.extend(
            [caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])
        print(net_param, file=f)

    # Create training solver.
    train_solver = caffe_pb2.SolverParameter(train_net=train_net_file,
                                             snapshot_prefix=snapshot_prefix,
                                             **train_solver_param)

    with open(train_solver_file, 'w') as f:
        print(train_solver, file=f)

    # Create testing solver.
    test_solver = caffe_pb2.SolverParameter(train_net=train_net_file,
                                            test_net=[test_net_file],
                                            snapshot_prefix=snapshot_prefix,
                                            **test_solver_param)

    with open(test_solver_file, 'w') as f:
        print(test_solver, file=f)
コード例 #11
0
        bb_data[i][0] = 0
        bb_data[i][1] = 0

    # centroids = bb_data[:k]
    centroids = k_init(k, bb_data)
    # centroids = k_init(k, bb_data, n_local_trials=len(bb_data))

    # Start k-means to find best clusters
    anchor_boxes, best_avg_iou = k_means(k, centroids, bb_data)

    # Sort on width
    anchor_boxes = np.asarray(anchor_boxes)
    anchor_boxes = anchor_boxes[anchor_boxes[:, 2].argsort()]

    anchor_dir = osp.join(dataset_dir, 'custom_anchor_boxes')
    make_if_not_exist(anchor_dir)

    anchor_file = osp.join(anchor_dir, '{:d}_anchor_boxes.txt'.format(k))

    # Check that better anchor_boxes does not already exist and write to anchor file
    old_iou = 0
    if osp.exists(anchor_file):
        with open(anchor_file, 'r') as f:
            header = f.readline()
            old_iou = float(header.split(':')[1])

    if old_iou > 0:
        print('Previous IoU: {:f}. New IoU: {:f}'.format(
            old_iou, best_avg_iou))

    if best_avg_iou > old_iou: