def evaluate_ssd(): """Evaluate a SSD network.""" # Set results directory and solver results_dir = osp.join(cfg.OUTPUT_DIR, 'results') test_solver_file = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME, cfg.MODEL_NAME, 'test_solver.prototxt') make_if_not_exist(results_dir) check_if_exist('Solver', test_solver_file) # Find most recent model test_model = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_') if test_model is None: print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR)) sys.exit() # Test model cmd = './frameworks/caffe-rcnn-ssd/build/tools/caffe train \ --solver="{}" --weights="{}" --gpu="{}"\ '.format(test_solver_file, test_model, cfg.GPU_ID) subprocess.call(cmd, shell=True) # Set imdb and do evaluation imdb_name = '{:s}_val'.format(cfg.DATASET_NAME) imdb = get_imdb(imdb_name) imdb._do_pascal_voc_eval(results_dir)
def evaluate_detections(self, all_boxes): results_dir = cfg.OUTPUT_DIR + '/results' make_if_not_exist(results_dir) self._write_pascal_voc_results_files(all_boxes, results_dir) self._do_pascal_voc_eval(results_dir)
def evaluate_yolov2(conf_thresh, nms_thresh): """Evaluate a YOLOv2 network.""" results_dir = osp.join(cfg.OUTPUT_DIR, 'results') data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME)) model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME)) make_if_not_exist(results_dir) check_if_exist('YOLOv2 data config', data_cfg) check_if_exist('YOLOv2 model config', model_cfg) # Change model config for testing with open(model_cfg, 'r') as f: data = f.readlines() for i in range(len(data)): if 'height' in data[i]: data[i] = 'height={:d}\n'.format(cfg.TEST.SCALES[0]) data[i + 1] = 'width={:d}\n'.format(cfg.TEST.MAX_SIZE) with open(model_cfg, 'w') as f: f.writelines(data) # Find most recent model test_model = get_model_path(cfg.OUTPUT_DIR, '.weights', '_batch_') if test_model is None: print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR)) sys.exit() result_file_prefix = '{}_det_test_'.format(cfg.DATASET_NAME) # Test model cmd = ('./frameworks/darknet/darknet-cpp detector valid {} {} {} -out {} ' '-gpus {} -nms_thresh {:f}').format(data_cfg, model_cfg, test_model, result_file_prefix, cfg.GPU_ID, nms_thresh) subprocess.call(cmd, shell=True) # Set imdb and evaluate imdb_name = '{:s}_val'.format(cfg.DATASET_NAME) imdb = get_imdb(imdb_name) imdb._do_pascal_voc_eval(results_dir)
BCE_EPOCHS = 32 INTERMEDIATE_EPOCHS = 2 PRETRAINED_COOLDOWN = 2 DROPOUT_COOLDOWN = 2 NUM_EPOCHS = BCE_EPOCHS + INTERMEDIATE_EPOCHS + CYCLES * CYCLE_LENGTH CLASS_WEIGHT = 0.05 MASKS_WEIGHT = 0.12 PROB = 0.5 PROB_CLASS = 0.8 VAL_METRIC_CRITERION = 'comp_metric' MODEL_FILE_DIR = './saved_models_' + str(EXP_NAME) LOGS_DIR = './logs/logs_' + str(EXP_NAME) make_if_not_exist(MODEL_FILE_DIR) make_if_not_exist(LOGS_DIR) MODEL_FILE_PATH = MODEL_FILE_DIR + '/model_{}_{:.4f}' def predict(config, model, data_loader, thresholding=True, threshold=THRESHOLD, tta=True): model.set_training(False) y_preds = [] with torch.no_grad():
skip = args.skip classnames = [] if labelmap_file: check_if_exist('Label map file', labelmap_file) labelmap = cpb2.LabelMap() with open(labelmap_file, 'r') as f: text_format.Merge(str(f.read()), labelmap) for item in labelmap.item: classname = str(item.display_name) classnames.append(classname) if save: save_dir = osp.splitext(result_file)[0] make_if_not_exist(save_dir) print('Saving to directory: {}'.format(save_dir)) img_results = OrderedDict() with open(result_file, "r") as f: for line in f.readlines(): img_path, label, score, xmin, ymin, xmax, ymax = line.strip( "\n").split() result = dict() result["label"] = int(label) result["score"] = float(score) result["bbox"] = [ float(xmin), float(ymin), float(xmax),
check_if_exist('Config', cfg_file) extra_cfg = ('METHOD_NAME {:s} MODEL_NAME {:s} ' 'DATASET_NAME {:s} GPU_ID {:d}'.format( method_name, model_name, dataset_name, gpu_id)) set_cfgs = extra_cfg.split() # Update config cfg_from_file(cfg_file) cfg_from_list(set_cfgs) # Set and create output dir cfg.OUTPUT_DIR = osp.join(cfg.OUTPUT_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME, cfg.MODEL_NAME) make_if_not_exist(cfg.OUTPUT_DIR) # Get classes from label map label_map_file = osp.join( cfg.DATA_DIR, cfg.DATASET_NAME, '{}_labelmap.prototxt'.format(cfg.DATASET_NAME)) cfg.CLASSES = get_classnames_from_labelmap(label_map_file) cfg.NUM_CLASSES = len(cfg.CLASSES) # Dump full config to output dir dst = osp.join(cfg.OUTPUT_DIR, 'config.yml') with open(dst, 'w') as f: yaml.dump(cfg, f, default_flow_style=False) else: # Get config from given output directory
def custom_dataset_eval(detpath, annopath, imagesetfile, classname, cachedir, ovthresh=0.5, use_07_metric=False): """rec, prec, ap = custom_dataset_eval(detpath, annopath, imagesetfile, classname, [ovthresh], [use_07_metric]) Top level function that does the evaluation. detpath: Path to detections detpath.format(classname) should produce the detection results file. annopath: Path to annotations annopath.format(imagename) should be the xml annotations file. imagesetfile: Text file containing the list of images, one image per line. classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use voc07's 11 point AP computation (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) # assumes imagesetfile is a text file with each line an image name # cachedir caches the annotations in a pickle file # first load gt make_if_not_exist(cachedir) cachefile = osp.join(cachedir, 'gt_annotations.pkl') # read list of images with open(imagesetfile, 'r') as f: lines = f.readlines() imagenames = [x.strip() for x in lines] if not osp.isfile(cachefile): # load annots recs = {} for i, imagename in enumerate(imagenames): recs[imagename] = parse_rec(annopath.format(imagename)) if i % 100 == 0: print 'Reading annotation for {:d}/{:d}'.format( i + 1, len(imagenames)) # save print 'Saving cached annotations to {:s}'.format(cachefile) with open(cachefile, 'w') as f: cPickle.dump(recs, f) else: # load with open(cachefile, 'r') as f: recs = cPickle.load(f) # extract gt objects for this class class_recs = {} npos = 0 for imagename in imagenames: R = [obj for obj in recs[imagename] if obj['name'] == classname] bbox = np.array([x['bbox'] for x in R]) difficult = np.array([x['difficult'] for x in R]).astype(np.bool) det = [False] * len(R) npos = npos + sum(~difficult) class_recs[imagename] = { 'bbox': bbox, 'difficult': difficult, 'det': det } # read dets detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = custom_dataset_ap(rec, prec, use_07_metric) return rec, prec, ap
# Remove background class if 'background' in classnames: classnames.remove('background') # Assume all images have same extension image_extension = osp.splitext(glob.glob(image_dir + '/*')[0])[1] yolov2_annotations_dir = osp.join(dataset_dir, 'yolov2_Annotations') yolov2_imageset_dir = osp.join(dataset_dir, 'yolov2_ImageSets') # # Remove existing labels # if osp.exists(yolov2_annotations_dir): # shutil.rmtree(yolov2_annotations_dir) make_if_not_exist(yolov2_annotations_dir) make_if_not_exist(yolov2_imageset_dir) # Create imagesets for YOLOv2 for imageset in imagesets: print('Creating YOLOv2 Imageset: {:s}'.format(imageset)) pascal_image_ids = open(osp.join(pascal_imageset_dir, imageset)).read().strip().split() yolov2_imageset_file = open(osp.join(yolov2_imageset_dir, imageset), 'w') # Shuffle train file if imageset == 'train.txt': random.shuffle(pascal_image_ids)
check_if_exist('Annotation directory', pascal_annotations_dir) check_if_exist('Imageset directory', pascal_imageset_dir) # src_dir = osp.dirname(osp.dirname(osp.dirname(osp.realpath(__file__)))) caffe_root = osp.join(src_dir, 'frameworks', 'caffe-rcnn-ssd') imagesets = [ os.path.basename(s) for s in glob.glob(pascal_imageset_dir + '/*.txt') ] # imagesets = ['train', 'val'] # Assume all images have same extension image_extension = osp.splitext(glob.glob(image_dir + '/*')[0])[1] ssd_imageset_dir = osp.join(dataset_dir, 'ssd_ImageSets') make_if_not_exist(ssd_imageset_dir) # Create imagesets for SSD for imageset in imagesets: print('Creating SSD Imageset: {:s}'.format(imageset)) pascal_image_ids = open(osp.join(pascal_imageset_dir, imageset)).read().strip().split() ssd_imageset_filename = osp.join(ssd_imageset_dir, imageset) ssd_imageset_file = open(ssd_imageset_filename, 'w') # Shuffle train file if imageset == 'train.txt': random.shuffle(pascal_image_ids) for image_id in pascal_image_ids:
def create_ssd_model_definition(max_iters, conf_thresh, nms_thresh): """Create SSD network definition files based on config settings.""" # Training and testing data created by data/data_utils/pascal_voc_to_ssd.py train_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'train_lmdb') test_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'val_lmdb') models_dir = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME, cfg.MODEL_NAME) make_if_not_exist(models_dir) check_if_exist('Training data', train_data) check_if_exist('Test data', test_data) # Directory which stores the detection results results_dir = osp.join(cfg.OUTPUT_DIR, 'results') # Model definition files. train_net_file = osp.join(models_dir, 'train.prototxt') test_net_file = osp.join(models_dir, 'test.prototxt') deploy_net_file = osp.join(models_dir, 'deploy.prototxt') train_solver_file = osp.join(models_dir, 'train_solver.prototxt') test_solver_file = osp.join(models_dir, 'test_solver.prototxt') # The name of the model model_name = '{}_ssd'.format(cfg.MODEL_NAME.lower()) # Snapshot prefix. snapshot_prefix = osp.join(cfg.OUTPUT_DIR, model_name) # Stores the test image names and sizes name_size_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'ssd_ImageSets', 'val_name_size.txt') label_map_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, '{}_labelmap.prototxt'.format(cfg.DATASET_NAME)) # Specify the batch sampler. resize_width = cfg.TRAIN.MAX_SIZE resize_height = cfg.TRAIN.MAX_SIZE resize = '{}x{}'.format(resize_width, resize_height) batch_sampler = [ { 'sampler': {}, 'max_trials': 1, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'min_jaccard_overlap': 0.1, }, 'max_trials': 50, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'min_jaccard_overlap': 0.3, }, 'max_trials': 50, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'min_jaccard_overlap': 0.5, }, 'max_trials': 50, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'min_jaccard_overlap': 0.7, }, 'max_trials': 50, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'min_jaccard_overlap': 0.9, }, 'max_trials': 50, 'max_sample': 1, }, { 'sampler': { 'min_scale': 0.3, 'max_scale': 1.0, 'min_aspect_ratio': 0.5, 'max_aspect_ratio': 2.0, }, 'sample_constraint': { 'max_jaccard_overlap': 1.0, }, 'max_trials': 50, 'max_sample': 1, }, ] train_transform_param = { 'mirror': True, # 'mean_value': [104, 117, 124], 'mean_value': list(cfg.PIXEL_MEANS[0][0]), 'force_color': True, 'resize_param': { 'prob': 1, 'resize_mode': P.Resize.WARP, 'height': resize_height, 'width': resize_width, # 'resize_mode': P.Resize.FIT_SMALL_SIZE, # 'height': resize_height, # 'width': resize_width, # 'height_scale': resize_height, # 'width_scale': resize_width, 'interp_mode': [ P.Resize.LINEAR, P.Resize.AREA, P.Resize.NEAREST, P.Resize.CUBIC, P.Resize.LANCZOS4, ], }, 'distort_param': { 'brightness_prob': 0.5, 'brightness_delta': 32, 'contrast_prob': 0.5, 'contrast_lower': 0.5, 'contrast_upper': 1.5, 'hue_prob': 0.5, 'hue_delta': 18, 'saturation_prob': 0.5, 'saturation_lower': 0.5, 'saturation_upper': 1.5, 'random_order_prob': 0.0, }, 'expand_param': { 'prob': 0.5, 'max_expand_ratio': 4.0, }, 'emit_constraint': { 'emit_type': caffe_pb2.EmitConstraint.CENTER, } } test_transform_param = { # 'mean_value': [104, 117, 124], 'mean_value': list(cfg.PIXEL_MEANS[0][0]), 'force_color': True, 'resize_param': { 'prob': 1, 'resize_mode': P.Resize.WARP, 'height': resize_height, 'width': resize_width, # 'resize_mode': P.Resize.FIT_SMALL_SIZE, # 'height': resize_height, # 'width': resize_width, # 'height_scale': resize_height, # 'width_scale': resize_height, 'interp_mode': [P.Resize.LINEAR], }, } # If true, use batch norm for all newly added layers. # Currently only the non batch norm version has been tested. use_batchnorm = False lr_mult = 1 # Use different initial learning rate. if use_batchnorm: base_lr = 0.0004 else: # A learning rate for batch_size = 1, num_gpus = 1. base_lr = 0.00004 # MultiBoxLoss parameters. num_classes = cfg.NUM_CLASSES share_location = True background_label_id = 0 output_name_prefix = '{}_det_test_'.format(cfg.DATASET_NAME) train_on_diff_gt = False normalization_mode = P.Loss.VALID code_type = P.PriorBox.CENTER_SIZE ignore_cross_boundary_bbox = False mining_type = P.MultiBoxLoss.MAX_NEGATIVE neg_pos_ratio = 3. loc_weight = (neg_pos_ratio + 1.) / 4. multibox_loss_param = { 'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1, 'conf_loss_type': P.MultiBoxLoss.SOFTMAX, 'loc_weight': loc_weight, 'num_classes': num_classes, 'share_location': share_location, 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'overlap_threshold': 0.5, 'use_prior_for_matching': True, 'background_label_id': background_label_id, 'use_difficult_gt': train_on_diff_gt, 'mining_type': mining_type, 'neg_pos_ratio': neg_pos_ratio, 'neg_overlap': 0.5, 'code_type': code_type, 'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox, } loss_param = { 'normalization': normalization_mode, } # parameters for generating priors. # minimum dimension of input image min_dim = cfg.TRAIN.MAX_SIZE # conv4_3 ==> 38 x 38 (300x300) ==> 64 x 64 (512x512) ==> 76 x 76 (608x608) # fc7 ==> 19 x 19 (300x300) ==> 32 x 32 (512x512) ==> 38 x 38 (608x608) # conv6_2 ==> 10 x 10 (300x300) ==> 16 x 16 (512x512) ==> 19 x 19 (608x608) # conv7_2 ==> 5 x 5 (300x300) ==> 8 x 8 (512x512) ==> 10 x 10 (608x608) # conv8_2 ==> 3 x 3 (300x300) ==> 4 x 4 (512x512) ==> 5 x 5 (608x608) # conv9_2 ==> 1 x 1 (300x300) ==> 2 x 2 (512x512) ==> 3 x 3 (608x608) # conv10_2 ==> 1 x 1 (512x512) ==> 1 x 1 (608x608) if cfg.CUSTOM_ANCHORS: anchor_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'custom_anchor_boxes', '6_anchor_boxes.txt') if not osp.exists(anchor_file): print('Custom anchor boxes `{:s}` does not exist.'.format( anchor_file)) print('Generate custom anchor boxes with ' 'data/data_utils/k_means_anchor_boxes.py') sys.exit() # Read anchor file with open(anchor_file, 'r') as f: data = f.readlines() custom_anchors = [] # aspect_ratio = [] for i in range(1, len(data)): splt = data[i].split(',') anchor_width = float(splt[0]) * min_dim anchor_height = float(splt[1]) * min_dim # aspect_ratio.append(anchor_height/anchor_width) custom_anchors.append([anchor_width, anchor_height]) custom_anchors = np.asarray(custom_anchors) print(custom_anchors) min_ratio = int(np.floor(np.min(custom_anchors) / min_dim * 100)) max_ratio = int(np.ceil(np.amax(custom_anchors) / min_dim * 100)) nb = 1 else: # in percent % min_const = 10 max_const = 20 min_ratio = 20 max_ratio = 90 if min_dim == 512 or min_dim == 608: max_const = 10 min_ratio = 10 min_const = 4 nb = 2 mbox_source_layers = [ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ] if min_dim == 512 or min_dim == 608: mbox_source_layers.append('conv10_2') step = int( np.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - nb))) min_sizes = [] max_sizes = [] for ratio in xrange(min_ratio, max_ratio + 1, step): print(ratio) min_sizes.append(min_dim * ratio / 100.) max_sizes.append(min_dim * (ratio + step) / 100.) steps = [8, 16, 32, 64, 100, 300] aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] # L2 normalize conv4_3. normalizations = [20, -1, -1, -1, -1, -1] if min_dim == 512: steps = [8, 16, 32, 64, 128, 256, 512] aspect_ratios.insert(2, [2, 3]) normalizations.append(-1) elif min_dim == 608: steps = [8, 16, 32, 61, 122, 203, 608] aspect_ratios.insert(2, [2, 3]) normalizations.append(-1) print("minsize: ", min_sizes) print("maxsize: ", max_sizes) if not cfg.CUSTOM_ANCHORS: min_sizes = [min_dim * min_const / 100.] + min_sizes max_sizes = [min_dim * max_const / 100.] + max_sizes print("minsize: ", min_sizes) print("maxsize: ", max_sizes) if min_dim != 300 and min_dim != 512: print('SSD anchor boxes are not optimized for size {}'.format(min_dim)) # variance used to encode/decode prior bboxes. if code_type == P.PriorBox.CENTER_SIZE: prior_variance = [0.1, 0.1, 0.2, 0.2] else: prior_variance = [0.1] flip = True clip = False ### PRIOR CALCULATIONS THAT ARE DONE IN CAFFE LAYER for s in range(0, len(min_sizes)): min_size = min_sizes[s] # first prior: aspect_ratio = 1, size = min_size box_width = min_size box_height = min_size print('\nfirst: {} X {}'.format(box_width, box_height)) if len(max_sizes) > 0: max_size = max_sizes[s] box_width = np.sqrt(min_size * max_size) box_height = np.sqrt(min_size * max_size) print('second: {} X {}'.format(box_width, box_height)) for r in range(0, len(aspect_ratios[s])): ar = aspect_ratios[s][r] if np.fabs(ar - 1.) < 1e-6: continue box_width = min_size * np.sqrt(ar) box_height = min_size / np.sqrt(ar) print('rest: {} X {}'.format(box_width, box_height)) # sys.exit() # Solver parameters. # Defining which GPUs to use. gpus = '{:d}'.format(cfg.GPU_ID) gpulist = gpus.split(',') num_gpus = len(gpulist) # Divide the mini-batch to different GPUs. batch_size = cfg.TRAIN.IMS_PER_BATCH accum_batch_size = cfg.TRAIN.BATCH_SIZE iter_size = accum_batch_size / batch_size solver_mode = P.Solver.CPU device_id = 0 batch_size_per_device = batch_size if num_gpus > 0: batch_size_per_device = int(np.ceil(float(batch_size) / num_gpus)) iter_size = int( np.ceil( float(accum_batch_size) / (batch_size_per_device * num_gpus))) solver_mode = P.Solver.GPU device_id = int(gpulist[0]) if normalization_mode == P.Loss.NONE: base_lr /= batch_size_per_device elif normalization_mode == P.Loss.VALID: base_lr *= 25. / loc_weight elif normalization_mode == P.Loss.FULL: # Roughly there are 2000 prior bboxes per image. # TODO(weiliu89): Estimate the exact # of priors. base_lr *= 2000. # Get number of test images from name_size_file num_test_image = sum(1 for line in open(name_size_file)) test_batch_size = 8 # Ideally test_batch_size should be divisible by num_test_image, test_iter = int(np.ceil(float(num_test_image) / test_batch_size)) stepvalue = [] stepvalue.append(int(np.ceil(max_iters * 0.6667))) stepvalue.append(int(np.ceil(max_iters * 0.8333))) stepvalue.append(max_iters) train_solver_param = { # Train parameters 'base_lr': base_lr, 'weight_decay': 0.0005, 'lr_policy': 'multistep', 'stepvalue': stepvalue, 'gamma': 0.1, 'momentum': 0.9, 'iter_size': iter_size, 'max_iter': max_iters, 'snapshot': cfg.TRAIN.SNAPSHOT_ITERS, 'display': 20, 'average_loss': 10, 'type': 'SGD', 'solver_mode': solver_mode, 'device_id': device_id, 'debug_info': False, 'snapshot_after_train': True, } test_solver_param = { # Test parameters 'snapshot': 1, 'snapshot_after_train': False, 'test_iter': [test_iter], 'test_interval': 1, 'eval_type': 'detection', 'ap_version': 'MaxIntegral', 'test_initialization': True, } # Parameters for generating detection output. det_out_param = { 'num_classes': num_classes, 'share_location': share_location, 'background_label_id': background_label_id, 'nms_param': { 'nms_threshold': nms_thresh, 'top_k': 200 }, 'save_output_param': { 'output_directory': results_dir, 'output_name_prefix': output_name_prefix, 'output_format': 'VOC', 'label_map_file': label_map_file, 'name_size_file': name_size_file, 'num_test_image': num_test_image, }, 'keep_top_k': 50, 'confidence_threshold': conf_thresh, 'code_type': code_type, } # Parameters for evaluating detection results. det_eval_param = { 'num_classes': num_classes, 'background_label_id': background_label_id, 'overlap_threshold': 0.5, 'evaluate_difficult_gt': False, 'name_size_file': name_size_file, } # Create train net. net = caffe.NetSpec() net.data, net.label = CreateAnnotatedDataLayer( train_data, batch_size=batch_size_per_device, train=True, output_label=True, label_map_file=label_map_file, transform_param=train_transform_param, batch_sampler=batch_sampler) VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False) AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult) mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes, aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) # Create the MultiBoxLossLayer. name = "mbox_loss" mbox_layers.append(net.label) net[name] = L.MultiBoxLoss( *mbox_layers, multibox_loss_param=multibox_loss_param, loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), propagate_down=[True, True, False, False]) with open(train_net_file, 'w') as f: print('name: "{}_train"'.format(model_name), file=f) print(net.to_proto(), file=f) # Create test net. net = caffe.NetSpec() net.data, net.label = CreateAnnotatedDataLayer( test_data, batch_size=test_batch_size, train=False, output_label=True, label_map_file=label_map_file, transform_param=test_transform_param) VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False) AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult) mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes, aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) conf_name = 'mbox_conf' if multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.SOFTMAX: reshape_name = '{}_reshape'.format(conf_name) net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes])) softmax_name = '{}_softmax'.format(conf_name) net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = '{}_flatten'.format(conf_name) net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.LOGISTIC: sigmoid_name = '{}_sigmoid'.format(conf_name) net[sigmoid_name] = L.Sigmoid(net[conf_name]) mbox_layers[1] = net[sigmoid_name] net.detection_out = L.DetectionOutput( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval = L.DetectionEvaluate( net.detection_out, net.label, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) with open(test_net_file, 'w') as f: print('name: "{}_test"'.format(model_name), file=f) print(net.to_proto(), file=f) # Create deploy net. # Remove the first and last layer from test net. deploy_net = net with open(deploy_net_file, 'w') as f: net_param = deploy_net.to_proto() # Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net. del net_param.layer[0] del net_param.layer[-1] net_param.name = '{}_deploy'.format(model_name) net_param.input.extend(['data']) net_param.input_shape.extend( [caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])]) print(net_param, file=f) # Create training solver. train_solver = caffe_pb2.SolverParameter(train_net=train_net_file, snapshot_prefix=snapshot_prefix, **train_solver_param) with open(train_solver_file, 'w') as f: print(train_solver, file=f) # Create testing solver. test_solver = caffe_pb2.SolverParameter(train_net=train_net_file, test_net=[test_net_file], snapshot_prefix=snapshot_prefix, **test_solver_param) with open(test_solver_file, 'w') as f: print(test_solver, file=f)
bb_data[i][0] = 0 bb_data[i][1] = 0 # centroids = bb_data[:k] centroids = k_init(k, bb_data) # centroids = k_init(k, bb_data, n_local_trials=len(bb_data)) # Start k-means to find best clusters anchor_boxes, best_avg_iou = k_means(k, centroids, bb_data) # Sort on width anchor_boxes = np.asarray(anchor_boxes) anchor_boxes = anchor_boxes[anchor_boxes[:, 2].argsort()] anchor_dir = osp.join(dataset_dir, 'custom_anchor_boxes') make_if_not_exist(anchor_dir) anchor_file = osp.join(anchor_dir, '{:d}_anchor_boxes.txt'.format(k)) # Check that better anchor_boxes does not already exist and write to anchor file old_iou = 0 if osp.exists(anchor_file): with open(anchor_file, 'r') as f: header = f.readline() old_iou = float(header.split(':')[1]) if old_iou > 0: print('Previous IoU: {:f}. New IoU: {:f}'.format( old_iou, best_avg_iou)) if best_avg_iou > old_iou: