def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input): num_images = cfg["CNTK"].NUM_TRAIN_IMAGES # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, max_images=num_images, randomize=False, use_flipping=False) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N buffered_proposals = [None for _ in range(num_images)] sample_count = 0 while sample_count < num_images: data = od_minibatch_source.next_minibatch(1, input_map=input_map) output = rpn_model.eval(data) out_dict = dict([(k.name, k) for k in output]) out_rpn_rois = output[out_dict['rpn_rois']][0] buffered_proposals[sample_count] = np.round(out_rpn_rois).astype( np.int16) sample_count += 1 if sample_count % 500 == 0: print("Buffered proposals for {} samples".format(sample_count)) # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return buffered_proposals
def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input): num_images = cfg["CNTK"].NUM_TRAIN_IMAGES # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, max_images=num_images, randomize=False, use_flipping=False) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N buffered_proposals = [None for _ in range(num_images)] sample_count = 0 while sample_count < num_images: data = od_minibatch_source.next_minibatch(1, input_map=input_map) output = rpn_model.eval(data) out_dict = dict([(k.name, k) for k in output]) out_rpn_rois = output[out_dict['rpn_rois']][0] buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16) sample_count += 1 if sample_count % 500 == 0: print("Buffered proposals for {} samples".format(sample_count)) # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return buffered_proposals
def eval_faster_rcnn_mAP(eval_model): img_map_file = globalvars['test_map_file'] roi_map_file = globalvars['test_roi_file'] classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( img_map_file, roi_map_file, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where( all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1, ) scores.shape = scores.shape + (1, ) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, globalvars['num_classes']): coords_score_label_for_cls = coords_score_label[np.where( coords_score_label[:, -1] == cls_j)] all_boxes[cls_j][ img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32, copy=False) if (img_i + 1) % 100 == 0: print("Processed {} samples".format(img_i + 1)) confusions = None try: conf_file = cfg["CNTK"].CONFUSION_FILE conf_file = os.path.join(map_file_path, conf_file) confusions = confusions_map(classes, conf_file) except: confusions = None # calculate mAP aps, fp_errors = evaluate_detections( all_boxes, all_gt_infos, classes, nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, conf_threshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD, soft=cfg["CNTK"].RESULTS_NMS_SOFT, confusions=confusions) if fp_errors: output_file = os.path.join( globalvars['output_path'], "{}_{}_fps.txt".format( cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage")) log_fp_errors(fp_errors, output_file) ap_list = [] for class_name in aps: ap_list += [aps[class_name]] print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) meanAP = np.nanmean(ap_list) print('Mean AP = {:.4f}'.format(meanAP)) return meanAP
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd( biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["CNTK"].NUM_TRAIN_IMAGES, buffered_rpn_proposals=buffered_rpn_proposals) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } use_buffered_proposals = buffered_rpn_proposals is not None progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data, proposals = od_minibatch_source.next_minibatch_with_proposals( min(mb_size, epoch_size - sample_count), input_map=input_map) if use_buffered_proposals: data[rpn_rois_input] = MinibatchData( Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False) # remove dims input if no rpn is required to avoid warnings del data[[k for k in data if '[6]' in str(k)][0]] trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def eval_faster_rcnn_mAP(eval_model): img_map_file = globalvars['test_map_file'] roi_map_file = globalvars['test_roi_file'] classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( img_map_file, roi_map_file, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])] # evaluate test images and write netwrok output to file print("Evaluating Faster R-CNN model for %s images." % num_test_images) all_gt_infos = {key: [] for key in classes} for img_i in range(0, num_test_images): mb_data = minibatch_source.next_minibatch(1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)] all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes)}) output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]}) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray()) labels.shape = labels.shape + (1,) scores.shape = scores.shape + (1,) coords_score_label = np.hstack((regressed_rois, scores, labels)) # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score for cls_j in range(1, globalvars['num_classes']): coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)] all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False) if (img_i+1) % 100 == 0: print("Processed {} samples".format(img_i+1)) # calculate mAP aps = evaluate_detections(all_boxes, all_gt_infos, classes, nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD) ap_list = [] for class_name in aps: ap_list += [aps[class_name]] print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name])) meanAP = np.nanmean(ap_list) print('Mean AP = {:.4f}'.format(meanAP)) return meanAP
def train_model(image_input, roi_input, dims_input, loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, rpn_rois_input=None, buffered_rpn_proposals=None): if isinstance(loss, cntk.Variable): loss = combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT if cfg["CNTK"].DEBUG_OUTPUT: print("biases") for p in biases: print(p) print("others") for p in others: print(p) print("bias_lr_mult: {}".format(bias_lr_mult)) # Instantiate the learners and the trainer object lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source od_minibatch_source = ObjectDetectionMinibatchSource( globalvars['train_map_file'], globalvars['train_roi_file'], max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["CNTK"].NUM_TRAIN_IMAGES, buffered_rpn_proposals=buffered_rpn_proposals) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.roi_si: roi_input, od_minibatch_source.dims_si: dims_input } use_buffered_proposals = buffered_rpn_proposals is not None progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map) if use_buffered_proposals: data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False) # remove dims input if no rpn is required to avoid warnings del data[[k for k in data if '[6]' in str(k)][0]] trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True)
def evalImage(url): # set image eval_model = load_model(model_path) classes = globalvars['classes'] image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) frcn_eval = eval_model(image_input, dims_input) # Create the minibatch source minibatch_source = ObjectDetectionMinibatchSource( url, max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE, pad_width=image_width, pad_height=image_height, pad_value=img_pad_value, randomize=False, use_flipping=False, max_images=cfg["CNTK"].NUM_TEST_IMAGES) # define mapping from reader streams to network inputs input_map = { minibatch_source.image_si: image_input, minibatch_source.roi_si: roi_input, minibatch_source.dims_si: dims_input } # evaluate test images and write netwrok output to file all_gt_infos = {key: [] for key in classes} img_i = 0 mb_data = minibatch_source.next_minibatch(url, 1, input_map=input_map) gt_row = mb_data[roi_input].asarray() gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5)) all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)] for cls_index, cls_name in enumerate(classes): if cls_index == 0: continue cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:, -1] == cls_index)] all_gt_infos[cls_name].append({ 'bbox': np.array(cls_gt_boxes), 'difficult': [False] * len(cls_gt_boxes), 'det': [False] * len(cls_gt_boxes) }) output = frcn_eval.eval({ image_input: mb_data[image_input], dims_input: mb_data[dims_input] }) out_dict = dict([(k.name, k) for k in output]) out_cls_pred = output[out_dict['cls_pred']][0] out_rpn_rois = output[out_dict['rpn_rois']][0] out_bbox_regr = output[out_dict['bbox_regr']][0] labels = out_cls_pred.argmax(axis=1) scores = out_cls_pred.max(axis=1) result = dict() for label in LabelList: result.update({label: 0}) for index, label in enumerate(labels): if result[LabelList[int(label)]] < scores[index]: result.update({LabelList[int(label)]: scores[index]}) pp = pprint.PrettyPrinter(indent=4) print("---------------------") print(url) pp.pprint(result) print("---------------------") for number, accuracy in result.items(): result.update({number: str(accuracy)}) return result