Python ObjectDetectionMinibatchSource Exemples, od_mb_source.ObjectDetectionMinibatchSource Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : TreeLLi/CNTK-Hotel-pictures-classificator

def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
    num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
    # Create the minibatch source
    od_minibatch_source = ObjectDetectionMinibatchSource(
        globalvars['train_map_file'],
        globalvars['train_roi_file'],
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width,
        pad_height=image_height,
        pad_value=img_pad_value,
        max_images=num_images,
        randomize=False,
        use_flipping=False)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
        od_minibatch_source.dims_si: dims_input
    }

    # setting pre- and post-nms top N to training values since buffered proposals are used for further training
    test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
    test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
    cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
    cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N

    buffered_proposals = [None for _ in range(num_images)]
    sample_count = 0
    while sample_count < num_images:
        data = od_minibatch_source.next_minibatch(1, input_map=input_map)
        output = rpn_model.eval(data)
        out_dict = dict([(k.name, k) for k in output])
        out_rpn_rois = output[out_dict['rpn_rois']][0]
        buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(
            np.int16)
        sample_count += 1
        if sample_count % 500 == 0:
            print("Buffered proposals for {} samples".format(sample_count))

    # resetting config values to original test values
    cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
    cfg["TEST"].RPN_POST_NMS_TOP_N = test_post

    return buffered_proposals

Exemple #2

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : gzt200361/CNTK

def compute_rpn_proposals(rpn_model, image_input, roi_input, dims_input):
    num_images = cfg["CNTK"].NUM_TRAIN_IMAGES
    # Create the minibatch source
    od_minibatch_source = ObjectDetectionMinibatchSource(
        globalvars['train_map_file'], globalvars['train_roi_file'],
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
        max_images=num_images,
        randomize=False, use_flipping=False)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
        od_minibatch_source.dims_si: dims_input
    }

    # setting pre- and post-nms top N to training values since buffered proposals are used for further training
    test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N
    test_post = cfg["TEST"].RPN_POST_NMS_TOP_N
    cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
    cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N

    buffered_proposals = [None for _ in range(num_images)]
    sample_count = 0
    while sample_count < num_images:
        data = od_minibatch_source.next_minibatch(1, input_map=input_map)
        output = rpn_model.eval(data)
        out_dict = dict([(k.name, k) for k in output])
        out_rpn_rois = output[out_dict['rpn_rois']][0]
        buffered_proposals[sample_count] = np.round(out_rpn_rois).astype(np.int16)
        sample_count += 1
        if sample_count % 500 == 0:
            print("Buffered proposals for {} samples".format(sample_count))

    # resetting config values to original test values
    cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre
    cfg["TEST"].RPN_POST_NMS_TOP_N = test_post

    return buffered_proposals

Exemple #3

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : TreeLLi/CNTK-Hotel-pictures-classificator

def eval_faster_rcnn_mAP(eval_model):
    img_map_file = globalvars['test_map_file']
    roi_map_file = globalvars['test_roi_file']
    classes = globalvars['classes']
    image_input = input_variable((num_channels, image_height, image_width),
                                 dynamic_axes=[Axis.default_batch_axis()],
                                 name=feature_node_name)
    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5),
                               dynamic_axes=[Axis.default_batch_axis()])
    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
    frcn_eval = eval_model(image_input, dims_input)

    # Create the minibatch source
    minibatch_source = ObjectDetectionMinibatchSource(
        img_map_file,
        roi_map_file,
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width,
        pad_height=image_height,
        pad_value=img_pad_value,
        randomize=False,
        use_flipping=False,
        max_images=cfg["CNTK"].NUM_TEST_IMAGES)

    # define mapping from reader streams to network inputs
    input_map = {
        minibatch_source.image_si: image_input,
        minibatch_source.roi_si: roi_input,
        minibatch_source.dims_si: dims_input
    }

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_test_images)]
                 for _ in range(globalvars['num_classes'])]

    # evaluate test images and write netwrok output to file
    print("Evaluating Faster R-CNN model for %s images." % num_test_images)
    all_gt_infos = {key: [] for key in classes}
    for img_i in range(0, num_test_images):
        mb_data = minibatch_source.next_minibatch(1, input_map=input_map)

        gt_row = mb_data[roi_input].asarray()
        gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
        all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)]

        for cls_index, cls_name in enumerate(classes):
            if cls_index == 0: continue
            cls_gt_boxes = all_gt_boxes[np.where(
                all_gt_boxes[:, -1] == cls_index)]
            all_gt_infos[cls_name].append({
                'bbox':
                np.array(cls_gt_boxes),
                'difficult': [False] * len(cls_gt_boxes),
                'det': [False] * len(cls_gt_boxes)
            })

        output = frcn_eval.eval({
            image_input: mb_data[image_input],
            dims_input: mb_data[dims_input]
        })
        out_dict = dict([(k.name, k) for k in output])
        out_cls_pred = output[out_dict['cls_pred']][0]
        out_rpn_rois = output[out_dict['rpn_rois']][0]
        out_bbox_regr = output[out_dict['bbox_regr']][0]

        labels = out_cls_pred.argmax(axis=1)
        scores = out_cls_pred.max(axis=1)
        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels,
                                      mb_data[dims_input].asarray())

        labels.shape = labels.shape + (1, )
        scores.shape = scores.shape + (1, )
        coords_score_label = np.hstack((regressed_rois, scores, labels))

        #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
        for cls_j in range(1, globalvars['num_classes']):
            coords_score_label_for_cls = coords_score_label[np.where(
                coords_score_label[:, -1] == cls_j)]
            all_boxes[cls_j][
                img_i] = coords_score_label_for_cls[:, :-1].astype(np.float32,
                                                                   copy=False)

        if (img_i + 1) % 100 == 0:
            print("Processed {} samples".format(img_i + 1))

    confusions = None
    try:
        conf_file = cfg["CNTK"].CONFUSION_FILE
        conf_file = os.path.join(map_file_path, conf_file)
        confusions = confusions_map(classes, conf_file)
    except:
        confusions = None

    # calculate mAP
    aps, fp_errors = evaluate_detections(
        all_boxes,
        all_gt_infos,
        classes,
        nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
        conf_threshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
        soft=cfg["CNTK"].RESULTS_NMS_SOFT,
        confusions=confusions)
    if fp_errors:
        output_file = os.path.join(
            globalvars['output_path'], "{}_{}_fps.txt".format(
                cfg["CNTK"].BASE_MODEL,
                "e2e" if globalvars['train_e2e'] else "4stage"))
        log_fp_errors(fp_errors, output_file)

    ap_list = []
    for class_name in aps:
        ap_list += [aps[class_name]]
        print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
    meanAP = np.nanmean(ap_list)
    print('Mean AP = {:.4f}'.format(meanAP))
    return meanAP

Exemple #4

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : TreeLLi/CNTK-Hotel-pictures-classificator

def train_model(image_input,
                roi_input,
                dims_input,
                loss,
                pred_error,
                lr_per_sample,
                mm_schedule,
                l2_reg_weight,
                epochs_to_train,
                rpn_rois_input=None,
                buffered_rpn_proposals=None):
    if isinstance(loss, cntk.Variable):
        loss = combine([loss])

    params = loss.parameters
    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
    others = [p for p in params if not p in biases]
    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT

    if cfg["CNTK"].DEBUG_OUTPUT:
        print("biases")
        for p in biases:
            print(p)
        print("others")
        for p in others:
            print(p)
        print("bias_lr_mult: {}".format(bias_lr_mult))

    # Instantiate the learners and the trainer object
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    learner = momentum_sgd(others,
                           lr_schedule,
                           mm_schedule,
                           l2_regularization_weight=l2_reg_weight,
                           unit_gain=False,
                           use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)

    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
    bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample,
                                              unit=UnitType.sample)
    bias_learner = momentum_sgd(
        biases,
        bias_lr_schedule,
        mm_schedule,
        l2_regularization_weight=l2_reg_weight,
        unit_gain=False,
        use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
    trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

    # Get minibatches of images and perform model training
    print("Training model for %s epochs." % epochs_to_train)
    log_number_of_parameters(loss)

    # Create the minibatch source
    od_minibatch_source = ObjectDetectionMinibatchSource(
        globalvars['train_map_file'],
        globalvars['train_roi_file'],
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width,
        pad_height=image_height,
        pad_value=img_pad_value,
        randomize=True,
        use_flipping=cfg["TRAIN"].USE_FLIPPED,
        max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
        buffered_rpn_proposals=buffered_rpn_proposals)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
        od_minibatch_source.dims_si: dims_input
    }

    use_buffered_proposals = buffered_rpn_proposals is not None
    progress_printer = ProgressPrinter(tag='Training',
                                       num_epochs=epochs_to_train,
                                       gen_heartbeat=True)
    for epoch in range(epochs_to_train):  # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data, proposals = od_minibatch_source.next_minibatch_with_proposals(
                min(mb_size, epoch_size - sample_count), input_map=input_map)
            if use_buffered_proposals:
                data[rpn_rois_input] = MinibatchData(
                    Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1,
                    False)
                # remove dims input if no rpn is required to avoid warnings
                del data[[k for k in data if '[6]' in str(k)][0]]

            trainer.train_minibatch(data)  # update model with it
            sample_count += trainer.previous_minibatch_sample_count  # count samples processed so far
            progress_printer.update_with_trainer(
                trainer, with_metric=True)  # log progress
            if sample_count % 100 == 0:
                print("Processed {} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)

Exemple #5

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : gzt200361/CNTK

def eval_faster_rcnn_mAP(eval_model):
    img_map_file = globalvars['test_map_file']
    roi_map_file = globalvars['test_roi_file']
    classes = globalvars['classes']
    image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()])
    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
    frcn_eval = eval_model(image_input, dims_input)

    # Create the minibatch source
    minibatch_source = ObjectDetectionMinibatchSource(
        img_map_file, roi_map_file,
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
        randomize=False, use_flipping=False,
        max_images=cfg["CNTK"].NUM_TEST_IMAGES)

    # define mapping from reader streams to network inputs
    input_map = {
        minibatch_source.image_si: image_input,
        minibatch_source.roi_si: roi_input,
        minibatch_source.dims_si: dims_input
    }

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_test_images)] for _ in range(globalvars['num_classes'])]

    # evaluate test images and write netwrok output to file
    print("Evaluating Faster R-CNN model for %s images." % num_test_images)
    all_gt_infos = {key: [] for key in classes}
    for img_i in range(0, num_test_images):
        mb_data = minibatch_source.next_minibatch(1, input_map=input_map)

        gt_row = mb_data[roi_input].asarray()
        gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
        all_gt_boxes = gt_row[np.where(gt_row[:,-1] > 0)]

        for cls_index, cls_name in enumerate(classes):
            if cls_index == 0: continue
            cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:,-1] == cls_index)]
            all_gt_infos[cls_name].append({'bbox': np.array(cls_gt_boxes),
                                           'difficult': [False] * len(cls_gt_boxes),
                                           'det': [False] * len(cls_gt_boxes)})

        output = frcn_eval.eval({image_input: mb_data[image_input], dims_input: mb_data[dims_input]})
        out_dict = dict([(k.name, k) for k in output])
        out_cls_pred = output[out_dict['cls_pred']][0]
        out_rpn_rois = output[out_dict['rpn_rois']][0]
        out_bbox_regr = output[out_dict['bbox_regr']][0]

        labels = out_cls_pred.argmax(axis=1)
        scores = out_cls_pred.max(axis=1)
        regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, mb_data[dims_input].asarray())

        labels.shape = labels.shape + (1,)
        scores.shape = scores.shape + (1,)
        coords_score_label = np.hstack((regressed_rois, scores, labels))

        #   shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
        for cls_j in range(1, globalvars['num_classes']):
            coords_score_label_for_cls = coords_score_label[np.where(coords_score_label[:,-1] == cls_j)]
            all_boxes[cls_j][img_i] = coords_score_label_for_cls[:,:-1].astype(np.float32, copy=False)

        if (img_i+1) % 100 == 0:
            print("Processed {} samples".format(img_i+1))

    # calculate mAP
    aps = evaluate_detections(all_boxes, all_gt_infos, classes,
                              nms_threshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
                              conf_threshold = cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD)
    ap_list = []
    for class_name in aps:
        ap_list += [aps[class_name]]
        print('AP for {:>15} = {:.4f}'.format(class_name, aps[class_name]))
    meanAP = np.nanmean(ap_list)
    print('Mean AP = {:.4f}'.format(meanAP))
    return meanAP

Exemple #6

0

Afficher le fichier

Fichier : FasterRCNN.py Projet : gzt200361/CNTK

def train_model(image_input, roi_input, dims_input, loss, pred_error,
                lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train,
                rpn_rois_input=None, buffered_rpn_proposals=None):
    if isinstance(loss, cntk.Variable):
        loss = combine([loss])

    params = loss.parameters
    biases = [p for p in params if '.b' in p.name or 'b' == p.name]
    others = [p for p in params if not p in biases]
    bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT

    if cfg["CNTK"].DEBUG_OUTPUT:
        print("biases")
        for p in biases: print(p)
        print("others")
        for p in others: print(p)
        print("bias_lr_mult: {}".format(bias_lr_mult))

    # Instantiate the learners and the trainer object
    lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample)
    learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)

    bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample]
    bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample)
    bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight,
                           unit_gain=False, use_mean_gradient=cfg["CNTK"].USE_MEAN_GRADIENT)
    trainer = Trainer(None, (loss, pred_error), [learner, bias_learner])

    # Get minibatches of images and perform model training
    print("Training model for %s epochs." % epochs_to_train)
    log_number_of_parameters(loss)

    # Create the minibatch source
    od_minibatch_source = ObjectDetectionMinibatchSource(
        globalvars['train_map_file'], globalvars['train_roi_file'],
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width, pad_height=image_height, pad_value=img_pad_value,
        randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED,
        max_images=cfg["CNTK"].NUM_TRAIN_IMAGES,
        buffered_rpn_proposals=buffered_rpn_proposals)

    # define mapping from reader streams to network inputs
    input_map = {
        od_minibatch_source.image_si: image_input,
        od_minibatch_source.roi_si: roi_input,
        od_minibatch_source.dims_si: dims_input
    }

    use_buffered_proposals = buffered_rpn_proposals is not None
    progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True)
    for epoch in range(epochs_to_train):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data, proposals = od_minibatch_source.next_minibatch_with_proposals(min(mb_size, epoch_size-sample_count), input_map=input_map)
            if use_buffered_proposals:
                data[rpn_rois_input] = MinibatchData(Value(batch=np.asarray(proposals, dtype=np.float32)), 1, 1, False)
                # remove dims input if no rpn is required to avoid warnings
                del data[[k for k in data if '[6]' in str(k)][0]]

            trainer.train_minibatch(data)                                    # update model with it
            sample_count += trainer.previous_minibatch_sample_count          # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True)  # log progress
            if sample_count % 100 == 0:
                print("Processed {} samples".format(sample_count))

        progress_printer.epoch_summary(with_metric=True)

Exemple #7

0

Afficher le fichier

Fichier : application.py Projet : yuhattor/cntk-fasterrcnn-model-deployment

def evalImage(url):
    # set image
    eval_model = load_model(model_path)

    classes = globalvars['classes']
    image_input = input_variable((num_channels, image_height, image_width),
                                 dynamic_axes=[Axis.default_batch_axis()],
                                 name=feature_node_name)
    roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5),
                               dynamic_axes=[Axis.default_batch_axis()])
    dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()])
    frcn_eval = eval_model(image_input, dims_input)

    # Create the minibatch source

    minibatch_source = ObjectDetectionMinibatchSource(
        url,
        max_annotations_per_image=cfg["CNTK"].INPUT_ROIS_PER_IMAGE,
        pad_width=image_width,
        pad_height=image_height,
        pad_value=img_pad_value,
        randomize=False,
        use_flipping=False,
        max_images=cfg["CNTK"].NUM_TEST_IMAGES)

    # define mapping from reader streams to network inputs
    input_map = {
        minibatch_source.image_si: image_input,
        minibatch_source.roi_si: roi_input,
        minibatch_source.dims_si: dims_input
    }

    # evaluate test images and write netwrok output to file
    all_gt_infos = {key: [] for key in classes}
    img_i = 0
    mb_data = minibatch_source.next_minibatch(url, 1, input_map=input_map)

    gt_row = mb_data[roi_input].asarray()
    gt_row = gt_row.reshape((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5))
    all_gt_boxes = gt_row[np.where(gt_row[:, -1] > 0)]

    for cls_index, cls_name in enumerate(classes):
        if cls_index == 0: continue
        cls_gt_boxes = all_gt_boxes[np.where(all_gt_boxes[:, -1] == cls_index)]
        all_gt_infos[cls_name].append({
            'bbox': np.array(cls_gt_boxes),
            'difficult': [False] * len(cls_gt_boxes),
            'det': [False] * len(cls_gt_boxes)
        })

    output = frcn_eval.eval({
        image_input: mb_data[image_input],
        dims_input: mb_data[dims_input]
    })

    out_dict = dict([(k.name, k) for k in output])
    out_cls_pred = output[out_dict['cls_pred']][0]
    out_rpn_rois = output[out_dict['rpn_rois']][0]
    out_bbox_regr = output[out_dict['bbox_regr']][0]

    labels = out_cls_pred.argmax(axis=1)
    scores = out_cls_pred.max(axis=1)

    result = dict()
    for label in LabelList:
        result.update({label: 0})

    for index, label in enumerate(labels):
        if result[LabelList[int(label)]] < scores[index]:
            result.update({LabelList[int(label)]: scores[index]})

    pp = pprint.PrettyPrinter(indent=4)
    print("---------------------")
    print(url)
    pp.pprint(result)
    print("---------------------")

    for number, accuracy in result.items():
        result.update({number: str(accuracy)})
    return result