Exemple #1
0
def extract_detected_objects(R, C, features):
    """
	Returns a dictionary where the key is the name of the object class and the
	value is an array of DetectedObject instances.
	"""
    result = {}
    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([features, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in result:
                result[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            result[cls_name].append({
                "name":
                cls_name,
                "score":
                float(np.max(P_cls[0, ii, :])),
                "frame": [
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ]
            })
    return result
def get_img_probas(img_path, P_cls, P_regr, ROIs, C, f):

    img = cv2.imread(img_path)
    new_height = 299
    new_width = 299
    img_probas = np.zeros((P_cls.shape[1], len(class_mapping)))

    for ii in range(P_cls.shape[1]):

        (x, y, w, h) = ROIs[0, ii, :]
        cls_num = np.argmax(P_cls[0, ii, :])

        try:
            (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
            tx /= C.classifier_regr_std[0]
            ty /= C.classifier_regr_std[1]
            tw /= C.classifier_regr_std[2]
            th /= C.classifier_regr_std[3]
            x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
        except:
            pass

        # Get the true BB coordinates
        x1, y1, x2, y2 = C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (
            x + w), C.rpn_stride * (y + h)
        x1, y1, x2, y2 = data_generators.get_real_coordinates(
            f, x1, y1, x2, y2)

        # Get the probabilities from the image classifier
        cropped_img = img[y1:y2, x1:x2, :]
        x_resized = cv2.resize(np.copy(cropped_img),
                               (int(new_width), int(new_height)),
                               interpolation=cv2.INTER_CUBIC)
        x_resized = x_resized / 255.
        x_resized = np.expand_dims(x_resized, axis=0)

        img_probas[ii, :] = img_classifier.predict(x_resized)[0]

    return np.expand_dims(img_probas, axis=0)
Exemple #3
0
def main():
    cleanup()
    cnt = 0
    sys.setrecursionlimit(40000)
    config_output_filename = 'config.pickle'

    with open(config_output_filename, 'r') as f_in:
        C = pickle.load(f_in)

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False
    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.iteritems()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = num_rois

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (1024, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True

    print("Converting video to images..")
    convert_to_images()
    print("anotating...")

    list_files = sorted(get_file_names(img_path),
                        key=lambda var: [
                            int(x) if x.isdigit() else x
                            for x in re.findall(r'[^0-9]|[0-9]+', var)
                        ])
    for img_name in list_files:
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)
        img = cv2.imread(filepath)
        X = format_img(img, C)

        img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :],
                                  (1, 2, 0)).copy()
        img_scaled[:, :, 0] += 123.68
        img_scaled[:, :, 1] += 116.779
        img_scaled[:, :, 2] += 103.939

        img_scaled = img_scaled.astype(np.uint8)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        all_objects = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                cv2.rectangle(img_scaled, (x1, y1), (x2, y2),
                              class_to_color[key], 2)
                crop_img = img_scaled[y1:y2, x1:x2]
                cv2.imwrite("Detected Subjects/subject" + str(cnt) + ".png",
                            crop_img)
                cnt += 1

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                all_objects.append((key, 1))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (x1, y1 - 0)

                cv2.rectangle(
                    img_scaled, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img_scaled, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img_scaled, textLabel, textOrg,
                            cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
        print('Elapsed time = {}'.format(time.time() - st))
        height, width, channels = img_scaled.shape
        cv2.rectangle(img_scaled, (0, 0), (width, 30), (0, 0, 0), -1)
        #crop_img = img_scaled[textOrg[1]+baseLine - 5:textOrg[1]-retval[1] - 5, textOrg[0] - 5:textOrg[0]+retval[0] + 5]

        cv2.putText(img_scaled,
                    "Obj count: " + str(list(accumulate(all_objects))),
                    (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1)
        cv2.imwrite(os.path.join(output_path, img_name), img_scaled)
        print(all_dets)
    print("saving to video..")
    save_to_video()
Exemple #4
0
def upload_file():
    if request.method == 'POST':

        file = request.files['file']
        f = os.path.join(UPLOAD_FOLDER, "temp.jpg")
        file.save(secure_filename(f))

        img_name = f

        print(img_name)
        st = time.time()
        # filepath = os.path.join(img_path, img_name)

        img = cv2.imread(img_name)

        # img = cv2.resize(img,None,fx=0.5,fy=0.5)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # model_rpn._make_predict_function()

        # get_model()
        with graph.as_default():
            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            with graph.as_default():
                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                              (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),
                              2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

        print('Elapsed time = {}'.format(time.time() - st))
        print(all_dets)
        print(len(all_dets))
        cv2.imwrite('temp_infected.jpg',img)
        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        result = {}

        if len(all_dets) > 0:
            result['count'] = len(all_dets)
        else:
            result['count'] = 0

        with open("temp_infected.jpg", "rb") as imageFile:
            str = base64.b64encode(imageFile.read())
            print(str)
            result['image'] = str.decode("utf-8")

        return jsonify(result)
Exemple #5
0
def run_prediction(config_filename,
                   model_path,
                   test_path,
                   out_path,
                   network='resnet50',
                   num_rois=32):

    with open(config_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    #print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    C.model_path = model_path
    #print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.9  # default is 0.8

    visualise = True

    preresults = []  # add this

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        #print(img_name)
        #st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)  # default is 0.7

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        bbox_results = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]),
                overlap_thresh=0.4)  # default is 0.5
            for jk in range(new_boxes.shape[0]):
                img_use = img.copy()
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                #cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
                cv2.rectangle(img_use, (real_x1, real_y1), (real_x2, real_y2),
                              (0, 0, 255), 4)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                bbox_results.append(
                    (key, (real_x1, real_y1, real_x2, real_y2)))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1, real_y1-0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
                page_id = int(img_name.split('.')[0].split('_')[-1]) + 1
                page_name = 'page_' + str(page_id)
                cv2.imwrite(
                    os.path.join(
                        out_path,
                        '{}.jpg'.format(page_name + '_id_' + str(jk + 1))),
                    img_use)  #format(idx)

        #print('Elapsed time = {}'.format(time.time() - st))
        #print(all_dets)
        #print(bbox_results)
        preresults.append([img_name, bbox_results])  ## add this
        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        #outpath = './results/result_scanned/'
        #if not os.path.exists(out_path):
        #os.makedirs(out_path)
        #cv2.imwrite(out_path + '{}.jpg'.format(img_name.split('.')[0]),img) #format(idx)

    preresults = pd.DataFrame(preresults)
    preresults.to_csv(os.path.join(out_path, 'preresults.txt'),
                      header=None,
                      index=None)
Exemple #6
0
def model():

    sys.setrecursionlimit(40000)

    config_output_filename = "config.pickle"

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)
        K.clear_session()

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = "crops"

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    #class_mapping = C.class_mapping

    #if 'bg' not in class_mapping:
    #   class_mapping['bg'] = len(class_mapping)

    #class_mapping = {v: k for k, v in class_mapping.items()}
    #print(class_mapping)
    #class_to_color = {class_mapping[v]: np.random.randint(0,255,3) for v in class_mapping}

    C.num_rois = 32

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    #classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=3,
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    disease_bbx_list = []
    picture = None
    ## MULTIPLE MODELS
    cls_map_SPD = {'SPD': 0, 'OK': 1, 'bg': 2}
    cls_map_EPD = {'OK': 0, 'EPD': 1, 'bg': 2}
    cls_map_SS = {'N': 0, 'SS': 1, 'bg': 2}
    cls_map_ADB = {'N': 0, 'ADB': 1, 'bg': 2}

    sve_loc_list = [
        './SPD_result/', './SS_result/', './EPD_result/', './ADB_result/'
    ]

    wght_loc_list = [
        './Weights/SS/model_frcnn.hdf5', './Weights/SS/model_frcnn.hdf5',
        './Weights/SS/model_frcnn.hdf5', './Weights/SS/model_frcnn.hdf5'
    ]
    for w_path, sv_loc in zip(wght_loc_list, sve_loc_list):

        if 'SPD' in sv_loc:
            class_mapping = cls_map_SPD
        elif 'EPD' in sv_loc:
            class_mapping = cls_map_EPD
        elif 'SS' in sv_loc:
            class_mapping = cls_map_SS
        elif 'ADB' in sv_loc:
            class_mapping = cls_map_ADB
        else:
            print("Classes not found in config.pickle")
            break

        if 'bg' not in class_mapping:
            class_mapping['bg'] = len(class_mapping)

        class_mapping = {v: k for k, v in class_mapping.items()}
        print()
        print(class_mapping)
        #class_to_color = {class_mapping[v]: np.random.randint(0,255,3) for v in class_mapping}

        #print('Loading weights from {}'.format(C.model_path))
        print('Loading weights from {}'.format(w_path))
        print()

        #model_rpn.load_weights(C.model_path, by_name=True)
        model_rpn.load_weights(w_path, by_name=True)

        #model_classifier.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(w_path, by_name=True)

        model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')

        all_imgs = []

        classes = {}

        bbox_threshold = 0.8

        visualise = True
        img_name_list = []
        lists = []
        thic = 2
        box_color = (0, 0, 255)
        label_color = (255, 255, 255)

        for idx, img_name in enumerate(sorted(os.listdir(img_path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            img_name_list.append(img_name)
            #print(img_name_list)
            st = time.time()
            filepath = os.path.join(img_path, img_name)

            img = cv2.imread(filepath)

            X, ratio = format_img(img, C)

            if K.image_dim_ordering() == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_dim_ordering(),
                                       overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    #pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                for ii in range(P_cls.shape[1]):

                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append([
                        C.rpn_stride * x, C.rpn_stride * y,
                        C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            #i = 0

            for key in bboxes:
                bbox = np.array(bboxes[key])

                new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                    bbox, np.array(probs[key]), overlap_thresh=0.5)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]

                    (real_x1, real_y1, real_x2,
                     real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
                    #bbx_df = pd.DataFrame((real_x1, real_y1, real_x2, real_y2))

                    #print("X1 ",real_x1)
                    #print("Y1 ",real_y1)
                    #print("X2 ",real_x2)
                    #print("Y2 ",real_y2)

                    ##cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

                    textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                    ##all_dets.append((key,100*new_probs[jk]))

                    lists.append([
                        real_x1, real_x2, real_y1, real_y2, img_name, idx, key,
                        filepath, textLabel
                    ])
                    if (key == 'OK') or (key == 'N') or (key == '0'):
                        continue
                    else:
                        disease_bbx_list.append([
                            real_x1, real_x2, real_y1, real_y2, key, img_name,
                            filepath, textLabel
                        ])
                        img = cv2.rectangle(img, (real_x1, real_y1),
                                            (real_x2, real_y2), box_color,
                                            thic)
                        img = cv2.putText(img, textLabel, (real_x1, real_y1),
                                          cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                          label_color)

                    ##(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,0.4,1)
                    ##textOrg = (real_x1, real_y1-0)

                    #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), color = None)
                    ##cv2.rectangle(img, (textOrg[0] - 3,textOrg[1]+baseLine - 3), (textOrg[0]+retval[0], textOrg[1]-retval[1]), (255,255,255), -1)
                    ##cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 0, 255))

            print('Elapsed time = {}'.format(time.time() - st))
            print(all_dets)
            #cv2.imshow('img', img)
            #cv2.waitKey(0)
            cv2.imwrite(sv_loc + img_name, img)
            #cv2.imwrite('{}{}'.format(sv_loc,img_name),img)
        bbx_df = pd.DataFrame(lists,
                              columns=('x1', 'x2', 'y1', 'y2', 'img_name',
                                       'index', 'class', 'path', 'label'))
        bbx_df.to_csv(sv_loc + 'bbx_df.csv', index=None, sep=',')
    disease_df = pd.DataFrame(disease_bbx_list,
                              columns=('x1', 'x2', 'y1', 'y2', 'class',
                                       'img_name', 'path', 'label'))
    disease_df.to_csv('final_bbx.csv', index=None, sep=',')
    thic = 2
    box_color_SPD = (102, 102, 255)
    box_color_ADB = (102, 255, 255)
    box_color_SS = (102, 255, 102)
    box_color_EPD = (255, 255, 102)
    label_color = (255, 255, 255)
    for i, j in zip(disease_df.img_name.unique(), disease_df.path.unique()):
        img = cv2.imread(j)
        cv2.rectangle(img, (4, 4), (6, 10), box_color_SPD, 2)
        cv2.rectangle(img, (4, 17), (6, 23), box_color_ADB, 2)
        cv2.rectangle(img, (4, 29), (6, 35), box_color_SS, 2)
        cv2.rectangle(img, (4, 41), (6, 47), box_color_EPD, 2)
        cv2.putText(img, "SPD", (10, 11), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_SPD)
        cv2.putText(img, "ADB", (10, 24), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_ADB)
        cv2.putText(img, "SS", (10, 36), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_SS)
        cv2.putText(img, "EPD", (10, 48), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_EPD)
        for _, row in disease_df[disease_df.img_name == i].iterrows():
            if row['class'] == "SPD":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_SPD, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "ADB":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_ADB, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "SS":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_SS, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "EPD":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_EPD, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
        cv2.imwrite('results_imgs/{}'.format(i), img)
    for root, dirnames, filenames in os.walk('results_imgs/'):
        for filename in filenames:
            filename = filename
    return render_template("result.html", user_image=filename)
    #zipf = zipfile.ZipFile('RESULTS.zip','w', zipfile.ZIP_DEFLATED)
    #for root,dirs, files in os.walk('results_imgs/'):
    #   for file in files:
    #      zipf.write('results_imgs/'+file)
    #zipf.close()
    #return send_file('RESULTS.zip',
    #       mimetype = 'zip',
    #      attachment_filename= 'RESULTS.zip',
    #     as_attachment = True)
    #return send_from_directory(app.config['RESULT_FOLDER'],
    #                          filename=filename + '.jpg', as_attachment=True)
    #lIndex=req.rfind(".")
    #global res
    #global domain
    #domain=req[lIndex::]
    #print(domain)
    #res="0"+res[lIndex::]
    #print(res)
    K.clear_session()
Exemple #7
0
def predict_single_image(img_path, model_rpn, model_classifier_only, cfg,
                         class_mapping):
    st = time.time()
    img = cv2.imread(img_path)
    if img is None:
        print('reading image failed.')
        exit(0)

    X, ratio = format_img(img, cfg)
    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    # this is result contains all boxes, which is [x1, y1, x2, y2]
    result = roi_helpers.rpn_to_roi(Y1,
                                    Y2,
                                    cfg,
                                    K.image_dim_ordering(),
                                    overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    result[:, 2] -= result[:, 0]
    result[:, 3] -= result[:, 1]
    bbox_threshold = 0.8

    # apply the spatial pyramid pooling to the proposed regions
    boxes = dict()
    for jk in range(result.shape[0] // cfg.num_rois + 1):
        rois = np.expand_dims(result[cfg.num_rois * jk:cfg.num_rois *
                                     (jk + 1), :],
                              axis=0)
        if rois.shape[1] == 0:
            break
        if jk == result.shape[0] // cfg.num_rois:
            # pad R
            curr_shape = rois.shape
            target_shape = (curr_shape[0], cfg.num_rois, curr_shape[2])
            rois_padded = np.zeros(target_shape).astype(rois.dtype)
            rois_padded[:, :curr_shape[1], :] = rois
            rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :]
            rois = rois_padded

        [p_cls, p_regr] = model_classifier_only.predict([F, rois])

        for ii in range(p_cls.shape[1]):
            if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                continue

            cls_num = np.argmax(p_cls[0, ii, :])
            if cls_num not in boxes.keys():
                boxes[cls_num] = []
            (x, y, w, h) = rois[0, ii, :]
            try:
                (tx, ty, tw, th) = p_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= cfg.classifier_regr_std[0]
                ty /= cfg.classifier_regr_std[1]
                tw /= cfg.classifier_regr_std[2]
                th /= cfg.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except Exception as e:
                print(e)
                pass
            boxes[cls_num].append([
                cfg.rpn_stride * x, cfg.rpn_stride * y,
                cfg.rpn_stride * (x + w), cfg.rpn_stride * (y + h),
                np.max(p_cls[0, ii, :])
            ])
    # add some nms to reduce many boxes
    for cls_num, box in boxes.items():
        boxes_nms = roi_helpers.non_max_suppression_fast(box,
                                                         overlap_thresh=0.5)
        boxes[cls_num] = boxes_nms
        print(class_mapping[cls_num] + ":")
        for b in boxes_nms:
            b[0], b[1], b[2], b[3] = get_real_coordinates(
                ratio, b[0], b[1], b[2], b[3])
            print('{} prob: {}'.format(b[0:4], b[-1]))
    img = draw_boxes_and_label_on_image_cv2(img, class_mapping, boxes)
    print('Elapsed time = {}'.format(time.time() - st))
    cv2.imshow('image', img)
    result_path = './results_images/{}.png'.format(
        os.path.basename(img_path).split('.')[0])
    print('result saved into ', result_path)
    cv2.imwrite(result_path, img)
    cv2.waitKey(0)
Exemple #8
0
def test(image_path,mode = 'test'):
	## get the dict for labels
	class_mapping = C.class_mapping
	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.items()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = int(options.num_rois) # 32 for default

	model_path = 'model_frcnn_cat.h5'
	# model_path = 'model_frcnn_cat_tested.h5'
	# model_path = 'model_frcnn (1).h5'

	## shape for input
	input_shape_img = (None, None, 3)
	# input_shape_features = (None, None, num_features)
	## rebuild the model in train_frcnn
	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	## Bone network of Vgg16
	shared_layers = nn.nn_base(img_input, trainable=True)
	## network of rpn and classifcation
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)                                                                      ##
	rpn = nn.rpn(shared_layers, num_anchors)                                                                              ##
	## [out_class, out_reg]  ## num_rois = 4                                                                              ##
	classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes = len(class_mapping), trainable = True)   ##
	## build model for each network Model(input, output)                                                                  ##
	model_rpn = Model(img_input, rpn[:2])  ## because rpn[2] is base_layers(input)                                        ##
	model_classifier = Model([img_input, roi_input], classifier)                                                          ##
	model_all = Model([img_input, roi_input], rpn[:2]+classifier)
	##
	print('Loading weights from {}'.format(model_path))
	model_rpn.load_weights(model_path, by_name=True)
	model_classifier.load_weights(model_path, by_name=True)
	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')


	bbox_threshold = 0.7
	image = cv2.imread(image_path)
	## resize make the shorter side to be 600
	## and get the resize ratio
	X, ratio = format_img(image, C)
	## make predict
	[Y1, Y2] = model_rpn.predict(X)
	##
	R = roi_helpers.rpn_to_roi(Y1, Y2, C, overlap_thresh=0.7)
	#X2,Y1,Y2,IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping)
	# convert from (x1,y1,x2,y2) to (x,y,w,h)
	R[:, 2] -= R[:, 0]
	R[:, 3] -= R[:, 1]
	# apply the spatial pyramid pooling to the proposed regions
	bboxes = {}
	probs = {}


	for jk in range(R.shape[0]//C.num_rois + 1):
		## take 4 ROIs each time
	    ## 1, 32, 4
		ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)

		if ROIs.shape[1] == 0:
			break

		## when it comes to the last time
		if jk == R.shape[0]//C.num_rois:
			#pad R
			curr_shape = ROIs.shape # 1,4,4
			## 1 4 4
			target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
			ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
			ROIs_padded[:, :curr_shape[1], :] = ROIs
			ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
			ROIs = ROIs_padded

		[P_cls, P_reg] = model_classifier.predict([X, ROIs])

		for ii in range(P_cls.shape[1]): ##32

			print('Max value')
			print(np.max(P_cls[0, ii, :]))
			print('label map')
			print(np.argmax(P_cls[0, ii, :]))

			if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
				continue

			## we get the predict truth
			cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
			if cls_name not in bboxes:
				bboxes[cls_name] = []
				probs[cls_name] = []

			(x, y, w, h) = ROIs[0, ii, :]

			cls_num = np.argmax(P_cls[0, ii, :])
			try:
				(tx, ty, tw, th) = P_reg[0, ii, 4*cls_num:4*(cls_num+1)]
				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			## rpn_stride = 16
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]) )

	all_dets = []
	#print(bboxes)

	## show time !
	for key in bboxes:
		bbox = np.array(bboxes[key])

		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.35)

		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

			cv2.rectangle(image,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
			all_dets.append((key,100*new_probs[jk]))

			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
			textOrg = (real_x1, real_y1-0)

			cv2.rectangle(image, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
			cv2.rectangle(image, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
			cv2.putText(image, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

	#print('Elapsed time = {}'.format(time.time() - st))

	return (image)
def operation():
    sys.setrecursionlimit(40000)

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      default="images",
                      help="Path to test data.")
    parser.add_option(
        "-n",
        "--num_rois",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True
    object = []
    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        item = []
        for key in bboxes:
            bbox = np.array(bboxes[key])
            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)
                # print(textLabel)
                # print(real_x1, real_y1, real_x2, real_y2)
                if (100 * new_probs[jk]) > 95:
                    item.append([key, [real_x1, real_y1, real_x2, real_y2]])
        print(all_dets)
        object.append([[idx], [item]])

        # print (object)

        # print('Elapsed time = {}'.format(time.time() - st))

        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        cv2.imwrite('./results_imgs/{}.png'.format(idx), img)
    print("=======================")

    return object
def handler(event, context):
    img_name = event['img_process']

    client.download_file('adaproject', img_name, '/tmp/' + img_name)
    X = np.load('/tmp/' + img_name)

    with open('config.pickle', 'rb') as f_in:
        C = pickle.load(f_in)
    class_mapping = C.class_mapping
    num_features = 1024
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    BUCKET_NAME = 'adaproject'  # replace with your bucket name
    KEY = 'model_frcnn.hdf5'  # replace with your object key

    s3 = boto3.resource('s3')

    try:
        s3.Bucket(BUCKET_NAME).download_file(KEY, '/tmp/model_frcnn.hdf5')
        print 'File Found'
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise
    model_rpn.load_weights('/tmp/model_frcnn.hdf5', by_name=True)
    model_classifier.load_weights('/tmp/model_frcnn.hdf5', by_name=True)
    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    # Starting RPN prediction
    [Y1, Y2, F] = model_rpn.predict(X)
    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]
    bboxes = {}
    probs = {}
    bbox_threshold = 0.8
    class_mapping = {v: k for k, v in class_mapping.items()}
    # print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            # pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded
        [P_cls, P_regr] = model_classifier.predict([F, ROIs])
        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))
    final_data = []
    output = {}
    for key in bboxes:
        data = {}
        bbox = np.array(bboxes[key])
        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        data[key] = {}
        for i in range(new_boxes.shape[0]):
            data[key]['x'] = str(new_boxes[i][0])
            data[key]['y'] = str(new_boxes[i][1])
            data[key]['w'] = str(new_boxes[i][2])
            data[key]['z'] = str(new_boxes[i][3])
            data[key]['prob'] = str(new_probs[i])
            final_data.append(data)

    output['bboxes'] = bboxes
    output['rpn'] = final_data
    timestamp = int(time.time() * 1000)
    table = dynamodb.Table(os.environ['DYNAMODB_TABLE'])
    result = table.update_item(
        Key={'requestId': event['requestId']},
        ExpressionAttributeNames={
            '#status': 'status',
            '#result': 'result',
        },
        ExpressionAttributeValues={
            ':status': 'DONE',
            ':result': output,
            ':updatedAt': timestamp,
        },
        UpdateExpression='SET #status = :status, '
        '#result = :result, '
        'updatedAt = :updatedAt',
        ReturnValues='ALL_NEW',
    )
    response = {
        "statusCode": 200,
        "body": json.dumps(result['Attributes'], cls=DecimalEncoder)
    }

    return response
Exemple #11
0
    def detect_known_objects(self, img):
        print("HELLLOOOOOO")
        #img = self.image_resize(img, height=int(img.shape[0]/3.0))
        #img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
        #img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
        #img=cv2.cvtColor(img_yuv,cv2.COLOR_YUV2BGR)
        X, ratio = self.format_img(img, self.C)
        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = self.model_rpn.predict(X)
        #print Y1, Y2, F

        a = datetime.datetime.now()
        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   self.C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)
        b = datetime.datetime.now()
        delta = b - a
        #print("roi_helpers.rpn_to_roi took:", int(delta.total_seconds() * 1000)) # milliseconds
        #print R
        #for i in R:
        #    cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]),(0,255,0),3)
        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        for idx, jk in enumerate(range(R.shape[0] // self.C.num_rois + 1)):
            ROIs = np.expand_dims(R[self.C.num_rois * jk:self.C.num_rois *
                                    (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded
                #print("ROIs shape", np.array(ROIs).shape)
                #print("F", np.array(F).shape)
            a = datetime.datetime.now()
            [P_cls, P_regr,
             P_clust] = self.model_classifier_only.predict([F, ROIs])
            b = datetime.datetime.now()
            delta = b - a
            #print("prediction of roi took: :", int(delta.total_seconds() * 1000)) # milliseconds
            #print P_cls, P_regr
            #print P_cls.shape, P_regr.shape
            for ii in range(P_cls.shape[1]):
                #print P_cls[0,ii,:]
                if np.max(P_cls[0, ii, :]) < self.bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue
                cls_name = self.class_mapping[np.argmax(P_cls[0, ii, :])]
                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []
                (x, y, w, h) = ROIs[0, ii, :]
                #print x, y, w, h
                cls_num = np.argmax(P_cls[0, ii, :])
                #print "something", cls_num
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= self.C.classifier_regr_std[0]
                    ty /= self.C.classifier_regr_std[1]
                    tw /= self.C.classifier_regr_std[2]
                    th /= self.C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    print("exception")
                    pass
                bboxes[cls_name].append([
                    self.C.rpn_stride * x, self.C.rpn_stride * y,
                    self.C.rpn_stride * (x + w), self.C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        detected_objects = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = self.get_real_coordinates(ratio, x1, y1, x2, y2)
                #print "drawing detected rect at:", (real_x1, real_y1), (real_x2, real_y2)
                #cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),5)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                #all_dets.append((key,100*new_probs[jk]))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1-20, real_y1-20)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] +5, textOrg[1]-retval[1] +5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] -5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] +5, textOrg[1]-retval[1] +5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 0, 0), 1)
                height, width, channels = img.shape
                #FOV horizontal = 62 degrees   (from 90 on right to 33 on left)
                angle_between_robot_centre_and_detected_object = self.angle_between(
                    (real_x1 + self.distance([real_x1, real_y1],
                                             [real_x2, real_y1]) / 2.0,
                     (real_y1 + self.distance([real_x1, real_y1],
                                              [real_x1, real_y2]) / 2.0)),
                    (width / 2.0, 0)) - 52.0
                angle_between_robot_centre_and_detected_object = -angle_between_robot_centre_and_detected_object
                angle_between_robot_centre_and_detected_object = calc_angle([
                    int((real_x1 + real_x2) / 2.0),
                    int((real_y1 + real_y2) / 2.0)
                ])
                angle, distance = calculate_angle_and_distance(img,
                                                               real_x1,
                                                               real_x2,
                                                               real_y1,
                                                               real_y2,
                                                               obj_width=16)
                angle_between_robot_centre_and_detected_object = angle
                focal_length_mm = 1.0
                average_real_object_height_mm = 1.0
                image_height_px = height
                object_height_px = self.distance([real_x1, real_y1],
                                                 [real_x1, real_y2])
                sensor_height_mm = 314.2
                distance_between_robot_centre_and_detected_object = (15.0 / (
                    (min(self.distance([real_x1, real_y1], [real_x2, real_y1]),
                         self.distance([real_x1, real_y1],
                                       [real_x1, real_y2])) /
                     max(self.distance([real_x1, real_y1], [real_x2, real_y1]),
                         self.distance([real_x1, real_y1], [real_x1, real_y2]))
                     )) * 123) / self.distance([real_x1, real_y1],
                                               [real_x2, real_y1])
                distance_between_robot_centre_and_detected_object = distance_between_robot_centre_and_detected_object * 5.0
                distance_between_robot_centre_and_detected_object = distance
                detected_objects.append(
                    (key, "", real_x1, real_y1, real_x2, real_y2,
                     distance_between_robot_centre_and_detected_object,
                     angle_between_robot_centre_and_detected_object))

        print("detected objects", len(detected_objects))
        temporary_memory = []
        for image_item in detected_objects:
            seen_item_centroid = (image_item[2] + self.distance(
                (image_item[2], image_item[3]),
                (image_item[4], image_item[3])) / 2.0,
                                  image_item[4] + self.distance(
                                      (image_item[2], image_item[3]),
                                      (image_item[2], image_item[5])) / 2.0)

            tracking_uuid = None

            #print ("items in memory", len(self.SHORT_TERM_MEMORY))
            for memory_item in self.SHORT_TERM_MEMORY:
                memory_centroid = (memory_item[2] + self.distance(
                    (memory_item[2], memory_item[3]),
                    (memory_item[4], memory_item[3])) / 2.0,
                                   memory_item[4] + self.distance(
                                       (memory_item[2], memory_item[3]),
                                       (memory_item[2], memory_item[5])) / 2.0)
                #print ("distance", self.distance(seen_item_centroid, memory_centroid))
                if self.distance(seen_item_centroid,
                                 memory_centroid) < self.distance(
                                     [image_item[2], image_item[3]],
                                     [image_item[4], image_item[5]
                                      ]) and image_item[0] == memory_item[0]:
                    tracking_uuid = memory_item[1]
                    continue

            if tracking_uuid != None:
                temporary_memory.append(
                    (self.KNOWN_OBJECTS[int(image_item[0])], tracking_uuid,
                     image_item[2], image_item[3], image_item[4],
                     image_item[5], image_item[6], image_item[7]))
            else:
                temporary_memory.append(
                    (self.KNOWN_OBJECTS[int(image_item[0])], uuid.uuid1(),
                     image_item[2], image_item[3], image_item[4],
                     image_item[5], image_item[6], image_item[7]))
            #print ("temp memory items", len(temporary_memory))

            if self.show_image:
                for item in temporary_memory:
                    cv2.rectangle(img, (item[2], item[3]), (item[4], item[5]),
                                  (0, 0, 0), 5)
                    textLabel = '{}: {}'.format(item[0], item[1])
                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
                    textOrg = (image_item[2] - 20, image_item[3] - 20)
                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] + 5), (0, 0, 0), 2)
                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] + 5),
                                  (255, 255, 255), -1)
                    cv2.putText(img, textLabel, textOrg,
                                cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 0, 0), 1)

        self.SHORT_TERM_MEMORY = temporary_memory
        if self.show_image:
            cv2.imshow('image', img)
            cv2.waitKey(3000)
            #time.sleep(1)
            #cv2.destroyAllWindows()

        return self.SHORT_TERM_MEMORY
    def predict(self, img_path):
        all_imgs = []
        classes = {}
        bbox_threshold = 0.8

        st = time.time()

        img = cv2.imread(img_path)

        X, ratio = format_img(img, self.C)
        print(K.image_data_format())
        if K.image_data_format() == 'tf' or K.image_data_format(
        ) == 'channels_last':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = self.model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   self.C,
                                   K.image_data_format(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // self.C.num_rois + 1):
            ROIs = np.expand_dims(R[self.C.num_rois * jk:self.C.num_rois *
                                    (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):

                    continue

                cls_name = self.class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= self.C.classifier_regr_std[0]
                    ty /= self.C.classifier_regr_std[1]
                    tw /= self.C.classifier_regr_std[2]
                    th /= self.C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    self.C.rpn_stride * x, self.C.rpn_stride * y,
                    self.C.rpn_stride * (x + w), self.C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        candidates = []  # 모든 객체 후보 영역이 담길 list

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.6)

            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                real_points = TwoPoint(real_x1 + 10, real_y1 + 10,
                                       real_x2 - 10, real_y2 - 10)

                candidates.append(real_points)

                cv2.rectangle(img, (real_x1 + 10, real_y1 + 10),
                              (real_x2 - 10, real_y2 - 10), (255, 0, 0), 2)
                # cv2.rectangle(img,(x1, y1), (x2, y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 3)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(img, (textOrg[0], textOrg[1]),
                              (textOrg[0] + retval[0], textOrg[1] - retval[1]),
                              (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)

                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)

        min_x1 = 10
        min_y1 = 10
        max_x2 = 0
        max_y2 = 0

        for c in candidates:  # 허용된 범위를 넘어가는 bbox에 대한 최대 좌표 수정
            if c.x1 <= 0:
                c.x1 = 10

            if c.y1 <= 0:
                c.y1 = 10

            if c.x2 >= IMAGE_MAX_X:
                c.x2 = IMAGE_MAX_X - 10

            if c.y2 >= IMAGE_MAX_Y:
                c.y2 = IMAGE_MAX_Y - 10

            if min_x1 > c.x1:  # 객체들의 영역을 하나의 영역으로 합치는 과정
                min_x1 = c.x1

            if min_y1 > c.y1:
                min_y1 = c.y1

            if max_x2 < c.x2:
                max_x2 = c.x2

            if max_y2 < c.y2:
                max_y2 = c.y2  # 각 객체의 최소점, 최대점을 계산하여 하나로 합침

        cv2.rectangle(img, (min_x1, min_y1), (max_x2, max_y2), (255, 0, 255),
                      2)

        print('Elapsed time = {}'.format(time.time() - st))
        print(all_dets)
        cv2.imshow('img', img)
        cv2.waitKey(0)
Exemple #13
0
def uploadtest(request):
    if request.FILES:
        str_info = ''
        pic = request.FILES.get('pic')
        sava_path = '%s/test/%s' % (settings.MEDIA_ROOT, pic.name)
        with open(sava_path, 'wb') as f:
            for content in pic.chunks():
                f.write(content)
        print('-->图片上传成功...')
        str_info += '-->图片上传成功...\n'
        # 以下是测试过程:
        sys.setrecursionlimit(40000)

        config_output_filename = os.path.join(settings.CONFIG_BISHE, 'bishe/config.pickle')
        #  print(config_output_filename)
        print('-->正在检测...')
        str_info += '-->正在检测...\n'
        with open(config_output_filename, 'rb') as f_in:
            C = pickle.load(f_in)
            print('-->找到配置文件...')

        C.model_path = os.path.join(settings.CONFIG_BISHE, C.model_path)
        print('-->找到模型信息...')
        str_info += '-->找到模型信息...\n'
        print('-->模型路径地址:' + C.model_path)
        str_info += '-->模型路径地址:' + C.model_path + '\n'
        if C.network == 'resnet50':
            import keras_frcnn.resnet as nn
        elif C.network == 'vgg':
            import keras_frcnn.vgg as nn

        # turn off any data augmentation at test time
        C.use_horizontal_flips = False
        C.use_vertical_flips = False
        C.rot_90 = False
        C.num_rois = 10

        def format_img_size(img, C):
            """ formats the image size based on config """
            img_min_side = float(C.im_size)
            (height, width, _) = img.shape

            if width <= height:
                ratio = img_min_side / width
                new_height = int(ratio * height)
                new_width = int(img_min_side)
            else:
                ratio = img_min_side / height
                new_width = int(ratio * width)
                new_height = int(img_min_side)
            img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
            return img, ratio

        def format_img_channels(img, C):
            """ formats the image channels based on config """
            img = img[:, :, (2, 1, 0)]
            img = img.astype(np.float32)
            img[:, :, 0] -= C.img_channel_mean[0]
            img[:, :, 1] -= C.img_channel_mean[1]
            img[:, :, 2] -= C.img_channel_mean[2]
            img /= C.img_scaling_factor
            img = np.transpose(img, (2, 0, 1))
            img = np.expand_dims(img, axis=0)
            return img

        def format_img(img, C):
            """ formats an image for model prediction based on config """
            img, ratio = format_img_size(img, C)
            img = format_img_channels(img, C)
            return img, ratio

        # Method to transform the coordinates of the bounding box to its original size
        def get_real_coordinates(ratio, x1, y1, x2, y2):

            real_x1 = int(round(x1 // ratio))
            real_y1 = int(round(y1 // ratio))
            real_x2 = int(round(x2 // ratio))
            real_y2 = int(round(y2 // ratio))

            return (real_x1, real_y1, real_x2, real_y2)

        class_mapping = C.class_mapping

        if 'bg' not in class_mapping:
            class_mapping['bg'] = len(class_mapping)

        class_mapping = {v: k for k, v in class_mapping.items()}

        print('-->乳腺癌病症种类:' + str(class_mapping))
        str_info += '-->乳腺癌病症种类:' + str(class_mapping) + '\n'

        class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}

        if C.network == 'resnet50':
            num_features = 1024
        elif C.network == 'vgg':
            num_features = 512

        if K.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
            input_shape_features = (num_features, None, None)
        else:
            input_shape_img = (None, None, 3)
            input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(C.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping),
                                   trainable=True)

        model_rpn = Model(img_input, rpn_layers)
        model_classifier_only = Model([feature_map_input, roi_input], classifier)

        model_classifier = Model([feature_map_input, roi_input], classifier)

        print('-->由 {} 加载权重信息...'.format(C.model_path))
        str_info += '-->由 {} 加载权重信息...'.format(C.model_path) + '\n'
        model_rpn.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(C.model_path, by_name=True)

        model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')

        all_imgs = []

        classes = {}

        bbox_threshold = 0.8

        st = time.time()

        filepath = sava_path

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]),
                                                                        overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2), (
                    int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2)
                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1)
                textOrg = (real_x1, real_y1 - 0)
                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
        timeused = (time.time() - st)
        print('-->检测完成,用时: {}...'.format(timeused))
        str_info += '-->检测完成,用时: {}'.format(timeused) + '\n'
        aaa = str(all_dets)
        print('-->检测结果:' + aaa)
        str_info += '-->检测结果:' + aaa + '\n'
        result_path = '%s/result/%s' % (settings.MEDIA_ROOT, pic.name)
        cv2.imwrite(result_path, img)
        print(str_info)
        # 将图片路径回传,为上传数据库做准备
        test_pic = '/static/media/test/%s' % (pic.name)
        result_pic = '/static/media/result/%s' % (pic.name)
        user_id = UserInfo.objects.get(username=request.session.get('username')).id
        return JsonResponse(
            {'res': 1, 'result_pic': result_pic, 'test_pic': test_pic, 'user_id': user_id, 'str_info': str_info})
    return JsonResponse({'res': 0})
def work(input, output, textlabel, piclabel, primpiclabel):
    textlabel.append("Detecting now:")
    test_path = input + '/'
    output_path = output + '/'
    #test_path = "../big_test_input/"
    sys.setrecursionlimit(40000)

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      help="Path to test data.",
                      default=test_path)
    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="./config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    textlabel.append('Loading weights from {}'.format(C.model_path))
    ########################这两句以后要加上#################
    #model_rpn.load_weights(C.model_path, by_name=True)
    #model_classifier.load_weights(C.model_path, by_name=True)
    #########################################################
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        ##################更新窗口################
        textlabel.append(img_name)
        oldpath = test_path + img_name
        beforeimage = QtGui.QPixmap(oldpath)
        primpiclabel.setPixmap(beforeimage)
        primpiclabel.setScaledContents(True)
        ##########################################
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                color = [0, 0, 0]
                if key == "airbase": color = [0, 0, 255]
                if key == "harbour": color = [21, 159, 235]
                if key == "island": color = [59, 197, 184]

                cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                              color, 2)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                #all_dets.append((key,100*new_probs[jk]))
                #textlabel.append('{}: {}'.format(key,100*new_probs[jk]))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1, real_y1-0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1)

        print('Elapsed time = {}'.format(time.time() - st))
        textlabel.append('Elapsed time = {}'.format(time.time() - st))
        print(all_dets)

        #cv2.imshow('img', img)
        #cv2.waitKey(0)

        cv2.imwrite(output_path + '{}.png'.format(idx), img)
        afterimage = QtGui.QPixmap(output_path + '{}.png'.format(idx))
        piclabel.setPixmap(afterimage)
        piclabel.setScaledContents(True)
def predict_image(file_path):
    global graph
    print(file_path)
    st = time.time()
    img = cv2.imread(file_path)
    image_name = os.path.split(file_path)[-1]
    X = format_img(img, C)

    img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
    img_scaled[:, :, 0] += 123.68
    img_scaled[:, :, 1] += 116.779
    img_scaled[:, :, 2] += 103.939

    img_scaled = img_scaled.astype(np.uint8)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))
    with graph.as_default():
        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                cv2.rectangle(img_scaled, (x1, y1), (x2, y2),
                              class_to_color[key], 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (x1, y1 - 0)

                cv2.rectangle(
                    img_scaled, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img_scaled, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img_scaled, textLabel, textOrg,
                            cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
        print('Elapsed time = {}'.format(time.time() - st))
        cv2.imwrite('./static/uploadImage/{}process.jpg'.format(image_name),
                    img_scaled)
        print(all_dets)
def main():
	cleanup()
	sys.setrecursionlimit(40000)
	config_output_filename = 'config.pickle'

	with open(config_output_filename, 'r') as f_in:
		C = pickle.load(f_in)

	# turn off any data augmentation at test time
	C.use_horizontal_flips = False
	C.use_vertical_flips = False
	C.rot_90 = False
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = num_rois

	if K.image_dim_ordering() == 'th':
		input_shape_img = (3, None, None)
		input_shape_features = (1024, None, None)
	else:
		input_shape_img = (None, None, 3)
		input_shape_features = (None, None, 1024)


	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	feature_map_input = Input(shape=input_shape_features)

	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=True)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn_layers = nn.rpn(shared_layers, num_anchors)

	classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

	model_rpn = Model(img_input, rpn_layers)
	model_classifier_only = Model([feature_map_input, roi_input], classifier)

	model_classifier = Model([feature_map_input, roi_input], classifier)

	model_rpn.load_weights(C.model_path, by_name=True)
	model_classifier.load_weights(C.model_path, by_name=True)

	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')

	all_imgs = []

	classes = {}

	bbox_threshold = 0.8

	visualise = True

	print("Converting video to images..")
	convert_to_images()
	print("anotating...")

	list_files = sorted(get_file_names(img_path), key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
	for img_name in list_files:
		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
			continue
		print(img_name)
		st = time.time()
		filepath = os.path.join(img_path,img_name)
		img = cv2.imread(filepath)
		X = format_img(img, C)

		img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
		img_scaled[:, :, 0] += 123.68
		img_scaled[:, :, 1] += 116.779
		img_scaled[:, :, 2] += 103.939

		img_scaled = img_scaled.astype(np.uint8)

		if K.image_dim_ordering() == 'tf':
			X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = model_rpn.predict(X)


		R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0]//C.num_rois + 1):
			ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0]//C.num_rois:
				#pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

			for ii in range(P_cls.shape[1]):

				if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
					tx /= C.classifier_regr_std[0]
					ty /= C.classifier_regr_std[1]
					tw /= C.classifier_regr_std[2]
					th /= C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []
		all_objects = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk,:]

				cv2.rectangle(img_scaled,(x1, y1), (x2, y2), class_to_color[key],2)

				textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
				all_dets.append((key,100*new_probs[jk]))
				all_objects.append((key, 1))

				(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
				textOrg = (x1, y1-0)

				cv2.rectangle(img_scaled, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
				cv2.rectangle(img_scaled, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
				cv2.putText(img_scaled, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
		print('Elapsed time = {}'.format(time.time() - st))
		height, width, channels = img_scaled.shape
		cv2.rectangle(img_scaled, (0,0), (width, 30), (0, 0, 0), -1)
		cv2.putText(img_scaled, "Obj count: " + str(list(accumulate(all_objects))), (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1)
		cv2.imwrite(os.path.join(output_path, img_name), img_scaled)
		print(all_dets)
	print("saving to video..")
	save_to_video()
Exemple #17
0
			cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

			if cls_name not in bboxes:
				bboxes[cls_name] = []
				probs[cls_name] = []

			(x, y, w, h) = ROIs[0, ii, :]

			cls_num = np.argmax(P_cls[0, ii, :])
			try:
				(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]))

	all_dets = []

	for key in bboxes:
		bbox = np.array(bboxes[key])

		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
def get_bbox(R, C, model_classifier, class_mapping, F, ratio, bbox_threshold = 0.8):
	# convert from (x1,y1,x2,y2) to (x,y,w,h)
	R[:, 2] -= R[:, 0]
	R[:, 3] -= R[:, 1]

	# apply the spatial pyramid pooling to the proposed regions
	bboxes = {}
	probs = {}

	for jk in range(R.shape[0]//C.num_rois + 1):
		ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
		if ROIs.shape[1] == 0:
			break

		if jk == R.shape[0]//C.num_rois:
			#pad R
			curr_shape = ROIs.shape
			target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
			ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
			ROIs_padded[:, :curr_shape[1], :] = ROIs
			ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
			ROIs = ROIs_padded

		[P_cls, P_regr] = model_classifier.predict([F, ROIs])

		for ii in range(P_cls.shape[1]):

			if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
				continue

			cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

			if cls_name not in bboxes:
				bboxes[cls_name] = []
				probs[cls_name] = []

			(x, y, w, h) = ROIs[0, ii, :]

			cls_num = np.argmax(P_cls[0, ii, :])
			try:
				(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
				tx /= C.classifier_regr_std[0]
				ty /= C.classifier_regr_std[1]
				tw /= C.classifier_regr_std[2]
				th /= C.classifier_regr_std[3]
				x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
			except:
				pass
			bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
			probs[cls_name].append(np.max(P_cls[0, ii, :]))
	all_dets = []
	for key in bboxes:
		bbox = np.array(bboxes[key])

		new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
		for jk in range(new_boxes.shape[0]):
			(x1, y1, x2, y2) = new_boxes[jk,:]

			(real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

#			cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
			textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
			all_dets.append((key,100*new_probs[jk]))
			(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
#			textOrg = (real_x1, real_y1-0)            
	return all_dets, bboxes, probs
Exemple #19
0
def predict(model, request: dict) -> dict:
    model_rpn, model_classifier, C = model[0], model[1], model[2]
    class_mapping = {v: k for k, v in C.class_mapping.items()}
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    if not tf.io.gfile.exists(export_path):
        tf.io.gfile.makedirs(export_path)
    bbox_threshold = 0.5
    # filepath = test_path
    # img = cv2.imread(filepath)
    instances = request["instances"]
    img = np.array(instances, dtype=np.uint8)
    print(img.shape)
    X, ratio = format_img(img, C)
    X = np.transpose(X, (0, 2, 3, 1))
    [Y1, Y2, F] = model_rpn.predict(X)
    R = roi_helpers.rpn_to_roi(Y1, Y2, C, overlap_thresh=0.7)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):
            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []
    for key in bboxes:
        bbox = np.array(bboxes[key])
        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]
            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)
            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))
            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)
    payload = {
        'output': img.tolist(),
    }
    return payload


# model_path = '/home/ritesh/Dkube-Demos/model/'
# export_path = './'
# test_path = 'data/test.png'

# filepath = test_path
# img = cv2.imread(filepath)
# print(img.shape)
# request = {
#     'instances': img.tolist(),
# }
# model, st = load(model_path)
# out = predict(model, request)
# out = np.array(out['output'], dtype= np.uint8)
# cv2.imwrite(export_path + 'result.png',out)
# with open('request.json', 'w') as outfile:
#     json.dump(request, outfile, indent=4)
Exemple #20
0
def calc_roi_siam(Im, R, X, title_id):
    bboxes = {}
    probs = {}
    azimuths = {}
    idx = []
    bbox_threshold = 0.7
    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            # pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr, P_view] = model_classifier.predict([X, ROIs])
        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue
            cls_num = np.argmax(P_cls[0, ii, :])
            cls_name = class_mapping_inv[cls_num]
            cls_view = P_view[0, ii, 360 * cls_num:360 * (cls_num + 1)]
            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []
                azimuths[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))
            azimuths[cls_name].append(np.argmax(cls_view, axis=0))
            idx.append(jk * C.num_rois + ii)

    key = cls_name
    bbox = np.array(bboxes[key])
    prob = np.array(probs[key])
    azimuth = np.array(azimuths[key])
    # bbox, prob, azimuth = roi_helpers.non_max_suppression_fast(bbox, prob, azimuth, overlap_thresh=0.3,use_az=True)
    if draw_flag:
        img = img_helpers.draw_bbox(Im, bbox, prob, azimuth, ratio,
                                    class_mapping_inv, key)
        img_helpers.display_image(img, title_id)

    return bbox, prob, azimuth, idx
Exemple #21
0
def work(textedit,pic_label,input,model,output):
    #run test_frcnn.py -p ./testImages/
    sys.setrecursionlimit(40000)
    keras.backend.clear_session()
    test_path=input + "/"
    output = output + "/"


    parser = OptionParser()

    parser.add_option("-p", "--path", dest="test_path", help="Path to test data.", default=test_path)
    parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
                    help="Number of ROIs per iteration. Higher means more memory use.", default=256)
    parser.add_option("--config_filename", dest="config_filename", help=
                    "Location to re ad the metadata related to the training (generated when training).",
                    default="config.pickle")
    parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:   # if filename is not given
        parser.error('Error: path to test data must be specified. Pass --path to command line')


    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)


    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True

    strideN = 400 ##步长
    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        print("开始检测:")
        if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)

        filepath = os.path.join(img_path,img_name)

        image = QtGui.QPixmap(filepath)
        pic_label.setPixmap(image)
        pic_label.setScaledContents(True)

        imgO = cv2.imread(filepath)
        (height,width,_) = imgO.shape
        mH = int((height-strideN)/strideN)
        mW = int((width-strideN)/strideN)

        ####对图像进行分割处理,网格搜索
        object_key = []
        object_pro = []
        object_x1 = []
        object_y1 = []
        object_x2 = []
        object_y2 = []

        for m in range(mH):
            for n in range (mW):
                print(m*mW+n)

                imgCopy = imgO.copy()

                img = imgCopy[strideN*m:strideN*(m+2),strideN*n:strideN*(n+2)]##height,width

                st = time.time()

                X, ratio = format_img(img, C)
                if K.image_dim_ordering() == 'tf':
                    X = np.transpose(X, (0, 2, 3, 1))

                # get the feature maps and output from the RPN
                [Y1, Y2, F] = model_rpn.predict(X)

                R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

                # convert from (x1,y1,x2,y2) to (x,y,w,h)
                R[:, 2] -= R[:, 0]
                R[:, 3] -= R[:, 1]

                # apply the spatial pyramid pooling to the proposed regions
                bboxes = {}
                probs = {}

                for jk in range(R.shape[0]//C.num_rois + 1):
                    ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
                    if ROIs.shape[1] == 0:
                        break

                    if jk == R.shape[0]//C.num_rois:
                        #pad R
                        curr_shape = ROIs.shape
                        target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
                        ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                        ROIs_padded[:, :curr_shape[1], :] = ROIs
                        ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                        ROIs = ROIs_padded

                    [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                    for ii in range(P_cls.shape[1]):
                        if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                            continue
                        cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                        if cls_name not in bboxes:
                            bboxes[cls_name] = []
                            probs[cls_name] = []

                        (x, y, w, h) = ROIs[0, ii, :]
                        cls_num = np.argmax(P_cls[0, ii, :])

                        try:
                            (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                            tx /= C.classifier_regr_std[0]
                            ty /= C.classifier_regr_std[1]
                            tw /= C.classifier_regr_std[2]
                            th /= C.classifier_regr_std[3]
                            x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
                        except:
                            pass
                        bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
                        probs[cls_name].append(np.max(P_cls[0, ii, :]))

                all_dets = []

                for key in bboxes:
                    print(key)
                    bbox = np.array(bboxes[key])

                    new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
                    for jk in range(new_boxes.shape[0]):
                        print("test")
                        (x1, y1, x2, y2) = new_boxes[jk,:]
                        (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                        color = [0,0,255]
                        if key == "airbase": color= [0,0,255]
                        if key == "harbour": color = [0,159,255]
                        if key == "island": color = [0,255,0]


                        print(real_x1)
                        print(real_y1)
                        print(real_x2)
                        print(real_y2)

                        cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), color,2)

                        textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                        print(textLabel)
                        all_dets.append((key,100*new_probs[jk]))

                        object_real_x1 = real_x1 + strideN*n
                        object_real_y1 = real_y1 + strideN*m
                        object_real_x2 = real_x2 + strideN*n
                        object_real_y2 = real_y2 + strideN*m

                        object_key.append(key)
                        object_pro.append(new_probs[jk])
                        object_x1.append(object_real_x1)
                        object_y1.append(object_real_y1)
                        object_x2.append(object_real_x2)
                        object_y2.append(object_real_y2)

                        #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                        #textOrg = (real_x1, real_y1-0)

                        #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                        #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                        #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1)

                print('Elapsed time = {}'.format(time.time() - st))
                cursor = textedit.textCursor()
                cursor.movePosition(QtGui.QTextCursor.End)
                cursor.insertText(str(m*mW+n))
                cursor.insertText("\r\n")
                cursor.insertText('Elapsed time = {}'.format(time.time() - st))
                cursor.insertText("\r\n")
                # textedit.append('Elapsed time = {}'.format(time.time() - st))
                textedit.setTextCursor(cursor)
                textedit.ensureCursorVisible()
                #print(all_dets)
                aaa=filepath.split('/')
                aab=aaa[-1].split('.')
                cv2.imwrite(output + aab[0]  + '_' + str(m*mW+n+1) + '.' + aab[1],img)
                image = QtGui.QPixmap(output + aab[0]  + '_' + str(m*mW+n+1) + '.' + aab[1])
                pic_label.setPixmap(image)
                pic_label.setScaledContents(True)
                #cv2.imwrite('./results_imgs/{}.jpg'.format(m*mW+n),img)

        ##非极大值抑制
        imgCopy2 = imgO.copy()
        object_name = ["airbase","harbour","island"]
        for object_class in range (len(object_name)):
            x1 = []
            y1 = []
            x2 = []
            y2 = []
            prob=[]
            for numR in range (len(object_key)):
                if object_key[numR]==object_name[object_class]:
                    x1.append(object_x1[numR])
                    y1.append(object_y1[numR])
                    x2.append(object_x2[numR])
                    y2.append(object_y2[numR])
                    prob.append(object_pro[numR])
            if len(x1)>0:
                x1=np.array(x1)
                y1=np.array(y1)
                x2=np.array(x2)
                y2=np.array(y2)
                prob=np.array(prob)

                x1, y1, x2, y2, probs =  non_max_suppression(x1,y1,x2,y2, prob, overlap_thresh=0.5, max_boxes=300)

                for numLR in range (len(x1)):
                    real_x1 = x1[numLR]
                    real_y1 = y1[numLR]
                    real_x2 = x2[numLR]
                    real_y2 = y2[numLR]

                    color = [0,0,255]
                    if object_name[object_class] == "airbase": color= [0,0,255]
                    if object_name[object_class] == "harbour": color = [0,159,255]
                    if object_name[object_class] == "island": color = [0,255,0]
                    cv2.rectangle(imgCopy2,(real_x1, real_y1), (real_x2, real_y2), color,2)
        #cv2.imwrite('./results_imgs/{}.jpg'.format(9999),imgCopy2)
        cv2.imwrite(output + filepath.split('/')[-1],imgCopy2)
        image = QtGui.QPixmap(output + filepath.split('/')[-1])
        pic_label.setPixmap(image)
        pic_label.setScaledContents(True)

        '''
Exemple #22
0
def test_view_func_NN(model_classifier, model_rpn, model_inner, C):
    test_cls = 'aeroplane'
    input_train_file = 'pickle_data/train_data_Wflip_all.pickle'

    ## read the training data from pickle file or from annotations
    test_pickle = 'pickle_data/test_data_{}.pickle'.format(test_cls)
    if os.path.exists(test_pickle):
        with open(test_pickle) as f:
            all_imgs, classes_count, _ = pickle.load(f)

    class_mapping = C.class_mapping
    inv_class_mapping = {v: k for k, v in class_mapping.iteritems()}
    backend = K.image_dim_ordering()
    gt_cls_num = class_mapping[test_cls]
    print('work on class {}'.format(test_cls))
    base_path = os.getcwd()

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False
    count = 0
    good_img = 0
    not_good = 0

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    def display_image(img):
        img1 = img[:, :, (2, 1, 0)]
        # img1=img
        im = Image.fromarray(img1.astype('uint8'), 'RGB')
        im.show()

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):
        ## read the training data from pickle file or from annotations
        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))
        return (real_x1, real_y1, real_x2, real_y2)

    vnum_test = 24
    azimuth_vec = np.concatenate(
        ([0],
         np.linspace((360. / (vnum_test * 2)), 360. -
                     (360. / (vnum_test * 2)), vnum_test)),
        axis=0)

    def find_interval(azimuth, azimuth_vec):
        for i in range(len(azimuth_vec)):
            if azimuth < azimuth_vec[i]:
                break
        ind = i
        if azimuth > azimuth_vec[-1]:
            ind = 1
        return ind

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    # print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = 32

    obj_num = 0
    bbox_threshold_orig = 0.6
    th_bbox = 0.4

    ## get GT for all az for single cls
    feature_az = []
    sorted_path = input_train_file
    tmp_ind = sorted_path.index('.pickle')
    sorted_path = sorted_path[:tmp_ind] + "_sorted_Angles" + sorted_path[
        tmp_ind:]
    if os.path.exists(sorted_path):
        print("loading sorted data")
        with open(sorted_path) as f:
            trip_data = pickle.load(f)
    im_file = []
    ind = []
    for ii in range(360):
        for jj in range(3):
            try:
                im_file.append(trip_data[test_cls][ii][jj])
                ind.append(ii)
            except:
                if jj == 0:
                    print('no azimuth {}'.format(ii))
    data_gen_train = data_generators.get_anchor_gt(im_file, [],
                                                   C,
                                                   K.image_dim_ordering(),
                                                   mode='test')
    azimuth_dict = []
    inner_NN = []
    azimuths = []
    for tt in range(len(ind)):
        try:
            if tt % 100 == 0:
                print('worked on {}/{}'.format(tt, len(ind)))
            # print ('im num {}'.format(good_img))
            X, Y, img_data = next(data_gen_train)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0],
                                       P_rpn[1],
                                       C,
                                       K.image_dim_ordering(),
                                       use_regr=True,
                                       overlap_thresh=0.7,
                                       max_boxes=300)

            X2, Y1, Y2, Y_view = roi_helpers.calc_iou_new(
                R, img_data, C, C.class_mapping)

            pos_samples = np.where(Y1[0, :, -1] == 0)
            sel_samples = pos_samples[0].tolist()
            R = X2[0, sel_samples, :]
            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr, P_view] = model_classifier.predict([X, ROIs])
                iner_f = model_inner.predict([X, ROIs])
                # oo = model_classifier_only.predict([F, ROIs])

                for ii in range(len(sel_samples)):

                    if np.max(P_cls[0,
                                    ii, :]) < bbox_threshold_orig or np.argmax(
                                        P_cls[0,
                                              ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    ## get class from the net
                    # cls_num = np.argmax(P_cls[0, ii, :])

                    ## use gt class
                    cls_num = gt_cls_num

                    cls_name = inv_class_mapping[cls_num]
                    cls_view = P_view[0, ii, 360 * cls_num:360 * (cls_num + 1)]

                    # azimuths[cls_name].append(np.argmax(cls_view, axis=0))
                    inner_NN.append(iner_f[0, ii, :])
                    azimuth_dict.append(img_data['bboxes'][0]['azimuth'])
        except:
            print('failed on az {}'.format(img_data['bboxes'][0]['azimuth']))
    ## calculating some mean feature map for every az
    with open('pickle_data/{}_NN.pickle'.format(C.weight_name), 'w') as f:
        pickle.dump([inner_NN, azimuth_dict], f)
        print('saved PICKLE')

    with open('pickle_data/{}_NN.pickle'.format(C.weight_name)) as f:
        inner_NN, azimuth_dict = pickle.load(f)
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(inner_NN, azimuth_dict)

    jj = 0
    for im_file in all_imgs:
        jj += 1
        if jj % 50 == 0:
            print(jj)
        filepath = im_file['filepath']
        img = cv2.imread(filepath)
        img_gt = np.copy(img)
        if img is None:
            not_good += 1
            continue
        else:
            good_img += 1
            # print ('im num {}'.format(good_img))
        X, ratio = format_img(img, C)

        if backend == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        Y1, Y2 = model_rpn.predict(X)
        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)
        # # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        width, height = int(im_file["width"]), int(im_file["height"])
        resized_width, resized_height = data_generators.get_new_img_size(
            width, height, C.im_size)
        # [_,_, F] = model_rpn.predict(X)
        ROIs = []
        ## pass on all the labels in the image, some of them are not equal to test_cls
        for bbox_gt in im_file['bboxes']:
            no_bbox_flag = 1
            bbox_threshold = bbox_threshold_orig
            if not bbox_gt['class'] == test_cls:
                continue
            if bbox_gt[
                    'class'] == test_cls and bbox_threshold == bbox_threshold_orig:
                obj_num += 1
            while no_bbox_flag and bbox_threshold > th_bbox:
                cls_gt = bbox_gt['class']
                az_gt = bbox_gt['azimuth']
                el_gt = bbox_gt['elevation']
                t_gt = bbox_gt['tilt']
                if len(ROIs) == 0:
                    # apply the spatial pyramid pooling to the proposed regions
                    bboxes = {}
                    probs = {}
                    azimuths = {}
                    inner_res = {}
                    # print ('obj num {}'.format(obj_num))

                    for jk in range(R.shape[0] // C.num_rois + 1):
                        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                                (jk + 1), :],
                                              axis=0)
                        if ROIs.shape[1] == 0:
                            break

                        if jk == R.shape[0] // C.num_rois:
                            #pad R
                            curr_shape = ROIs.shape
                            target_shape = (curr_shape[0], C.num_rois,
                                            curr_shape[2])
                            ROIs_padded = np.zeros(target_shape).astype(
                                ROIs.dtype)
                            ROIs_padded[:, :curr_shape[1], :] = ROIs
                            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                            ROIs = ROIs_padded

                        [P_cls, P_regr,
                         P_view] = model_classifier.predict([X, ROIs])
                        inner_out = model_inner.predict([X, ROIs])
                        # oo = model_classifier_only.predict([F, ROIs])

                        for ii in range(P_cls.shape[1]):

                            if np.max(P_cls[
                                    0, ii, :]) < bbox_threshold or np.argmax(
                                        P_cls[0,
                                              ii, :]) == (P_cls.shape[2] - 1):
                                continue

                            ## get class from the net
                            # cls_num = np.argmax(P_cls[0, ii, :])

                            ## use gt class
                            cls_num = gt_cls_num

                            cls_name = inv_class_mapping[cls_num]
                            cls_view = P_view[0, ii, 360 * cls_num:360 *
                                              (cls_num + 1)]

                            if cls_name not in bboxes:
                                bboxes[cls_name] = []
                                probs[cls_name] = []
                                azimuths[cls_name] = []
                                inner_res[cls_name] = []

                            (x, y, w, h) = ROIs[0, ii, :]

                            try:
                                (tx, ty, tw,
                                 th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                                tx /= C.classifier_regr_std[0]
                                ty /= C.classifier_regr_std[1]
                                tw /= C.classifier_regr_std[2]
                                th /= C.classifier_regr_std[3]
                                x, y, w, h = roi_helpers.apply_regr(
                                    x, y, w, h, tx, ty, tw, th)
                            except:
                                pass
                            bboxes[cls_name].append([
                                C.rpn_stride * x, C.rpn_stride * y,
                                C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                            ])
                            probs[cls_name].append(np.max(P_cls[0, ii, :]))
                            azimuths[cls_name].append(
                                np.argmax(cls_view, axis=0))
                            inner_res[cls_name].append(inner_out[0, ii, :])

                # cv2.rectangle(img_gt, (bbox_gt['x1'], bbox_gt['y1']), (bbox_gt['x2'], bbox_gt['y2']), (int(class_to_color[test_cls][0]), int(class_to_color[test_cls][1]), int(class_to_color[test_cls][2])), 2)
                for key in bboxes:
                    # if 1:
                    if key == test_cls and bbox_gt['class'] == test_cls:
                        bbox = np.array(bboxes[key])
                        prob = np.array(probs[key])
                        azimuth = np.array(azimuths[key])
                        inner_result = np.array(inner_res[key])
                        # img = draw_bbox(img,bbox, prob, azimuth, ratio)
                        azimuth = neigh.predict(inner_result)
                        ## get the azimuth from bbox that have more than 'overlap_thresh' overlap with gt_bbox
                        az = []
                        overlap_thresh = 0.5
                        try:
                            while np.size(az) == 0 and overlap_thresh > 0:
                                _, prob_bbox, az = roi_helpers.overlap_with_gt(
                                    bbox,
                                    prob,
                                    azimuth,
                                    bbox_gt,
                                    ratio=ratio,
                                    overlap_thresh=overlap_thresh,
                                    max_boxes=300,
                                    use_az=True)
                                overlap_thresh -= 0.1
                            if overlap_thresh == 0:
                                print("No good Bbox was found")
                            counts = np.bincount(az)
                        except:
                            az = []
                            counts = []
                        try:
                            az_fin = np.argmax(counts)
                            true_bin = find_interval(az_gt, azimuth_vec)
                            prob_bin = find_interval(az_fin, azimuth_vec)
                            no_bbox_flag = 0
                            if true_bin == prob_bin:
                                count += 1
                                break
                        except:
                            # print('here')
                            no_bbox_flag = 1
                            bbox_threshold -= 0.1

                    ## azimuth calculations

                    ## display

                bbox_threshold -= 0.1

    succ = float(count) / float(obj_num) * 100.
    print(
        'for class {} -true count is {} out of {} from {} images . {} success'.
        format(test_cls, count, obj_num, good_img, succ))
    return succ
Exemple #23
0
def detect_image(img, image_id="", is_map=False):
    st = time.time()
    X, ratio = format_img(img, C)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    if is_map:
        image_file = open('/content/VOC2007TestFRCNNVgg16/%s.txt' % image_id,
                          'w')

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

            if is_map:
                image_file.write(key + " " + str(new_probs[jk]) + " " +
                                 str(real_x1) + " " + str(real_y1) + " " +
                                 str(real_x2) + " " + str(real_y2) + '\n')

    if is_map:
        image_file.close()
    print('Elapsed time = {}'.format(time.time() - st))
    print(all_dets)
    #cv2.imshow('img', img)
    #cv2.waitKey(0)
    return img
def upload_file():
    print("request is ", request.files)
    st = time.time()
    content_length = request.content_length
    print(f"Content_length : {content_length}")
    print("data type is ", type(request))
    print("data type of request files  ", type(request.files))
    data_dict = request.form.to_dict()
    #print(type(data_dict))
    #print(data_dict['file'])
    #print('data from frontend',data_dict)
    data = (data_dict['file'].split(',')[1])
    l, b = (data_dict['imgDimensions'].split(','))
    l = int(l)
    b = int(b)
    print('width of image', l)
    print('type of l ', type(l))
    print('height of image', b)
    #print(data)
    #print(len(data_dict))
    #print(data)
    imgdata = base64.b64decode(data)
    print("imagedata type is", type(imgdata))
    img2 = Image.open(io.BytesIO(imgdata))
    print(type(img2))
    #img2.show()
    #img = cv2.imread(img2)
    #print('opencv type' , type(img))
    #print(type(img))
    a = np.array(img2.getdata()).astype(np.float64)
    #print('datatype of w ', w.dtype)
    #b = np.ones(172800,3)
    #a = np.concatenate((w,b), axis=None)
    print('type of data to model ', type(a))
    print('shape of data from frontend', a.shape)
    #r, c = a.shape
    #print('Value of r', r)
    """
	if a.shape == (480000, 3):
		data = a.reshape(600, 800, 3)
	else: data = a.reshape(480, 640, 3)
	"""
    data = a.reshape(b, l, 3)

    st = time.time()

    parser = OptionParser()

    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=64)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.6

    visualise = True

    #if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #	continue
    #print(img_name)
    #filepath = os.path.join(img_path,img_name)

    img = data

    #cv2.imshow('img', img)
    #cv2.waitKey(0)

    X, ratio = format_img(img, C)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.6)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.6)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

    print('Elapsed time = {}'.format(time.time() - st))
    print('number of windoiws detected', len(all_dets))
    print(all_dets)
    r = len(all_dets)
    img3 = normalize(img)
    #plt.imshow(img)
    #cv2.imshow('img3', img3)
    #cv2.waitKey(0)

    K.clear_session()
    #data = process(data)
    #print('after reshape',data.shape)
    im2 = Image.fromarray(img.astype("uint8"), "RGB")
    print("im2 data type is ", type(im2))
    #to_frontend = (" ".join(str(x) for x in data))
    db = data.tobytes()
    print('type of data to database :', type(db))
    todb = insertBLOB('Image007', db)
    print('final data shape fed to model : ', data.shape)
    # ImageFile img = db.b64encode()
    # with open("t.png", "rb") as imageFile:
    # str = base64.b64encode(imageFile.read())
    #cv2.imshow('image', cv2.cvtColor(data, cv2.COLOR_BGR2GRAY))
    #cv2.waitKey()
    #str = base64.b64encode(data)
    # return jsonify(to_frontend, r)

    #img = Image.open( 'C:\Window Counter_Project\Flickr\Window_101 (131).jpg' )
    #img.load()

    #data = np.asarray( img, dtype="int32" )
    #im = Image.fromarray(data.astype("uint8"))
    #im.show()  # uncomment to look at the image
    rawBytes = io.BytesIO()
    print(rawBytes)
    im2.save(rawBytes, "jpeg")
    #im2.show()
    print('type of im2 is ', type(im2))
    rawBytes.seek(0)  # return to the start of the file
    response_obj = {
        'count': r,
        'image':
        "data:image/jpeg;base64," + str(base64.b64encode(rawBytes.read()))
    }
    #print("response is", type(response_obj))
    return jsonify(Data=response_obj)
	def predict(self, img=None, filepath=None, img_name=None):
		if img is None:
			img = cv2.imread(filepath)

		X = self.format_img(img, self.C)

		img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
		img_scaled[:, :, 0] += 123.68
		img_scaled[:, :, 1] += 116.779
		img_scaled[:, :, 2] += 103.939
		
		img_scaled = img_scaled.astype(np.uint8)

		if K.image_dim_ordering() == 'tf':
			X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = self.model_rpn.predict(X)
		

		R = roi_helpers.rpn_to_roi(Y1, Y2, self.C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0]//self.C.num_rois + 1):
			ROIs = np.expand_dims(R[self.C.num_rois*jk:self.C.num_rois*(jk+1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0]//self.C.num_rois:
				#pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0],self.C.num_rois,curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = self.model_classifier_only.predict([F, ROIs])

			for ii in range(P_cls.shape[1]):

				if np.max(P_cls[0, ii, :]) < self.bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				cls_name = self.class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
					tx /= self.C.classifier_regr_std[0]
					ty /= self.C.classifier_regr_std[1]
					tw /= self.C.classifier_regr_std[2]
					th /= self.C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk,:]

				cv2.rectangle(img_scaled,(x1, y1), (x2, y2), np.array([0, 255, 255]), 2)

				all_dets.append([new_probs[jk], x1, y1, x2, y2])

		# cv2.imshow('img', img_scaled)
		# cv2.waitKey(0)
		# cv2.imwrite('./imgs/{}.png'.format(img_name[:-4]),img_scaled)
		return all_dets
Exemple #26
0
def Test_frcnn(test_path,
               config_filename,
               num_rois=32,
               network="vgg",
               terminal_flag=False):
    """
    Test the object detection network
    
    test_path --str: Full Path to the folder containing the test images (No default)
    config_filename --str: Full path to the config_file.pickle, generated while training (No default)
    num_rois --int: number of ROIs to process at once (Default 32)
    network --str: The base network to use (One of 'vgg','resnet50') (Default 'vgg')
    terminal_flag --bool: Flag to test if accessing from terminal do not pass anything to it while calling this function
                          
    OUTPUT:
    When the script is called from terminal the images are displayed using opencv (images are in BGR format)
    When called as a function returns the images, dets as 2 lists (images are in RGB format)
    """

    config_output_filename = config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif network == 'vgg':
        import keras_frcnn.vgg as nn
    elif network == "mobilenet":
        import keras_frcnn.mobilenet as nn
    C.model_path = 'epoch-176.hdf5'
    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    def format_img_size(img, C):  # utility function 1
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):  #utility function 2
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):  # utility function 3
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):  #utility function 4

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512
    elif C.network == 'mobilenet':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    bbox_threshold = 0.8

    list_of_all_images = []
    list_of_all_dets = []

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)

        if terminal_flag:
            print('Elapsed time = {}'.format(time.time() - st))
            print(all_dets)
            if len(all_dets) > 0:
                cv2.imwrite(img_name + '_new.png', img)
                cv2.waitKey(0)
        else:
            list_of_all_images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            list_of_all_dets.append(all_dets)

    if not terminal_flag:
        return (list_of_all_images, list_of_all_dets)
Exemple #27
0
    def predict(self, img):

        st = time.time()

        X, ratio = self.format_img(img)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = self.model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   self.C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // self.C.num_rois + 1):
            ROIs = np.expand_dims(R[self.C.num_rois * jk:self.C.num_rois *
                                    (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // self.C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], self.C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = self.model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < self.bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = self.class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= self.C.classifier_regr_std[0]
                    ty /= self.C.classifier_regr_std[1]
                    tw /= self.C.classifier_regr_std[2]
                    th /= self.C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    self.C.rpn_stride * x, self.C.rpn_stride * y,
                    self.C.rpn_stride * (x + w), self.C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            return new_boxes, ratio
Exemple #28
0
    def detect_on_image(self, img):
        tic = time.time()

        X, ratio = format_img(img, self.cfg)
        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))
        # get the feature maps and output from the RPN
        [Y1, Y2, F] = self.model_rpn.predict(X)

        # this is result contains all boxes, which is [x1, y1, x2, y2]
        result = roi_helpers.rpn_to_roi(Y1,
                                        Y2,
                                        self.cfg,
                                        K.image_dim_ordering(),
                                        overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        result[:, 2] -= result[:, 0]
        result[:, 3] -= result[:, 1]
        bbox_threshold = 0.7

        # apply the spatial pyramid pooling to the proposed regions
        boxes = dict()
        for jk in range(result.shape[0] // self.cfg.num_rois + 1):
            rois = np.expand_dims(result[self.cfg.num_rois *
                                         jk:self.cfg.num_rois * (jk + 1), :],
                                  axis=0)
            if rois.shape[1] == 0:
                break
            if jk == result.shape[0] // self.cfg.num_rois:
                # pad R
                curr_shape = rois.shape
                target_shape = (curr_shape[0], self.cfg.num_rois,
                                curr_shape[2])
                rois_padded = np.zeros(target_shape).astype(rois.dtype)
                rois_padded[:, :curr_shape[1], :] = rois
                rois_padded[0, curr_shape[1]:, :] = rois[0, 0, :]
                rois = rois_padded

            [p_cls, p_regr] = self.model_classifier.predict([F, rois])

            for ii in range(p_cls.shape[1]):
                if np.max(p_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        p_cls[0, ii, :]) == (p_cls.shape[2] - 1):
                    continue

                cls_num = np.argmax(p_cls[0, ii, :])
                if cls_num not in boxes.keys():
                    boxes[cls_num] = []
                (x, y, w, h) = rois[0, ii, :]
                try:
                    (tx, ty, tw, th) = p_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= self.cfg.classifier_regr_std[0]
                    ty /= self.cfg.classifier_regr_std[1]
                    tw /= self.cfg.classifier_regr_std[2]
                    th /= self.cfg.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except Exception as e:
                    print(e)
                    pass
                boxes[cls_num].append([
                    self.cfg.rpn_stride * x, self.cfg.rpn_stride * y,
                    self.cfg.rpn_stride * (x + w),
                    self.cfg.rpn_stride * (y + h),
                    np.max(p_cls[0, ii, :])
                ])

        return boxes
Exemple #29
0
    def predict(self, img, filename):
        with self.graph.as_default():
            print(self.class_mapping)
            bbox_threshold = 0.8
            X, ratio = ult.format_img(img, self.config)

            if K.image_dim_ordering() == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            # print(X)
            [Y1, Y2, F] = self.model.predict(X)

            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       self.config,
                                       K.image_dim_ordering(),
                                       overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // self.config.num_rois + 1):
                ROIs = np.expand_dims(R[self.config.num_rois *
                                        jk:self.config.num_rois * (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // self.config.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], self.config.num_rois,
                                    curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = self.modelClassify.predict([F, ROIs])

                for ii in range(P_cls.shape[1]):

                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = self.class_mapping[np.argmax(P_cls[0, ii, :])]

                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= self.config.classifier_regr_std[0]
                        ty /= self.config.classifier_regr_std[1]
                        tw /= self.config.classifier_regr_std[2]
                        th /= self.config.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append([
                        self.config.rpn_stride * x, self.config.rpn_stride * y,
                        self.config.rpn_stride * (x + w),
                        self.config.rpn_stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []
            detect_imgs = []

            for key in bboxes:
                bbox = np.array(bboxes[key])
                count = 0
                newPic_name = "box_{}.jpg".format(
                    str(filename[:-4] + str(count)))
                count += 1
                detect_imgs.append(newPic_name)
                original_img = cv2.imread('./static/tmp_pic/' + filename)
                height, width, _ = original_img.shape
                (resized_width,
                 resized_height) = ult.get_new_img_size(width, height, 300)
                resize_img = cv2.resize(original_img,
                                        (resized_width, resized_height),
                                        interpolation=cv2.INTER_CUBIC)
                cv2.imwrite("./static/img/doc/" + filename, resize_img)
                new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                    bbox, np.array(probs[key]), overlap_thresh=0.5)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]
                    (real_x1, real_y1, real_x2,
                     real_y2) = ult.get_real_coordinates(
                         ratio, x1, y1, x2, y2)
                    gt_x1, gt_x2 = real_x1 * (
                        resized_width / width), real_x2 * (resized_width /
                                                           width)
                    gt_y1, gt_y2 = real_y1 * (
                        resized_height / height), real_y2 * (resized_height /
                                                             height)
                    gt_x1, gt_y1, gt_x2, gt_y2 = int(gt_x1), int(gt_y1), int(
                        gt_x2), int(gt_y2)
                    color = (0, 255, 0)
                    result_img = cv2.rectangle(resize_img, (gt_x1, gt_y1),
                                               (gt_x2, gt_y2), color, 2)

                    cv2.imwrite("./static/img/doc/" + newPic_name, result_img)
                    textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                    all_dets.append((key, 100 * new_probs[jk]))

                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
                    textOrg = (real_x1, real_y1 - 0)

                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5),
                                  (255, 255, 255), -1)
                    cv2.putText(img, textLabel, textOrg,
                                cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
            print(all_dets)
            if len(detect_imgs) > 0:
                print(detect_imgs[0])
                return detect_imgs[0], all_dets
            else:
                return "Can not detect"
Exemple #30
0
def Test_frcnn(test_images_list="./test_samples/",
               network_arch=arch,
               config_filename,
               preprocessing_function=None,
               num_rois=None,
               final_classification_threshold=0.8):
    """
    Test the object detection network
    
    test_images_list --list: list containing path to test_images (No default)
    network_arc --object: the full faster rcnn network .py file passed as an object (no default)
    config_filename --str: Full path to the config_file.pickle, generated while training (No default)
    preprocessing_function --function: optional image preprocessing function (Default None)
    num_rois --int: (optional)The number of ROIs to process at once in the final classifier (Default None)
                    if not given. The number of ROIs given while training is chosen
    final_classification_threshold --float: (0,1) min threshold for accepting as a detection in final classifier (Default 0.8)                       
    
    OUTPUT:
    returns the images with bboxes over layed using opencv, and a dataframe with data
    """
    nn = network_arch

    assert "list" in str(
        type(test_images_list
             )), "test_images_list must be a list of paths to the test images"

    with open(config_filename, 'rb') as f_in:
        C = pickle.load(f_in)
    if num_rois:
        C.num_rois = int(num_rois)

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    def format_img_size(img, C):  # utility function 1
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def preprocess_img(img, preprocessing_function):  #utility function 2
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]  #bgr to rgb
        if preprocessing_function:
            img = preprocessing_function(img)
        #img = np.transpose(img, (2, 0, 1)) # convert to theano
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C, preprocessing_function):  # utility function 3
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = preprocess_img(img, preprocessing_function)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):  #utility function 4

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    # load the models
    input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    shared_layers = nn.nn_base(img_input)

    num_features = shared_layers.get_shape().as_list()[3]  #512 for vgg-16
    feature_map_input = Input(shape=(None, None, num_features))
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input, roi_input, C.num_rois,
                               len(class_mapping))
    # create a keras model
    model_rpn = Model(img_input, rpn)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    #Note: The model_classifier in training and testing are different.
    # In training model_classifier and model_rpn both have the base_nn.
    # while testing only model_rpn has the base_nn it returns the FM of base_nn
    # Thus the model_classifier has the FM and ROI as input
    # This id done to increase the testing speed

    print('Loading weights from {}'.format(C.weights_all_path))
    model_rpn.load_weights(C.weights_all_path, by_name=True)
    model_classifier.load_weights(C.weights_all_path, by_name=True)

    list_of_all_images = []
    df_list = []

    for idx, filepath in enumerate(sorted(test_images_list)):
        print(os.path.basename(filepath))

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C, preprocessing_function)

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=C.rpn_nms_threshold,
                                   flag="test")

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]
                          ) < final_classification_threshold or np.argmax(
                              P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        probs_list = []  # new list for every image
        coor_list = []  # new list for every image
        classes_list = []  # new list for every image
        img_name_list = []  # new list for ever image
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox,
                np.array(probs[key]),
                overlap_thresh=C.test_roi_nms_threshold,
                max_boxes=C.TEST_RPN_POST_NMS_TOP_N
            )  #0.3 default threshold from original implementation
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                coor_list.append([real_x1, real_y1, real_x2,
                                  real_y2])  # get the coordinates
                classes_list.append(key)
                probs_list.append(100 * new_probs[jk])
                img_name_list.append(filepath)

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

        df = pd.DataFrame({
            "Image_name": img_name_list,
            "classes": classes_list,
            "pred_prob": probs_list,
            "x1_y1_x2_y2": coor_list
        })

        list_of_all_images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        df_list.append(df)

    final_df = pd.concat(df_list, ignore_index=True)

    return (list_of_all_images, final_df)
Exemple #31
0
            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append(
                [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]
Exemple #32
0
def test_view_func(C, model_rpn, model_classifier):
    base_dir = os.getcwd()
    test_cls_all = ['aeroplane', 'bus', 'motorbike']
    class_mapping = C.class_mapping
    inv_class_mapping = {v: k for k, v in class_mapping.iteritems()}
    backend = K.image_dim_ordering()
    filename = '/home/gilad/bar/real7.p'
    video_filename = "/home/gilad/ssd/keras-frcnn-master/a.mp4"
    write_flag = False
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    # turn off any data augmentation at test time
    save_flag = False
    visualise = False
    count = 0
    good_img = 0
    not_good = 0
    mAP = 0

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def draw_bbox(img, bbox, prob, azimuth, ratio):
        # new_boxes, new_probs, new_az = roi_helpers.non_max_suppression_fast(bbox, prob, azimuth, overlap_thresh=0.3,use_az=True)
        new_boxes = bbox
        new_az = azimuth
        new_probs = prob
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)
            # cv2.rectangle(img,(bbox_gt['x1'], bbox_gt['y1']), (bbox_gt['x2'], bbox_gt['y2']), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

            # textLabel = '{}: {},azimuth : {}'.format(key,int(100*new_probs[jk]),new_az[jk])
            textLabel = 'azimuth : {}'.format(new_az[jk])

            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 + 15)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    def display_image(img):
        img1 = img[:, :, (2, 1, 0)]
        # img1=img
        im = Image.fromarray(img1.astype('uint8'), 'RGB')
        im.show()

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):
        ## read the training data from pickle file or from annotations
        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))
        return (real_x1, real_y1, real_x2, real_y2)

    vnum_test = 24
    azimuth_vec = np.concatenate(
        ([0],
         np.linspace((360. / (vnum_test * 2)), 360. -
                     (360. / (vnum_test * 2)), vnum_test)),
        axis=0)

    def find_interval(azimuth, azimuth_vec):
        for i in range(len(azimuth_vec)):
            if azimuth < azimuth_vec[i]:
                break
        ind = i
        if azimuth > azimuth_vec[-1]:
            ind = 1
        return ind

    # print(rep)
    obj_num = 0
    bbox_threshold_orig = 0.6
    th_bbox = 0.3
    #### open images from folder

    # for idx, img_name in enumerate(sorted(os.listdir(img_path))):
    # 	if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    # 		continue
    # 	print(img_name)
    # 	filepath = os.path.join(img_path,img_name)
    # 	img = cv2.imread(filepath)caricycle

    #### open images from file
    ## read the training data from pickle file or from annotations
    # class_mapping = C.class_mapping
    succ = []
    for test_cls in test_cls_all:
        good_img = 0
        not_good = 0
        count = 0
        obj_num = 0
        gt_cls_num = class_mapping[test_cls]
        print('work on class {}'.format(test_cls))
        test_pickle = os.path.join(
            base_dir, 'pickle_data/test_data_{}.pickle'.format(test_cls))
        if os.path.exists(test_pickle):
            with open(test_pickle) as f:
                all_imgs, classes_count, _ = pickle.load(f)
        for im_file in all_imgs:
            filepath = im_file['filepath']
            img = cv2.imread(filepath)
            img_gt = np.copy(img)
            if img is None:
                not_good += 1
                continue
            else:
                good_img += 1
                # print ('im num {}'.format(good_img))
            if good_img % 50 == 0:
                print("worked on {} images".format(good_img))
            X, ratio = format_img(img, C)

            if backend == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2] = model_rpn.predict(X)
            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_dim_ordering(),
                                       overlap_thresh=0.7)
            # # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            width, height = int(im_file["width"]), int(im_file["height"])
            resized_width, resized_height = data_generators.get_new_img_size(
                width, height, C.im_size)
            # [_,_, F] = model_rpn.predict(X)

            ROIs = []
            ## pass on all the labels in the image, some of them are not equal to test_cls
            for bbox_gt in im_file['bboxes']:
                if not bbox_gt['class'] == test_cls:
                    continue
                no_bbox_flag = 1
                bbox_threshold = bbox_threshold_orig

                while no_bbox_flag and bbox_threshold > th_bbox:
                    cls_gt = bbox_gt['class']
                    az_gt = bbox_gt['azimuth']
                    el_gt = bbox_gt['elevation']
                    t_gt = bbox_gt['tilt']
                    if bbox_gt[
                            'class'] == test_cls and bbox_threshold == bbox_threshold_orig:
                        obj_num += 1
                    if len(ROIs) == 0:
                        # apply the spatial pyramid pooling to the proposed regions
                        bboxes = {}
                        probs = {}
                        azimuths = {}

                        # print ('obj num {}'.format(obj_num))

                        for jk in range(R.shape[0] // C.num_rois + 1):
                            ROIs = np.expand_dims(
                                R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                axis=0)
                            if ROIs.shape[1] == 0:
                                break

                            if jk == R.shape[0] // C.num_rois:
                                #pad R
                                curr_shape = ROIs.shape
                                target_shape = (curr_shape[0], C.num_rois,
                                                curr_shape[2])
                                ROIs_padded = np.zeros(target_shape).astype(
                                    ROIs.dtype)
                                ROIs_padded[:, :curr_shape[1], :] = ROIs
                                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0,
                                                                         0, :]
                                ROIs = ROIs_padded

                            [P_cls, P_regr,
                             P_view] = model_classifier.predict([X, ROIs])

                            for ii in range(P_cls.shape[1]):

                                if np.max(P_cls[0, ii, :]
                                          ) < bbox_threshold or np.argmax(
                                              P_cls[0, ii, :]) == (
                                                  P_cls.shape[2] - 1):
                                    continue

                                ## get class from the net
                                # cls_num = np.argmax(P_cls[0, ii, :])

                                ## use gt class
                                cls_num = gt_cls_num

                                cls_name = inv_class_mapping[cls_num]
                                cls_view = P_view[0, ii, 360 * cls_num:360 *
                                                  (cls_num + 1)]
                                # cls_name_gt = cls_nimg = draw_bbox(img,bbox, prob, azimuth, ratio)ame
                                # if cls_name == cls_name_gt:
                                # 	print(np.argmax(cls_view,axis=0))
                                if cls_name not in bboxes:
                                    bboxes[cls_name] = []
                                    probs[cls_name] = []
                                    azimuths[cls_name] = []

                                (x, y, w, h) = ROIs[0, ii, :]

                                try:
                                    (tx, ty, tw, th) = P_regr[0, ii,
                                                              4 * cls_num:4 *
                                                              (cls_num + 1)]
                                    tx /= C.classifier_regr_std[0]
                                    ty /= C.classifier_regr_std[1]
                                    tw /= C.classifier_regr_std[2]
                                    th /= C.classifier_regr_std[3]
                                    x, y, w, h = roi_helpers.apply_regr(
                                        x, y, w, h, tx, ty, tw, th)
                                except:
                                    pass
                                bboxes[cls_name].append([
                                    C.rpn_stride * x, C.rpn_stride * y,
                                    C.rpn_stride * (x + w),
                                    C.rpn_stride * (y + h)
                                ])
                                probs[cls_name].append(np.max(P_cls[0, ii, :]))
                                azimuths[cls_name].append(
                                    np.argmax(cls_view, axis=0))

                    all_dets = []
                    if len(bboxes) == 0:
                        bbox_threshold -= 0.1
                    for key in bboxes:
                        # if 1:
                        if key == test_cls and bbox_gt['class'] == test_cls:
                            bbox = np.array(bboxes[key])
                            prob = np.array(probs[key])
                            azimuth = np.array(azimuths[key])

                            ## get the azimuth from bbox that have more than 'overlap_thresh' overlap with gt_bbox
                            az = []
                            overlap_thresh = 0.5
                            try:
                                while np.size(
                                        az) == 0 and overlap_thresh > 0.3:
                                    _, prob_bbox, az = roi_helpers.overlap_with_gt(
                                        bbox,
                                        prob,
                                        azimuth,
                                        bbox_gt,
                                        ratio=ratio,
                                        overlap_thresh=overlap_thresh,
                                        max_boxes=300,
                                        use_az=True)
                                    if np.size(
                                            az) != 0 and overlap_thresh == 0.5:
                                        mAP += 1
                                    overlap_thresh -= 0.1
                                if overlap_thresh == 0:
                                    print("No good Bbox was found")
                                counts = np.bincount(az)
                            except:
                                az = []
                                counts = []
                            try:
                                az_fin = np.argmax(counts)
                                true_bin = find_interval(az_gt, azimuth_vec)
                                prob_bin = find_interval(az_fin, azimuth_vec)
                                no_bbox_flag = 0
                                if true_bin == prob_bin:
                                    count += 1
                                    break
                            except:
                                # print('here')
                                no_bbox_flag = 1
                                bbox_threshold -= 0.1

                        ## azimuth calculations

                        ## display
                        # if visualise:
                        # 	display_image(img)
                        # # cv2.imshow('img', img)
                        # # cv2.waitKey(0)
                        # if save_flag:
                        #    cv2.imwrite('./results_imgs/{}'.format(img_name),img)
                        #    # img = img[:, :, (2, 1, 0)]
                        #    # cv2.imwrite('./results_imgs/video/{}.png'.format(num),img)
                        # # print('save')
                    bbox_threshold -= 0.1
                    # if visualise:
                    # 	display_image(img)
        succ.append(float(count) / float(obj_num) * 100.)
        string = 'for class {} -true count is {} out of {} from {} images . {} success'.format(
            test_cls, count, obj_num, good_img,
            float(count) / float(obj_num) * 100.)
        print(string)
        mAP = float(mAP) / float(obj_num) * 100.
        print("MAP is {}".format(mAP))
        # if write_flag:
        # 	f = open('{}_results.txt'.format(weight_name),'a')
        # 	f.write(string+'\n')
        # 	f.close()
    return succ, mAP