Exemple #1
0
def extract_detections(frame, min_confidence=0.6, labels=None):
    """Extract detections from frame.

    frame: numpy array WxHx3
    returns: numpy int array Cx5 [[label_id, xmin, ymin, xmax, ymax]]
    """
    # Write code here
    # First, convert frame to float and resize to 300x300
    w, h = np.shape(frame)[:2]
    frame = resize(frame.astype(float), (300, 300, 3))
    # Then use preprocess_input, model.predict and bbox_util.detection_out
    # Use help(...) function to help
    x = preprocess_input(frame)
    x = np.expand_dims(x, axis=0)
    y = model.predict(x)
    results = bbox_util.detection_out(y)[0]
    # Select detections with confidence > min_confidence
    results = results[results[:, 1] > min_confidence]
    # If label set is known, use it
    if labels is not None:
        result_labels = results[:, 0].astype(np.int32)
        indeces = [
            i for i, l in enumerate(result_labels)
            if VOC_CLASSES[l - 1] in labels
        ]
        results = results[indeces]
    else:
        results[:, 0] = -1
    # Remove confidence column from result
    results = np.delete(results, 1, 1)
    # Resize detection coords to input image shape.
    # Didn't you forget to save it before resize?
    results[:, 1] *= h
    results[:, 2] *= w
    results[:, 3] *= h
    results[:, 4] *= w
    # Return result
    return detection_cast(results.astype(int))
Exemple #2
0
def predict_result(model, file_path):
    #   print("+++++++++++++++++++++++++++++++++")
    #  print(file_path)
    global cnt
    img_path = file_path
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    #    print(x)
    #print('Input image shape:', x.shape)
    #im = Image.open(file_path)
    #im.show()
    preds = model.predict(x)
    #print('Predicted:', decode_predictions(preds))
    label = decode_predictions(preds)[0][0][1]
    if label == 'toilet_tissue':
        print('Predicted:', decode_predictions(preds))
        print(file_path)
        cnt += 1
    else:
        print("+++++++++++++++++++++++++++++++++++++++")
        print('Predicted:', decode_predictions(preds))
Exemple #3
0
def model_predict(img_path, model):
    img = image.load_img(img_path, target_size=(150, 150))

    # Preprocessing the image
    x = image.img_to_array(img)
    # x = np.true_divide(x, 255)
    # Scaling
    # x = x / 255
    x = np.expand_dims(x, axis=0)

    # Be careful how your trained model deals with the input
    # otherwise, it won't make correct prediction!
    x = preprocess_input(x)

    preds = model.predict(x)
    # preds = np.argmax(preds, axis=0)

    if preds[0][0] == 0:
        preds = "This is Defective Insulator"
    else:
        preds = "This is Normal Insulator"

    return preds
def model_predict(img_path):
    img = image.load_img(img_path, target_size=(200, 200))
    model = load_model('MobileNet_model.h5')

    # Preprocessing the image
    x = image.img_to_array(img)
    # x = np.true_divide(x, 255)
    ## Scaling
    x = x / 255
    x = np.expand_dims(x, axis=0)

    # Be careful how your trained model deals with the input
    # otherwise, it won't make correct prediction!
    x = preprocess_input(x)

    preds = model.predict(x)
    preds = np.argmax(preds, axis=1)
    if preds == 0:
        preds = "The Person is not Infected With Pneumonia"
    else:
        preds = "The Person is  Infected With Pneumonia"

    return preds
Exemple #5
0
def model_predict(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))

    x = image.img_to_array(img)

    x = x / 255

    x = np.expand_dims(x, axis=0)

    x = preprocess_input(x)

    preds = model_predict(x)

    preds = np.argmax(preds, axis=1)

    if preds == 0:
        preds = "The Car is Audi !!!"
    elif preds == 1:
        preds = "The Car is Lamborghini !!!"
    else:
        preds = "The Car is Mercedes !!!"

    return preds
Exemple #6
0
def model_predict(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))

    # Preprocessing the image
    x = image.img_to_array(img)
    # x = np.true_divide(x, 255)
    ## Scaling
    x = x / 255
    x = np.expand_dims(x, axis=0)

    # Be careful how your trained model deals with the input
    # otherwise, it won't make correct prediction!
    x = preprocess_input(x)

    preds = model.predict(x)
    preds = np.argmax(preds, axis=1)
    if preds == 0:
        preds = "Normal"
    else:

        preds = "Covid"

    return preds
def prepare_train_data(vgg_face):
    # Prepare Train Data
    x_train = []
    y_train = []
    person_rep = dict()
    person_folders = os.listdir('./faces/')
    for i, person in enumerate(person_folders):
        print("preparing train set for person: " + person)
        person_rep[i] = person
        image_names = os.listdir(r'.\faces\\' + person + '\\')
        for image_name in image_names:
            print("\tphoto: " + image_name)
            img = load_img('./faces/' + person + '/' + image_name,
                           target_size=(224, 224))
            img = img_to_array(img)

            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)
            img_encode = vgg_face(img)
            x_train.append(np.squeeze(K.eval(img_encode)).tolist())
            y_train.append(i)

    return person_rep, np.array(x_train), np.array(y_train)
def model_predict(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))

    # Preprocessing the image
    x = image.img_to_array(img)
    x = np.true_divide(x, 255)
    ## Scaling
    x=x/255
    x = np.expand_dims(x, axis=0)

    x = preprocess_input(x)

    preds = model.predict(x)
    preds=np.argmax(preds, axis=1)
    if preds==0:
        preds="The Car IS Audi"
    elif preds==1:
        preds="The Car is Lamborghini"
    else:
        preds="The Car Is Mercedes"
    
    
    return preds
Exemple #9
0
def get_integrated_gradients(model,
                             img_input,
                             top_pred_idx,
                             baseline=None,
                             num_steps=50):
    if baseline is None:
        baseline = np.zeros(img_size).astype(np.float32)
    else:
        baseline = baseline.astype(np.float32)

    # 1. Do interpolation.
    img_input = img_input.astype(np.float32)
    interpolated_image = [
        baseline + (step / num_steps) * (img_input - baseline)
        for step in range(num_steps + 1)
    ]
    interpolated_image = np.array(interpolated_image).astype(np.float32)

    # 2. Preprocess the interpolated images
    # interpolated_image = xception.preprocess_input(interpolated_image)
    interpolated_image = preprocess_input(interpolated_image, mode="tf")

    # 3. Get the gradients
    grads = []
    for i, img in enumerate(interpolated_image):
        img = tf.expand_dims(img, axis=0)
        grad = compute_gradients(model, img, top_pred_idx=top_pred_idx)
        grads.append(grad[0])
    grads = tf.convert_to_tensor(grads, dtype=tf.float32)

    # 4. Approximate the integral using the trapezoidal rule
    grads = (grads[:-1] + grads[1:]) / 2.0
    avg_grads = tf.reduce_mean(grads, axis=0)

    # 5. Calculate integrated gradients and return
    integrated_grads = (img_input - baseline) * avg_grads
    return integrated_grads
    def generate(self, train=True):
        while True:
            if train:
                # 打乱
                shuffle(self.train_lines)
                lines = self.train_lines
            else:
                shuffle(self.val_lines)
                lines = self.val_lines
            inputs = []
            targets = []
            for annotation_line in lines:  
                img,y=self.get_random_data(annotation_line,self.image_size[0:2])
                if len(y)!=0:
                    boxes = np.array(y[:,:4],dtype=np.float32)
                    boxes[:,0] = boxes[:,0]/self.image_size[1]
                    boxes[:,1] = boxes[:,1]/self.image_size[0]
                    boxes[:,2] = boxes[:,2]/self.image_size[1]
                    boxes[:,3] = boxes[:,3]/self.image_size[0]
                    one_hot_label = np.eye(self.num_classes)[np.array(y[:,4],np.int32)]
                    if ((boxes[:,3]-boxes[:,1])<=0).any() and ((boxes[:,2]-boxes[:,0])<=0).any():
                        continue
                    
                    y = np.concatenate([boxes,one_hot_label],axis=-1)
                    # print('y', tf.shape(y))

                y = self.bbox_util.assign_boxes(y)
                # print('y', tf.shape(y))
                inputs.append(img)               
                targets.append(y)
                if len(targets) == self.batch_size:
                    tmp_inp = np.array(inputs)
                    tmp_targets = np.array(targets)
                    inputs = []
                    targets = []
                    # print(tf.shape(preprocess_input(tmp_inp)), tf.shape(tmp_targets))
                    yield preprocess_input(tmp_inp), tmp_targets
Exemple #11
0
def predict_with_gradient(dirs):
    '''
    Classify image then create heatmap (gradient). Save combination of image, gradient and gradient on image.
    :param dirs: list of paths to all data items
    :return: nothing
    '''
    # loop through all photos in dirs and predict
    for item in dirs:
        orig = cv2.imread(path + item)
        image = load_img(path + item)
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)

        # use the network to make predictions
        preds = model.predict(image)
        i = np.argmax(preds[0])

        label = "Gradient"
        # initialize our gradient class activation map and build the heatmap
        cam = GradCAM(model, i)
        heatmap = cam.compute_heatmap(image)
        # resize the resulting heatmap to the original input image dimensions
        # and then overlay heatmap on top of the image
        heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0]))
        (heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5)
        # draw the predicted label on the output image
        cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1)
        cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                    (255, 255, 255), 2)
        # display the original image and resulting heatmap and output image
        # to our screen
        output = np.vstack([orig, heatmap, output])
        output = imutils.resize(output, height=700)
        # cv2.imshow("Output", output)
        cv2.waitKey(0)
        cv2.imwrite(my_path_results_pos + item, output)
def recognize(img, outputs, class_names, vgg_face):
    person_rep = dict()
    person_names = ["angelamerkel", "jinping", "trump"]
    for person in person_names:
        embed = np.loadtxt(person + ".txt")
        person_rep[person] = embed

    boxes, objectness, classes, nums = outputs
    boxes, objectness, classes, nums = boxes[0], objectness[0], classes[
        0], nums[0]
    wh = np.flip(img.shape[0:2])
    for i in range(nums):
        x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
        x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
        if class_names[int(classes[i])] == "face":

            img_crop = img[x1y1[1]:x2y2[1], x1y1[0]:x2y2[0]]

            crop_img = img_to_array(img_crop)
            crop_img = np.expand_dims(crop_img, axis=0)
            crop_img = preprocess_input(crop_img)

            img_encode = vgg_face(transform_images(crop_img, 224))
            embed = K.eval(img_encode)
            name, score = get_match(person_rep, embed, 0.3)

            img = cv2.rectangle(img, x1y1, x2y2, (205, 0, 0), 2)
            img = cv2.putText(img, '{} {:.4f}'.format(name, score), x1y1,
                              cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 255),
                              2)
        else:
            img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
            img = cv2.putText(
                img, '{} {:.4f}'.format(class_names[int(classes[i])],
                                        objectness[i]), x1y1,
                cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
    return img
def main():
    parser = argparser()
    args = parser.parse_args()
    image_path = args.image
    layer_name = args.layer_name
    feature_to_visualize = args.feature
    visualize_mode = args.mode

    model = vgg16.VGG16(weights='imagenet', include_top=True)
    layer_dict = dict([(layer.name, layer) for layer in model.layers])
    if not layer_name in layer_dict:
        print('Wrong layer name')
        sys.exit()

    # Load data and preprocess
    img = Image.open(image_path)
    img = img.resize((224, 224))
    img_array = np.array(img)
    img_array = img_array[np.newaxis, :]
    img_array = img_array.astype(np.float)
    img_array = imagenet_utils.preprocess_input(img_array)

    deconv = visualize(model, img_array, layer_name, feature_to_visualize,
                       visualize_mode)

    # postprocess and save image
    deconv = deconv - deconv.min()
    deconv *= 1.0 / (deconv.max() + 1e-8)
    deconv = deconv[:, :, ::-1]
    uint8_deconv = (deconv * 255).astype(np.uint8)
    img = Image.fromarray(uint8_deconv, 'RGB')
    img.save('results/{}_{}_{}.png'.format(layer_name, feature_to_visualize,
                                           visualize_mode))

    print('Saved: results/{}_{}_{}.png'.format(layer_name,
                                               feature_to_visualize,
                                               visualize_mode))
Exemple #14
0
    def get_model_in_blocks(self, model_function, include_top=True):
        # Load the vanilla model
        model = model_function(pretrained=True)

        # Instantiate consecutive blocks
        blocks = []

        # Add preprocess_input
        blocks.append(
            Lambda(
                lambda x: preprocess_input(x, mode=self.preprocess_input_mode),
                name="preprocess_input"))

        # Discard the last pooling layer
        blocks += model.features.children[:-1]

        if include_top:
            # Add a GlobalAveragePooling2D layer
            blocks.append(GlobalAveragePooling2D())

            # Add the dense layer with softmax
            blocks += [model.output1, Softmax()]

        return blocks
Exemple #15
0
    def recognize(self,draw):
        height,width,_ = np.shape(draw)
        draw_rgb = cv.cvtColor(draw,cv.COLOR_BGR2RGB)
        rectangles = self.mtcnn_model.detectFace(draw_rgb,self.threshold)
        if len(rectangles)==0:
            return

        rectangles = np.array(rectangles,dtype=np.int32)
        rectangles[:,0] = np.clip(rectangles[:,0],0,width)
        rectangles[:,1] = np.clip(rectangles[:,1],0,height)
        rectangles[:,2] = np.clip(rectangles[:,2],0,width)
        rectangles[:,3] = np.clip(rectangles[:,3],0,height)

        rectangles_temp = utils.rect2square(np.array(rectangles,dtype=np.int32))
        rectangles_temp[:,0] = np.clip(rectangles_temp[:,0],0,width)
        rectangles_temp[:,1] = np.clip(rectangles_temp[:,1],0,height)
        rectangles_temp[:,2] = np.clip(rectangles_temp[:,2],0,width)
        rectangles_temp[:,3] = np.clip(rectangles_temp[:,3],0,height)

        for rectangle in rectangles_temp:
            # 获取landmark在小图中的坐标
            landmark = (np.reshape(rectangle[5:15],(5,2)) - np.array([int(rectangle[0]),int(rectangle[1])]))/(rectangle[3]-rectangle[1])*160
            # 截取图像
            crop_img = draw_rgb[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])]
            crop_img = cv.resize(crop_img,(self.Crop_HEIGHT,self.Crop_WIDTH))
            # 对齐
            new_img,_ = utils.Alignment_1(crop_img,landmark)
            # 归一化
            new_img = preprocess_input(np.reshape(np.array(new_img,np.float64),[1,self.Crop_HEIGHT,self.Crop_WIDTH,3]))

        rectangles = rectangles[:,0:4]

        #draw = cv2.cvtColor(draw, cv2.COLOR_RGB2BGR)
        for (left, top, right, bottom) in rectangles:
            cv.rectangle(draw, (left, top), (right, bottom), (255, 0, 0), 2)
        return draw
Exemple #16
0
 def generate(self, train=True):
     while True:
         if train:
             shuffle(self.train_keys)
             keys = self.train_keys
         else:
             shuffle(self.val_keys)
             keys = self.val_keys
         inputs = []
         targets = []
         for key in keys:
             img_path = self.path_prefix + key
             img = imread(img_path).astype('float32')
             y = self.gt[key].copy()
             if train and self.do_crop:
                 img, y = self.random_sized_crop(img, y)
             img = imresize(img, self.image_size).astype('float32')
             if train:
                 shuffle(self.color_jitter)
                 for jitter in self.color_jitter:
                     img = jitter(img)
                 if self.lighting_std:
                     img = self.lighting(img)
                 if self.hflip_prob > 0:
                     img, y = self.horizontal_flip(img, y)
                 if self.vflip_prob > 0:
                     img, y = self.vertical_flip(img, y)
             y = self.bbox_util.assign_boxes(y)
             inputs.append(img)
             targets.append(y)
             if len(targets) == self.batch_size:
                 tmp_inp = np.array(inputs)
                 tmp_targets = np.array(targets)
                 inputs = []
                 targets = []
                 yield preprocess_input(tmp_inp), tmp_targets
Exemple #17
0
def load_paired_img_wrd(folder):
    class_names = [fold for fold in os.listdir(folder) if ".DS" not in fold]
    image_list = []
    labels_list = []
    paths_list = []
    for cl in class_names:
        splits = cl.split("_")
        subfiles = [f for f in os.listdir(folder + "/" + cl) if ".DS" not in f]

        for subf in subfiles:
            full_path = os.path.join(folder, cl, subf)
            # 229 x 229 is the size resnet 50 uses for images
            img = image_utils.load_img(full_path, target_size=(229, 229))
            x_raw = image_utils.img_to_array(img)
            x_expand = np.expand_dims(
                x_raw, axis=0)  # add a column for the index of the image
            x = preprocess_input(x_expand)  # normalize to [-1, 1]
            image_list.append(x)
            paths_list.append(full_path)
    img_data = np.array(image_list)
    img_data = np.rollaxis(img_data, 1, 0)
    img_data = img_data[0]

    return img_data, np.array(labels_list), paths_list
Exemple #18
0
    def generate(self):
        while True:
            #-----------------------------------#
            #   对训练集进行打乱
            #-----------------------------------#
            shuffle_index = np.arange(len(self.imgs_path))
            shuffle(shuffle_index)
            self.imgs_path = np.array(self.imgs_path, dtype=np.object)[shuffle_index]
            self.words = np.array(self.words, dtype=np.object)[shuffle_index]

            inputs = []
            target0 = []
            target1 = []
            target2 = []
            for i, image_path in enumerate(self.imgs_path):  
                #-----------------------------------#
                #   打开图像,获取对应的标签
                #-----------------------------------#
                img = Image.open(image_path)
                labels = self.words[i]
                annotations = np.zeros((0, 15))

                for idx, label in enumerate(labels):
                    annotation = np.zeros((1, 15))
                    #-----------------------------------#
                    #   bbox 真实框的位置
                    #-----------------------------------#
                    annotation[0, 0] = label[0]  # x1
                    annotation[0, 1] = label[1]  # y1
                    annotation[0, 2] = label[0] + label[2]  # x2
                    annotation[0, 3] = label[1] + label[3]  # y2

                    #-----------------------------------#
                    #   landmarks 人脸关键点的位置
                    #-----------------------------------#
                    annotation[0, 4] = label[4]    # l0_x
                    annotation[0, 5] = label[5]    # l0_y
                    annotation[0, 6] = label[7]    # l1_x
                    annotation[0, 7] = label[8]    # l1_y
                    annotation[0, 8] = label[10]   # l2_x
                    annotation[0, 9] = label[11]   # l2_y
                    annotation[0, 10] = label[13]  # l3_x
                    annotation[0, 11] = label[14]  # l3_y
                    annotation[0, 12] = label[16]  # l4_x
                    annotation[0, 13] = label[17]  # l4_y
                    if (annotation[0, 4]<0):
                        annotation[0, 14] = -1
                    else:
                        annotation[0, 14] = 1
                    annotations = np.append(annotations, annotation, axis=0)

                target = np.array(annotations)
                img, target = get_random_data(img, target, [self.img_size,self.img_size])

                # 计算真实框对应的先验框,与这个先验框应当有的预测结果
                assignment = self.bbox_util.assign_boxes(target)

                regression = assignment[:,:5]
                classification = assignment[:,5:8]

                landms = assignment[:,8:]
                
                inputs.append(img)     
                target0.append(np.reshape(regression,[-1,5]))
                target1.append(np.reshape(classification,[-1,3]))
                target2.append(np.reshape(landms,[-1,10+1]))
                if len(target0) == self.batch_size:
                    tmp_inp = np.array(inputs)
                    
                    yield preprocess_input(tmp_inp), np.array(target0,dtype=np.float32),np.array(target1,dtype=np.float32),np.array(target2,dtype=np.float32)
                    inputs = []
                    target0 = []
                    target1 = []
                    target2 = []
    def detect_image(self, image_id, image, bgr_img, write_down=False):
        # img_ori = image
        if write_down:
            self.detect_txtfile = open(
                "./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])
        crop_img, x_offset, y_offset = self.letterbox_image(
            image, (self.model_image_size[1], self.model_image_size[0]))
        photo = np.array(crop_img, dtype=np.float64)

        # Image preprocessing
        photo = preprocess_input(
            np.reshape(
                photo,
                [1, self.model_image_size[0], self.model_image_size[1], 3]))
        start = time.time()
        preds = self.ssd_model.predict(photo)
        end = time.time()
        ti = np.round((end - start) * 1000)
        print('Execution time: {} ms'.format(ti))

        # Decode the predictions
        self.bbox_util = BBoxUtility(
            self.num_classes,
            overlap_threshold=self.IOU_thresh,
            nms_thresh=self.nms_thresh
        )  # (self, num_classes, priors=None, overlap_threshold=0.5, nms_thresh=0.3, top_k=400)
        results = self.bbox_util.detection_out(
            preds,
            background_label_id=0,
            keep_top_k=200,
            confidence_threshold=self.confidence)

        if len(results[0]) <= 0:
            if write_down:
                self.detect_txtfile.close()
            return bgr_img

        det_label = results[0][:, 0]
        det_conf = results[0][:, 1]
        det_xmin, det_ymin, det_xmax, det_ymax = results[0][:, 2], results[
            0][:, 3], results[0][:, 4], results[0][:, 5]
        top_indices = [
            i for i, conf in enumerate(det_conf) if conf >= self.confidence
        ]
        top_conf = det_conf[top_indices]
        top_label_indices = det_label[top_indices].tolist()
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            det_xmin[top_indices],
            -1), np.expand_dims(det_ymin[top_indices], -1), np.expand_dims(
                det_xmax[top_indices],
                -1), np.expand_dims(det_ymax[top_indices], -1)

        # Remove the gray column
        boxes = self.ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        # img_ori = np.array(img_ori)
        # img_ori = cv2.cvtColor(bgr_img, cv2.COLOR_RGB2BGR)
        img_ori = bgr_img
        for i, c in enumerate(top_label_indices):
            predicted_class = self.class_names[int(c) - 1]
            score = str(top_conf[i])

            top, left, bottom, right = boxes[i]
            if write_down:
                self.detect_txtfile.write(
                    "%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))
            self.plot_one_box(
                img_ori,
                [int(left), int(top),
                 int(right), int(bottom)],
                label=predicted_class +
                ', {:.2f}%'.format(np.round(float(score) * 100, 2)),
                color=self.colors[int(c) - 1])
        cv2.putText(bgr_img,
                    'FPS: {0} Execution time: {1}ms'.format(
                        str(int(1000 / ti)), str(int(ti))), (20, 50),
                    0,
                    0.5, [20, 150, 255],
                    thickness=1,
                    lineType=cv2.LINE_AA)

        cv2.imshow('draw', img_ori)
        cv2.waitKey(self.gap_time)

        if write_down:
            self.detect_txtfile.close()
        return bgr_img
Exemple #20
0
def preprocess_image(image_path):
    img = load_img(image_path, target_size=(224, 224))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img
Exemple #21
0
# load the pre-trained CNN from disk
print("[INFO] loading model...")
model = Model(weights="imagenet")

# load the original image from disk (in OpenCV format) and then
# resize the image to its target dimensions
orig = cv2.imread(args["image"])
resized = cv2.resize(orig, (224, 224))

# load the input image from disk (in Keras/TensorFlow format) and
# preprocess it
image = load_img(args["image"], target_size=(224, 224))
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image = imagenet_utils.preprocess_input(image)

# use the network to make predictions on the input imag and find
# the class label index with the largest corresponding probability
preds = model.predict(image)
i = np.argmax(preds[0])

# decode the ImageNet predictions to obtain the human-readable label
decoded = imagenet_utils.decode_predictions(preds)
(imagenetID, label, prob) = decoded[0][0]
label = "{}: {:.2f}%".format(label, prob * 100)
print("[INFO] {}".format(label))

# initialize our gradient class activation map and build the heatmap
cam = GradCAM(model, i)
heatmap = cam.compute_heatmap(image)
Exemple #22
0
def predict_quad(model, img, pixel_threshold=cfg.pixel_threshold, quiet=False, img_name=None):
    """
    Args:
        model: 检测模型,要load_weights的
        img:  image 图片,文件类型
        pixel_threshold: 阈值
        quiet:
        img_name: 图片的名字

    Returns:
        text_recs_all:一个列表,每个元素是检测边界的quad值
        text_recs_len:text_recs_all的长度,一共检测到多少个区域
        img_all: 一个四维数组,img_all[0] 是img_to_array的结果

    """

    if not os.path.exists(root_temp):
        os.makedirs(root_temp)
    if not os.path.exists(root_predict):
        os.makedirs(root_predict)

    # 获取计算后的图像长宽
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    # 调整图像大小,便于预测
    img = img.resize((d_wight, d_height), Image.BILINEAR).convert('RGB')
    img = image.img_to_array(img)
    num_img = 1
    # 一个4维张量,也就是只有1个3维张量的4维张量
    img_all = np.zeros((num_img, d_height, d_wight, 3))
    img_all[0] = img

    # 将张量的数值大小调到【-1 1】
    img_ori = imagenet_utils.preprocess_input(img, mode='tf')  # suit tf tensor

    # 又整个一样的
    x = np.zeros((num_img, d_height, d_wight, 3))
    x[0] = img_ori

    # (sample, h, w, channels)
    y_pred = model.predict(x)

    text_recs_all = []
    text_recs_len = []
    for n in range(num_img):
        # (sample, rows, cols, 7_points_pred)
        y = y_pred[n]
        y[:, :, :3] = sigmoid(y[:, :, :3])
        cond = np.greater_equal(y[:, :, 0], pixel_threshold)
        activation_pixels = np.where(cond)  # fixme 返回元祖tuple类型 a[0]保存了纵坐标 a[1]保存横坐标
        quad_scores, quad_after_nms = nms(y, activation_pixels)

        text_recs = []
        x[n] = np.uint8(x[n])
        with image.array_to_img(img_all[n]) as im:     # Image.fromarray(x[n]) error ?
            im_array = x[n]

            # fixme 注意:拿去CRNN识别的是缩放后的图像
            scale_ratio_w = 1
            scale_ratio_h = 1

            quad_im = im.copy()
            draw = ImageDraw.Draw(im)
            # 拷贝一个原图像,在拷贝的图像上有文字的地方画线
            for i, j in zip(activation_pixels[0], activation_pixels[1]):
                px = (j + 0.5) * cfg.pixel_size
                py = (i + 0.5) * cfg.pixel_size
                line_width, line_color = 1, 'blue'
                if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                    if y[i, j, 2] < cfg.trunc_threshold:
                        line_width, line_color = 2, 'yellow'
                    elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                        line_width, line_color = 2, 'green'
                draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                           (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                           (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                           (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                           (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                          width=line_width, fill=line_color)

            if not img_name is None:
                im.save(root_temp + img_name + '.jpg')

            quad_draw = ImageDraw.Draw(quad_im)
            for score, geo, s in zip(quad_scores, quad_after_nms,
                                     range(len(quad_scores))):
                if np.amin(score) > 0:
                    quad_draw.line([tuple(geo[0]),
                                    tuple(geo[1]),
                                    tuple(geo[2]),
                                    tuple(geo[3]),
                                    tuple(geo[0])], width=2, fill='blue')

                    if cfg.predict_cut_text_line:
                        cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                      img_name, s)

                    rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                    text_rec = np.reshape(rescaled_geo, (8,)).tolist()
                    text_recs.append(text_rec)
                elif not quiet:
                    print('quad invalid with vertex num less then 4.')

            if not img_name is None:
                quad_im.save(root_predict + img_name + '.jpg' )

        for t in range(len(text_recs)):
            text_recs_all.append(text_recs[t])

        text_recs_len.append(len(text_recs))

    return text_recs_all, text_recs_len, img_all
Exemple #23
0
def predict(east_detect, img_path, pixel_threshold, quiet=False):
    img = image.load_img(img_path)
    d_wight, d_height = resize_image(img, cfg.max_predict_img_size)
    img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
    img = image.img_to_array(img)
    # 将张量的值 调到【-1 1】
    img = imagenet_utils.preprocess_input(img,mode='tf')
    # 变成4维张量
    x = np.expand_dims(img, axis=0)
    y = east_detect.predict(x)

    y = np.squeeze(y, axis=0)
    y[:, :, :3] = sigmoid(y[:, :, :3])
    cond = np.greater_equal(y[:, :, 0], pixel_threshold)
    activation_pixels = np.where(cond)
    quad_scores, quad_after_nms = nms(y, activation_pixels)
    with Image.open(img_path) as im:
        im_array = image.img_to_array(im.convert('RGB'))
        d_wight, d_height = resize_image(im, cfg.max_predict_img_size)
        scale_ratio_w = d_wight / im.width
        scale_ratio_h = d_height / im.height
        im = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
        quad_im = im.copy()
        draw = ImageDraw.Draw(im)
        for i, j in zip(activation_pixels[0], activation_pixels[1]):
            px = (j + 0.5) * cfg.pixel_size
            py = (i + 0.5) * cfg.pixel_size
            line_width, line_color = 1, 'red'
            if y[i, j, 1] >= cfg.side_vertex_pixel_threshold:
                if y[i, j, 2] < cfg.trunc_threshold:
                    line_width, line_color = 2, 'yellow'
                elif y[i, j, 2] >= 1 - cfg.trunc_threshold:
                    line_width, line_color = 2, 'green'
            draw.line([(px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size),
                       (px + 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py + 0.5 * cfg.pixel_size),
                       (px - 0.5 * cfg.pixel_size, py - 0.5 * cfg.pixel_size)],
                      width=line_width, fill=line_color)
        im.save(img_path + '_act.jpg')
        quad_draw = ImageDraw.Draw(quad_im)
        txt_items = []
        for score, geo, s in zip(quad_scores, quad_after_nms,
                                 range(len(quad_scores))):
            if np.amin(score) > 0:
                quad_draw.line([tuple(geo[0]),
                                tuple(geo[1]),
                                tuple(geo[2]),
                                tuple(geo[3]),
                                tuple(geo[0])], width=2, fill='red')
                if cfg.predict_cut_text_line:
                    cut_text_line(geo, scale_ratio_w, scale_ratio_h, im_array,
                                  img_path, s)
                rescaled_geo = geo / [scale_ratio_w, scale_ratio_h]
                rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist()
                txt_item = ','.join(map(str, rescaled_geo_list))
                txt_items.append(txt_item + '\n')
            elif not quiet:
                print('quad invalid with vertex num less then 4.')
        quad_im.save(img_path + '_predict.jpg')
        if cfg.predict_write2txt and len(txt_items) > 0:
            with open(img_path[:-4] + '.txt', 'w') as f_txt:
                f_txt.writelines(txt_items)
Exemple #24
0
    def detect_image(self, image):
        old_image = image.copy()

        image = np.array(image, np.float32)
        im_height, im_width, _ = np.shape(image)

        scale = [im_width, im_height, im_width, im_height]
        scale_for_landmarks = [
            im_width, im_height, im_width, im_height, im_width, im_height,
            im_width, im_height, im_width, im_height
        ]

        # 图片预处理,归一化
        photo = np.expand_dims(preprocess_input(image), 0)
        anchors = Anchors(self.cfg,
                          image_size=(im_height, im_width)).get_anchors()

        preds = self.get_pred(photo)
        preds = [pred.numpy() for pred in preds]

        # 将预测结果进行解码和非极大抑制
        results = self.bbox_util.detection_out(
            preds, anchors, confidence_threshold=self.confidence)

        if len(results) <= 0:
            return old_image, 0, 0
        results = np.array(results)
        results[:, :4] = results[:, :4] * scale
        results[:, 5:] = results[:, 5:] * scale_for_landmarks

        for b in results:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)

            #####################
            global cnt, t0, t1
            t1 = time.time()
            image_clip = old_image
            # if t1 - t0 > 1:
            #     t0 = t1
            # image_clip = old_image[b[1]-20:b[3]+20, b[0]-20:b[2]+20]
            image_clip = old_image[b[1]:b[3], b[0]:b[2]]
            image_clip = cv2.cvtColor(image_clip, cv2.COLOR_RGB2BGR)

            # 保存剪切的图片
            # cv2.imshow("clip", image_clip)
            # cv2.imwrite("savedImg/wang/" + str(t1) + ".png", image_clip)
            # cnt += 1
            # print(cnt)
            #####################

            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            # landms
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)

        return old_image, image_clip, len(results)
    print(folder, z)
    pathn = path + folder
    os.chdir(pathn)
    filenames = os.listdir(pathn)
    for f in filenames:
        x = cv2.imread(f)
        x = cv2.resize(x, (224, 224), interpolation=cv2.INTER_CUBIC)
        if z == 1 or z == 2:
            D = d_aug(x)
        else:
            D = [x]
            # D=D[:3]

        for x in D:
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            x = x / 255
            img_data.append(x)
            if z == 2:
                labels.append(1)
            else:
                labels.append(z)

img_data = np.array(img_data)
img_data = img_data.astype('float32')
print(img_data.shape)
img_data = np.rollaxis(img_data, 1, 0)
print(img_data.shape)
img_data = img_data[0]
print(img_data.shape)
# labels = np.expand_dims(labels, axis=0)
Exemple #26
0
pil_input_images = []  # Store resized versions of the images here.

# We'll only load one image in this example.
img_dir = './example_images'
write_out_dir = './results'

os.makedirs(write_out_dir, exist_ok=True)

for image_path in glob.glob(os.path.join(img_dir, '*.jpg')):
    pil_image = image.load_img(image_path,
                               target_size=(config.height, config.width))
    pil_input_images.append(pil_image)
    img = np.array(pil_image)
    input_images.append(img)

input_images = preprocess_input(np.array(input_images))

bboxes, scores, labels = prediction_model.predict(input_images)

confidence_threshold = 0.75

for index in range(input_images.shape[0]):

    bbox = bboxes[index]
    confidence = scores[index]
    label = labels[index]

    print(bbox.shape)
    # confidence

    annotated_image = image_annotator(image=pil_input_images[index],
Exemple #27
0
def upload_file():
    #print('here')
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)

        file = request.files['file']

        if file.filename == '':
            flash('No file selected for uploading')
            return redirect(request.url)

        if file and allowed_file(file.filename):
            global filename
            filename = secure_filename(file.filename)
            img_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            file.save(img_path)

            # Get Features of uploaded image
            # check if we alrey have this file (by name) in our database
            have_file = database_df[database_df.loc[:, 'Filename'] == filename]
            if (have_file.shape[0] >
                    0):  # we have the features in the database
                img_features = have_file.image_features.tolist()

            else:
                img_data = image.load_img(img_path,
                                          target_size=(pwidth, pheight))
                img_vector = image.img_to_array(img_data)
                img_vector = np.expand_dims(img_vector, axis=0)
                img_vector = preprocess_input(
                    img_vector
                )  #Problem here, must be convention of keras to pass by reference?
                img_vector = imagenet_utils.preprocess_input(img_vector)
                img_features = network_model.predict(img_vector)
                #print(type(img_features))

            # get 9 nearest neighbors
            # n_neighs = 9
            # nn_index, neighbors,distance = nearest_neighbor_image_finder(img_features, n_neighs, database_df)

            dist, nn_index = neighs.kneighbors(img_features,
                                               return_distance=True)
            distance = dist.tolist()[0]

            # fix path to the database...
            neighbors = database_df.iloc[nn_index.tolist()[0]].copy()
            neighbors.loc[:, 'db_path'] = neighbors.loc[:, 'path'].astype(
                str).copy()

            #neighbors = neighbors_db # this line is where the "filtering" should occur if we add handles on website

    #image_name = os.path.join('images',filename)

    # print(f'saved: {img_path}')
    # print(f'<upload path>: {UPLOAD_FOLDER}')
    # print(f'image: {filename}')

    npath = neighbors['db_path'][0]
    # print(f'saved: {npath}')
    # print(f'<media path>: {MEDIA_FOLDER}')

    header_copy = 'your example:'
    return render_template('album.html',
                           header_copy=header_copy,
                           image_name=filename,
                           neighbors=neighbors,
                           dist=distance)
vgg_face = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)

x_train = []
y_train = []
person_rep = dict()
person_folders = os.listdir('Images_crop')
for i, person in enumerate(person_folders):
    person_rep[i] = person
    image_names = os.listdir('Images_crop/' + person + '/')
    for image_name in image_names:
        img = load_img('Images_crop/' + person + '/' + image_name,
                       target_size=(224, 224))
        img = img_to_array(img)
        img = np.expand_dims(img, axis=0)
        img = preprocess_input(img)
        img_encode = vgg_face(img)
        x_train.append(np.squeeze(K.eval(img_encode)).tolist())
        y_train.append(i)

        # Prepare Test Data
        x_test = []
        y_test = []
        test_image_names = os.listdir('Test_Images_crop/' + person + '/')
        for image_name in test_image_names:
            img = load_img('Test_Images_crop/' + person + '/' + image_name,
                           target_size=(224, 224))
            img = img_to_array(img)
            img = np.expand_dims(img, axis=0)
            img = preprocess_input(img)
            img_encode = vgg_face(img)
Exemple #29
0
    def get_FPS(self, image, test_interval):
        #-------------------------------------#
        #   转换成RGB图片,可以用于灰度图预测。
        #-------------------------------------#
        image = image.convert("RGB")
        
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
    
        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width,height], Image.BICUBIC)
        photo = np.array(image,dtype = np.float64)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = preprocess_input(np.expand_dims(photo,0))
        rpn_pred = self.model_rpn.predict(photo)

        #-----------------------------------------------------------#
        #   将建议框网络的预测结果进行解码
        #-----------------------------------------------------------#
        base_feature_width, base_feature_height = self.get_img_output_length(width, height)
        anchors = get_anchors([base_feature_width, base_feature_height], width, height)
        rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
        
        #-------------------------------------------------------------#
        #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
        #-------------------------------------------------------------#
        base_layer = rpn_pred[2]
        proposal_box = np.array(rpn_results)[:, :, 1:]
        temp_ROIs = np.zeros_like(proposal_box)
        temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
        classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
        
        #-------------------------------------------------------------#
        #   利用classifier的预测结果对建议框进行解码,获得预测框
        #-------------------------------------------------------------#
        results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

        if len(results[0])>0:
            results = np.array(results[0])
            boxes = results[:, :4]
            top_conf = results[:, 4]
            top_label_indices = results[:, 5]
            boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
            boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t1 = time.time()
        for _ in range(test_interval):
            rpn_pred = self.model_rpn.predict(photo)

            #-----------------------------------------------------------#
            #   将建议框网络的预测结果进行解码
            #-----------------------------------------------------------#
            base_feature_width, base_feature_height = self.get_img_output_length(width, height)
            anchors = get_anchors([base_feature_width, base_feature_height], width, height)
            rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
            
            #-------------------------------------------------------------#
            #   在获得建议框和共享特征层后,将二者传入classifier中进行预测
            #-------------------------------------------------------------#
            base_layer = rpn_pred[2]
            proposal_box = np.array(rpn_results)[:, :, 1:]
            temp_ROIs = np.zeros_like(proposal_box)
            temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
            classifier_pred = self.model_classifier.predict([base_layer, temp_ROIs])
            
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)

            if len(results[0])>0:
                results = np.array(results[0])
                boxes = results[:, :4]
                top_conf = results[:, 4]
                top_label_indices = results[:, 5]
                boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
                boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
Exemple #30
0
def prepare_input(file_path, keypoints, image_size=IMAGE_SIZE):
    img = tf.io.read_file(file_path)
    img = decode_img(img, image_size)
    img = preprocess_input(img, mode='torch')
    group_keypoints = tf.cast(tf.reshape(keypoints, shape=(n_keypoints, 2)), tf.float32)
    return img, group_keypoints