Exemple #1
0
    def save(self):
        self.full_url = '/' + '/'.join([parent.slug for parent in self.parent_categories]) + '/'

        super(Category, self).save()

        if self.background:
            image.resize_image(self.background, (284, 300))
Exemple #2
0
    def save(self):
        super(Promotion, self).save()

        if self.background:
            size = (142, 300)
            if self.category.skyscraper_mode:
                size = (142, 612)
            image.resize_image(self.background, size)
def load_image_test(image_file, height, width):
    input_image, real_image = vertical_split(load_image(image_file))
    input_image, real_image = resize_image(input_image, height,
                                           width), resize_image(
                                               real_image, height, width)
    input_image, real_image = normalize_image(input_image), normalize_image(
        real_image)

    return input_image, real_image
Exemple #4
0
 def resize_image(self, image):
     """!@brief
     Resize an image using image_min_side and image_max_side.
     """
     return resize_image(image,
                         min_side=self.image_min_side,
                         max_side=self.image_max_side)
Exemple #5
0
 def resize_image(self, image):
     """ Resize an image using image_min_side and image_max_side.
     """
     if self.no_resize:
         return image, 1
     else:
         return resize_image(image,
                             min_side=self.image_min_side,
                             max_side=self.image_max_side)
def random_jitter(input_image, real_image, height, width,
                  resize_ratio_before_crop):
    # resize images
    resize_height = int(resize_ratio_before_crop * height)
    resize_width = int(resize_ratio_before_crop * width)
    input_image = resize_image(input_image, resize_height, resize_width)
    real_image = resize_image(real_image, resize_height, resize_width)

    # randomly cropping
    input_image, real_image = random_crop(input_image, real_image, height,
                                          width)

    if tf.random.uniform(()) > 0.5:
        # random mirroring
        input_image = tf.image.flip_left_right(input_image)
        real_image = tf.image.flip_left_right(real_image)

    return input_image, real_image
Exemple #7
0
    def __getitem__(self, index):
        possible_indices = np.delete(np.arange(self.__len__()), index).tolist()
        indices = random.sample(possible_indices, self.images_per_grid - 1)
        indices.append(index)

        images = []
        labels = []
        for index in indices:
            image, label = super().__getitem__(index)
            image = resize_image(image, (96, 96))
            images.append(image)
            labels.append(label)

        unique_labels, unique_counts = np.unique(labels, return_counts=True)
        counts = np.zeros(self.n_classes, dtype=np.float32)
        counts[unique_labels] = unique_counts
        counts = counts.reshape((counts.shape[0], 1))

        image_grid = create_image_grid(images, self.image_grid_distribution)

        if self.transformations is not None:
            image_grid = self.transformations(image_grid)

        templates = []
        for class_name in self.class_names:
            template = self.template_dict[class_name]
            template = resize_image(template, (96, 96))
            # Make template have the same shape as the image grid
            if self.template_view == 'resize':
                template = resize_image(template, self.image_grid_shape[-2:])
            elif self.template_view == 'padding':
                template = pad_image(template, self.image_grid_shape[-2:])
            elif self.template_view == 'repeat':
                template = repeat_image(template, self.image_grid_shape[-2:])
            elif self.template_view == 'raw':
                pass

            if self.transformations is not None:
                template = self.transformations(template)

            templates.append(template)

        return image_grid, templates, counts
Exemple #8
0
 def resize_image(self, image):
     """
     Resize an image using image_min_side and image_max_side.
     """
     # random_side_index = random.randint(0, 4)
     # return resize_image(image,
     #                     min_side=self.image_min_sides[random_side_index],
     #                     max_side=self.image_max_sides[random_side_index])
     return resize_image(image,
                         min_side=self.image_min_side,
                         max_side=self.image_max_side)
Exemple #9
0
def visualize(inputs, outputs = None, size = 256):
    if isinstance(inputs, list) and len(inputs) == 2:
        inputs = inputs[0][-1]
    else:
        inputs = inputs[-1]

    if outputs is None:
        outputs = np.zeros(inputs.size())

    images = []
    for input, output in zip(inputs, outputs):
        input, output = to_np(input), to_np(output)
        input = resize_image(input, output.shape[-1], channel_first = True)

        image = input + output / 128.
        image = np.maximum(image, 0)
        image = np.minimum(image, 1)

        image = resize_image(image, size, channel_first = True)
        images.append(image)
    return images
Exemple #10
0
    def __getitem__(self, index):
        m_inputs = []
        for k in range(2):
            m_inputs.append(
                load_image(os.path.join(
                    self.data_path,
                    '{0}_im{1}.png'.format(self.data[index], k + 1)),
                           size=self.input_size,
                           channel_first=True))

        i_inputs = []
        for input_scale in self.input_scales:
            i_inputs.append(
                resize_image(m_inputs[0],
                             size=int(self.input_size * input_scale),
                             channel_first=True))

        inputs = (i_inputs, m_inputs)
        targets = resize_image(m_inputs[1], size = self.target_size, channel_first = True) - \
                  resize_image(m_inputs[0], size = self.target_size, channel_first = True)

        return inputs, targets * 128.
Exemple #11
0
    def __getitem__(self, index):
        file = self.files[index]

        image_file_path = join_path(self.image_root, file + '.png')
        anno_file_path = join_path(self.anno_root, file + '.txt')

        im = Image.open(image_file_path)
        annotations = Annotation(anno_file_path)
        if self.image_shape is None:
            size = im.size
        else:
            size = self.image_shape
        size = im.size
        # size = (size[0] // 4, size[1] // 4)
        ground_truth = Image.new('L', size)
        template_box = None
        max_area = 0
        for gaussian_center, bounding_box in zip(annotations.centers,
                                                 annotations.bounding_boxes):
            current_area = ((bounding_box[1][0] - bounding_box[0][0]) *
                            (bounding_box[1][1] - bounding_box[0][1]))
            if max_area < current_area:
                max_area = ((bounding_box[1][0] - bounding_box[0][0]) *
                            (bounding_box[1][1] - bounding_box[0][1]))
                template_box = bounding_box
            box = [[
                int(bounding_box[0][0] * size[0] / im.size[0]),
                int(bounding_box[0][1] * size[1] / im.size[1])
            ],
                   [
                       int(bounding_box[1][0] * size[0] / im.size[0]),
                       int(bounding_box[1][1] * size[1] / im.size[1])
                   ]]
            shape = [(box[1][0] - box[0][0]), (box[1][1] - box[0][1])]
            gaussian = gkern(shape)
            gaussian *= 255
            gaussian = gaussian.astype(np.uint8)
            gaussian = Image.fromarray(gaussian, mode='L')
            gaussian_center = (
                int((gaussian_center[0] * size[0] / im.size[0]) -
                    gaussian.size[0] // 2),
                int((gaussian_center[1] * size[1] / im.size[1]) -
                    gaussian.size[1] // 2))
            ground_truth = paste_image(ground_truth, gaussian, gaussian_center)
        # ground_truth.show()
        if template_box is not None:
            coords = self.square_template(template_box, im)

            template = im.crop(tuple(coords))
            template = thumbnail_image(template, (63, 63))
        else:
            template = Image.new('RGB', (63, 63))

        if self.image_shape is not None:
            im = thumbnail_image(im, self.image_shape)
            padding = Image.new('RGBA', self.image_shape)
            x = int(padding.size[0] / 2 - im.size[0] / 2)
            y = int(padding.size[1] / 2 - im.size[1] / 2)
            im = im.convert('RGBA')
            im = paste_image(padding, im, (x, y)).convert('RGB')

            size = (self.image_shape[0] // 4, self.image_shape[1] // 4)
            ground_truth = thumbnail_image(ground_truth, size)
            padding = Image.new('RGBA', size)
            x = int(padding.size[0] / 2 - ground_truth.size[0] / 2)
            y = int(padding.size[1] / 2 - ground_truth.size[1] / 2)
            ground_truth = ground_truth.convert('RGBA')
            ground_truth = paste_image(padding, ground_truth,
                                       (x, y)).convert('L')
        # im.show()
        # template.show()
        count = len(annotations.centers)
        resized_template = resize_image(template, (96, 96))
        if self.transform is not None:
            im = self.transform(im)
            template = self.transform(template)
            ground_truth = self.transform(ground_truth)
            resized_template = self.transform(resized_template)
        return im, template, ground_truth, count, resized_template
Exemple #12
0
    'tvmonitor': 19
}
labels_to_names = {}
for key, value in voc_classes.items():
    labels_to_names[value] = key
# load image
image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg')
for image_path in image_paths:
    image = read_image_bgr(image_path)

    # copy to draw on
    draw = image.copy()

    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)

    # process image
    start = time.time()
    # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0))
    boxes, scores, labels = model.predict_on_batch(
        np.expand_dims(image, axis=0))
    print("processing time: ", time.time() - start)

    # correct for image scale
    boxes /= scale
    labels_to_locations = {}
    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # scores are sorted so we can break
        if score < 0.5:
Exemple #13
0
 def resize_image(self, image):
     return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
Exemple #14
0
def main(argv=None):

    print('Mode :%s' % FLAGS.detect_mode)

    sys.path.append(os.getcwd())

    from utils.text_connector.detectors import TextDetector
    from nets import model_train as model
    from utils.rpn_msr.proposal_layer import proposal_layer

    if FLAGS.output_path:
        # if need overide output? may be no need for testing
        # shutil.rmtree(FLAGS.output_path)

        if not os.path.exists(FLAGS.output_path):
            os.makedirs(FLAGS.output_path)

        image_path = os.path.join(FLAGS.output_path, "image")
        label_path = os.path.join(FLAGS.output_path, "label")
        if not os.path.exists(image_path):
            os.makedirs(image_path)
        if not os.path.exists(label_path):
            os.makedirs(label_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            # print(im_fn_list)

            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)

                try:
                    im = cv2.imread(im_fn)  # [:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im, FLAGS.image_size)
                img = cv2.detailEnhance(img)

                # process image
                start = time.time()
                h, w, c = img.shape
                # print(h, w, rh, rw)
                im_info = np.array([h, w, c]).reshape([1, 3])

                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                thickness = max(1, int(im.shape[0] / 400))
                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode)
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.float64)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                # applied to result and fix scale
                for i, box in enumerate(boxes):
                    box[:8][::2] /= rh
                    box[1:8][::2] /= rh

                basename = os.path.basename(im_fn)
                if FLAGS.output_path:

                    bfn, ext = os.path.splitext(basename)
                    gt_path = os.path.join(FLAGS.output_path, "label",
                                           'gt_' + bfn + '.txt')
                    img_path = os.path.join(FLAGS.output_path, "image",
                                            basename)
                    # save image and coordination, may be resize image
                    # cv2.imwrite(img_path, im)
                    shutil.copyfile(im_fn, img_path)
                    with open(gt_path, "w") as f:
                        for i, box in enumerate(boxes):
                            line = ",".join(str(int(box[k])) for k in range(8))
                            # line += "," + str(scores[i]) + "\r\n"
                            # store label as 0-9 for simple
                            line += "," + str(i % 10) + "\r\n"
                            f.writelines(line)
                else:
                    # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN)
                    # cv2.setWindowProperty(
                    #     basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

                    # draw polyline and show
                    for i, box in enumerate(boxes):
                        points = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                        cv2.polylines(im,
                                      points,
                                      True,
                                      color=(0, 255, 0),
                                      thickness=thickness,
                                      lineType=cv2.LINE_AA)
                    cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
                    cv2.resizeWindow(basename, w, h)
                    cv2.imshow(basename, im)
                    cv2.waitKey(0)
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        """
        Produce image detection predictions.

        Parameters
        ----------
            inputs  : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, 
                      and bounding box for each image.

        Returns
        -------
            outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as 
                      a string (8 coordinate format), and confidence scores.
        """
        iou_threshold = 0.5     # Bounding box overlap threshold for false positive or true positive
        score_threshold = 0.05  # The score confidence threshold to use for detections
        max_detections = 100    # Maxmimum number of detections to use per image

        # Convert training model to inference model
        inference_model = models.convert_model(self.training_model)

        # Generate image paths
        image_cols = inputs.metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/FileName')
        self.base_dir = [inputs.metadata.query((metadata_base.ALL_ELEMENTS, t))['location_base_uris'][0].replace('file:///', '/') for t in image_cols]
        self.image_paths = np.array([[os.path.join(self.base_dir, filename) for filename in inputs.iloc[:,col]] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten()
        self.image_paths = pd.Series(self.image_paths)

        # Initialize output objects
        box_list = []
        score_list = []
        image_name_list = []

        # Predict bounding boxes and confidence scores for each image
        image_list = [x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i]

        start_time = time.time()
        print('Starting testing...', file = sys.__stdout__)

        for i in image_list:
            image = read_image_bgr(i)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            boxes, scores, labels = inference_model.predict_on_batch(np.expand_dims(image, axis = 0))

            # correct for image scale
            boxes /= scale

            for box, score in zip(boxes[0], scores[0]):
                if score < 0.5:
                    break
    
                b = box.astype(int)
                box_list.append(b)
                score_list.append(score)
                image_name_list.append(i * len(b))

        print(f'Testing complete. Testing took {time.time()-start_time} seconds.', file = sys.__stdout__)
        
        ## Convert predicted boxes from a list of arrays to a list of strings
        boxes = np.array(box_list).tolist()
        boxes = list(map(lambda x : [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes))  # Convert to 8 coordinate format for D3M            
        boxes = list(map(lambda x : ",".join(map(str, x)), boxes))

        # Create mapping between image names and D3M index
        input_df = pd.DataFrame({
            'd3mIndex': inputs.d3mIndex,
            'image': [os.path.basename(list) for list in self.image_paths]
        })

        d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list')

        # Extract values for image name keys and get missing image predictions (if they exist)
        image_name_list = [os.path.basename(list) for list in image_name_list]
        d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list]
        empty_predictions_image_names = [k for k,v in d3mIdx_image_mapping.items() if v not in d3mIdx]
        d3mIdx = [item for sublist in d3mIdx for item in sublist]   # Flatten list of lists

        ## Assemble in a Pandas DataFrame
        results = pd.DataFrame({
            'd3mIndex': d3mIdx,
            'bounding_box': boxes,
            'confidence': score_list
        })

        # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return 
        # predictions for an image, create a dummy empty prediction row to add to results_df for that
        # missing image.
        if len(empty_predictions_image_names) != 0:
            # Create data frame of empty predictions for missing each image and concat with results.
            # Sort results_df.
            empty_predictions_df = self._fill_empty_predictions(empty_predictions_image_names, d3mIdx_image_mapping)
            results_df = pd.concat([results, empty_predictions_df]).sort_values('d3mIndex')

        # Convert to DataFrame container
        results_df = d3m_DataFrame(results_df)
        
        ## Assemble first output column ('d3mIndex)
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'd3mIndex'
        col_dict['semantic_types'] = ('http://schema.org/Integer', 
                                      'https://metadata.datadrivendiscovery.org/types/PrimaryKey')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 0), col_dict)

        ## Assemble second output column ('bounding_box')
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'bounding_box'
        col_dict['semantic_types'] = ('http://schema.org/Text', 
                                      'https://metadata.datadrivendiscovery.org/types/PredictedTarget', 
                                      'https://metadata.datadrivendiscovery.org/types/BoundingPolygon')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 1), col_dict)

        ## Assemble third output column ('confidence')
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'confidence'
        col_dict['semantic_types'] = ('http://schema.org/Integer', 
                                      'https://metadata.datadrivendiscovery.org/types/Score')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 2), col_dict) 
        
        return CallResult(results_df)
    def detectObjectsFromImage_Me(self, input_image="", output_image_path="", input_type="file", output_type="file",
                               extract_detected_objects=False, minimum_percentage_probability=50,
                               display_percentage_probability=True, display_object_name=True,
                               display_box=True, thread_safe=False, custom_objects=None):
        """
            'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
                    * input_image , which can be a filepath, image numpy array or image file stream
                    * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
                    * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
                    * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
                    * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
                    * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
                    * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
                    * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
                    * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
            The values returned by this function depends on the parameters parsed. The possible values returnable
            are stated as below
            - If extract_detected_objects = False or at its default value and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
            - If extract_detected_objects = False or at its default value and output_type = 'array' ,
              Then the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
            - If extract_detected_objects = True and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                2. an array of string paths to the image of each object extracted from the image
            - If extract_detected_objects = True and output_type = 'array', the the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                3. an array of numpy arrays of each object detected in the image
            :param input_image:
            :param output_image_path:
            :param input_type:
            :param output_type:
            :param extract_detected_objects:
            :param minimum_percentage_probability:
            :param display_percentage_probability:
            :param display_object_name:
            :param thread_safe:
            :return image_frame:
            :return output_objects_array:
            :return detected_objects_image_array:
        """

        if (self.__modelLoaded == False):
            raise ValueError("You must call the loadModel() function before making object detection.")
        elif (self.__modelLoaded == True):
            try:

                model_detections = list()
                detections = list()
                image_copy = None

                detected_objects_image_array = []
                min_probability = minimum_percentage_probability / 100

                if (input_type == "file"):
                    input_image = cv2.imread(input_image)
                elif (input_type == "array"):
                    input_image = np.array(input_image)

                detected_copy = input_image
                image_copy = input_image

                if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):

                    image_h, image_w, _ = detected_copy.shape
                    detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size)

                    model = self.__model_collection[0]
                    yolo_result = model.predict(detected_copy)

                    model_detections = retrieve_yolo_detections(yolo_result,
                            self.__yolo_anchors,
                            min_probability,
                            self.__nms_thresh,
                            self.__yolo_model_image_size,
                            (image_w, image_h),
                            self.numbers_to_names)
                            
                elif (self.__modelType == "retinanet"):
                    detected_copy = preprocess_image(detected_copy)
                    detected_copy, scale = resize_image(detected_copy)

                    model = self.__model_collection[0]
                    boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0))

                    
                    boxes /= scale

                    for box, score, label in zip(boxes[0], scores[0], labels[0]):
                        # scores are sorted so we can break
                        if score < min_probability:
                            break

                        detection_dict = dict()
                        detection_dict["name"] = self.numbers_to_names[label]
                        detection_dict["percentage_probability"] = score * 100
                        detection_dict["box_points"] = box.astype(int).tolist()
                        model_detections.append(detection_dict)

                counting = 0
                objects_dir = output_image_path + "-objects"

                for detection in model_detections:
                    counting += 1
                    label = detection["name"]
                    percentage_probability = detection["percentage_probability"]
                    box_points = detection["box_points"]

                    if (custom_objects is not None):
                        if (custom_objects[label] != "valid"):
                            continue
                    
                    detections.append(detection)

                    if display_object_name == False:
                        label = None

                    if display_percentage_probability == False:
                        percentage_probability = None

                    
                    image_copy = draw_boxes(image_copy, 
                                    box_points,
                                    display_box,
                                    label, 
                                    percentage_probability, 
                                    self.__box_color)
                    
                    

                    if (extract_detected_objects == True):
                        splitted_copy = image_copy.copy()[box_points[1]:box_points[3],
                                        box_points[0]:box_points[2]]
                        if (output_type == "file"):
                            if (os.path.exists(objects_dir) == False):
                                os.mkdir(objects_dir)
                            splitted_image_path = os.path.join(objects_dir,
                                                                detection["name"] + "-" + str(
                                                                    counting) + ".jpg")
                            cv2.imwrite(splitted_image_path, splitted_copy)
                            detected_objects_image_array.append(splitted_image_path)
                        elif (output_type == "array"):
                            detected_objects_image_array.append(splitted_copy)

                image_copy = padded_fragment(image_copy)
                
                if (output_type == "file"):
                    cv2.imwrite(output_image_path, image_copy)

                if (extract_detected_objects == True):
                    if (output_type == "file"):
                        return detections, detected_objects_image_array
                    elif (output_type == "array"):
                        return image_copy, detections, detected_objects_image_array

                else:
                    if (output_type == "file"):
                        return detections
                    elif (output_type == "array"):
                        return image_copy, detections

            except:
                raise ValueError(
                    "Ensure you specified correct input image, input type, output type and/or output image path ")
Exemple #17
0
def analyze_fmaps(size=256):
    fmaps_path = os.path.join('exp', args.exp, 'fmaps')
    mkdir(fmaps_path, clean=True)

    images_path = os.path.join(fmaps_path, 'images')
    mkdir(images_path, clean=True)

    # feature maps
    for split in ['train', 'test']:
        inputs, targets = iter(loaders[split]).next()
        inputs, targets = to_var(inputs, volatile=True), to_var(targets,
                                                                volatile=True)

        outputs, features = model.forward(inputs, returns='features')
        num_scales, num_channels = len(features), features[0].size(1)

        for s in trange(num_scales):
            input, feature = inputs[0][-1], features[s]

            for b in trange(args.batch, leave=False):
                image = resize_image(to_np(input[b]),
                                     size=size,
                                     channel_first=True)

                for c in trange(num_channels, leave=False):
                    fmap = resize_image(to_np(feature[b, c]),
                                        size=size,
                                        channel_first=True)

                    if np.min(fmap) < np.max(fmap):
                        fmap = (fmap - np.min(fmap)) / (np.max(fmap) -
                                                        np.min(fmap))

                    image_path = os.path.join(
                        images_path,
                        '{0}-{1}-{2}-{3}.gif'.format(split, s, c, b))
                    save_images([image, fmap], image_path, channel_first=True)

    # visualization
    with open(os.path.join(fmaps_path, 'index.html'), 'w') as fp:
        for s in range(num_scales):
            for c in range(num_channels):
                print('<h3>scale [{0}] - channel [{1}]</h3>'.format(
                    s + 1, c + 1),
                      file=fp)
                print('<table border="1" style="table-layout: fixed;">',
                      file=fp)
                for split in ['train', 'test']:
                    print('<tr>', file=fp)
                    for b in range(args.batch):
                        image_path = os.path.join(
                            'images',
                            '{0}-{1}-{2}-{3}.gif'.format(split, s, c, b))
                        print(
                            '<td halign="center" style="word-wrap: break-word;" valign="top">',
                            file=fp)
                        print('<img src="{0}" style="width:128px;">'.format(
                            image_path),
                              file=fp)
                        print('</td>', file=fp)
                    print('</tr>', file=fp)
                print('</table>', file=fp)
Exemple #18
0
def process(input, output, size):

    im_fns = os.listdir(os.path.join(input, "image"))
    im_fns.sort()

    if not os.path.exists(os.path.join(output, "image")):
        os.makedirs(os.path.join(output, "image"))
    if not os.path.exists(os.path.join(output, "label")):
        os.makedirs(os.path.join(output, "label"))

    for im_fn in tqdm(im_fns):
        try:
            _, fn = os.path.split(im_fn)
            bfn, ext = os.path.splitext(fn)
            if ext.lower() not in ['.jpg', '.png']:
                continue

            gt_path = os.path.join(input, "label", 'gt_' + bfn + '.txt')
            img_path = os.path.join(input, "image", im_fn)

            img = cv2.imread(img_path)
            h, w, _ = img.shape
            re_im, _ = resize_image(img, size)
            re_size = re_im.shape

            polys = []
            with open(gt_path, 'r') as f:
                lines = f.readlines()
            for line in lines:
                splitted_line = line.strip().lower().split(',')
                x1, y1, x2, y2, x3, y3, x4, y4 = map(float, splitted_line[:8])
                poly = np.array([x1, y1, x2, y2, x3, y3, x4,
                                 y4]).reshape([4, 2])
                poly[:, 0] = poly[:, 0] / w * re_size[1]
                poly[:, 1] = poly[:, 1] / h * re_size[0]
                poly = orderConvex(poly)
                polys.append(poly)

                # cv2.polylines(re_im, [poly.astype(np.int32).reshape((-1, 1, 2))], True,color=(0, 255, 0), thickness=2)

            res_polys = []
            for poly in polys:
                # delete polys with width less than 10 pixel
                if np.linalg.norm(poly[0] - poly[1]) < 10 or np.linalg.norm(
                        poly[3] - poly[0]) < 10:
                    continue

                res = shrink_poly(poly)
                # for p in res:
                #     cv2.polylines(re_im, [p.astype(np.int32).reshape(
                #         (-1, 1, 2))], True, color=(0, 255, 0), thickness=1)

                res = res.reshape([-1, 4, 2])
                for r in res:
                    x_min = np.min(r[:, 0])
                    y_min = np.min(r[:, 1])
                    x_max = np.max(r[:, 0])
                    y_max = np.max(r[:, 1])

                    res_polys.append([x_min, y_min, x_max, y_max])

            cv2.imwrite(os.path.join(output, "image", fn), re_im)
            with open(os.path.join(output, "label", bfn) + ".txt", "w") as f:
                for p in res_polys:
                    line = ",".join(str(p[i]) for i in range(4))
                    f.writelines(line + "\r\n")
                    # for p in res_polys:
                    #     cv2.rectangle(re_im, (p[0], p[1]), (p[2], p[3]), color=(
                    #         0, 0, 255), thickness=1, lineType=cv2.LINE_AA)

                    # cv2.imshow("demo", re_im)
                    # cv2.waitKey(0)
        except:
            print("Error processing {}".format(im_fn))
def main(config):
    killer = GracefullKiller()

    # open config file
    try:
        file = open(config)
        cfg = json.load(file)
        file.close()
    except Exception as error:
        logger.critical(str(error), exc_info=1)
        return

    # give meaningful names to each sub config
    source_cfg = cfg["video_source"]
    broadcast_cfg = cfg["broadcaster"]
    pool_cfg = cfg["inferencing_pool"]
    worker_cfg = cfg["inferencing_worker"]
    flusher_cfg = cfg["flusher"]
    gps_cfg = cfg["gps"]
    gen_cfg = cfg["general"]

    # bind requests module to use a given network interface
    try:
        socket.inet_aton(gen_cfg["bind_ip"])
        session.mount("http://", SourceAddressAdapter(gen_cfg["bind_ip"]))
        logger.info("binding requests module to {} IP".format(gen_cfg["bind_ip"]))
    except OSError as e:
        logger.error("bind IP is invalid, resorting to default interface", exc_info=True)

    # start polling the GPS
    if gps_cfg["use_gps"]:
        wport = gps_cfg["write_port"]
        rport = gps_cfg["read_port"]
        br = gps_cfg["baudrate"]
        gps = ReadGPSData(wport, rport, br)
        gps.start()
    else:
        gps = None

    # workers on a separate process to run inference on the data
    logger.info("initializing pool w/ " + str(pool_cfg["workers"]) + " workers")
    output = DistributeFramesAndInfer(pool_cfg, worker_cfg)
    frames_queue, bc_queue, predicts_queue = output.get_queues()
    logger.info("initialized worker pool")

    # a single worker in a separate process to reassemble the data
    reassembler = BroadcastReassembled(bc_queue, broadcast_cfg, name="BroadcastReassembled")
    reassembler.start()

    # a single thread to flush the producing queue
    # when there are too many frames in the pipe
    flusher = Flusher(frames_queue, threshold=flusher_cfg["frame_count_threshold"], name="Flusher")
    flusher.start()

    # data aggregator to write things to disk
    def results_writer():
        if len(gen_cfg["saved_data"]) > 0:
            df = pd.DataFrame(columns=["Date", "License Plate", "Coordinates"])
            while not killer.kill_now:
                time.sleep(0.01)
                try:
                    data = predicts_queue.get_nowait()
                except queue.Empty:
                    continue
                predicts = data["predicts"]
                date = data["date"]
                for lp in predicts:
                    if len(lp) > 0:
                        lp = " ".join(lp)
                        entry = {"Date": date, "License Plate": lp, "Coordinates": ""}
                        if gps:
                            entry["Coordinates"] = "{}, {}".format(
                                gps.latitude, gps.longitude
                            ).upper()
                        df = df.append(entry, ignore_index=True)

            logger.info("dumping results to csv file {}".format(gen_cfg["saved_data"]))
            if os.path.isfile(gen_cfg["saved_data"]):
                header = False
            else:
                header = True
            with open(gen_cfg["saved_data"], "a") as f:
                df.to_csv(f, header=header)

    # data aggregator thread
    results_thread = td.Thread(target=results_writer)
    results_thread.start()

    if source_cfg["type"] == "camera":
        # import module
        import picamera

        # start the pi camera
        with picamera.PiCamera() as camera:
            # configure the camera
            camera.sensor_mode = source_cfg["sensor_mode"]
            camera.resolution = source_cfg["resolution"]
            camera.framerate = source_cfg["framerate"]
            logger.info(
                "picamera initialized w/ mode={} resolution={} framerate={}".format(
                    camera.sensor_mode, camera.resolution, camera.framerate
                )
            )

            # start recording both to disk and to the queue
            camera.start_recording(
                output=source_cfg["output_file"], format="h264", splitter_port=0, bitrate=10000000,
            )
            camera.start_recording(
                output=output, format="mjpeg", splitter_port=1, bitrate=10000000, quality=95,
            )
            logger.info("started recording to file and to queue")

            # wait until SIGINT is detected
            while not killer.kill_now:
                camera.wait_recording(timeout=0.5, splitter_port=0)
                camera.wait_recording(timeout=0.5, splitter_port=1)
                logger.info(
                    "frames qsize: {}, broadcast qsize: {}, predicts qsize: {}".format(
                        frames_queue.qsize(), bc_queue.qsize(), predicts_queue.qsize()
                    )
                )

            # stop recording
            logger.info("gracefully exiting")
            camera.stop_recording(splitter_port=0)
            camera.stop_recording(splitter_port=1)
            output.stop()

    elif source_cfg["type"] == "file":
        # open video file
        video_reader = cv2.VideoCapture(source_cfg["input"])
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, source_cfg["frames_to_skip"])

        # get # of frames and determine target width
        nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
        target_h = int(frame_h * source_cfg["scale_video"])
        target_w = int(frame_w * source_cfg["scale_video"])
        period = 1.0 / source_cfg["framerate"]

        logger.info(
            "file-based video stream initialized w/ resolution={} framerate={} and {} skipped frames".format(
                (target_w, target_h), source_cfg["framerate"], source_cfg["frames_to_skip"],
            )
        )

        # serve each frame to the workers iteratively
        last_log = time.time()
        for i in range(nb_frames):
            start = time.time()
            try:
                # write frame to queue
                _, frame = video_reader.read()
                if target_w != frame_w:
                    frame = resize_image(frame, target_w)
                jpeg = image_to_jpeg_bytes(frame)
                output.write(jpeg)
            except Exception as error:
                logger.error("unexpected error occurred", exc_info=True)
                break
            end = time.time()
            spent = end - start
            left = period - spent
            if left > 0:
                # maintain framerate
                time.sleep(period)

            # check if SIGINT has been sent
            if killer.kill_now:
                break

            # do logs every second
            current = time.time()
            if current - last_log >= 1.0:
                logger.info(
                    "frames qsize: {}, broadcast qsize: {}, predicts qsize: {}".format(
                        frames_queue.qsize(), bc_queue.qsize(), predicts_queue.qsize()
                    )
                )
                last_log = current

        logger.info("gracefully exiting")
        video_reader.release()
        output.stop()

    if gps_cfg["use_gps"]:
        gps.stop()

    reassembler.stop()
    flusher.stop()
Exemple #20
0
    def cloud_infer(self):
        """
        Main method that runs in the loop.
        """
        try:
            data = self.in_queue.get_nowait()
        except queue.Empty:
            # logger.warning("no data available for worker")
            return

        #############################

        # extract frame
        frame_num = data["frame_num"]
        img = data["jpeg"]
        # preprocess/compress the image
        image = image_from_bytes(img)
        reduced = compress_image(image)
        byte_im = image_to_jpeg_bytes(reduced)
        # encode image
        img_enc = base64.b64encode(byte_im).decode("utf-8")
        img_dump = json.dumps({"img": img_enc})

        # make inference request
        resp = self.yolov3_api_request(img_dump)
        if not resp:
            return

        #############################

        # parse response
        r_dict = resp.json()
        boxes_raw = r_dict["boxes"]
        boxes = []
        for b in boxes_raw:
            box = BoundBox(*b)
            boxes.append(box)

        # purge bounding boxes with a low confidence score
        aux = []
        for b in boxes:
            label = -1
            for i in range(len(b.classes)):
                if b.classes[i] > self.yolov3_obj_thresh:
                    label = i
            if label >= 0:
                aux.append(b)
        boxes = aux
        del aux

        # also scale the boxes for later uses
        camera_source_width = image.shape[1]
        boxes640 = self.scale_bbox(boxes, self.yolov3_input_size_px,
                                   self.bounding_boxes_upscale_px)
        boxes_source = self.scale_bbox(boxes, self.yolov3_input_size_px,
                                       camera_source_width)

        #############################

        # recognize the license plates in case
        # any bounding boxes have been detected
        dec_words = []
        if len(boxes) > 0 and len(self.api_endpoint_crnn) > 0:
            # create set of images of the detected license plates
            lps = []
            try:
                for b in boxes_source:
                    lp = image[b.ymin:b.ymax, b.xmin:b.xmax]
                    jpeg = image_to_jpeg_nparray(
                        lp, [int(cv2.IMWRITE_JPEG_QUALITY), self.crnn_quality])
                    lps.append(jpeg)
            except:
                logger.warning("encountered error while converting to jpeg")
                pass

            lps = pickle.dumps(lps, protocol=0)
            lps_enc = base64.b64encode(lps).decode("utf-8")
            lps_dump = json.dumps({"imgs": lps_enc})

            # make request to rcnn API
            dec_lps = self.rcnn_api_request(lps_dump)
            dec_lps = self.reorder_recognized_words(dec_lps)
            for dec_lp in dec_lps:
                dec_words.append([word[0] for word in dec_lp])

        if len(dec_words) > 0:
            logger.info("Detected the following words: {}".format(dec_words))
        else:
            dec_words = [[] for i in range(len(boxes))]

        #############################

        # draw detections
        upscaled = resize_image(image, self.bounding_boxes_upscale_px)
        draw_image = draw_boxes(
            upscaled,
            boxes640,
            overlay_text=dec_words,
            labels=["LP"],
            obj_thresh=self.yolov3_obj_thresh,
        )
        draw_byte_im = image_to_jpeg_bytes(
            draw_image,
            [int(cv2.IMWRITE_JPEG_QUALITY), self.broadcast_quality])

        #############################

        # push data for further processing in the queue
        output = {
            "boxes": boxes,
            "frame_num": frame_num,
            "avg_yolo3_rtt": self.rtt_yolo3_ms,
            "avg_crnn_rtt": self.rtt_crnn_ms,
            "image": draw_byte_im,
        }
        self.bc_queue.put(output)

        # push predictions to write to disk
        if len(dec_words) > 0:
            timestamp = time.time()
            literal_time = time.ctime(timestamp)
            predicts = {"predicts": dec_words, "date": literal_time}
            self.predicts_queue.put(predicts)

        logger.info(
            "Frame Count: {} - Avg YOLO3 RTT: {}ms - Avg CRNN RTT: {}ms - Detected: {}"
            .format(frame_num, int(self.rtt_yolo3_ms), int(self.rtt_crnn_ms),
                    len(boxes)))