def save(self): self.full_url = '/' + '/'.join([parent.slug for parent in self.parent_categories]) + '/' super(Category, self).save() if self.background: image.resize_image(self.background, (284, 300))
def save(self): super(Promotion, self).save() if self.background: size = (142, 300) if self.category.skyscraper_mode: size = (142, 612) image.resize_image(self.background, size)
def load_image_test(image_file, height, width): input_image, real_image = vertical_split(load_image(image_file)) input_image, real_image = resize_image(input_image, height, width), resize_image( real_image, height, width) input_image, real_image = normalize_image(input_image), normalize_image( real_image) return input_image, real_image
def resize_image(self, image): """!@brief Resize an image using image_min_side and image_max_side. """ return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
def resize_image(self, image): """ Resize an image using image_min_side and image_max_side. """ if self.no_resize: return image, 1 else: return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
def random_jitter(input_image, real_image, height, width, resize_ratio_before_crop): # resize images resize_height = int(resize_ratio_before_crop * height) resize_width = int(resize_ratio_before_crop * width) input_image = resize_image(input_image, resize_height, resize_width) real_image = resize_image(real_image, resize_height, resize_width) # randomly cropping input_image, real_image = random_crop(input_image, real_image, height, width) if tf.random.uniform(()) > 0.5: # random mirroring input_image = tf.image.flip_left_right(input_image) real_image = tf.image.flip_left_right(real_image) return input_image, real_image
def __getitem__(self, index): possible_indices = np.delete(np.arange(self.__len__()), index).tolist() indices = random.sample(possible_indices, self.images_per_grid - 1) indices.append(index) images = [] labels = [] for index in indices: image, label = super().__getitem__(index) image = resize_image(image, (96, 96)) images.append(image) labels.append(label) unique_labels, unique_counts = np.unique(labels, return_counts=True) counts = np.zeros(self.n_classes, dtype=np.float32) counts[unique_labels] = unique_counts counts = counts.reshape((counts.shape[0], 1)) image_grid = create_image_grid(images, self.image_grid_distribution) if self.transformations is not None: image_grid = self.transformations(image_grid) templates = [] for class_name in self.class_names: template = self.template_dict[class_name] template = resize_image(template, (96, 96)) # Make template have the same shape as the image grid if self.template_view == 'resize': template = resize_image(template, self.image_grid_shape[-2:]) elif self.template_view == 'padding': template = pad_image(template, self.image_grid_shape[-2:]) elif self.template_view == 'repeat': template = repeat_image(template, self.image_grid_shape[-2:]) elif self.template_view == 'raw': pass if self.transformations is not None: template = self.transformations(template) templates.append(template) return image_grid, templates, counts
def resize_image(self, image): """ Resize an image using image_min_side and image_max_side. """ # random_side_index = random.randint(0, 4) # return resize_image(image, # min_side=self.image_min_sides[random_side_index], # max_side=self.image_max_sides[random_side_index]) return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
def visualize(inputs, outputs = None, size = 256): if isinstance(inputs, list) and len(inputs) == 2: inputs = inputs[0][-1] else: inputs = inputs[-1] if outputs is None: outputs = np.zeros(inputs.size()) images = [] for input, output in zip(inputs, outputs): input, output = to_np(input), to_np(output) input = resize_image(input, output.shape[-1], channel_first = True) image = input + output / 128. image = np.maximum(image, 0) image = np.minimum(image, 1) image = resize_image(image, size, channel_first = True) images.append(image) return images
def __getitem__(self, index): m_inputs = [] for k in range(2): m_inputs.append( load_image(os.path.join( self.data_path, '{0}_im{1}.png'.format(self.data[index], k + 1)), size=self.input_size, channel_first=True)) i_inputs = [] for input_scale in self.input_scales: i_inputs.append( resize_image(m_inputs[0], size=int(self.input_size * input_scale), channel_first=True)) inputs = (i_inputs, m_inputs) targets = resize_image(m_inputs[1], size = self.target_size, channel_first = True) - \ resize_image(m_inputs[0], size = self.target_size, channel_first = True) return inputs, targets * 128.
def __getitem__(self, index): file = self.files[index] image_file_path = join_path(self.image_root, file + '.png') anno_file_path = join_path(self.anno_root, file + '.txt') im = Image.open(image_file_path) annotations = Annotation(anno_file_path) if self.image_shape is None: size = im.size else: size = self.image_shape size = im.size # size = (size[0] // 4, size[1] // 4) ground_truth = Image.new('L', size) template_box = None max_area = 0 for gaussian_center, bounding_box in zip(annotations.centers, annotations.bounding_boxes): current_area = ((bounding_box[1][0] - bounding_box[0][0]) * (bounding_box[1][1] - bounding_box[0][1])) if max_area < current_area: max_area = ((bounding_box[1][0] - bounding_box[0][0]) * (bounding_box[1][1] - bounding_box[0][1])) template_box = bounding_box box = [[ int(bounding_box[0][0] * size[0] / im.size[0]), int(bounding_box[0][1] * size[1] / im.size[1]) ], [ int(bounding_box[1][0] * size[0] / im.size[0]), int(bounding_box[1][1] * size[1] / im.size[1]) ]] shape = [(box[1][0] - box[0][0]), (box[1][1] - box[0][1])] gaussian = gkern(shape) gaussian *= 255 gaussian = gaussian.astype(np.uint8) gaussian = Image.fromarray(gaussian, mode='L') gaussian_center = ( int((gaussian_center[0] * size[0] / im.size[0]) - gaussian.size[0] // 2), int((gaussian_center[1] * size[1] / im.size[1]) - gaussian.size[1] // 2)) ground_truth = paste_image(ground_truth, gaussian, gaussian_center) # ground_truth.show() if template_box is not None: coords = self.square_template(template_box, im) template = im.crop(tuple(coords)) template = thumbnail_image(template, (63, 63)) else: template = Image.new('RGB', (63, 63)) if self.image_shape is not None: im = thumbnail_image(im, self.image_shape) padding = Image.new('RGBA', self.image_shape) x = int(padding.size[0] / 2 - im.size[0] / 2) y = int(padding.size[1] / 2 - im.size[1] / 2) im = im.convert('RGBA') im = paste_image(padding, im, (x, y)).convert('RGB') size = (self.image_shape[0] // 4, self.image_shape[1] // 4) ground_truth = thumbnail_image(ground_truth, size) padding = Image.new('RGBA', size) x = int(padding.size[0] / 2 - ground_truth.size[0] / 2) y = int(padding.size[1] / 2 - ground_truth.size[1] / 2) ground_truth = ground_truth.convert('RGBA') ground_truth = paste_image(padding, ground_truth, (x, y)).convert('L') # im.show() # template.show() count = len(annotations.centers) resized_template = resize_image(template, (96, 96)) if self.transform is not None: im = self.transform(im) template = self.transform(template) ground_truth = self.transform(ground_truth) resized_template = self.transform(resized_template) return im, template, ground_truth, count, resized_template
'tvmonitor': 19 } labels_to_names = {} for key, value in voc_classes.items(): labels_to_names[value] = key # load image image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg') for image_path in image_paths: image = read_image_bgr(image_path) # copy to draw on draw = image.copy() # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) # process image start = time.time() # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0)) boxes, scores, labels = model.predict_on_batch( np.expand_dims(image, axis=0)) print("processing time: ", time.time() - start) # correct for image scale boxes /= scale labels_to_locations = {} # visualize detections for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < 0.5:
def resize_image(self, image): return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
def main(argv=None): print('Mode :%s' % FLAGS.detect_mode) sys.path.append(os.getcwd()) from utils.text_connector.detectors import TextDetector from nets import model_train as model from utils.rpn_msr.proposal_layer import proposal_layer if FLAGS.output_path: # if need overide output? may be no need for testing # shutil.rmtree(FLAGS.output_path) if not os.path.exists(FLAGS.output_path): os.makedirs(FLAGS.output_path) image_path = os.path.join(FLAGS.output_path, "image") label_path = os.path.join(FLAGS.output_path, "label") if not os.path.exists(image_path): os.makedirs(image_path) if not os.path.exists(label_path): os.makedirs(label_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.compat.v1.get_default_graph().as_default(): input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.compat.v1.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) saver = tf.compat.v1.train.Saver( variable_averages.variables_to_restore()) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() # print(im_fn_list) for im_fn in im_fn_list: print('===============') print(im_fn) try: im = cv2.imread(im_fn) # [:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im, FLAGS.image_size) img = cv2.detailEnhance(img) # process image start = time.time() h, w, c = img.shape # print(h, w, rh, rw) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) thickness = max(1, int(im.shape[0] / 400)) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode) boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.float64) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # applied to result and fix scale for i, box in enumerate(boxes): box[:8][::2] /= rh box[1:8][::2] /= rh basename = os.path.basename(im_fn) if FLAGS.output_path: bfn, ext = os.path.splitext(basename) gt_path = os.path.join(FLAGS.output_path, "label", 'gt_' + bfn + '.txt') img_path = os.path.join(FLAGS.output_path, "image", basename) # save image and coordination, may be resize image # cv2.imwrite(img_path, im) shutil.copyfile(im_fn, img_path) with open(gt_path, "w") as f: for i, box in enumerate(boxes): line = ",".join(str(int(box[k])) for k in range(8)) # line += "," + str(scores[i]) + "\r\n" # store label as 0-9 for simple line += "," + str(i % 10) + "\r\n" f.writelines(line) else: # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty( # basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # draw polyline and show for i, box in enumerate(boxes): points = [box[:8].astype(np.int32).reshape((-1, 1, 2))] cv2.polylines(im, points, True, color=(0, 255, 0), thickness=thickness, lineType=cv2.LINE_AA) cv2.namedWindow(basename, cv2.WINDOW_NORMAL) cv2.resizeWindow(basename, w, h) cv2.imshow(basename, im) cv2.waitKey(0)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce image detection predictions. Parameters ---------- inputs : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, and bounding box for each image. Returns ------- outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as a string (8 coordinate format), and confidence scores. """ iou_threshold = 0.5 # Bounding box overlap threshold for false positive or true positive score_threshold = 0.05 # The score confidence threshold to use for detections max_detections = 100 # Maxmimum number of detections to use per image # Convert training model to inference model inference_model = models.convert_model(self.training_model) # Generate image paths image_cols = inputs.metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/FileName') self.base_dir = [inputs.metadata.query((metadata_base.ALL_ELEMENTS, t))['location_base_uris'][0].replace('file:///', '/') for t in image_cols] self.image_paths = np.array([[os.path.join(self.base_dir, filename) for filename in inputs.iloc[:,col]] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten() self.image_paths = pd.Series(self.image_paths) # Initialize output objects box_list = [] score_list = [] image_name_list = [] # Predict bounding boxes and confidence scores for each image image_list = [x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i] start_time = time.time() print('Starting testing...', file = sys.__stdout__) for i in image_list: image = read_image_bgr(i) # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) boxes, scores, labels = inference_model.predict_on_batch(np.expand_dims(image, axis = 0)) # correct for image scale boxes /= scale for box, score in zip(boxes[0], scores[0]): if score < 0.5: break b = box.astype(int) box_list.append(b) score_list.append(score) image_name_list.append(i * len(b)) print(f'Testing complete. Testing took {time.time()-start_time} seconds.', file = sys.__stdout__) ## Convert predicted boxes from a list of arrays to a list of strings boxes = np.array(box_list).tolist() boxes = list(map(lambda x : [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes)) # Convert to 8 coordinate format for D3M boxes = list(map(lambda x : ",".join(map(str, x)), boxes)) # Create mapping between image names and D3M index input_df = pd.DataFrame({ 'd3mIndex': inputs.d3mIndex, 'image': [os.path.basename(list) for list in self.image_paths] }) d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list') # Extract values for image name keys and get missing image predictions (if they exist) image_name_list = [os.path.basename(list) for list in image_name_list] d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list] empty_predictions_image_names = [k for k,v in d3mIdx_image_mapping.items() if v not in d3mIdx] d3mIdx = [item for sublist in d3mIdx for item in sublist] # Flatten list of lists ## Assemble in a Pandas DataFrame results = pd.DataFrame({ 'd3mIndex': d3mIdx, 'bounding_box': boxes, 'confidence': score_list }) # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return # predictions for an image, create a dummy empty prediction row to add to results_df for that # missing image. if len(empty_predictions_image_names) != 0: # Create data frame of empty predictions for missing each image and concat with results. # Sort results_df. empty_predictions_df = self._fill_empty_predictions(empty_predictions_image_names, d3mIdx_image_mapping) results_df = pd.concat([results, empty_predictions_df]).sort_values('d3mIndex') # Convert to DataFrame container results_df = d3m_DataFrame(results_df) ## Assemble first output column ('d3mIndex) col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0))) col_dict['structural_type'] = type("1") col_dict['name'] = 'd3mIndex' col_dict['semantic_types'] = ('http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 0), col_dict) ## Assemble second output column ('bounding_box') col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))) col_dict['structural_type'] = type("1") col_dict['name'] = 'bounding_box' col_dict['semantic_types'] = ('http://schema.org/Text', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 1), col_dict) ## Assemble third output column ('confidence') col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))) col_dict['structural_type'] = type("1") col_dict['name'] = 'confidence' col_dict['semantic_types'] = ('http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Score') results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 2), col_dict) return CallResult(results_df)
def detectObjectsFromImage_Me(self, input_image="", output_image_path="", input_type="file", output_type="file", extract_detected_objects=False, minimum_percentage_probability=50, display_percentage_probability=True, display_object_name=True, display_box=True, thread_safe=False, custom_objects=None): """ 'detectObjectsFromImage()' function is used to detect objects observable in the given image path: * input_image , which can be a filepath, image numpy array or image file stream * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file" * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream" * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array" * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path. * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output. * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph. The values returned by this function depends on the parameters parsed. The possible values returnable are stated as below - If extract_detected_objects = False or at its default value and output_type = 'file' or at its default value, you must parse in the 'output_image_path' as a string to the path you want the detected image to be saved. Then the function will return: 1. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) - If extract_detected_objects = False or at its default value and output_type = 'array' , Then the function will return: 1. a numpy array of the detected image 2. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) - If extract_detected_objects = True and output_type = 'file' or at its default value, you must parse in the 'output_image_path' as a string to the path you want the detected image to be saved. Then the function will return: 1. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) 2. an array of string paths to the image of each object extracted from the image - If extract_detected_objects = True and output_type = 'array', the the function will return: 1. a numpy array of the detected image 2. an array of dictionaries, with each dictionary corresponding to the objects detected in the image. Each dictionary contains the following property: * name (string) * percentage_probability (float) * box_points (list of x1,y1,x2 and y2 coordinates) 3. an array of numpy arrays of each object detected in the image :param input_image: :param output_image_path: :param input_type: :param output_type: :param extract_detected_objects: :param minimum_percentage_probability: :param display_percentage_probability: :param display_object_name: :param thread_safe: :return image_frame: :return output_objects_array: :return detected_objects_image_array: """ if (self.__modelLoaded == False): raise ValueError("You must call the loadModel() function before making object detection.") elif (self.__modelLoaded == True): try: model_detections = list() detections = list() image_copy = None detected_objects_image_array = [] min_probability = minimum_percentage_probability / 100 if (input_type == "file"): input_image = cv2.imread(input_image) elif (input_type == "array"): input_image = np.array(input_image) detected_copy = input_image image_copy = input_image if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"): image_h, image_w, _ = detected_copy.shape detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size) model = self.__model_collection[0] yolo_result = model.predict(detected_copy) model_detections = retrieve_yolo_detections(yolo_result, self.__yolo_anchors, min_probability, self.__nms_thresh, self.__yolo_model_image_size, (image_w, image_h), self.numbers_to_names) elif (self.__modelType == "retinanet"): detected_copy = preprocess_image(detected_copy) detected_copy, scale = resize_image(detected_copy) model = self.__model_collection[0] boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0)) boxes /= scale for box, score, label in zip(boxes[0], scores[0], labels[0]): # scores are sorted so we can break if score < min_probability: break detection_dict = dict() detection_dict["name"] = self.numbers_to_names[label] detection_dict["percentage_probability"] = score * 100 detection_dict["box_points"] = box.astype(int).tolist() model_detections.append(detection_dict) counting = 0 objects_dir = output_image_path + "-objects" for detection in model_detections: counting += 1 label = detection["name"] percentage_probability = detection["percentage_probability"] box_points = detection["box_points"] if (custom_objects is not None): if (custom_objects[label] != "valid"): continue detections.append(detection) if display_object_name == False: label = None if display_percentage_probability == False: percentage_probability = None image_copy = draw_boxes(image_copy, box_points, display_box, label, percentage_probability, self.__box_color) if (extract_detected_objects == True): splitted_copy = image_copy.copy()[box_points[1]:box_points[3], box_points[0]:box_points[2]] if (output_type == "file"): if (os.path.exists(objects_dir) == False): os.mkdir(objects_dir) splitted_image_path = os.path.join(objects_dir, detection["name"] + "-" + str( counting) + ".jpg") cv2.imwrite(splitted_image_path, splitted_copy) detected_objects_image_array.append(splitted_image_path) elif (output_type == "array"): detected_objects_image_array.append(splitted_copy) image_copy = padded_fragment(image_copy) if (output_type == "file"): cv2.imwrite(output_image_path, image_copy) if (extract_detected_objects == True): if (output_type == "file"): return detections, detected_objects_image_array elif (output_type == "array"): return image_copy, detections, detected_objects_image_array else: if (output_type == "file"): return detections elif (output_type == "array"): return image_copy, detections except: raise ValueError( "Ensure you specified correct input image, input type, output type and/or output image path ")
def analyze_fmaps(size=256): fmaps_path = os.path.join('exp', args.exp, 'fmaps') mkdir(fmaps_path, clean=True) images_path = os.path.join(fmaps_path, 'images') mkdir(images_path, clean=True) # feature maps for split in ['train', 'test']: inputs, targets = iter(loaders[split]).next() inputs, targets = to_var(inputs, volatile=True), to_var(targets, volatile=True) outputs, features = model.forward(inputs, returns='features') num_scales, num_channels = len(features), features[0].size(1) for s in trange(num_scales): input, feature = inputs[0][-1], features[s] for b in trange(args.batch, leave=False): image = resize_image(to_np(input[b]), size=size, channel_first=True) for c in trange(num_channels, leave=False): fmap = resize_image(to_np(feature[b, c]), size=size, channel_first=True) if np.min(fmap) < np.max(fmap): fmap = (fmap - np.min(fmap)) / (np.max(fmap) - np.min(fmap)) image_path = os.path.join( images_path, '{0}-{1}-{2}-{3}.gif'.format(split, s, c, b)) save_images([image, fmap], image_path, channel_first=True) # visualization with open(os.path.join(fmaps_path, 'index.html'), 'w') as fp: for s in range(num_scales): for c in range(num_channels): print('<h3>scale [{0}] - channel [{1}]</h3>'.format( s + 1, c + 1), file=fp) print('<table border="1" style="table-layout: fixed;">', file=fp) for split in ['train', 'test']: print('<tr>', file=fp) for b in range(args.batch): image_path = os.path.join( 'images', '{0}-{1}-{2}-{3}.gif'.format(split, s, c, b)) print( '<td halign="center" style="word-wrap: break-word;" valign="top">', file=fp) print('<img src="{0}" style="width:128px;">'.format( image_path), file=fp) print('</td>', file=fp) print('</tr>', file=fp) print('</table>', file=fp)
def process(input, output, size): im_fns = os.listdir(os.path.join(input, "image")) im_fns.sort() if not os.path.exists(os.path.join(output, "image")): os.makedirs(os.path.join(output, "image")) if not os.path.exists(os.path.join(output, "label")): os.makedirs(os.path.join(output, "label")) for im_fn in tqdm(im_fns): try: _, fn = os.path.split(im_fn) bfn, ext = os.path.splitext(fn) if ext.lower() not in ['.jpg', '.png']: continue gt_path = os.path.join(input, "label", 'gt_' + bfn + '.txt') img_path = os.path.join(input, "image", im_fn) img = cv2.imread(img_path) h, w, _ = img.shape re_im, _ = resize_image(img, size) re_size = re_im.shape polys = [] with open(gt_path, 'r') as f: lines = f.readlines() for line in lines: splitted_line = line.strip().lower().split(',') x1, y1, x2, y2, x3, y3, x4, y4 = map(float, splitted_line[:8]) poly = np.array([x1, y1, x2, y2, x3, y3, x4, y4]).reshape([4, 2]) poly[:, 0] = poly[:, 0] / w * re_size[1] poly[:, 1] = poly[:, 1] / h * re_size[0] poly = orderConvex(poly) polys.append(poly) # cv2.polylines(re_im, [poly.astype(np.int32).reshape((-1, 1, 2))], True,color=(0, 255, 0), thickness=2) res_polys = [] for poly in polys: # delete polys with width less than 10 pixel if np.linalg.norm(poly[0] - poly[1]) < 10 or np.linalg.norm( poly[3] - poly[0]) < 10: continue res = shrink_poly(poly) # for p in res: # cv2.polylines(re_im, [p.astype(np.int32).reshape( # (-1, 1, 2))], True, color=(0, 255, 0), thickness=1) res = res.reshape([-1, 4, 2]) for r in res: x_min = np.min(r[:, 0]) y_min = np.min(r[:, 1]) x_max = np.max(r[:, 0]) y_max = np.max(r[:, 1]) res_polys.append([x_min, y_min, x_max, y_max]) cv2.imwrite(os.path.join(output, "image", fn), re_im) with open(os.path.join(output, "label", bfn) + ".txt", "w") as f: for p in res_polys: line = ",".join(str(p[i]) for i in range(4)) f.writelines(line + "\r\n") # for p in res_polys: # cv2.rectangle(re_im, (p[0], p[1]), (p[2], p[3]), color=( # 0, 0, 255), thickness=1, lineType=cv2.LINE_AA) # cv2.imshow("demo", re_im) # cv2.waitKey(0) except: print("Error processing {}".format(im_fn))
def main(config): killer = GracefullKiller() # open config file try: file = open(config) cfg = json.load(file) file.close() except Exception as error: logger.critical(str(error), exc_info=1) return # give meaningful names to each sub config source_cfg = cfg["video_source"] broadcast_cfg = cfg["broadcaster"] pool_cfg = cfg["inferencing_pool"] worker_cfg = cfg["inferencing_worker"] flusher_cfg = cfg["flusher"] gps_cfg = cfg["gps"] gen_cfg = cfg["general"] # bind requests module to use a given network interface try: socket.inet_aton(gen_cfg["bind_ip"]) session.mount("http://", SourceAddressAdapter(gen_cfg["bind_ip"])) logger.info("binding requests module to {} IP".format(gen_cfg["bind_ip"])) except OSError as e: logger.error("bind IP is invalid, resorting to default interface", exc_info=True) # start polling the GPS if gps_cfg["use_gps"]: wport = gps_cfg["write_port"] rport = gps_cfg["read_port"] br = gps_cfg["baudrate"] gps = ReadGPSData(wport, rport, br) gps.start() else: gps = None # workers on a separate process to run inference on the data logger.info("initializing pool w/ " + str(pool_cfg["workers"]) + " workers") output = DistributeFramesAndInfer(pool_cfg, worker_cfg) frames_queue, bc_queue, predicts_queue = output.get_queues() logger.info("initialized worker pool") # a single worker in a separate process to reassemble the data reassembler = BroadcastReassembled(bc_queue, broadcast_cfg, name="BroadcastReassembled") reassembler.start() # a single thread to flush the producing queue # when there are too many frames in the pipe flusher = Flusher(frames_queue, threshold=flusher_cfg["frame_count_threshold"], name="Flusher") flusher.start() # data aggregator to write things to disk def results_writer(): if len(gen_cfg["saved_data"]) > 0: df = pd.DataFrame(columns=["Date", "License Plate", "Coordinates"]) while not killer.kill_now: time.sleep(0.01) try: data = predicts_queue.get_nowait() except queue.Empty: continue predicts = data["predicts"] date = data["date"] for lp in predicts: if len(lp) > 0: lp = " ".join(lp) entry = {"Date": date, "License Plate": lp, "Coordinates": ""} if gps: entry["Coordinates"] = "{}, {}".format( gps.latitude, gps.longitude ).upper() df = df.append(entry, ignore_index=True) logger.info("dumping results to csv file {}".format(gen_cfg["saved_data"])) if os.path.isfile(gen_cfg["saved_data"]): header = False else: header = True with open(gen_cfg["saved_data"], "a") as f: df.to_csv(f, header=header) # data aggregator thread results_thread = td.Thread(target=results_writer) results_thread.start() if source_cfg["type"] == "camera": # import module import picamera # start the pi camera with picamera.PiCamera() as camera: # configure the camera camera.sensor_mode = source_cfg["sensor_mode"] camera.resolution = source_cfg["resolution"] camera.framerate = source_cfg["framerate"] logger.info( "picamera initialized w/ mode={} resolution={} framerate={}".format( camera.sensor_mode, camera.resolution, camera.framerate ) ) # start recording both to disk and to the queue camera.start_recording( output=source_cfg["output_file"], format="h264", splitter_port=0, bitrate=10000000, ) camera.start_recording( output=output, format="mjpeg", splitter_port=1, bitrate=10000000, quality=95, ) logger.info("started recording to file and to queue") # wait until SIGINT is detected while not killer.kill_now: camera.wait_recording(timeout=0.5, splitter_port=0) camera.wait_recording(timeout=0.5, splitter_port=1) logger.info( "frames qsize: {}, broadcast qsize: {}, predicts qsize: {}".format( frames_queue.qsize(), bc_queue.qsize(), predicts_queue.qsize() ) ) # stop recording logger.info("gracefully exiting") camera.stop_recording(splitter_port=0) camera.stop_recording(splitter_port=1) output.stop() elif source_cfg["type"] == "file": # open video file video_reader = cv2.VideoCapture(source_cfg["input"]) video_reader.set(cv2.CAP_PROP_POS_FRAMES, source_cfg["frames_to_skip"]) # get # of frames and determine target width nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) target_h = int(frame_h * source_cfg["scale_video"]) target_w = int(frame_w * source_cfg["scale_video"]) period = 1.0 / source_cfg["framerate"] logger.info( "file-based video stream initialized w/ resolution={} framerate={} and {} skipped frames".format( (target_w, target_h), source_cfg["framerate"], source_cfg["frames_to_skip"], ) ) # serve each frame to the workers iteratively last_log = time.time() for i in range(nb_frames): start = time.time() try: # write frame to queue _, frame = video_reader.read() if target_w != frame_w: frame = resize_image(frame, target_w) jpeg = image_to_jpeg_bytes(frame) output.write(jpeg) except Exception as error: logger.error("unexpected error occurred", exc_info=True) break end = time.time() spent = end - start left = period - spent if left > 0: # maintain framerate time.sleep(period) # check if SIGINT has been sent if killer.kill_now: break # do logs every second current = time.time() if current - last_log >= 1.0: logger.info( "frames qsize: {}, broadcast qsize: {}, predicts qsize: {}".format( frames_queue.qsize(), bc_queue.qsize(), predicts_queue.qsize() ) ) last_log = current logger.info("gracefully exiting") video_reader.release() output.stop() if gps_cfg["use_gps"]: gps.stop() reassembler.stop() flusher.stop()
def cloud_infer(self): """ Main method that runs in the loop. """ try: data = self.in_queue.get_nowait() except queue.Empty: # logger.warning("no data available for worker") return ############################# # extract frame frame_num = data["frame_num"] img = data["jpeg"] # preprocess/compress the image image = image_from_bytes(img) reduced = compress_image(image) byte_im = image_to_jpeg_bytes(reduced) # encode image img_enc = base64.b64encode(byte_im).decode("utf-8") img_dump = json.dumps({"img": img_enc}) # make inference request resp = self.yolov3_api_request(img_dump) if not resp: return ############################# # parse response r_dict = resp.json() boxes_raw = r_dict["boxes"] boxes = [] for b in boxes_raw: box = BoundBox(*b) boxes.append(box) # purge bounding boxes with a low confidence score aux = [] for b in boxes: label = -1 for i in range(len(b.classes)): if b.classes[i] > self.yolov3_obj_thresh: label = i if label >= 0: aux.append(b) boxes = aux del aux # also scale the boxes for later uses camera_source_width = image.shape[1] boxes640 = self.scale_bbox(boxes, self.yolov3_input_size_px, self.bounding_boxes_upscale_px) boxes_source = self.scale_bbox(boxes, self.yolov3_input_size_px, camera_source_width) ############################# # recognize the license plates in case # any bounding boxes have been detected dec_words = [] if len(boxes) > 0 and len(self.api_endpoint_crnn) > 0: # create set of images of the detected license plates lps = [] try: for b in boxes_source: lp = image[b.ymin:b.ymax, b.xmin:b.xmax] jpeg = image_to_jpeg_nparray( lp, [int(cv2.IMWRITE_JPEG_QUALITY), self.crnn_quality]) lps.append(jpeg) except: logger.warning("encountered error while converting to jpeg") pass lps = pickle.dumps(lps, protocol=0) lps_enc = base64.b64encode(lps).decode("utf-8") lps_dump = json.dumps({"imgs": lps_enc}) # make request to rcnn API dec_lps = self.rcnn_api_request(lps_dump) dec_lps = self.reorder_recognized_words(dec_lps) for dec_lp in dec_lps: dec_words.append([word[0] for word in dec_lp]) if len(dec_words) > 0: logger.info("Detected the following words: {}".format(dec_words)) else: dec_words = [[] for i in range(len(boxes))] ############################# # draw detections upscaled = resize_image(image, self.bounding_boxes_upscale_px) draw_image = draw_boxes( upscaled, boxes640, overlay_text=dec_words, labels=["LP"], obj_thresh=self.yolov3_obj_thresh, ) draw_byte_im = image_to_jpeg_bytes( draw_image, [int(cv2.IMWRITE_JPEG_QUALITY), self.broadcast_quality]) ############################# # push data for further processing in the queue output = { "boxes": boxes, "frame_num": frame_num, "avg_yolo3_rtt": self.rtt_yolo3_ms, "avg_crnn_rtt": self.rtt_crnn_ms, "image": draw_byte_im, } self.bc_queue.put(output) # push predictions to write to disk if len(dec_words) > 0: timestamp = time.time() literal_time = time.ctime(timestamp) predicts = {"predicts": dec_words, "date": literal_time} self.predicts_queue.put(predicts) logger.info( "Frame Count: {} - Avg YOLO3 RTT: {}ms - Avg CRNN RTT: {}ms - Detected: {}" .format(frame_num, int(self.rtt_yolo3_ms), int(self.rtt_crnn_ms), len(boxes)))