def check_on_train_clip(video_id, weights, suffix, is_test=False): if 'resnet' in weights: model = build_resnet(input_shape) else: model = build_model(input_shape) pool = ThreadPool(processes=8) model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss, optimizer=Adam(lr=1e-3)) model.load_weights(weights) model.summary() priors = priors_from_model(model) bbox_util = BBoxUtility(NUM_CLASSES, priors) dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test) outdir = '../output/predictions_ssd/' + video_id + suffix os.makedirs(outdir, exist_ok=True) batch_size = 4 frame_id = 0 for x_batch, frames in dataset.generate_x_for_train_video_id(video_id=video_id, batch_size=batch_size, pool=pool): predictions = model.predict(x_batch) results = bbox_util.detection_out(predictions) for batch_id in range(predictions.shape[0]): print(results[batch_id]) display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id]) * 255, results=results, res_idx=batch_id) plt.savefig('{}/{:04}.jpg'.format(outdir, frame_id+1)) plt.clf() frame_id += 1 print(frame_id)
def generate_predictions_on_train_clips(weights, suffix, from_idx, count, use_requested_frames=False, is_test=False): if 'resnet' in weights: model = build_resnet(input_shape) else: model = build_model(input_shape) model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss, optimizer=Adam(lr=1e-3)) model.load_weights(weights) model.summary() priors = priors_from_model(model) bbox_util = BBoxUtility(NUM_CLASSES, priors) dataset = SSDDataset(bbox_util=bbox_util, is_test=is_test) items = list(sorted(dataset.video_clips.keys())) pool = ThreadPool(processes=4) executor = concurrent.futures.ThreadPoolExecutor(max_workers=2) for video_id in items[from_idx: from_idx+count]: print(video_id) if is_test: outdir = '../output/predictions_ssd_roi2_test/{}/{}'.format(suffix, video_id) else: outdir = '../output/predictions_ssd_roi2/{}/{}'.format(suffix, video_id) os.makedirs(outdir, exist_ok=True) batch_size = 4 if use_requested_frames: requested_frames = pickle.load(open('../output/used_frames.pkl', 'rb')) frames = requested_frames[video_id] else: frames = list(range(len(dataset.video_clips[video_id]))) new_frames = [] for frame in frames: if not os.path.exists('{}/{:04}.npy'.format(outdir, frame+1)): new_frames.append(frame) if len(new_frames) == 0: continue for x_batch, used_frames in utils.parallel_generator(dataset.generate_x_for_train_video_id(video_id=video_id, batch_size=batch_size, frames=new_frames, pool=pool), executor=executor): predictions = model.predict(x_batch) results = bbox_util.detection_out(predictions) for batch_id in range(predictions.shape[0]): np.save('{}/{:04}.npy'.format(outdir, used_frames[batch_id]+1), results[batch_id]) print(used_frames[batch_id])
def check(weights): model = build_model(input_shape) model.compile(loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=2.0, pos_cost_multiplier=1.0).compute_loss, optimizer=Adam(lr=1e-3)) model.load_weights(weights) model.summary() priors = priors_from_model(model) bbox_util = BBoxUtility(NUM_CLASSES, priors) dataset = SSDDataset(bbox_util=bbox_util) for x_batch, y_batch in dataset.generate_ssd(batch_size=4, is_training=False, verbose=True, always_shuffle=True): predictions = model.predict(x_batch) results = bbox_util.detection_out(predictions) for batch_id in range(4): display_img_with_rects(img=utils.preprocessed_input_to_img_resnet(x_batch[batch_id])*255, results=results, res_idx=batch_id) plt.show()
def _main(): parser = argparse.ArgumentParser(description='instagram post describer') parser.add_argument('--login.user', dest='username', help='instagram username') parser.add_argument('--login.password', dest='password', help='instagram password') # parser.add_argument('--post_id', dest='post_id', # help='post id (click post then see after the url "https://www.instagram.com/p/")') args = parser.parse_args() username, password = args.username, args.password image_paths = _profile_images(username) images, inputs = _load_images(image_paths) voc_classes = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] NUM_CLASSES = len(voc_classes) + 1 bbox_util = BBoxUtility(NUM_CLASSES) model = _load_model(NUM_CLASSES) model.summary() preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) for i, img in enumerate(images): # Parse the outputs. det_label = results[i][:, 0] det_conf = results[i][:, 1] det_xmin = results[i][:, 2] det_ymin = results[i][:, 3] det_xmax = results[i][:, 4] det_ymax = results[i][:, 5] # Get detections with confidence higher than 0.6. top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.6] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() for j in range(top_conf.shape[0]): xmin = int(round(top_xmin[j] * img.shape[1])) ymin = int(round(top_ymin[j] * img.shape[0])) xmax = int(round(top_xmax[j] * img.shape[1])) ymax = int(round(top_ymax[j] * img.shape[0])) score = top_conf[j] label = int(top_label_indices[j]) label_name = voc_classes[label - 1] display_txt = '{:0.2f}, {}'.format(score, label_name) coords = (xmin, ymin), xmax - xmin + 1, ymax - ymin + 1 color = colors[label] color = (int(255 * color[0]), int(255 * color[1]), int(255 * color[2])) # draw rectangle image = PIL.Image.fromarray(np.uint8(img)) draw = ImageDraw.Draw(image) draw.rectangle(((xmin, ymin), (xmax, ymax)), outline=color) draw.text((xmin, ymin), display_txt) image.save('{}.png'.format(i))
class DetectCars(object): """ Class for testing a trained SSD model and return boxes containing cars Arguments: class_names: A list of strings, each containing the name of a class. The first name should be that of the background class which is not used. model: An SSD model. It should already be trained . model_input_shape: The shape that the model expects for its input, as a tuple, for example (300, 300, 3) image_shape: The shape of input image, as a tuple, for example (720, 1280, 3) """ def __init__(self, class_names, model, model_input_shape, image_shape): self.class_names = class_names self.num_classes = len(class_names) self.model = model self.bbox_util = BBoxUtility(self.num_classes) self.model_input_shape = model_input_shape self.image_width = image_shape[0] self.image_height = image_shape[1] self.hit_window_list = [] def search_cars(self, rgb_image, conf_thresh=0.6): """ search cars on rgb image and return boxes contain cars and confidence larger than conf_thresh. # Arguments rgb_image: Image on which, ssd search cars. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ # Resize input image to model image model_im_size = (self.model_input_shape[0], self.model_input_shape[1]) model_image = cv2.resize(rgb_image, model_im_size) # Use model to predict inputs = [image.img_to_array(model_image)] tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) y = self.model.predict(x) # Get boxes result results = self.bbox_util.detection_out(y) boxes_list = [] if len(results) > 0 and len(results[0]) > 0: # Interpret output, only one frame is used det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= conf_thresh ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] for i in range(top_conf.shape[0]): class_num = int(top_label_indices[i]) if self.class_names[class_num] == 'car': xmin = int(round(top_xmin[i] * self.image_width)) ymin = int(round(top_ymin[i] * self.image_height)) xmax = int(round(top_xmax[i] * self.image_width)) ymax = int(round(top_ymax[i] * self.image_height)) boxes_list.append([xmin, ymin, xmax, ymax]) return boxes_list ############################### Augmented Display ############################### def add_heat(self, orignal_image, hit_windows): ''' Loop over hit_windows to create a heatmap ''' heatmap = np.zeros_like(orignal_image[:, :, 0]).astype(np.float) # Iterate through list of bboxes for xmin, ymin, xmax, ymax in hit_windows: # Add += 1 for all pixels inside each bbox # Assuming each "box" takes the form ((x1, y1), (x2, y2)) heatmap[int(ymin):int(ymax), int(xmin):int(xmax)] += 1 # Return updated heatmap return heatmap def filter_heatmap(self, heatmap, threshold): ''' apply threshold to heatmap to get rid of the interference ''' filtered_heatmap = np.zeros_like(heatmap).astype(np.float) # Zero out pixels below the threshold filtered_heatmap[heatmap > threshold] = 255 filtered_heatmap[heatmap <= threshold] = 0 # Return thresholded map return filtered_heatmap def label_bboxes(self, filtered_heatmap): ''' draw a box to indicate there is a car ''' labels = label(filtered_heatmap) # Iterate through all detected cars bbox_list = [] for car_number in range(1, labels[1] + 1): # Find pixels with each car_number label value nonzero = (labels[0] == car_number).nonzero() # Identify x and y values of those pixels nonzeroy = np.array(nonzero[0]) nonzerox = np.array(nonzero[1]) # Define a bounding box based on min/max x and y bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy))) bbox_list.append(bbox) # Return the box return bbox_list def augmented_display(self, orignal_image, hit_windows): ''' augmented display pipeline ''' # Add heat to each box in box list heatmap = self.add_heat(orignal_image, hit_windows) # Apply threshold to help remove false positives filtered_heatmap = self.filter_heatmap(heatmap, threshold=1) # Find final boxes from heatmap using label function # Indicate the cars on image bbox_list = self.label_bboxes(filtered_heatmap) #print(bbox_list) draw_img = np.copy(orignal_image) for bbox in bbox_list: cv2.rectangle(draw_img, bbox[0], bbox[1], (0, 255, 0), 3) return draw_img, heatmap, filtered_heatmap def image_prcess(self, rgb_image): ''' process a single images ''' hit_windows = self.search_cars(rgb_image) for xmin, ymin, xmax, ymax in hit_windows: if len(self.hit_window_list) < 10: self.hit_window_list.append([xmin, ymin, xmax, ymax]) else: temp_list = self.hit_window_list[1:] temp_list.append([xmin, ymin, xmax, ymax]) self.hit_window_list = temp_list if len(self.hit_window_list) > 0: rgb_image, heatmap, filtered_heatmap = self.augmented_display( rgb_image, self.hit_window_list) #cv2.rectangle(rgb_image,(xmin, ymin),(xmax, ymax),(0, 0, 255), 4) return rgb_image
inputs = [] images = [] image_names = os.listdir(base_path) for image_name in image_names: image = cv2.imread(base_path + image_name) images.append(image[:, :, ::-1]) image = image.astype(np.float32) image = cv2.resize(image, (img_height, img_width)) image[:, :, 0] -= 104.0 image[:, :, 1] -= 117.0 image[:, :, 2] -= 124.0 inputs.append(image) inputs = np.asarray(inputs) predictions = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(predictions) for i, img in enumerate(images): det_label = results[i][:, 0] det_conf = results[i][:, 1] det_xmin = results[i][:, 2] det_ymin = results[i][:, 3] det_xmax = results[i][:, 4] det_ymax = results[i][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= confidence] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices]