def get_classification(self, image, img_width=64, img_height=64): """Determines the color of the traffic light in the image Args: image (cv::Mat): image containing the traffic light Returns: int: ID of traffic light color (specified in styx_msgs/TrafficLight) """ lights = self.cascade.detectMultiScale(image, 1.4) lights = non_max_suppression_fast(lights, 0.2) state = TrafficLight.UNKNOWN for (x, y, w, h) in lights: border = int(round(h * 0.1)) slice = image[(y + border):(y + h - border), int(round(x + w / 2)), :] #are pixels in neighbourhood similar if np.std(slice) > 32: tl_img = image[y:(y + h), x:(x + w)] tl_img = cv2.resize(tl_img, (img_width, img_height)) tl_img_data = np.expand_dims(tl_img, axis=0) with tf.get_default_graph().as_default(): state = self.classifier.predict_classes(tl_img_data, verbose=False) if int(state) == TrafficLight.RED: break else: continue return int(state)
def generate_object_detections(predictions, perc_thresh=0.6): all_boxes = [] for index in range(predictions.shape[0]): print("generating prediction for ", index) # go through the different pixel by pixel mosquito predictions generated by the net prediction = predictions[index, :, :, :] boxes = np.zeros([1, 4]) width, height = prediction.shape[0], prediction.shape[1] window_size = 1 max_mosquito_size = 10 # increase window size; stop while window_size < min(width, height, max_mosquito_size): for i in range(0, width - window_size): for j in range(0, height - window_size): box = prediction[i: i + window_size, j: j + window_size] # print(box.shape, window_size ** 2, np.sum(box)) if (np.sum(box) / (window_size ** 2)) >= perc_thresh: boxes = np.vstack((boxes, [i, j, i + window_size, j + window_size])) window_size += 1 boxes_after_suppression = utils.non_max_suppression_fast(boxes[1:, :], 0.3) all_boxes.append(boxes_after_suppression) return all_boxes
def NMS(rects): """ Performs non maximum suppression. Input is a list of rects in the format [{'bbox': [x1,y1,x2,y2], 'conf': 1}, ...] """ idx = utils.non_max_suppression_fast(np.array([r['bbox'] for r in rects]), 0.5) return [r for i, r in enumerate(rects) if i in idx]
def analize_frame(frame, hog, overlapThresh, winStride, padding, scale, hitThreshold, finalThreshold, useMeanshiftGrouping, tkline_size, resize, frame_debug_dir, detection_box_dir, ii, detect_info_mx): frame_gray = frame.copy() frame_copy = frame.copy() # detect people in the image (rects, weights) = hog.detectMultiScale(frame_gray, winStride=winStride, padding=padding, scale=scale, hitThreshold=hitThreshold, finalThreshold=finalThreshold, useMeanshiftGrouping=useMeanshiftGrouping) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression_fast(rects, overlapThresh) #pick = rects frame_file_path = frame_debug_dir + '/frame_' + str(ii) + '.jpg' # draw the final bounding boxes and crop jj = 0 for (xA, yA, xB, yB) in pick: frame_num = ii crop_file_path = detection_box_dir + '/frame_' + str( ii) + '_crop_' + str(jj) + '.jpg' crop_num = jj id_tag = '' info = '' crop = frame[(yA + tkline_size):(yB - tkline_size), (xA + tkline_size):(xB - tkline_size)] resized_crop = cv2.resize(crop, resize) cv2.imwrite(crop_file_path, resized_crop) cv2.rectangle(frame_copy, (xA, yA), (xB, yB), (0, 255, 0), tkline_size) detect_row = [ frame_num, frame_file_path, crop_file_path, crop_num, xA, yA, xB, yB, id_tag, info ] detect_info_mx.append(detect_row) jj = jj + 1 # show some information on the number of bounding boxes logging.debug("[INFO] frame {}: {} boxes, {} supressed".format( str(ii), len(rects), len(pick))) cv2.imwrite(frame_file_path, frame_copy)
def predict_boxes(self, model_name="trained-model"): """ Given a directory containing the dataset and images_path show objects detected for a random image. Args - dirname: Name of directory containing meta data of training run. Returns - Nothing. Shows the pic with detections. """ images_path = self.cfg.g("images_path") train_imgs = pickle.load(open(self.dirname + "/train.pkl", "rb")) image_info = random.choice(list(train_imgs)) image_name = image_info['img_name'] p_conf, p_loc, p_probs = self.run_inference(image_name, model_name) non_zero_indices = np.where(p_conf > 0)[1] # DEBUGGING print("p_conf={} p_loc={} p_probs={}".format(p_conf.shape, p_loc.shape, p_probs.shape)) print("Non zero indices", non_zero_indices) for i in non_zero_indices: print(i, ") location", p_loc[0][i * 4:i * 4 + 4], "probs", p_probs[0][i], "conf", p_conf[0][i]) boxes, confs = self.convert_coordinates_to_boxes( p_loc, p_conf, p_probs) print("Boxes BEFORE NMS") for i, a in enumerate(zip(boxes, confs)): print(i, a) boxes = non_max_suppression_fast(boxes, 0.3) print("Boxes AFTER NMS") print(boxes) img = mpimg.imread(images_path + "/" + image_name) self.debug_draw_boxes(img, boxes, (0, 255, 0), 2) plt.figure(figsize=(8, 8)) plt.imshow(img) plt.show()
def main(): """La mayoria del codigo se saco de https://www.pyimagesearch.com/2015/03/23/sliding-windows-for-object-detection-with-python-and-opencv/""" # Tamaño de ventana deslizante #(win_w, win_h) = (200, 400) # Cargo y preparo la imagen # image = utils.load_image_from_path('/home/genaro/PycharmProjects/proyecto2018/pedestrian/SVM/imgs/street4.jpg') image = utils.load_image_from_path(settings.img_path) #print(image.shape) cv2.startWindowThread() cv2.namedWindow("Window final") #cv2.imshow("Window final", image) #cv2.waitKey(0) image = utils.to_grayscale(image) image = utils.normalize_image_max(image) count = 0 total = 0 # final_bounding_boxes = np.array([], dtype='int16').reshape(0, 4) final_bounding_boxes = [] # Obtengo mi SVM svm = utils.load_checkpoint(settings.checkpoint_path) print("Time started ...") start = time.time() # Loop sobre las diferentes escalas for i, resized_image in enumerate( utils.get_pyramid(image, scale=settings.EPSILON)): coefficient = settings.EPSILON**i #clone = resized_image.copy() # Hago una copia de la imagen original para graficar detecciones # clone_suppression = clone.copy() # Hago una copia para graficar el resultado de NMS # Lista de bounding boxes para el Non Maximal Suppression # bounding_boxes = [] # Llevo control para saber si funciona el NMS # count_bounding_boxes = 0 # Loop sobre la ventana deslizante en diferentes posiciones for (x, y, window) in utils.get_sliding_window(resized_image, stepSize=(32, 64), windowSize=(settings.win_w, settings.win_h)): # Si la ventana no coincide con nuestro tamaño de ventana, se ignora if window.shape[0] != settings.win_h or window.shape[ 1] != settings.win_w: continue # Manejo la ventana deslizante actual # cropped_image = utils.crop_image(image, x, y, win_w, win_h) # Recorto la ventana # cropped_image = window cropped_image = utils.resize(window, [96, 48]) # Escalo startParcial = time.time() cropped_image_hog = utils.get_hog_from_image( cropped_image, normalize=False) # Obtengo el HOG end = time.time() parcial = end - startParcial print(parcial) count = count + 1 total = total + parcial #sys.exit() # Comienzo a predecir prediction = svm.predict([cropped_image_hog])[0] if prediction == 1: # Si es un peaton guardo el bounding box #print(coefficient) # bounding_box = ( # x * coefficient, # y * coefficient, # (x + win_w) * coefficient, # (y + win_h) * coefficient # ) bounding_box = (x, y, (x + settings.win_w), (y + settings.win_h)) final_bounding_boxes.append(bounding_box) print(bounding_box) # bounding_boxes.append(bounding_box) final_bounding_boxes.append(bounding_box) # count_bounding_boxes += 1 # Si es un peaton grafico la ventana deslizante # if VISUALIZE_SLIDDING_WINDOW: # cv2.rectangle(clone, (x, y), (x + win_w, y + win_h), (0, 255, 0), 2) # Rectangulo verde # Si se quiere ver la ventana deslizante la grafico y la muestro # if VISUALIZE_SLIDDING_WINDOW: # cv2.rectangle(clone, (x, y), (x + win_w, y + win_h), (0, 0, 0), 2) # Rectangulo negro # cv2.imshow("Slidding Window", clone) # cv2.waitKey(1) # time.sleep(0.025) # non_result = utils.non_max_suppression_fast(bounding_boxes, 0.3) # # print(type(non_result)) # if non_result.any(): # print(non_result) # print(non_result.shape) # final_bounding_boxes = np.vstack((final_bounding_boxes, bounding_boxes)) # final_bounding_boxes += bounding_boxes # print("Cantidad de bounding boxes antes de NMS --> {}".format(len(bounding_boxes))) # Loop sobre las ventanas deslizantes suprimidas # for (startX, startY, endX, endY) in bounding_boxes_suppressed: # cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2) # Rectangulo verde # Si hay bounding boxes productos del NMS graficados, muestro la imagen # if len(bounding_boxes_suppressed): # cv2.imshow("Window final", image) # cv2.waitKey(1) # time.sleep(5.025) #break # print(final_bounding_boxes) # print(final_bounding_boxes.shape) final_bounding_boxes = utils.non_max_suppression_fast( final_bounding_boxes, 0.25) for (startX, startY, endX, endY) in final_bounding_boxes: try: cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2) # Rectangulo verde # cv2.rectangle(image, (startY, startX), (endY, endX), (0, 255, 0), 2) # Rectangulo verde except: print(sys.exc_info()[0]) print(startX) print(startY) print(endX) print(endY) end = time.time() final = end - start print("Cantidad de bounding boxes despues de NMS --> {}".format( len(final_bounding_boxes))) cv2.imshow("Window final", image) cv2.waitKey(0) cv2.destroyWindow("Window final") print(count) print(total) print("Promedio: ") print(total / count) print("Time Finished ...") print(final)
config.rpn_stride * x, config.rpn_stride * y, config.rpn_stride * (x + w), config.rpn_stride * (y + h) ]) probs[cls_name].append(np.max(P_cls[0, ii, :])) bar.finish() all_dets = [] #开始画图 print("Begin to draw the result!") for key in bboxes: #对每个类别 bbox = np.array(bboxes[key]) #做一下非最大值抑制,对多个重叠的框保留最好的一个https://www.cnblogs.com/makefile/p/nms.html。感觉这里还可以优化并加速 new_boxes, new_probs = non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5) for jk in range(new_boxes.shape[0]): (x1, y1, x2, y2) = new_boxes[jk, :] #从特征图映射回原真实坐标,用rpn和roi的转换也估计是映射使用以及论文中偏移计算使用 (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2) print(real_x1, real_y1, real_x2, real_y2) #画框框 draw.rectangle(real_x1, real_y1, real_x2, real_y2, outline=(int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])))