def detect_pedestrians(self, img_path): """ Detects pedestrians in an image. 1) Slides bounding box window over the image 2) Computes detection score using weights from boosted tree classifier 3) Keeps the bounding box if the score is above a certain threshold 4) Runs non-maximal suppression (NMS) on bounding boxes Input: img_path - img_path: path to image file Output: list of bounding boxes and scores """ candidate_bbs = self._get_bounding_boxes(img_path) bbs = None if len(candidate_bbs) > 1: bbs = nms.non_max_suppression(np.asarray(candidate_bbs), overlapThresh=self.nms) elif len(candidate_bbs) == 0: bbs = candidate_bbs return candidate_bbs, bbs
def detect_pedestrians(self, img_path): """ Detects pedestrians in an image. 1) Slides bounding box window over the image 2) Computes detection score using weights from boosted tree classifier 3) Keeps the bounding box if the score is above a certain threshold 4) Runs non-maximal suppression (NMS) on bounding boxes Input: img_path - img_path: path to image file Output: list of bounding boxes and scores """ candidate_bbs = self._get_bounding_boxes(img_path) bbs = None if len(candidate_bbs) > 1: bbs = nms.non_max_suppression(np.asarray(candidate_bbs), overlapThresh=self.nms) elif len(candidate_bbs) == 0: bbs = candidate_bbs return (candidate_bbs, bbs)
def detect_lines(img): print('detecting triangles') gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) edges = cv2.Canny(gray, 10, 50) # ret, thresh = cv2.threshold(gray, 50, 255, 0) # contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # contours_formatted = [] # for cont in contours: # cont = np.squeeze(cont, axis=1) # max_x = max(cont[:, 0]) # max_y = max(cont[:, 1]) # min_x = max(cont[:, 0]) # min_y = max(cont[:, 1]) # contours_formatted.append([[min_x, min_y], [max_x, max_y]]) # return contours_formatted lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 30, maxLineGap=250) lines_formatted = [] try: lines = np.squeeze(lines, axis=1) lines_nms = non_max_suppression(lines, 0.9) for line in lines_nms: lines_formatted.append([[line[0], line[1]], [line[2], line[3]]]) except: print('no lines detected') return lines_formatted
def nms_parsed(self, parsed): box = np.vstack(parsed['box']) cls = np.hstack(parsed['cls']) pts = np.vstack(parsed['pts']) boxes = np.hstack([box, np.array(cls, ndmin=2).T]) boxes, idx = nms.non_max_suppression(boxes, self.nms_thres) f_idx = boxes[:, 4] > self.nms_score return cls[idx[f_idx]], box[idx[f_idx]], pts[idx[f_idx]]
def display(img_in, img_out, y_pred, obj_threshold=0.3): y_pred[..., 4] = _sigmoid(y_pred[..., 4]) y_pred[..., 5:] = y_pred[..., 4][..., np.newaxis] * _softmax(y_pred[..., 5:]) y_pred[..., 5:] *= y_pred[..., 5:] > obj_threshold img = cv2.imread(img_in) cell_size = target_size[0] / S boxes = [] for row, cell_row in enumerate(y_pred): for col, cell in enumerate(cell_row): # find bbox with object for b, obj_abox in enumerate(cell): if obj_abox[4] < obj_threshold: continue # TODO: # x = (col + obj_abox[C + 1]) * cell_size # y = (row + obj_abox[C + 2]) * cell_size # w = obj_abox[C + 3] * target_size[0] # h = obj_abox[C + 4] * target_size[0] x = (col + _sigmoid(obj_abox[0])) * cell_size y = (row + _sigmoid(obj_abox[1])) * cell_size w = anchor_boxes[b][0] * np.exp( obj_abox[2]) * cell_size # unit: image width h = anchor_boxes[b][1] * np.exp( obj_abox[3]) * cell_size # unit: image height #print 'x: {0} y: {1} w: {2} h: {3} row: {4} col: {5}'.format(x, y, w, h, row, col) x1 = x - w / 2 y1 = y - h / 2 x2 = x + w / 2 y2 = y + h / 2 # rescale x1 *= scale_x x2 *= scale_x y1 *= scale_y y2 *= scale_y x1 = int(x1) x2 = int(x2) y1 = int(y1) y2 = int(y2) #print '({0}, {1}) ({2}, {3})'.format(x1, y1, x2, y2) boxes.append([ x1, y1, x2, y2, np.max(obj_abox[5:]), int(np.argmax(obj_abox[5:])) ]) boxes = non_max_suppression(np.array(boxes), 0.3) for box in boxes: cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 4) font = cv2.FONT_HERSHEY_SIMPLEX # class cls = box[5] cv2.putText(img, CLASSES[cls], (box[0], box[1]), font, 0.5, (0, 0, 255), 1) cv2.imwrite(img_out, img)
def run_detector(model, image, image_scale, rect_size, threshold=0.05, nms=True, cuda=None): """ :param model: Detector model to run :param image: Input image :param image_scale: (input_image_size / original_image_size), detections will be scaled by this factor :param rect_size: Detection rect size :param threshold: Detection score threshold :param nms: If true, apply non-max-suppression :return: """ softmax_2d = nn.Softmax2d() image = image.transpose(2, 0, 1).astype(np.float32) / 255. image = torch.Tensor(image) if cuda: image = image.cuda() out = model(Variable(image, volatile=True).unsqueeze(0)) out = softmax_2d(out) scores = out.data.cpu().numpy()[0, 1, ...] ys, xs = np.where(scores > threshold) rects = [] for y, x in zip(ys, xs): score = scores[y, x] # multipy x,y by 2 because output is half the input x, y = 2*x, 2*y rects.append([x, y, x + rect_size, y + rect_size, score]) rects = np.array(rects).reshape(-1, 5) # scale rects rects[:, :4] /= image_scale if nms: rects = non_max_suppression(rects, overlap_thresh=0.4) return rects
def detect_faster(img, feature_type, downscale=1.5, visualize=False, apply_nms=True, jobs=2): """use sliding window and pyramid to detect object with batch sliding window and batch classification""" detections = [] # detected candidates min_window_size = (int(config.img_width * 1.5), int(config.img_height * 1.5)) min_ws = (config.img_width, config.img_height) classifier = joblib.load(config.model_path) scale_level = 0 for scaled_img in helper.pyramid(img, downscale, min_window_size): # detections at current scale used for visualization curr_scale_dets = [] x_vec, y_vec, windows = helper.sliding_window_faster(scaled_img, min_ws, config.step_size) pool = Pool(processes=config.jobs) partial_compute_feature = partial(compute_feature, feature_type=feature_type) features = pool.map(partial_compute_feature, windows) features = np.array(features) pool.close() pool.join() preds = classifier.predict(features) confidence = classifier.decision_function(features) idxs = np.where(preds == 1)[0] print ('Detected {} candidates with scale level {}'.format(len(idxs), scale_level)) expand_rate = downscale ** scale_level for i in idxs: attr_vec = np.array([x_vec[i], y_vec[i], min_ws[0] + x_vec[i], min_ws[1] + y_vec[i]]) curr_scale_dets.append((attr_vec, confidence[i])) attr_vec = np.around(attr_vec * expand_rate).astype('int') detections.append((attr_vec, confidence[i])) if visualize: im_copy = scaled_img.copy() for det, _ in curr_scale_dets: cv2.rectangle(im_copy, (det[0], det[1]), (det[2], det[3]), color=(0, 0, 0), thickness=2) cv2.imshow('sliding window', im_copy) cv2.waitKey(20) scale_level += 1 if not apply_nms: # withour non-maximum suppression, return with confidence # can be used for hard-negative mining and graphcut segmentation return detections # apply non-maximum suppression dets = np.array([i[0] for i in detections]) detections = non_max_suppression(dets, only_one=True) if visualize: im_copy = img.copy() helper.draw_detections(im_copy, detections) return detections
def detect(img, feature_type, downscale=1.5, visualize=False, apply_nms=True): """use sliding window and pyramid to detect object""" detections = [] # detected candidates min_window_size = (int(config.img_width * 1.5), int(config.img_height * 1.5)) min_ws = (config.img_width, config.img_height) classifier = joblib.load(config.model_path) scale_level = 0 for scaled_img in helper.pyramid(img, downscale, min_window_size): # detections at current scale used for visualization curr_scale_dets = [] for (x, y, im_window) in helper.sliding_window(scaled_img, min_ws, config.step_size): # filter out not standard spliting window if im_window.shape[0] != config.img_height or \ im_window.shape[1] != config.img_width: continue # compute feature feature = compute_feature(im_window, feature_type) # prediction pred = classifier.predict([feature])[0] confidence = classifier.decision_function([feature])[0] if pred == 1: print ('Detection at location ({}, {})'.format(x, y)) print ('scale level: {}, confidence: {}'.format(scale_level, confidence)) # TODO: potential bug for coordinate restore # attr_vec: [x1, y1, x2, y2] attr_vec = np.array([x, y, min_ws[0] + x, min_ws[1] + y]) curr_scale_dets.append((attr_vec, confidence)) expand_rate = downscale ** scale_level attr_vec = np.around(attr_vec * expand_rate).astype('int') # detection: ([x1, y1, x2, y2], confidence) detections.append((attr_vec, confidence)) # visualize: draw current sliding withdow # and detections at this scale # TODO: show confidence on the bounding box if visualize: im_copy = scaled_img.copy() for det, _ in curr_scale_dets: cv2.rectangle(im_copy, (det[0], det[1]), (det[2], det[3]), color=(0, 0, 0), thickness=2) cv2.rectangle(im_copy, (x, y), (x + im_window.shape[1], y + im_window.shape[0]), color=(255, 255, 255), thickness=2) cv2.imshow('sliding window', im_copy) cv2.waitKey(20) scale_level += 1 if not apply_nms: # withour non-maximum suppression, return with confidence # can be used for hard-negative mining and graphcut segmentation return detections # apply non-maximum suppression dets = np.array([i[0] for i in detections]) detections = non_max_suppression(dets, only_one=True) if visualize: im_copy = img.copy() helper.draw_detections(im_copy, detections) return detections
def canny_edge(img, sigma=0.4, threshold=0): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) mag, *_, ori = get_derivatives(gray, sigma=sigma) edge = non_max_suppression(mag, ori, threshold=threshold) linked_edge = edge_link(edge, mag, ori) return linked_edge, edge
file = file.lower() if file not in rects: continue print "Now is get hard example from " + file rect = rects[file] video_path = path + file file = file[0:2] + file[3:] capture = cv2.VideoCapture(video_path) frame_cout = capture.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) pos = 15 while capture.isOpened(): capture.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, pos) ret, frame = capture.read() # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) objs, weights = hog.detectMultiScale(frame, scale=1.1) objs = non_max_suppression(objs) if len(objs) == 0: hoop = cv2.resize( frame[rect[1]:rect[1] + rect[3], rect[0]:rect[0] + rect[2]], (40, 40)) cv2.imwrite('./pos_hard2/%s-%d.jpg' % (file, pos / step), hoop) else: for (x, y, w, h) in objs: if rect[0] - 15 <= x <= rect[0] + 15 and rect[1] - 15 <= y <= rect[1] + 15: continue nohoop = cv2.resize( frame[y:y + h, x:x + w], (40, 40)) count += 1 cv2.imwrite('./neg_hard2/%s-%d.jpg' % (file, count), nohoop) if pos <= frame_cout - step:
image_lists = glob.glob( '/home/chenxp/Documents/hitachi/paper1/sceneData/ICDAR2011/train-textloc/*.jpg' ) image_filenames = [] for item in image_lists: image_filenames.append(item) t = time.time() # [boxes: ymin, xmin, ymax, xmax, score] boxes = get_windows(image_filenames) # do Non-maximum Suppression boxes2 = [] # stored the bounding box after NMS for i in range(len(boxes)): boxes2.append(nms.non_max_suppression(boxes[i], 0.8)) # test: Plot the bounding boxes in the image for img in image_filenames: im = Image.open(img) draw = ImageDraw.Draw(im) idx = image_filenames.index(img) for i in range(len(boxes2[idx])): x1, y1, x2, y2 = int(boxes2[idx][i][1]), int( boxes2[idx][i][0]), int(boxes2[idx][i][3]), int( boxes2[idx][i][2]) draw.rectangle((x1, y1, x2, y2), outline='red') im.save( os.path.join( '/home/chenxp/Documents/hitachi/paper1/sceneData/ICDAR2011/bbox2/', os.path.basename(img)))
def ciratefi(image, template, radii, scales, angles, t1=245, t2=245, t3=0.8, overlap_thresh=0.3): """ finding a query template grayscale image 'template' in another grayscale image to analyze 'image', invariant to rotation, scale, translation, brightness and contrast :param image: path of the image to analyze :param template: path of the query template :param radii: set of radii :param scales: set of scales which the template is resize :param angles: set of rotation angles [°] :param t1: threshold for the first step Cifi, range[0-255] :param t2: threshold for the second step Rafi, range[0-255] :param t3: threshold for the third step Tefi, range[0-1] :param overlap_thresh: for Non-Maximum Suppression algorithm :return: final pixel(s) """ img = cv2.imread(image, 0) # gray-scale image tmp = cv2.imread(template, 0) img_y, img_x = img.shape[:2] tmp_y, tmp_x = tmp.shape[:2] # calculo de la matriz CA, the average grayscale of pixels of A(img) on the circle ring with radius rk centered at (x,y) kernel_ca = np.zeros((11, 11), np.float32) results = [] for radius in radii: cv2.circle(kernel_ca, (5, 5), radius, 1, 1) # (5,5) es el centro del kernel de 11x11 pk = np.sum(kernel_ca) conv = convolve2d(img, kernel_ca, mode='same', fillvalue=0) value = conv / pk results.append(value) kernel_ca.fill(0) ca = np.asarray(results) # calculo de la matriz CT, the average grayscale of pixels of template(T) at scale si on the circle ring rk # CT[i,k] | i=scales si | k=radii rk ct = np.zeros((len(scales), len(radii)), np.float32) for scale in scales: # para cada escala el template va a sufrir un cambio de tamaño tmp_x_scale = round(tmp_x * scale) tmp_y_scale = round(tmp_y * scale) tmp_resize = cv2.resize(tmp, None, fy=scale, fx=scale, interpolation=cv2.INTER_CUBIC) kernel_cq = np.zeros((tmp_y_scale, tmp_x_scale), np.float32) for radius in radii: # x0, y0 central pixel of T(template) x0 = round(tmp_x_scale / 2) y0 = round(tmp_y_scale / 2) cv2.circle(kernel_cq, (x0, y0), radius, 1, 1) pk = np.sum(kernel_cq) multi = np.multiply(tmp_resize, kernel_cq) value = np.sum(multi) / pk ct[scales.index(scale)][radii.index(radius)] = value kernel_cq.fill(0) # calculo circular sampling correlation CisCorr cis_corr = np.zeros((img_y, img_x), np.float) # circular sampling correlation CisCorr cis_ps = np.zeros( (img_y, img_x), np.int) # the probable scale of a pixel (x,y) -> best matching scale ct_norm = (ct - np.mean(ct)) / (np.std(ct) * len(ct)) for y in range(0, img_y): for x in range(0, img_x): # vectores para normalizar y que los resultados de la correlacion varien [-1,1] ca_norm = (ca[:, y, x] - np.mean(ca[:, y, x])) / ( np.std(ca[:, y, x]) * len(ca[:, y, x])) results = [] for scale in scales: results.append( np.abs( np.correlate(ct_norm[scales.index(scale)], ca_norm, 'full'))) results = np.asarray(results) cis_corr[y][x] = np.amax(results) # indice de la escala que hace maxima la corr cis_ps[y][x] = np.unravel_index(np.argmax(results, axis=None), results.shape)[0] # img resultado del primer filtro 'Cifi' cifi_img = np.zeros((img_y, img_x)) cifi_img = cv2.normalize(cis_corr, cifi_img, 0, 255, cv2.NORM_MINMAX) cifi_img[cifi_img < t1] = 0 first_pixels_y, first_pixels_x = np.nonzero(cifi_img >= t1) print("La cantidad de pixeles de primer grado son=", len(first_pixels_y)) img_color = cv2.imread(image, cv2.IMREAD_COLOR) img_color[np.nonzero(cifi_img >= t1)] = [0, 33, 166] cv2.imwrite('first.jpg', img_color) # calculo del vector RT, where T is radially sampled yielding a vector RT with m(angles) features length = max(radii) p1_tmp = (round(tmp_x / 2), round(tmp_y / 2) ) #punto del centro del template kernel_rq = np.zeros((tmp_y, tmp_x), np.float32) rt = np.zeros(len(angles), np.float32) for angle in angles: theta = angle * math.pi / 180.0 p2_tmp = (round(p1_tmp[0] + length * math.cos(theta)), round(p1_tmp[1] + length * math.sin(theta))) cv2.line(kernel_rq, p1_tmp, p2_tmp, 1, 1) multi = np.multiply(tmp, kernel_rq) value = np.sum(multi) / length rt[angles.index(angle)] = value kernel_rq.fill(0) # LENTO!!!! # Calculo de RA, the length of the radial lines is calculated according to the largest circle radius and the # probable scale si computed by Cifi kernel_ra = np.zeros((img_y, img_x), np.float32) ra = np.zeros((img_y, img_x, len(angles)), np.float32) cos = [] sin = [] for angle in angles: cos.append(math.cos(angle * math.pi / 180.0)) sin.append(math.sin(angle * math.pi / 180.0)) # manipulando la imagen A for (x1, y1) in tqdm(zip(first_pixels_x, first_pixels_y), total=len(first_pixels_x)): for angle in (angles): # progress bar index_ps = cis_ps[y1][x1] # ps = possible scale length_ = scales[index_ps] * length # theta = angle * math.pi / 180.0 x2 = round(x1 + length_ * cos[angles.index(angle)]) y2 = round(y1 + length_ * sin[angles.index(angle)]) cv2.line(kernel_ra, (x1, y1), (int(x2), int(y2)), 1, 1) multi = np.multiply(img, kernel_ra) value = np.sum(multi) / length_ ra[y1][x1][angles.index(angle)] = value kernel_ra.fill(0) # calculo radial sampling correlation RasCorr ras_corr = np.zeros( (img_y, img_x), np.float ) # radial sampling correlation RasCorr at the best matching angle ras_ang = np.zeros( (img_y, img_x), np.int) # the probable scale of a pixel (x,y) -> best matching scale rt_norm = (rt - np.mean(rt)) / (np.std(rt) * len(rt)) for (x, y) in zip(first_pixels_x, first_pixels_y): ra_norm = (ra[y, x] - np.mean(ra[y, x])) / (np.std(ra[y, x]) * len(ra[y, x])) results = [] for angle in angles: cshift = np.roll(rt_norm, angles.index(angle)) results.append(np.abs(np.correlate(cshift, ra_norm, 'full'))) results = np.asarray(results) ras_corr[y][x] = np.amax(results) # indice del angulo que hace maxima la correlacion ras_ang[y][x] = np.unravel_index(np.argmax(results, axis=None), results.shape)[0] # # img resultado del segundo filtro 'Rafi' rafi_img = np.zeros((img_y, img_x)) rafi_img = cv2.normalize(ras_corr, rafi_img, 0, 255, cv2.NORM_MINMAX) rafi_img[rafi_img < t2] = 0 second_pixels_y, second_pixels_x = np.nonzero(rafi_img >= t2) print("La cantidad de pixeles de segundo grado son=", len(second_pixels_y)) img_color = cv2.imread(image, cv2.IMREAD_COLOR) for (x, y) in zip(second_pixels_x, second_pixels_y): top_left = x, y bottom_right = (top_left[0] + 2, top_left[1] + 2) cv2.rectangle(img_color, top_left, bottom_right, (120, 200, 100), 2) # print(x, y, scales[cis_ps[y][x]], angles[ras_ang[y][x]]) cv2.imwrite('second.jpg', img_color) # rotación y escala de la imagen, template matching final_pixelX = [] final_pixelY = [] # escalar el delta al si(scale_i) de mejor coincidencia delta_x = round(tmp_x / 2) # para que no ocurra-> template > imagen (cropped) delta_y = round(tmp_y / 2) for (x, y) in zip(second_pixels_x, second_pixels_y): ang = angles[ras_ang[y][x]] sca = scales[cis_ps[y][x]] M = cv2.getRotationMatrix2D((x, y), ang, sca) affine = cv2.warpAffine(img.copy(), M, (img_x, img_y)) a = y - delta_y b = x - delta_x if a < 0: a = 0 if b < 0: b = 0 cropped = affine[a:(a + tmp_y), b:(b + tmp_x)] res = cv2.matchTemplate(cropped, tmp, cv2.TM_CCORR_NORMED) # print("res:", np.mean(res)) if np.mean(res) > t3: final_pixelX.append(x) final_pixelY.append(y) # procesamiento de resultado, eleccion del mejor rectangulo boxes = [] for (x, y) in zip(final_pixelX, final_pixelY): sca = scales[cis_ps[y][x]] delta_x_scale = round( delta_x * sca) # cambio de escala para el recorte del template delta_y_scale = round(delta_y * sca) top_left = (x - delta_x_scale, y - delta_y_scale) bottom_right = (x + delta_x_scale, y + delta_y_scale) boxes.append( [top_left[0], top_left[1], bottom_right[0], bottom_right[1]]) boxes = np.asarray(boxes) pick = nms.non_max_suppression(boxes, overlap_thresh) print("La cantidad de pixeles finales son=", len(pick)) img_color = cv2.imread(image, cv2.IMREAD_COLOR) for (startX, startY, endX, endY) in pick: cv2.rectangle(img_color, (startX, startY), (endX, endY), (120, 200, 100), 1) cv2.imwrite('final.jpg', img_color) return pick
def process_frame(frame, car_count, car_count_up, detected_points_dwn, detected_points_up, frame_cnt): # Read and preprocess an image. img = frame.copy() rows = img.shape[0] cols = img.shape[1] inp = cv.resize(img, (300, 300)) inp = inp[:, :, [2, 1, 0]] # BGR2RGB # Run the model out = sess.run([ sess.graph.get_tensor_by_name('num_detections:0'), sess.graph.get_tensor_by_name('detection_scores:0'), sess.graph.get_tensor_by_name('detection_boxes:0'), sess.graph.get_tensor_by_name('detection_classes:0') ], feed_dict={ 'image_tensor:0': inp.reshape(1, inp.shape[0], inp.shape[1], 3) }) if nms == "False": boxes = out[2][0] scores = out[1][0] classes = out[3][0] else: boxes, scores, classes = non_max_suppression(out[2][0], out[1][0], out[3][0], cols, rows) # Visualize detected bounding boxes. num_detections = len(classes) for i in range(num_detections): classId = int(classes[i]) if classId is not None: score = float(scores[i]) bbox = [float(v) for v in boxes[i]] if score > 0.4: x = bbox[1] * cols y = bbox[0] * rows right = bbox[3] * cols bottom = bbox[2] * rows x_center = int((x + right) / 2) y_center = int((y + bottom) / 2) # straight line cv.line(img, (210, 250), (590, 250), (0, 0, 255), 1) #for downstream cv.line(img, (400, 200), (620, 200), (255, 0, 0), 1) #for upstream if ((bottom - y) / (right - x) > 0.8): clr = find_clr(frame, y_center, x_center) else: clr = "" #for upstream if y_center > 200 and x_center > 400: img = edit_frame(img, x_center, y_center, x, y, right, bottom, COCO_CLASSES_LIST[classId], clr, box_clr="green") else: if y_center == 199 and x_center > 400: print("true") if len(detected_points_up) != 0: d = distance.euclidean(detected_points_up[0], (x_center, y_center)) print(d) if d > 5 \ and frame_cnt - detected_points_up[1] > 10: detected_points_up = [] detected_points_up.append((x_center, y_center)) detected_points_up.append(frame_cnt) car_count_up += 1 else: detected_points_up.append((x_center, y_center)) detected_points_up.append(frame_cnt) car_count_up += 1 img = edit_frame(img, x_center, y_center, x, y, right, bottom, COCO_CLASSES_LIST[classId], clr, box_clr="red") #for downstream if y_center <= 250: img = edit_frame(img, x_center, y_center, x, y, right, bottom, COCO_CLASSES_LIST[classId], clr, box_clr="green") else: if y_center == 251: if len(detected_points_dwn) != 0: d = distance.euclidean((x_center, y_center), detected_points_dwn[0]) if d > 3 and (right < 630 and bottom < 350)\ and frame_cnt - detected_points_dwn[1] > 10: detected_points_dwn = [] detected_points_dwn.append( (x_center, y_center)) detected_points_dwn.append(frame_cnt) car_count += 1 else: detected_points_dwn.append((x_center, y_center)) detected_points_dwn.append(frame_cnt) car_count += 1 img = edit_frame(img, x_center, y_center, x, y, right, bottom, COCO_CLASSES_LIST[classId], clr, box_clr="red") cv.putText( img, str("vehicles crossing red line (down): " + str(car_count)), (280, 25), cv.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), 1) cv.putText( img, str("vehicles crossing blue line(up): " + str(car_count_up)), (280, 35), cv.FONT_HERSHEY_PLAIN, 1.0, (255, 0, 0), 1) cv.imwrite("out_img.jpg", img) return img, car_count, car_count_up, detected_points_dwn, detected_points_up
# from nms import non_max_suppression_slow as non_max_suppression from nms import non_max_suppression_fast as non_max_suppression with open('locations.json', 'r') as f: locations = json.load(f) def convertToNumPy(l): return {'name': l[0], 'value': numpy.array(l[1])} locationsNP = map(convertToNumPy, locations.items()) results = [] totalTime = 0 for location in locationsNP: name = location['name'] val = location['value'] beforeSize = val[:, 0].size start_time = time.time() pick = non_max_suppression(val, 0.3) # make sure parameters match node version totalTime += (time.time() - start_time) numberSuppressed = beforeSize - pick[:, 0].size print(f'file {name} number of boxes suppressed by: {numberSuppressed}') results.append({'file': name, 'suppressed': numberSuppressed}) print( f'total non-maximum suppression processing time: {totalTime * NS_PER_SEC} nanoseconds' )
('images/sarah4.jpg', np.array([(66, 100, 244, 278), (83, 100, 261, 278), (66, 117, 244, 295), (83, 117, 261, 295), (66, 133, 244, 311), (83, 133, 261, 311)])), ] # loop over the images for (fn, boxes) in images: # load the image and clone it print(f'[x] {len(boxes)} initial bounding boxes') image = cv2.imread(fn) orig = image.copy() # loop over the bounding boxes for each image and draw them for (start_x, start_y, end_x, end_y) in boxes: cv2.rectangle(orig, (start_x, start_y), (end_x, end_y), (0, 0, 255), 2) # perform non-maximum suppression on the bounding boxes pick = non_max_suppression(boxes, np.arange(len(boxes))) print(f'[x] after applying non-maximum, {len(pick)} bounding boxes') # loop over the picked bounding boxes and draw them for (start_x, start_y, end_x, end_y) in boxes[pick]: cv2.rectangle(image, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2) # display the images cv2.imshow('Original', orig) cv2.imshow('After NMS', image) cv2.waitKey(0)
if (is_weak[i+1,j+1]): linked_edge[i+1,j+1]=1 if (is_weak[i-1,j-1]): linked_edge[i-1,j-1]=1 elif ori[i,j]==math.pi*3/4: if (is_weak[i+1,j+1]): linked_edge[i+1,j+1]=1 if (is_weak[i-1,j-1]): linked_edge[i-1,j-1]=1 elif ori[i,j]>math.pi*3/4: if (is_weak[i+1,j+1]): linked_edge[i+1,j+1]=1 if (is_weak[i-1,j-1]): linked_edge[i-1,j-1]=1 if (is_weak[i+1,j]): linked_edge[i+1,j]=1 if (is_weak[i-1,j]): linked_edge[i-1,j]=1 return linked_edge if __name__ == '__main__': from PIL import Image from derivatives import get_derivatives from nms import non_max_suppression gray = Image.open("../Madison.png").convert('L') mag, *_, ori = get_derivatives(gray, sigma=1.0) edge = non_max_suppression(mag, ori, threshold=3) linked_edge = edge_link(edge, mag, ori)
def main(): net12 = Net12() net12.load_state_dict( torch.load(args.net12_checkpoint, map_location=lambda storage, loc: storage)) net12.eval() if args.cuda: net12.cuda() net24 = None if args.net24_checkpoint: net24 = Net24() net24.load_state_dict( torch.load(args.net24_checkpoint, map_location=lambda storage, loc: storage)) net24.eval() if args.cuda: net24.cuda() with open(os.path.join(args.fddb_dir, 'FDDB-folds/FDDB-fold-01.txt')) as f: file_list = f.read().split('\n')[:-1] gt = read_gt( os.path.join(args.fddb_dir, 'FDDB-folds/FDDB-fold-01-ellipseList.txt')) output_lines = [] n_rects = [] for image_path in tqdm(file_list): image = scipy.misc.imread( os.path.join(args.fddb_dir, 'images', image_path) + '.jpg', mode='RGB') rects = run_detector_pyramid(net12, image, 12, min_face_size=24, threshold=0.05, pyramid_factor=0.8, cuda=args.cuda) if args.net24_checkpoint: rects = filter_rects_with_24net(net24, image, rects, threshold=0.05) rects = non_max_suppression(rects, overlap_thresh=0.7) n_rects.append(len(rects)) output_lines.append(image_path) output_lines.append(str(len(rects))) ellipses = [] for x1, y1, x2, y2, score in rects: major_axis_radius = (x2 - x1) * 0.5 minor_axis_radius = (y2 - y1) * 0.5 * 1.2 angle = 0.0 center_x = (x1 + x2) * 0.5 center_y = (y1 + y2) * 0.5 - minor_axis_radius * 0.2 output_lines.append('{} {} {} {} {} {}'.format( major_axis_radius, minor_axis_radius, angle, center_x, center_y, score)) ellipses.append( Ellipse([center_x, center_y], major_axis_radius * 2, minor_axis_radius * 2, angle, fc='none', lw=int(2**(2 * score)), ec='b')) if args.debug: fix, ax = plt.subplots(1) ax.imshow(image) for e in ellipses: ax.add_patch(e) for center_x, center_y, major_axis_radius, minor_axis_radius, angle in gt[ image_path]: ax.add_patch( Ellipse([center_x, center_y], major_axis_radius * 2, minor_axis_radius * 2, angle, fc='none', lw=3, ec='r')) plt.show() print('falses per image: ', sum(n_rects) / len(n_rects)) with open(os.path.join(args.output_dir, 'fold-01-out.txt'), 'w') as f: f.write('\n'.join(output_lines)) call([ os.path.join(args.fddb_dir, 'evaluation/runEvaluate.pl'), args.output_dir ])
# detect people in the image (rects, weights) = hog.detectMultiScale(image, winStride=(4, 4), padding=(8, 8), scale=1.05) # draw the original bounding boxes for (x, y, w, h) in rects: cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2) # show some information on the number of bounding boxes print("[INFO] {}: {} original boxes, {} after suppression".format( filename, len(rects), len(pick))) # show the output images f1 = plt.figure(1) plt.imshow(orig) plt.title('Original') f2 = plt.figure(2) plt.imshow(image)
def detect(self, frame): original_frame_shape = frame.shape aspect_ratio = self.shots["aspect_ratio"] c = min(frame.shape[0], frame.shape[1] / aspect_ratio) slice_h_shift = r((frame.shape[0] - c) / 2) slice_w_shift = r((frame.shape[1] - c * aspect_ratio) / 2) if slice_w_shift != 0 and slice_h_shift == 0: frame = frame[:, slice_w_shift:-slice_w_shift] elif slice_w_shift == 0 and slice_h_shift != 0: frame = frame[slice_h_shift:-slice_h_shift, :] else: if slice_w_shift != 0 and slice_h_shift != 0: raise ErrorSignal(math_is_wrong_error) frames = [] for s in self.shots["shots"]: frames.append(cv2.resize(frame[r(s[1] * frame.shape[0]):r((s[1] + s[3]) * frame.shape[0]), r(s[0] * frame.shape[1]):r((s[0] + s[2]) * frame.shape[1])], (self.image_size, self.image_size), interpolation=cv2.INTER_NEAREST)) frames = np.array(frames) predictions = self.model.predict(frames, batch_size=min(len(frames), batch_size), verbose=0) boxes = [] prob = [] shots = self.shots['shots'] for i in range(len(shots)): slice_boxes = [] slice_prob = [] for j in range(predictions.shape[1]): for k in range(predictions.shape[2]): p = sigmoid(predictions[i][j][k][4]) if not(p is None) and p > self.prob_threshold: px = sigmoid(predictions[i][j][k][0]) py = sigmoid(predictions[i][j][k][1]) pw = min(math.exp(predictions[i][j][k][2] / self.grids), self.grids) ph = min(math.exp(predictions[i][j][k][3] / self.grids), self.grids) if not(px is None) and not(py is None) and not(pw is None) and not(ph is None) and pw > eps and ph > eps: cx = (px + j) / self.grids cy = (py + k) / self.grids wx = pw / self.grids wy = ph / self.grids if wx <= shots[i][4] and wy <= shots[i][4]: lx = min(max(cx - wx / 2, 0), 1) ly = min(max(cy - wy / 2, 0), 1) rx = min(max(cx + wx / 2, 0), 1) ry = min(max(cy + wy / 2, 0), 1) lx *= shots[i][2] ly *= shots[i][3] rx *= shots[i][2] ry *= shots[i][3] lx += shots[i][0] ly += shots[i][1] rx += shots[i][0] ry += shots[i][1] slice_boxes.append([lx, ly, rx, ry]) slice_prob.append(p) slice_boxes = np.array(slice_boxes) slice_prob = np.array(slice_prob) slice_boxes = non_max_suppression(slice_boxes, slice_prob, self.iou_threshold) for sb in slice_boxes: boxes.append(sb) boxes = np.array(boxes) boxes = union_suppression(boxes, self.union_threshold) for i in range(len(boxes)): boxes[i][0] /= original_frame_shape[1] / frame.shape[1] boxes[i][1] /= original_frame_shape[0] / frame.shape[0] boxes[i][2] /= original_frame_shape[1] / frame.shape[1] boxes[i][3] /= original_frame_shape[0] / frame.shape[0] boxes[i][0] += slice_w_shift / original_frame_shape[1] boxes[i][1] += slice_h_shift / original_frame_shape[0] boxes[i][2] += slice_w_shift / original_frame_shape[1] boxes[i][3] += slice_h_shift / original_frame_shape[0] return list(boxes)