def main(): image = cv2.imread(filename) rows=image.shape[0] cols=image.shape[1] cv2.namedWindow('Selfie Original', cv2.WINDOW_NORMAL) cv2.imshow('Selfie Original',image) gk=gaussian_kernel(9,2) blur = cv2.filter2D(image,-1,gk) cv2.namedWindow('Selfie Blurred', cv2.WINDOW_NORMAL) cv2.imshow('Selfie Blurred',blur) gray=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY) sobel, theta = sobel_filters(gray) cv2.namedWindow('Selfie Sobel', cv2.WINDOW_NORMAL) cv2.imshow('Selfie Sobel',sobel.astype(np.int8)) nonmax = non_max_suppression(sobel, theta) cv2.namedWindow('Selfie Nonmax', cv2.WINDOW_NORMAL) cv2.imshow('Selfie Nonmax',nonmax.astype(np.int8)) edges= cv2.Canny(image,100,200) cv2.namedWindow('Selfie Canny', cv2.WINDOW_NORMAL) cv2.imshow('Selfie Canny',edges) cv2.waitKey(0) cv2.destroyAllWindows()
def canny(img): gray_img = monochrome_img(img) gauss_img = gauss_filter(gray_img) sobel_img, grad_img = sobel(gauss_img) non_max_suppression_img = non_max_suppression(sobel_img, grad_img) result = double_tresholding(non_max_suppression_img, 0.5, 0.7) return result
def harris_corner(image): pixel_coords = [] h = np.array((2, 2)) temp = np.zeros((image.shape[0], image.shape[1])) window_size = 5 sigma = 1 radius = int((window_size - 1)/2) padded_img = np.pad(image, radius, 'constant') h = image.shape[0] w = image.shape[1] k = 0.06 threshold = 15000000 gx = gaussian_derivative1d(window_size, sigma) gx = gx.reshape(gx.shape[0], 1) gy = gx.reshape(1, gx.shape[0]) conv_im_x = signal.convolve2d(padded_img, gx, mode="same", boundary="symm") conv_im_y = signal.convolve2d(padded_img, gy, mode="same", boundary="symm") ix2 = conv_im_x **2 ixy = conv_im_x * conv_im_y iy2 = conv_im_y ** 2 l = int(window_size/2) for i in range(0, h): for j in range(0, w): xx = ix2[i - l: i + l, j - l: j + l] xy = ixy[i - l: i + l, j - l: j + l] yy = iy2[i - l: i + l, j - l: j + l] sumx2 = xx.sum() sumxy = xy.sum() sumy2 = yy.sum() det = (sumx2 * sumy2) - (sumxy**2) t = sumx2 + sumy2 r = det - k *(t**2) if(r > threshold): temp[i][j] = r temp = non_max_suppression(temp) for i in range(0, temp.shape[0]): for j in range(0, temp.shape[1]): if temp[i][j] != 0 : pixel_coords.append((i, j)) return pixel_coords
def canny(image): blurred_image = gaussian_blur(image, kernel_size=9) edge_filter = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) gradient_magnitude, gradient_direction = sobel_edge_detection(blurred_image, edge_filter) new_image = non_max_suppression(gradient_magnitude, gradient_direction) weak = 50 new_image = threshold(new_image, 5, 20, weak) new_image = hysteresis(new_image, weak) new_image = new_image.astype(np.uint8) kernel = np.ones((5, 5)) img_dil = cv2.dilate(new_image, kernel, iterations=1) form = getContours(img_dil, image, image) return form
def process_img(img, detector): """Search for pedestrians using a sliding window approach.""" # will need to process with different window sizes boxes = [] # top-left and bottom right coords print 'processing...' out = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) rows, cols = img.shape # test image as a whole if detector.test(img)[0] == 1: print 'here' cv2.rectangle(out, (0, 0), (cols, rows), (0, 255, 0), 3) return out # test image with windows and get boxes # 50% overlap win_size = np.array([160, 90]) windows = [win_size*2, win_size*3, win_size*4] for window in windows: # cv2.imshow("window", np.zeros(window)) for i in range(0, rows - window[0], window[0]/4): for j in range(0, cols - window[1], window[1]/4): roi = img[i:window[0]+i, j:window[1]+j] # cv2.imshow("ROI", roi) # cv2.waitKey(1000) if (detector.test(roi)[0] == 1): # human so draw a box boxes.append((j, i, window[1]+j, window[0]+i)) # suppress boxes boxes = non_max_suppression(boxes, .2) # draw the boxes for box in boxes: cv2.rectangle(out, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) return out
def process_img(img, detector): """Search for pedestrians using a sliding window approach.""" # will need to process with different window sizes boxes = [] # top-left and bottom right coords print 'processing...' out = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) rows, cols = img.shape # test image as a whole if detector.test(img)[0] == 1: print 'here' cv2.rectangle(out, (0, 0), (cols, rows), (0, 255, 0), 3) return out # test image with windows and get boxes # 50% overlap win_size = np.array([160, 90]) windows = [win_size * 2, win_size * 3, win_size * 4] for window in windows: # cv2.imshow("window", np.zeros(window)) for i in range(0, rows - window[0], window[0] / 4): for j in range(0, cols - window[1], window[1] / 4): roi = img[i:window[0] + i, j:window[1] + j] # cv2.imshow("ROI", roi) # cv2.waitKey(1000) if (detector.test(roi)[0] == 1): # human so draw a box boxes.append((j, i, window[1] + j, window[0] + i)) # suppress boxes boxes = non_max_suppression(boxes, .2) # draw the boxes for box in boxes: cv2.rectangle(out, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) return out
# loop over some frames...this time using the threaded stream while (True): # grab the frame from the threaded video stream and resize it # to have a maximum width of 600 pixels frame = vs.read() frame = imutils.resize(frame, width=600) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # detect people in the image (rects, weights) = hog.detectMultiScale(frame, winStride=(8, 8), padding=(32, 32), scale=1.05) rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects]) pick = non_max_suppression(rects, probs=None, overlapThresh=0.65) # draw the final bounding boxes for (xA, yA, xB, yB) in pick: i = i + 1 cv2.rectangle(frame, (xA, yA), (xB, yB), (0, 255, 0), 2) # print((((xB - xA) / 2), ((yB - yA)) / 2)) # print(xA) # print(yA) # print(xB) # print(yB) for debugging centerX = (xB + xA) / 2 centerY = (yB + yA) / 2 # obj = object(centerX,centerY,i) # objList.append(obj) onPed(centerX, centerY)
# ====================================================== # This script tests the non_max_suppression function # # Author: Robert Pham # # ====================================================== from non_max_suppression import non_max_suppression # testing box in box boxes = [] boxes.append((1, 1, 20, 20)) boxes.append((5, 5, 10, 10)) boxes.append((100, 100, 130, 130)) print boxes picked = non_max_suppression(boxes) print picked
def detect_image(self, image): image_shape = np.array(np.shape(image)[0:2]) crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1]))) photo = np.array(crop_img, dtype=np.float32) photo /= 255.0 photo = np.transpose(photo, (2, 0, 1)) # 将通道放到最前面 photo = photo.astype(np.float32) images = [] images.append(photo) images = np.asarray(images) images = torch.from_numpy(images) if self.cuda: images = images.cuda() with torch.no_grad(): outputs = self.net(images) output_list = [] for i in range(3): output_list.append(self.yolo_decodes[i](outputs[i])) output = torch.cat(output_list, 1) # 将所有先验框放到一个list中,大小为:图片张数 * 10647 * 类别 batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], conf_thres=self.confidence, nms_thres=0.3) try: batch_detections = batch_detections[0].cpu().numpy() # 此处为了去除最外层括号 except: return image # 此时batch_detections中有七列,分别为四个坐标、置信度、类别置信度、类别 top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5] top_label = np.array(batch_detections[top_index, -1], np.int32) # 得出各个类别 top_bboxes = np.array(batch_detections[top_index, :4]) # 得出各框坐标 top_xmin = np.expand_dims(top_bboxes[:, 0], -1) top_ymin = np.expand_dims(top_bboxes[:, 1], -1) top_xmax = np.expand_dims(top_bboxes[:, 2], -1) top_ymax = np.expand_dims(top_bboxes[:, 3], -1) # 将先验框放缩到原图大小,因为之前将图片加了灰条 # 此处model_image_size为模型需要的图像尺寸,image_shape为输入图像尺寸 boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape) font = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32')) thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0] for i, c in enumerate(top_label): predicted_class = self.class_names[c] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 画框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[self.class_names.index(predicted_class)]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[self.class_names.index(predicted_class)]) draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) del draw return image