def detect_ships(self, image): x_to_keep = [] y_to_keep = [] (winW, winH) = (80, 80) for (x, y, window) in sliding_window(image, stepSize=40, windowSize=(winW, winH)): if window.shape[0] != winH or window.shape[1] != winW: continue window = (window - self.X_mean) / self.X_std pred = np.round(self.predict(window)) if pred: print('Ship detected!') x_to_keep.append(x) y_to_keep.append(y) img_draw = draw_rois(image, winW, winH, np.asarray([x_to_keep, y_to_keep]), color=(255, 0, 0)) plt.figure(figsize=(12, 12)) plt.imsave('outputs/cnn.png', img_draw)
def detect_ships(self, image): x_to_keep = [] y_to_keep = [] (winW, winH) = (80, 80) for (x, y, window) in sliding_window(image, stepSize=40, windowSize=(winW, winH)): if window.shape[0] != winH or window.shape[1] != winW: continue window_reshaped = np.concatenate( (np.array(window[:, :, 0]).reshape( -1, 1), np.array(window[:, :, 1]).reshape( -1, 1), np.array(window[:, :, 2]).reshape(-1, 1)), axis=0) window_reshaped = self.scaler.transform( window_reshaped.reshape(1, -1)) pred = np.round( self.predict( np.expand_dims(np.array(window_reshaped).reshape(1, -1), axis=-1))) if pred: print('Ship detected!') x_to_keep.append(x) y_to_keep.append(y) img_draw = draw_rois(image, winW, winH, np.asarray([x_to_keep, y_to_keep]), color=(255, 0, 0)) plt.figure(figsize=(12, 12)) plt.imsave('outputs/fullyconnected.png', img_draw)
def test_sliding_window(self): expected_window_coords = [[50, 50], [75, 50], [50, 75], [75, 75]] actual_window_coords = [] for item in list(hp.sliding_window(np.zeros((200, 200)), (50, 50, 50, 50), 25, (10, 10))): actual_window_coords.append([item[0], item[1]]) self.assertEqual(actual_window_coords, expected_window_coords)
def extract_slices(gt_num, gt_img, savepath, windowSize=(224, 224), stepSize = 200, counter = 0): clone = gt_img.copy() # task: think about adding the pyramid in here. # or maybe generating slices at different size of gt images. From small to very big sizes and extract # with same size always. for (x, y, window) in sliding_window(clone, stepSize, windowSize): dst_filename = os.path.join(savepath, "SL_"+ gt_num + "_" + str(counter) + "_x_" + str(x) +"_y_" + str(y) + ".jpg") cv.imwrite(dst_filename, window) counter +=1 return counter
def main(): """main driver function""" images = sorted(glob.glob('data/*.jpg')) first_img = cv2.imread(images[0]) # ground truth bbox = [6, 166, 43, 27] helper.draw_bbox(os.path.join(RESULT_DIR, "00000001.jpg"), first_img, bbox) # quanize image to 32 bins quantized_img = first_img // 32 # generate histogram of bbox car_hist = generate_histogram(quantized_img, bbox) for img in images[1:]: test_img = cv2.imread(img) # quantize image quantized_img = test_img // 32 # search space for sliding window search_window = helper.expand_search_window(test_img, bbox, search_window_size=60) max_score = -1 best_window = None # iterate on each candidate window for (x, y, window) in helper.sliding_window(test_img, search_window, stride=8, template_size=(bbox[2], bbox[3])): # noqa # generate histogram for candidate window candidate_histogram = generate_histogram(quantized_img, (x, y, bbox[2], bbox[3]), use_kernel=False) # compute normalized cross correlation score ncc_score = helper.ncc(candidate_histogram, car_hist) if ncc_score > max_score: max_score = ncc_score best_window = (x, y, bbox[2], bbox[3]) bbox = best_window # draw best bbox and write to file helper.draw_bbox(os.path.join(RESULT_DIR, img.split('/')[-1]), test_img, best_window)
def detect(self, image, windim, winstep=2, pyramidscale=1.5, minprob=0.7): #initialize the list of bounding boxes and associated probabilities boxes = [] probs = [] #loop over the image pyramid for layer in helper.pyramid(image, scale=pyramidscale, minSize=windim): # determine the current scale of the pyramid scale = image.shape[0] / float(layer.shape[0]) #loop over the sliding windows for the current pyramid layer for (x, y, window) in helper.sliding_window(layer, winstep, windim): # grab the dimensions of the window (winh, winw) = window.shape[:2] #ensure the window dimensions match the supplied sliding window dimensions if winh == windim[1] and winw == windim[0]: #extract HOG features from the current window and classify whether or # not this window contains an object we are interested in features = self.desc.describe(window).reshape(1, -1) prob = self.model.predict_proba(features)[0][1] #check to see if the classifier has found an object with sufficient probability if prob > minprob: #compute the (x,y)-coordinates of the bounding box using the current scale of the image pyramid (startx, starty) = (int(scale * x), int(scale * y)) endx = int(startx + (scale * winw)) endy = int(starty + (scale * winh)) #update the list of bounding boxes and probablities boxes.append((startx, starty, endx, endy)) probs.append(prob) #return a tuple of the bounding boxes and probabilities return (boxes, probs)
winS = param.dmin # loop over the image pyramid for (i, resized) in enumerate(pyramid_gaussian(img, downscale=1.5)): if resized.shape[0] < 31: break print("Resized shape: %d, Window size: %d, i: %d" % (resized.shape[0], winS, i)) # loop over the sliding window for each layer of the pyramid # this process takes about 7 hours. To do quick test, we may try stepSize # to be large (60) and see if code runs OK for (x, y, window) in sliding_window(resized, stepSize=8, windowSize=(winS, winS)): # apply a circular mask ot the window here. Think about where you should apply this mask. Before, resizing or after it. crop_img = cv.resize(window, (50, 50)) cv.normalize(crop_img, crop_img, 0, 255, cv.NORM_MINMAX) crop_img = crop_img.flatten() p_non, p_crater = cnn.predict([crop_img])[0] scale_factor = 1.5**i x_c = int((x + 0.5 * winS) * scale_factor) y_c = int((y + 0.5 * winS) * scale_factor) crater_r = int(winS * scale_factor / 2) # add its probability to a score combined thresholded image and normal iamges.
def detect(img, feature_type, downscale=1.5, visualize=False, apply_nms=True): """use sliding window and pyramid to detect object""" detections = [] # detected candidates min_window_size = (int(config.img_width * 1.5), int(config.img_height * 1.5)) min_ws = (config.img_width, config.img_height) classifier = joblib.load(config.model_path) scale_level = 0 for scaled_img in helper.pyramid(img, downscale, min_window_size): # detections at current scale used for visualization curr_scale_dets = [] for (x, y, im_window) in helper.sliding_window(scaled_img, min_ws, config.step_size): # filter out not standard spliting window if im_window.shape[0] != config.img_height or \ im_window.shape[1] != config.img_width: continue # compute feature feature = compute_feature(im_window, feature_type) # prediction pred = classifier.predict([feature])[0] confidence = classifier.decision_function([feature])[0] if pred == 1: print ('Detection at location ({}, {})'.format(x, y)) print ('scale level: {}, confidence: {}'.format(scale_level, confidence)) # TODO: potential bug for coordinate restore # attr_vec: [x1, y1, x2, y2] attr_vec = np.array([x, y, min_ws[0] + x, min_ws[1] + y]) curr_scale_dets.append((attr_vec, confidence)) expand_rate = downscale ** scale_level attr_vec = np.around(attr_vec * expand_rate).astype('int') # detection: ([x1, y1, x2, y2], confidence) detections.append((attr_vec, confidence)) # visualize: draw current sliding withdow # and detections at this scale # TODO: show confidence on the bounding box if visualize: im_copy = scaled_img.copy() for det, _ in curr_scale_dets: cv2.rectangle(im_copy, (det[0], det[1]), (det[2], det[3]), color=(0, 0, 0), thickness=2) cv2.rectangle(im_copy, (x, y), (x + im_window.shape[1], y + im_window.shape[0]), color=(255, 255, 255), thickness=2) cv2.imshow('sliding window', im_copy) cv2.waitKey(20) scale_level += 1 if not apply_nms: # withour non-maximum suppression, return with confidence # can be used for hard-negative mining and graphcut segmentation return detections # apply non-maximum suppression dets = np.array([i[0] for i in detections]) detections = non_max_suppression(dets, only_one=True) if visualize: im_copy = img.copy() helper.draw_detections(im_copy, detections) return detections
#faces_target = np.load('label_target_face.npy') #note that when you are testing age prediction,you should comment the previous line and de-comment the next line faces_target = np.load('olivetti_faces_target.npy') import time #faces_image = np.load('training_image.npy') #faces_image #print(faces_image.shape) #faces_target = np.load('label_target_face.npy') # load the image and define the window width and height image = cv2.imread('physics.png') (winW, winH) = (64, 64) result = {} for resized in pyramid(image, scale=1.5): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized, stepSize=19, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue #knn classifiers nn = NearestNeighbor() faces_image = faces_image.reshape(400, 64 * 64) print(faces_target.shape) nn.train(faces_image, faces_target) window = cv2.cvtColor(window, cv2.COLOR_BGR2GRAY) window = window.reshape(1, 64 * 64) # print(1) result = nn.predict(window, 1) # print(2) print(result)
import cv2 # Construct the argument parser and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True, help="Path to the image") args = vars(ap.parse_args()) # Loading the image and define the window width and height image = cv2.imread(args["image"]) (width, height) = (128, 128) # Loop over the image pyramid for resized in pyramid(image, scale=1.5): # Loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized, stepSize=32, windowSize=(width, height)): # If the window does not meet our desired window size, ignore it if window.shape[0] != width or window.shape[1] != height: continue # The process use to process the window, such as applying a machine learning classifer to # classify contents of the window # Draw the windwo clone = resized.copy() cv2.rectangle(clone, (x, y), (x + width, y + height), (0, 255, 0), 2) cv2.imshow("Window", clone) cv2.waitKey() time.sleep(0.025)
# initialize the image pyramid pyramid = image_pyramid(origImage.copy(), scale=PYR_SCALE, minsize=ROI_SIZE) # list to store the roi generated from the image pyramid and sliding window rois = [] # list to store the x, y corrs of the generated roi in the original image locs = [] startTime = time.time() for image in pyramid: # detemine the scale of the roi in the original image # scale is used to upscale the values acc to the original image scale = W / float(image.shape[1]) for (x, y, roi) in sliding_window(image, WIN_STEP, ROI_SIZE): # scale the corrs with respect to the original image x = int(x * scale) y = int(y * scale) w = int(ROI_SIZE[0] * scale) h = int(ROI_SIZE[1] * scale) # if roi.shape[0] != 0 and roi.shape[1] != 0: roi = cv2.resize(roi, INPUT_SIZE) roi = img_to_array(roi) roi = preprocess_input(roi) rois.append(roi) locs.append((x, y, x + w, y + h)) if args["visualize"] > 0: