def testImg(): svm = cv2.ml.SVM_load(".\svm.xml") for file in glob.glob(".\/test\pos\*.png"): img = cv2.imread(file) boundingList = None for (x, y, window) in helpers.sliding_window(img, stepSize=32, windowSize=(64, 128)): if window.shape[0] != 128 or window.shape[1] != 64: continue winDescriptor = hog.compute(window) rows, cols = winDescriptor.shape winDescriptor = winDescriptor.reshape((cols, rows)) prediction = svm.predict(winDescriptor) if prediction[1] == 0: boundingBox = (x, y, x + 64, y + 128) if boundingList is None: boundingList = np.array([boundingBox]) else: boundingList = np.vstack((boundingList, [boundingBox])) else: continue pick = nms.non_max_suppression_slow(boundingList, 0.3) x, y, w, h = pick[0] cv2.rectangle(img, (x, y), (w, h), (0, 255, 0), 2) cv2.imshow("image", img) k = cv2.waitKey(30) & 0xff if k == 27: break
def detect(self, image, winDim, winStep=4, pyramidScale=1.5, minProb=0.7): boxes = [] probs = [] # loop over the image pyramid (scale down image by pyramidScale) for layer in helpers.pyramid(image, scale=pyramidScale, minSize=winDim): scale = image.shape[0] / float(layer.shape[0]) # loop over the sliding widnows for hte current pyramid lyaer # 由左至右,以 sliding_window 的大小從目前的 pyramid layer 擷取影像內容 for (x, y, window) in helpers.sliding_window(layer, winStep, winDim): # Get window dimensions (winH, winW) = window.shape[:2] # ensure the window dimensions match the supplied sliding window dimensions if winH == winDim[1] and winW == winDim[0]: # Get the HOG features and reshape it to one raw array (features vector) features = self.desc.describe(window).reshape(1, -1) # use our trained model to predict by using this feature vector prob = self.model.predict_proba(features)[0][1] # if probability over the min threshold, then add it into the result array if prob > minProb: (startX, startY) = (int(scale * x), int(scale * y)) endX = int(startX + (scale * winW)) endY = int(startY + (scale * winH)) boxes.append((startX, startY, endX, endY)) probs.append(prob) return (boxes, probs)
def detect(self, image, winDim, winStep=4, pyramidScale=1.5, minProb=0.7): boxes = [] probs = [] # loop over the image pyramid for layer in helpers.pyramid(image, scale=pyramidScale, minSize=winDim): scale = image.shape[0] / float(layer.shape[0]) # loop over the sliding window for (x, y, window) in helpers.sliding_window(layer, winStep, winDim): (winH, winW) = window.shape[:2] # ensure the window is not truncated # since sliding_window does not check boundaries if winH == winDim[1] and winH == winDim[0]: # extract HOG features from current window # and classify if contains an object features = self.desc.describe(window) features = feature.hog( window, orientations=conf["orientations"], pixels_per_cell=tuple(conf["pixels_per_cell"]), cells_per_block=tuple(conf["cells_per_block"]), transform_sqrt=conf["normalize"])
def slide(input_img, output): pixelate_count = 0 for resized in helpers.pyramid(input_img, scale=2): # loop over the sliding window for each layer of the pyramid for (x, y, window) in helpers.sliding_window(resized, stepSize=STEP_SIZE, windowSize=(WIN_SIZE, WIN_SIZE)): # if the window does not meet our desired window size, ignore it if window.shape[0] != WIN_SIZE or window.shape[1] != WIN_SIZE: continue # This code below shows sliding window # clone = resized.copy() # cv2.rectangle(clone, (x, y), (x + WIN_SIZE, y + WIN_SIZE), (0, 255, 0), 2) # Ensure crop is on the original location after pyramid resize temp_y_start = y + (input_img.shape[1] - resized.shape[1]) temp_x_start = x + (input_img.shape[0] - resized.shape[0]) temp_y_end = temp_y_start + WIN_SIZE temp_x_end = temp_x_start + WIN_SIZE try: crop_img = cv2.resize(resized[temp_y_start:temp_y_end, temp_x_start:temp_x_end], (200, 200)) except Exception as e: continue # Emoji confidence is below 0.9, above is nonsense if emoji_AI(crop_img) < 0.9: # If verified an emoji, pixelate pixelate_count += 1 output[temp_y_start:temp_y_end, temp_x_start:temp_x_end] = \ pixelate(output[temp_y_start:temp_y_end, temp_x_start:temp_x_end]) print("Number of pixelation:", pixelate_count) return output
def scan_image(path): # load the image and define the window width and height image = cv2.imread(path, cv2.IMREAD_COLOR) # loop over the image pyramid level = 0 lst_img = list() lst_imgDetail = list() ori_clone = image.copy() overlapImg = image.copy() for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(resized_image, stepSize=stepSize, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) lst_img.append(normalized_img) imgDetail = (x, y, level, resized_image) lst_imgDetail.append(imgDetail) level += 1 # Predict all window lst_indexPositive, positive_scores = predict_multi(lst_img, model_path) time_now("fusing") for i in lst_indexPositive: subX, subY, subLevel, subImg = lst_imgDetail[i] ori_x, ori_y, new_winW, new_winH = reverse_window(subX, subY, subImg.shape[1], subImg.shape[0], scale ** subLevel, image.shape[1], image.shape[0], winW, winH) # Get positive image and save it ori_window = (ori_x, ori_y, ori_x + new_winW, ori_y + new_winH) # Draw rectangle on output image cv2.rectangle(ori_clone, (ori_x, ori_y), (ori_x + new_winW, ori_y + new_winH), (0, 255, 0), 2) lstRect.append(ori_window) overlappedLst = run_NMS(lstRect, positive_scores, 0.05) time_now("draw rect") for i in overlappedLst: x1, y1, x2, y2 = lstRect[i] cv2.rectangle(overlapImg, (x1, y1), (x2, y2), (0, 255, 0), 2) result_path = os.path.join(stored_path, 'result.png') not_overlap = os.path.join(stored_path, 'be4.png') utils.to_image(ori_clone).save(not_overlap, 'png') utils.to_image(overlapImg).save(result_path, 'png') return result_path, len(overlappedLst)
def cropNegatives(path): for i, file in enumerate(os.listdir(path)): current_path = str(path) + '\\' + file img = cv2.imread(current_path) j = 1 for (x, y, window) in sliding_window(img, stepSize=512, windowSize=(128,64)): if window.shape[0] != 128 or window.shape[1] != 64: continue #crop_img = img[y:y+h, x:x+w] crop_img = img[y:y+128, x:x+64] cv2.imwrite('neg_128x64_2000\\neg' + str(i) + '_' + str(j) + '.png', crop_img) print('Saved: neg{}_{}.png'.format(i,j)) j += 1 if cv2.waitKey(1) == 0x1b: break
def process_input(i): windows = [] background_image = cv2.imread(splitPath[0] + "/" + backgroundLines[i].rstrip("\r\n"), 0) # for resized in pyramid(background_image, scale=1.2): for (x, y, window) in sliding_window(background_image, stepSize=15, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue check_hog = hog(window, orientations=orientation, pixels_per_cell=pixelsPerCell, cells_per_block=cellsPerBlock, transform_sqrt=True, feature_vector=True) c = bayes.predict_proba(check_hog.reshape(1, -1)) if c[0][1] > 0.40: if model.predict(check_hog.reshape(1, -1)) == [1]: windows.append(check_hog) return windows
def createTrainImages(): count = 0 for file in glob.glob(".\/train\/pos\*.png"): img = cv2.imread(file) resized = cv2.resize(img, (64, 128)) cv2.imwrite(".\/train\positive\/img%d.jpg" % count, resized) count += 1 count = 0 for neg_img in glob.glob(".\/train\/neg\*.png"): negative = cv2.imread(neg_img) for (x, y, window) in helpers.sliding_window(negative, stepSize=64, windowSize=(64, 128)): if window.shape[0] != 128 or window.shape[1] != 64: continue cv2.imwrite(".\/train\/negative\/img%d.jpg" % count, window) count += 1
def detect(self, image, winDim, winStep=4, pyramidScale=1.5, minProb=0.7): # initialize the list of bounding boxes and associated probabilities boxes = [] probs = [] # loop over the image pyramid for layer in helpers.pyramid(image, scale=pyramidScale, minSize=winDim): # determine the current scale of the pyramid scale = image.shape[0] / float(layer.shape[0]) # loop over the sliding windows for the current pyramid layer for (x, y, window) in helpers.sliding_window(layer, winStep, winDim): # grab the dimensions of the window (winH, winW) = window.shape[:2] # ensure the window dimensions match the supplied sliding window dimensions if winH == winDim[1] and winW == winDim[0]: # extract HOG features from the current window and classifiy whether or # not this window contains an object we are interested in features = self.desc.describe(window).reshape(1, -1) prob = self.model.predict_proba(features)[0][1] # check to see if the classifier has found an object with sufficient # probability if prob > minProb: # compute the (x, y)-coordinates of the bounding box using the current # scale of the image pyramid (startX, startY) = (int(scale * x), int(scale * y)) endX = int(startX + (scale * winW)) endY = int(startY + (scale * winH)) # update the list of bounding boxes and probabilities boxes.append((startX, startY, endX, endY)) probs.append(prob) # return a tuple of the bounding boxes and probabilities return (boxes, probs)
def hardNegativeMining(): svm = cv2.ml.SVM_load(".\svm.xml") (winW, winH) = (64, 128) # for each scale of every image in the negative test dataset apply the sliding window technique # get HOG descriptor of every window and use SVM to predict results for image_file in glob.glob(".\/test\/neg\*.png"): image = cv2.imread(image_file) for resized in helpers.pyramid(image, scale=1.5): for (x, y, window) in helpers.sliding_window(resized, stepSize=32, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue winDescriptor = hog.compute(window) rows, cols = winDescriptor.shape winDescriptor = winDescriptor.reshape((cols, rows)) prediction = svm.predict(winDescriptor) print(prediction)
images = numpy.empty(len(onlyfiles), dtype=object) for n in range(0, len(onlyfiles)): images[n] = cv2.imread(join(mypath, onlyfiles[n])) elif (cv2.imread(args["image"]) != None): images = numpy.empty(1, dtype=object) images[1] = cv2.imread(args["image"]) else: raise ValueError( 'Invalid arguments; image or directory path reference needed') for image in images: win_size = 16 (winW, winH) = (win_size, win_size) for resized in pyramid(image, scale=1.5, minSize=(20, 20)): for (x, y, window) in sliding_window(resized, stepSize=win_size, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue #CLASSIFY SHIT HERE # since we do not have a classifier... ahem... we'll just draw the window clone = resized.copy() cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) cv2.imshow("Window", clone) cv2.waitKey(1) # time.sleep(0.0001)
# import the necessary packages import helpers import argparse import time import cv2 # load the image and define the window width and height image = cv2.imread('E:/python/myInceptionProject/detected_image.jpg') (winW, winH) = (60, 48) i = 1 # loop over the image pyramid for resized in helpers.pyramid(image, scale=1.5): # loop over the sliding window for each layer of the pyramid for (x, y, window) in helpers.sliding_window(resized, stepSize=16, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW # since we do not have a classifier, we'll just draw the window clone = resized.copy() # cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) cut_img = clone[y:y + winH, x:x + winH] cv2.imwrite("E:/python/myInceptionProject/cut_img/" + str(i) + '.jpg', cut_img)
rois = [] locs = [] # loop over the image pyramid for image in pyramid: # determine the scale factor between the *original* image # dimensions and the *current* layer of the pyramid scale = W / float(image.shape[1]) # for each layer of the image pyramid, loop over the sliding # window locations for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE): # scale the (x, y)-coordinates of the ROI with respect to the # *original* image dimensions x = int(x * scale) y = int(y * scale) w = int(ROI_SIZE[0] * scale) h = int(ROI_SIZE[1] * scale) # take the ROI and pre-process it so we can later classify # the region using Keras/TensorFlow roi = cv2.resize(roiOrig, INPUT_SIZE) roi = img_to_array(roi) roi = preprocess_input(roi) # update our list of ROIs and associated coordinates rois.append(roi)
from helpers import sliding_window import cv2 from pathlib import Path from PIL import Image pathlist = Path('./ResizedData3/').glob('**/*.jpg') j = 0 for path in pathlist: path_in_str = str(path) image = cv2.imread(path_in_str) for (x, y, window) in sliding_window(image, stepSize=40, windowSize=(40, 40)): if window.shape[0] != 40 or window.shape[1] != 40: continue im = Image.fromarray(window) im.save('./cropdata/6/' + str(j) + '.jpg') j += 1
def run_sliding_window(self, image, win_size, step_size): for (x, y, window) in sliding_window(image, win_size, step_size): if self.predict(window): yield (x, y)
from helpers import pyramid from helpers import sliding_window import cv2 import time if __name__ == "__main__": image = cv2.imread('images/pedestrian.jpg') print(image.shape) (win_height, win_width) = (128, 128) for layer in pyramid(image): for (x, y, window) in sliding_window(layer, step_size=32, window_size=(win_width, win_height)): if window.shape[0] != win_height or window.shape[1] != win_width: continue clone = layer.copy() cv2.rectangle(clone, (x, y), (x + win_width, y + win_height), (0, 255, 0), 2) cv2.imshow("Window", clone) cv2.waitKey(1) time.sleep(0.025)
import helpers import cv2 import time image = cv2.imread("images/flowers.png") (winW, winH) = (128, 128) for resized in helpers.pyramids(image, 1.5): for(x,y,window) in helpers.sliding_window(resized,35,(winW,winH)): if window.shape[0] != winH or window.shape[1] !=winW: continue clone = resized.copy() cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) cv2.imshow("Window", clone) cv2.waitKey(1) time.sleep(0.025)
# (winW, winH) = (128, 128) start_time = datetime.datetime.now() # loop over the image pyramid count = 0 level = 0 scale = 1.25 lst_img = list() lst_imgDetail = list() ori_clone = image.copy() for resized_image in pyramid(image, scale): # loop over the sliding window for each layer of the pyramid # for (x, y, window) in sliding_window(resized_image, stepSize=8, windowSize=(winW, winH)): for (x, y, window) in sliding_window(resized_image, stepSize=16, windowSize=(winW, winH)): # if the window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE # WINDOW curWindow = (x, y, x + winW, y + winH) subImage = utils.to_image(resized_image).crop(curWindow) normalized_img = pre_processing_data.process_single_file(subImage) lst_img.append(normalized_img) imgDetail = (x, y, level, resized_image)
ap.add_argument("-s", "--scale", type=float, default=1.5, help="scale factor size") args = vars(ap.parse_args()) # load the input image and unpack the command line arguments image = cv2.imread(args["image"]) (winW, winH) = (args["width"], args["height"]) # loop over the image pyramid for layer in pyramid(image, scale=args["scale"]): # loop over the sliding window for each layer of the pyramid for (x, y, window) in sliding_window(layer, stepSize=32, windowSize=(winW, winH)): # if the current window does not meet our desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE WE WOULD PROCESS THE WINDOW, EXTRACT HOG FEATURES, AND # APPLY A MACHINE LEARNING CLASSIFIER TO PERFORM OBJECT DETECTION # since we do not have a classifier yet, let's just draw the window clone = layer.copy() cv2.rectangle(clone, (x, y), (x + winW, y + winH), (0, 255, 0), 2) cv2.imshow("Window", clone) # normally we would leave out this line, but let's pause execution # of our script so we can visualize the window
def detect_pedestrian(image, model, verbose=True, slidingWindow_parameters={'height':int(275),'width':int(100),'step_w':50,'step_h':50},imagePyramid_parameters=(224,224),scalefactor=1.5): ''' detects pedestrians ifrom a given input image params: image: PIL Input image in which pedestrians should be detected model: pytorch model used for prediction slidingWindow_parameters: Dictionary that contains information about the slliding window (height,width,step size in vertical and horizontal direction) imagePyramid_parameters: Tuple that contains the smallest size that the image should be scaled to verbose: Bool to print out the number of roi in which a pedestrian was detected return: picks: bounding box coordinates from the prediction ''' listOfBoundningBoxes = [] listOfProbs =[] locs = [] rois = [] ImgWidth,imgHeight = image.size # initialize image pyramid imgPyramide = image_pyramid(image,minSize=imagePyramid_parameters,scale=scalefactor) for img in imgPyramide: # get current scale to rescale roi later scale = ImgWidth/float(img.size[0]) # create sliding window generator sliding = sliding_window(img, window_size=(slidingWindow_parameters['width'],slidingWindow_parameters['height']), step_w=int(slidingWindow_parameters['step_w']/scale), step_h=int(slidingWindow_parameters['step_h']/scale)) # initialize sliding window for slide_img in sliding: windowBox = slide_img[0] windowBox= tuple(int(x*scale) for x in windowBox) locs.append(windowBox) # prepare region of interest for input into the classifier roi =slide_img[1].resize((224,int(224*2.5))) roi = prediction_transforms(roi) rois.append(roi) # classify the roi model.eval() with torch.no_grad(): rois =torch.stack(rois, dim=0) predLoader = torch.utils.data.DataLoader(rois,batch_size=8) sm = torch.nn.Softmax(dim=1) outputs = [] # split rois to prevent memory overload for inputs in predLoader: # use model to classify rois outputs.append(model(inputs)) outputs =torch.cat(outputs, dim=0) _, preds = torch.max(outputs.data, 1) probs= sm(outputs) # get list of indexes that conatain a pedestrian indexes = preds.numpy().nonzero() for index in indexes[0]: if verbose: print(f"Detected pedestrian at index {index}.") listOfBoundningBoxes.append(locs[index]) listOfProbs.append(probs[index][1]) # apply non-maxima suppression to the bounding boxes using a # fairly large overlap threshold to try to maintain overlapping # boxes that are still people rects = np.array([[xmin, ymin, xmax, ymax] for (xmin, ymin, xmax, ymax) in listOfBoundningBoxes]) picks = non_max_suppression(rects, probs=listOfProbs, overlapThresh=0.2) return picks
image = resize(image, width=800) else: image = resize(image, width=550) # define initial window size (winW, winH) = (32, 64) detect_list = [] #nb_size = 0 #detect_num = 0 # loop while winW < 75: # loop over the sliding window for each windowSize print 'winW=', winW, 'winH=', winH list_tmp = [] for (x, y, window) in sliding_window(image, stepSize=stepsize, windowSize=(winW, winH)): # if the window does not meet the desired window size, ignore it if window.shape[0] != winH or window.shape[1] != winW: continue # THIS IS WHERE TO PROCESS THE WINDOW # APPLYING CNN MODEL CLASSIFIER TO CLASSIFY THE OBJECT OF THE WINDOW window_resize = cv2.resize(window, (32, 64), interpolation=cv2.INTER_CUBIC) data = np.empty((1, 3, 64, 32), dtype="float32") #cv2.imwrite('/home/cad/tmp.png',window_resize) #img = Image.open('/home/cad/tmp.png') #arr = np.asarray(img,dtype="float32") window_resize_RGB = cv2.cvtColor(window_resize, cv2.COLOR_BGR2RGB) arr = np.asarray(window_resize_RGB, dtype="float32")
(8, 8), 9, False) origin_image = cv2.imread('data/Train/pos/crop001048.png') origin_r, origin_c, _ = origin_image.shape ratio = origin_c / origin_r image = cv2.resize(origin_image, None, fx=250 / origin_c, fy=250 / (ratio * origin_r)) r, c, _ = image.shape print(r, c) bounding_box = [] prob = [] for (i, layer) in enumerate(pyramid(image, 1.05, (64, 128))): m, n, _ = layer.shape for (x, y, window) in sliding_window(layer, 8, (64, 128)): if window.shape[0] != 128 or window.shape[1] != 64: continue normalized = cv2.resize(window, (64, 128)) data = np.array([hog.compute(normalized)]).squeeze() data = data.reshape(1, -1) resp = model.predict_proba(data) x_begin = int((x / n) * c) y_begin = int((y / m) * r) x_end = int(((x + 63) / n) * c) y_end = int(((y + 127) / m) * r) if resp[0][1] > resp[0][0]: bounding_box.append((x_begin, y_begin, x_end, y_end)) prob.append(resp[0][1])