def extract_face(self, frame, person): """ Cut and return image of face base in neck and nose points """ Nose = person[0] Neck = person[1] RShoulder = person[2] LShoulder = person[5] Radio1 = math.sqrt( math.pow(Neck[1] - Nose[1], 2) + math.pow(Neck[0] - Nose[0], 2)) Radio2 = math.sqrt( math.pow(Neck[1] - RShoulder[1], 2) + math.pow(Neck[0] - RShoulder[0], 2)) Radio3 = math.sqrt( math.pow(Neck[1] - LShoulder[1], 2) + math.pow(Neck[0] - LShoulder[0], 2)) Radio = max(Radio1, Radio2, Radio3) Radio = int(Radio) x = max(Nose[0] - Radio, 0) y = max(Nose[1] - Radio, 0) Face = frame[y:Nose[1] + Radio, x:Nose[0] + Radio] Face = cv2.resize(Face, (64, 64)) return Face
def select_image(): global panelA, panelB, imageInit path = askopenfilename() if len(path) > 0: image = cv2.imread(path) image = cv2.resize(image, (640, 480)) imageInit = image image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = Img.fromarray(image) image = ImageTk.PhotoImage(image) if panelA is None or panelB is None: panelA = Label(image=image) panelA.image = image panelA.pack(side="left", padx=10, pady=10) else: panelA.configure(image=image) panelA.image = image
def predict_ActionLabel_to_NewVideo_new(video_path): frames_before_resize = get_frames(video_path, divide=1, show=False) frames = [] for i in range(len(frames_before_resize)): image = cv2.resize(frames_before_resize[i], (960, 540)) # print(image.shape) frames.append(image) frames = np.array(frames) / 255.0 x_test = [] for i in range(2, len(frames) - 3): motion_1 = frames[i - 1] - frames[i] motion_2 = frames[i + 1] - frames[i] motion_3 = frames[i - 2] - frames[i] motion_4 = frames[i + 2] - frames[i] x_test.append( np.concatenate((frames[i], motion_1, motion_2, motion_3, motion_4), axis=2)) x_test = np.array(x_test) print('input shape:', x_test.shape) print('frames shape:', frames.shape) model = get_resnet50_model() results = np.array(model.predict(x_test)) # print(results) labels = [] action = {0: 'idle', 1: 'pick', 2: 'push'} for result in results: index = result.argmax() labels.append(action[index]) extract_actions(labels) video_name = video_path.split('/')[-1] video = add_ActionLabel_to_frames(labels, frames_before_resize[2:-3]) frames_to_video(video, video_name)
def cropSinglePage(imageName: str, dimensionsDict: dict, folderName: str): """ This function crops a single page from the scan (by its dimensions). Parameters: imageName (str): The name of the scanned image. dimensionsDict (dict): The dimensions of the scanned image. folderName (str): The name of the folder that the scan is saved in. """ img = cv2.imread(os.path.join(INPUT_PATH, folderName, imageName), cv2.IMREAD_GRAYSCALE ) # Read the image from the folder with grayscale mode originalName = imageName # For the log x1 = dimensionsDict["x1"] x2 = dimensionsDict["x2"] y1 = dimensionsDict["y1"] y2 = dimensionsDict["y2"] delta = dimensionsDict["x2"] - dimensionsDict[ "x1"] # The offset to move to the next page of the scan for _ in range(dimensionsDict["pageNum"]): croppedImage = img[y1:y2, x1:x2] # Crop the margins of the image croppedImage = cv2.resize( croppedImage, (RESIZE_UNITS["width"], RESIZE_UNITS["height"]) ) # Resize the image to work with the same size of the manuscript saveName = os.path.splitext( imageName )[0] # Get the name of the image without the extension (e.g. without '.jpg') cropToPatches(croppedImage, saveName, RESIZE_UNITS["width"], RESIZE_UNITS["height"], folderName) x1 += delta + dimensionsDict[ "margin"] # Move the X1 axis to the next page x2 += delta + dimensionsDict[ "margin"] # Move the X2 axis to the next page imageName = os.path.splitext(imageName)[0] + "2" + os.path.splitext( imageName)[1] # give the second page a different save name global NUM_OF_CROPPED_IMAGES NUM_OF_CROPPED_IMAGES += 1 logging.info("[" + inspect.stack()[0][3] + "] - " + "Image " + originalName + " Cropped successfully.")
def load_data(data_dir): """ Load image data from directory `data_dir`. Assume `data_dir` has one directory named after each category, numbered 0 through NUM_CATEGORIES - 1. Inside each category directory will be some number of image files. Return tuple `(images, labels)`. `images` should be a list of all of the images in the data directory, where each image is formatted as a numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should be a list of integer labels, representing the categories for each of the corresponding `images`. """ path = os.walk(data_dir) img_list = [] label_list = [] for root, dirs, files in path: for fname in files: if re.search('.ppm', fname): new_path = os.path.join(root, fname) else: continue num = root.replace(data_dir, '') # load a color image, setting flag to 1 new_img = cv2.imread(new_path, 1) # resize image img_output = cv2.resize(new_img, (IMG_WIDTH, IMG_HEIGHT)) # add img array and labels as tuples into a list img_list.append(img_output) label_list.append(num) return (img_list, label_list) raise NotImplementedError
def prepare_image(image: np.ndarray, new_width, new_height): curr_height, curr_width, _ = image.shape if curr_height > curr_width: diff = curr_height - curr_width offset = int(diff / 2) if curr_height > new_height: image = image[offset:offset+curr_width, 0:curr_width] else: np.pad(image, pad_width=offset, mode='constant', constant_values=0 ) image = np.pad(image, pad_width=((0,0), (offset, offset), (0, 0)), mode='constant', constant_values=0) pass else: diff = curr_width - curr_height offset = int(diff / 2) if curr_width > new_width: image = image[0:curr_height, offset:offset+curr_height] else: image = np.pad(image, pad_width=((offset, offset), (0, 0), (0, 0)), mode='constant', constant_values=0) image = cv2.resize(image, (new_width, new_height)) return image
def cartoonizer(img, num_down=2, num_bi=5): #Params #num_down = 2 #DOWNSAMPLE STEPS #num_bi = 5 # BILATERAL FILTERING STEPS img_c = img for ix in range(num_down): img_c = cv2.pyrDown(img) # Pyramid Down : Downsampling # print(img_c.shape) for iy in range(num_bi): img_c = cv2.bilateralFilter(img_c, d=9, sigmaColor=9, sigmaSpace=7) #Filtering # print(img_c.shape) #UPSAMPLING for ix in range(num_down): img_c = cv2.pyrUp(img_c) # Pyramid Down : Downsampling # print(img_c.shape) #BLUR and Threshold img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # GRAY SCALE img_blur = cv2.medianBlur(img_gray, 7) #MEDIAN BLUR img_edge = cv2.adaptiveThreshold(img_blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, blockSize=9, C=2) img_c = cv2.resize(img_c, (800, 800)) #RGB CONVERSION + BITWISE & img_edge = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2RGB) # print(img_c.shape) # print(img_edge.shape) img_cartoon = cv2.bitwise_and(img_c, img_edge) stack = np.hstack([img, img_cartoon]) return stack
def detect_and_predict_mask(frame, faceNet, maskNet): (h, w) = frame.shape[:2] blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0)) faceNet.setInput(blob) detections = faceNet.forward() print(detections.shape) faces = [] locs = [] preds = [] for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.5: box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") (startX, startY) = (max(0, startX), max(0, startY)) (endX, endY) = (min(w - 1, endX), min(h - 1, endY)) face = frame[startY:endY, startX:endX] face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) face = cv2.resize(face, (224, 224)) face = img_to_array(face) face = preprocess_input(face) faces.append(face) locs.append((startX, startY, endX, endY)) if len(faces) > 0: faces = np.array(faces, dtype="float32") preds = maskNet.predict(faces, batch_size=32) return (locs, preds)
def get_frames_locally( video_path: str, output_frame_resolution=(640, 360)) -> list: cap = cv.VideoCapture(video_path) frames_read = 0 frames = [] print("[INFO]: Getting Frames from the video") with tqdm(total=float("inf")) as pbar: while cap.isOpened(): try: ret, frame = cap.read() # Check for proper read if not ret: break # Process the frame frame = cv.resize(frame, output_frame_resolution, interpolation=cv.INTER_CUBIC) frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) frames.append(frame) # frames processed frames_read += 1 pbar.set_postfix_str(f"Frames Processed: {frames_read}") pbar.update(1) except Exception as err: print(f"[ERROR]: {err}") print(f""" [INFO]: Processed {frames_read} frames from the {video_path} """) return frames
def get_frame(self): ret, img = self.video.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 5) for (x, y, w, h) in faces: cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2) # draw rectangle to main image detected_face = img[int(y):int(y + h), int(x):int(x + w)] # crop detected face detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) # transform to gray scale detected_face = cv2.resize(detected_face, (48, 48)) # resize to 48x48 img_pixels = image.img_to_array(detected_face) img_pixels = np.expand_dims(img_pixels, axis=0) img_pixels /= 255 # pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1] with graph.as_default(): predictions = model.predict(img_pixels) # store probabilities of 7 expressions # find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral max_index = np.argmax(predictions[0]) emotion = emotions[max_index] # img = cv2.flip(img, 1) # write emotion text above rectangle cv2.putText(img, emotion, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) if emotion is not "neutral": global emotion_main emotion_main = emotion # frame_flip = cv2.flip(img, 1) ret, jpeg = cv2.imencode('.jpg', img) jpeg_tobytes = jpeg.tobytes() return jpeg_tobytes, emotion_main
def style_transfer(pathIn='',model='',width=None): ''' pathIn: 原始图片的路径 pathOut: 风格化图片的保存路径 model: 预训练模型的路径 width: 设置风格化图片的宽度,默认为None, 即原始图片尺寸 jpg_quality: 0-100,设置输出图片的质量,默认80,越大图片质量越好 ''' ## 读入原始图片,调整图片至所需尺寸,然后获取图片的宽度和高度 img = cv2.imread(pathIn) (h, w) = img.shape[:2] if width is not None: img = cv2.resize(img, (width, round(width*h/w)), interpolation=cv2.INTER_CUBIC) (h, w) = img.shape[:2] ## 从本地加载预训练模型 print('加载预训练模型......%s'%model) net = cv2.dnn.readNetFromTorch(model) ## 将图片构建成一个blob:设置图片尺寸,将各通道像素值减去平均值(比如ImageNet所有训练样本各通道统计平均值) ## 然后执行一次前馈网络计算,并输出计算所需的时间 blob = cv2.dnn.blobFromImage(img, 1.0, (w, h), (103.939, 116.779, 123.680), swapRB=False, crop=False) net.setInput(blob) start = time.time() output = net.forward() end = time.time() print("风格迁移花费:{:.2f}秒".format(end - start)) ## reshape输出结果, 将减去的平均值加回来,并交换各颜色通道 output = output.reshape((3, output.shape[2], output.shape[3])) output[0] += 103.939 output[1] += 116.779 output[2] += 123.680 output = output.transpose(1, 2, 0) return output
def findlines(board, showImage=True): img = np.array(Image.open(board).convert("L")) original_img = img.copy() # Images that are too big yield far too many lines if img.shape[0] * img.shape[1] > 300000: # Keep width and height proportional scale_ratio = img.shape[1] / 512 new_width = int(np.round(img.shape[0] / scale_ratio)) img = cv2.resize(img, (512, new_width)) else: scale_ratio = 1 blur_gray = cv2.GaussianBlur(img, (5, 5), 0) edges = cv2.Canny(blur_gray, 60, 110) lines = cv2.HoughLines(edges, 1, np.pi / 180, 135) lines = lines.reshape((lines.shape[0], lines.shape[2])) vertical, horizontal = preProcessLines(lines, img) y_corners, x_corners = findintersect(vertical, horizontal) x_corners = np.round(x_corners * scale_ratio).astype(np.int) y_corners = np.round(y_corners * scale_ratio).astype(np.int) if showImage: for i in range(9): for j in range(9): x = x_corners[i, j] y = y_corners[i, j] original_img = cv2.circle(original_img, (x, y), 3, [255, 0, 0], 2) cv2.imshow('hough', original_img) cv2.waitKey(0) cv2.destroyAllWindows() return (x_corners, y_corners)
def apply(self, image: np.ndarray, **params) -> np.ndarray: """ Applies augmentation on real data :param data: spectrogram :param params: additional params for use of albumentations :return: shifted spectrogram """ image_width = image.shape[1] speed_rate = np.random.uniform(*self.speed_rate_range) audio_speed_tune = cv2.resize( image, (int(image_width * speed_rate), image.shape[0])) audio_speed_tune_width = audio_speed_tune.shape[1] if audio_speed_tune_width < image_width: pad_length = image_width - audio_speed_tune_width audio_speed_tune = np.r_[np.random.uniform( low=-80, high=-79, size=(int(pad_length / 2), image.shape[0])), audio_speed_tune.transpose(1, 0), np.random.uniform(low=-80, high=-79, size=(int(pad_length / 2), image.shape[0]))] audio_speed_tune = audio_speed_tune.transpose(1, 0) elif audio_speed_tune_width > image_width: cut_len = audio_speed_tune_width - image_width start_idx = np.random.randint(0, cut_len) audio_speed_tune = audio_speed_tune[:, start_idx:start_idx + image_width] return audio_speed_tune
def input_preprocessing(input_image_path: str, model: tf.keras.Model) -> tuple: # Open the image input_image = cv.imread(input_image_path) # Pre-process it out input_image = cv.cvtColor(input_image, cv.COLOR_BGR2RGB) # Run MTCNN detector = MTCNN() faces = detector.detect_faces(input_image) if (len(faces) > 0): # Get the first face for face in faces: if face["confidence"] > 0.9: # Crop it out x, y, width, height = face["box"] face_image = np.asarray(input_image[y:y + height, x:x + width], dtype=np.uint8) key = face["keypoints"] face_image = fau.face_alignment(face_image, key["left_eye"], key["right_eye"]) # Interpolate face_image = cv.resize(face_image, (112, 112), interpolation=cv.INTER_CUBIC) # Normalize face_image = np.asarray(face_image / 255.0, dtype="float64") return face_image, model(face_image[None, ...]).numpy() else: continue raise Exception("[ERROR]: Proper Face not found!") else: raise Exception("[ERROR]: NO FACE FOUND IN IMAGE")
def start(self): print("start function") try: ret, image = self.cap.read() image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA) print("completed image capture") gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) color = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) print("changed the color") barcodes = pyzbar.decode(gray) print(barcodes) for barcode in barcodes: (x, y, w, h) = barcode.rect cv2.rectangle(color, (x, y), (x + w, y + h), (0, 0, 255), 2) text = barcode.data.decode('utf-8') cv2.putText(color, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (51, 51, 255), 2) # Convert the opencv image to tkinter image self.image_array = Image.fromarray(color) image_tk = ImageTk.PhotoImage(self.image_array) # update the image in the tkinter window self.label.config(image=image_tk) self.label.image = image_tk # refresh the label with recursive call if self.stop == False: self.label.after(10, self.start) else: self.label.image = None except: print("Some error")
def aHash(img): # 缩放为8*8 img = cv.imread(img.getim(), 0) img = cv.resize(img, (8, 8), interpolation=cv.INTER_CUBIC) # 转换为灰度图 gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # s为像素和初值为0,hash_str为hash值初值为'' s = 0 hash_str = '' # 遍历累加求像素和 for i in range(8): for j in range(8): s = s + gray[i, j] # 求平均灰度 avg = s / 64 # 灰度大于平均值为1相反为0生成图片的hash值 for i in range(8): for j in range(8): if gray[i, j] > avg: hash_str = hash_str + '1' else: hash_str = hash_str + '0' return hash_str
def panoFolder(path, type): vidImagesList = [file for file in os.listdir(path) if file.endswith(type)] print(vidImagesList) # list of images vidImages=[] for image in vidImagesList: # take only specifc type file file = path +"/"+ image print(file) img = cv2.imread(file,cv2.IMREAD_UNCHANGED) ratio = 0.7 # percent of original size height = int(img.shape[0] * ratio) width = int(img.shape[1] * ratio) resized = cv2.resize(img,(width,height),interpolation=cv2.INTER_AREA) # Panorama.displayPano(resized) vidImages.append(resized) panoImg = ImageProcessor.panoImageList(vidImages) return panoImg
def img_btn(self, event): self.IMAGE_STREAM = True dialog = wx.FileDialog(self, u"选择图片检测", os.getcwd(), '', wildcard="(*.jpg)|*.jpg|(*.png)|*.png", style=wx.FD_OPEN | wx.FD_CHANGE_DIR) if dialog.ShowModal() == wx.ID_OK: # 如果确定了选择的文件夹,将文件夹路径写到tips控件 self.tips.SetValue(u"文件路径:" + dialog.GetPath() + "\n") self.orgin_img_show = cv2.imread(str(dialog.GetPath())) # 更新全局变量路径 dialog.Destroy # cv2转wxpython self.orgin_img_show = cv2.resize( self.orgin_img_show, (600, 500), ) height, width = self.orgin_img_show.shape[:2] image1 = cv2.cvtColor(self.orgin_img_show, cv2.COLOR_BGR2RGB) pic = wx.Bitmap.FromBuffer(width, height, image1) # 显示图片在panel上: self.orgin_img.SetBitmap(pic)
def transition(self,frame,rng = 0): up_x = A1 * (self.x - self.x0) + A2 * (self.xp - self.x0) + B0 * add_guassnoise(rng,TRANS_X_STD) + self.x0 up_x = max(0.0, min(image_w-1.0,up_x)) up_y = A1 * (self.y - self.y0) + A2 * (self.yp - self.y0) + B0 * add_guassnoise(rng,TRANS_Y_STD) + self.y0 up_y = max(0.0, min(image_h-1.0,up_y)) up_s = A1 * (self.s - 1.0) + A2 * (self.sp - 1.0) + B0 * add_guassnoise(rng,TRANS_S_STD) + 1.0 # print(up_s,self.s) up_s = max(0.1,up_s) self.xp = self.x self.yp = self.y self.sp = self.s self.x = up_x self.y = up_y self.s = up_s y0 = max(0,int(self.y - h * self.s * 0.5)) y1 = max(0,int(self.y + h * self.s * 0.5)) x0 = max(0,int(self.x - w * self.s * 0.5)) x1 = max(0,int(self.x + w * self.s * 0.5)) # print(self.x,self.y) #print(y0,y1,x0,x1) noisy_sub = frame[y0: y1,x0:x1] noisy_sub = cv2.resize(noisy_sub,(cropped.shape[1],cropped.shape[0]),interpolation=cv2.INTER_CUBIC) self.weight =max(0.0, ssim(cropped,noisy_sub,multichannel=True))
def predict_photo(self): json_file = open('/home/pratz/Downloads/dataset/results/model28000.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = keras.models.model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights("/home/pratz/Downloads/dataset/results/model28000.h5") print("Loaded model from disk") im = cv2.imread(self.file_name,0) plt.imshow(im) plt.show() im = cv2.resize(im,(64,64)) im = im.reshape((1,4096)) img = loaded_model.predict(im)[0] #img=cv2.GaussianBlur(img, (3,3), 0) #img = cv2.addWeighted(blur,1.5,img,-0.5,0) gen_img = (1/2.5) * img + 0.5 #cv2.imwrite("/home/pratz/predicted_photo.jpg",gen_img) self.predicted_filename="/home/pratz/predicted_photo.jpg" plt.imsave(self.predicted_filename,gen_img) plt.imshow(gen_img) plt.show()
def send_in_thread(socket_connected, address_connected): """ 发送视频子线程 通过连接socket对象发送数据 """ while True: time.sleep(0.01) frame = video_reader.read() frame = cv2.resize(frame, (640, 360)) frame = frame[..., ::-1] retval, imgencode = cv2.imencode('.jpg', frame, encode_param) data = np.array(imgencode) stringData = data.tostring() stringData = frame.tostring() try: socket_connected.send( len(stringData).to_bytes(4, byteorder='little')) socket_connected.send(stringData) except Exception: socket_connected.close() logger.info(f'disconnect from: {address_connected}') break
def img_to_text(self, stream): img = cv2.cvtColor(stream, cv2.COLOR_BGR2GRAY) img = cv2.resize(img, None, fx=10, fy=10, interpolation=cv2.INTER_CUBIC) kernel = np.ones((1, 1), np.uint8) img = cv2.dilate(img, kernel, iterations=1) img = cv2.erode(img, kernel, iterations=1) img = cv2.threshold(cv2.GaussianBlur(img, (5, 5), 0), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) text = pytesseract.image_to_string(img, config='--psm 11') return text
def apply_classifier(x, model, img, im0): # applies a second stage classifier to yolo outputs im0 = [im0] if isinstance(im0, np.ndarray) else im0 for i, d in enumerate(x): # per image if d is not None and len(d): d = d.clone() # Reshape and pad cutouts b = xyxy2xywh(d[:, :4]) # boxes b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad d[:, :4] = xywh2xyxy(b).long() # Rescale boxes from img_size to im0 size scale_coords(img.shape[2:], d[:, :4], im0[i].shape) # Classes pred_cls1 = d[:, 5].long() ims = [] for j, a in enumerate(d): # per item cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] im = cv2.resize(cutout, (224, 224)) # BGR # cv2.imwrite('test%i.jpg' % j, cutout) im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 im /= 255.0 # 0 - 255 to 0.0 - 1.0 ims.append(im) pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax( 1) # classifier prediction x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections return x
def make_video(images, outimg=None, fps=5, size=None, is_color=True, format="XVID"): """ Create a video from a list of images. @param outvid output video @param images list of images to use in the video @param fps frame per second @param size size of each frame @param is_color color @param format see http://www.fourcc.org/codecs.php @return see http://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html The function relies on http://opencv-python-tutroals.readthedocs.org/en/latest/. By default, the video will have the size of the first image. It will resize every image to this size before adding them to the video. """ from cv2.cv2 import VideoWriter, VideoWriter_fourcc, imread, resize fourcc = VideoWriter_fourcc(*format) vid = None for image in images: if not os.path.exists(image): raise FileNotFoundError(image) img = imread(image) if vid is None: if size is None: size = img.shape[1], img.shape[0] vid = VideoWriter(outvid, fourcc, float(fps), size, is_color) if size[0] != img.shape[1] and size[1] != img.shape[0]: img = resize(img, size) vid.write(img) vid.release() return vid
def q2(image, sift): # a)Enlarge the given image by a scale percentage of 115. scale = 115 scale = scale/100 width = int(image.shape[1] * scale) height = int(image.shape[0] * scale) new_dim = (width, height) resized = cv2.resize(image, new_dim) # b) Extract the SIFT features and show the keypoints on the scaled image using the same # parameter setting as for Task 1 (for the reduced number of keypoints). ## find the keypoints and descriptors using sift detector keyPoints1, des1 = sift.detector.detectAndCompute(image, None) # Since I have already found keypoints, call sift.compute() which computes the descriptors # from the keypoints that has been already found keyPoints2, des2 = sift.detector.detectAndCompute(resized, None) img2 = cv2.drawKeypoints(image, keyPoints1, image) # Hint: Brute-force matching is available in OpenCV for feature matching. bf_matcher = cv2.BFMatcher() #use Matcher.match() method to get the best matches in two images matches = bf_matcher.match(des1, des2) #matches = bf_matcher.knnMatch(des1, des2, k=2) # c)the keypoints in both images similar which shows that they share the same common features. # d) Match the SIFT descriptors of the keypoints of the scaled image with those of the original image # using the nearest-neighbour distance ratio method # We sort them in ascending order of their distances so that best matches (with low distance) come to front matches = sorted(matches, key=lambda x: x.distance) # Show the keypoints of the 5 best-matching descriptors on both the original and the scaled image. img_q2=cv2.drawMatches(image, keyPoints1, resized, keyPoints2, matches[:6], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS) plt.imshow(img_q2), plt.show() cv2.imwrite('d2.jpg', img_q2) cv2.imwrite('b2.jpg', img2)
def load_images(path, image_size): """ 从文件中读取图片,并将图片随机乱序排序 根据分类文件夹打上标签 :param path: 需要读取的文件路径 :param image_size:图像张量 :return:图片数据和标签 """ print("[INFO] loading images...") data = [] labels = [] # 获得图像路径并随机选取 imagePaths = sorted(list(paths.list_images(path))) lists = sorted(os.listdir(path + "/")) random.seed(42) random.shuffle(imagePaths) # 在输入图像上循环 for imagePath in imagePaths: # 加载图像, 对其进行预处理, 并将其存储在数据列表中 image = cv2.imread(imagePath) image = cv2.resize(image, image_size) image = img_to_array(image) data.append(image) # 从图像路径中提取类标签, 并更新 # labels 列表 label = int(lists.index(imagePath.split(os.path.sep)[-2])) labels.append(label) # 将原始像素强度缩放到范围 [0,1] data = np.array(data, dtype="float") / 255.0 labels = np.array(labels) # 将标签从整数转换为向量 labels = to_categorical(labels, num_classes=len(os.listdir(path))) return data, labels
def predict(self, face): size = 80 final_image = cv2.resize(face, (size, size)) final_image = np.expand_dims(final_image, 0) self.model.allocate_tensors() # Get input and output tensors. input_details = self.model.get_input_details() output_details = self.model.get_output_details() # Test the model on random input data. # input_shape = input_details[0]['shape'] input_data = np.array(final_image, dtype=np.float32) self.model.set_tensor(input_details[0]['index'], input_data) self.model.invoke() # The function `get_tensor()` returns a copy of the tensor data. # Use `tensor()` in order to get a pointer to the tensor. result = self.model.get_tensor(output_details[0]['index']) self.emo = "" if result[0][0] > 0.3: self.emo += "anger_disgust" if result[0][1] > 0.3: self.emo += "joy" if result[0][2] > 0.3: self.emo += "neutral" if result[0][3] > 0.3: self.emo += "sadness" if result[0][4] > 0.3: self.emo += "surprise_fear" if self.emo == "": self.emo = "No result" return result[0]
def face_rec(img_path): names = ['rabbit'] [X,y] = read_images(img_path) y = np.asarray(y, dtype=np.int32) # 特征脸人脸识别算法 model = cv2.face.EigenFaceRecognizer_create() # 传入特征值和特征标签进行训练 model.train(np.asarray(X), np.asarray(y)) #参数为0,表示打开默认的摄像机 camera = cv2.VideoCapture(0) face_cascade = cv2.CascadeClassifier(r"D:/Anaconda/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml") while (True): read, img = camera.read() faces = face_cascade.detectMultiScale(img, 1.3, 5) for (x, y, w, h) in faces: img = cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) roi = gray[x: x+w, y: y+h] try: # 读入用于识别的人脸 roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_LINEAR) # 进行识别,返回标签和置信度 params = model.predict(roi) print("Label: %s, Confidence: %.2f" % (params[0], params[1])) # 增加文本内容 cv2.putText(img, names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2) except: continue cv2.imshow("camera", img) if cv2.waitKey(1000 // 12) & 0xff == ord('q'): break cv2.destroyAllWindows()
def inference(self, cap): result_frames = [] with torch.no_grad(): # sometimes doesn't work from first time lol try: sF = self.vgg(self.style) except: sF = self.vgg(self.style) while True: ret, frame = cap.read() if not ret: break frame = resize(frame, (512, 256), interpolation=INTER_CUBIC) frame = frame.transpose((2, 0, 1)) frame = frame[::-1, :, :] frame = frame / 255.0 frame = torch.from_numpy(frame.copy()).unsqueeze(0) self.content.data.resize_(frame.size()).copy_(frame) with torch.no_grad(): cF = self.vgg(self.content) if (self.layer == 'r41'): feature, transmatrix = self.matrix(cF[self.layer], sF[self.layer]) else: feature, transmatrix = self.matrix(cF, sF) transfer = self.dec(feature) transfer = transfer.clamp(0, 1).squeeze(0).data.cpu().numpy() transfer = transfer.transpose((1, 2, 0)) transfer = transfer[..., ::-1] result_frames.append(transfer * 255) cap.release() return result_frames
def __get_image_chunk__(self, x_idx: int, y_idx: int) -> np.ndarray: """ Reads image chunk by idx :param x_idx: vertical index of chunk :param y_idx: horizontal index of chunk :return: image chunk by idx """ y_window, x_window = self.slide_window max_x = math.ceil(self.image.shape[1] / x_window) max_y = math.ceil(self.image.shape[0] / y_window) assert x_idx < max_x, 'X_idx is more than max X' assert x_idx >= 0, 'X_idx is less than zero' assert y_idx < max_y, 'Y_idx is more than max Y' assert y_idx >= 0, 'Y_idx is less than zero' x_idx = x_idx * x_window y_idx = y_idx * y_window if y_idx + y_window > self.image.shape[0]: y_idx = self.image.shape[0] - y_window if x_idx + x_window > self.image.shape[1]: x_idx = self.image.shape[1] - x_window image_chunk = self.image[y_idx:y_idx + y_window, x_idx:x_idx + x_window] # It is not necessary, but still it makes me feel better if image_chunk.shape[:2] != self.slide_window: image_chunk = cv2.resize(image_chunk, dsize=self.slide_window) return image_chunk