Esempio n. 1
0
    def extract_face(self, frame, person):
        """ Cut and return image of face base in neck and nose points """
        Nose = person[0]
        Neck = person[1]
        RShoulder = person[2]
        LShoulder = person[5]
        Radio1 = math.sqrt(
            math.pow(Neck[1] - Nose[1], 2) + math.pow(Neck[0] - Nose[0], 2))
        Radio2 = math.sqrt(
            math.pow(Neck[1] - RShoulder[1], 2) +
            math.pow(Neck[0] - RShoulder[0], 2))
        Radio3 = math.sqrt(
            math.pow(Neck[1] - LShoulder[1], 2) +
            math.pow(Neck[0] - LShoulder[0], 2))
        Radio = max(Radio1, Radio2, Radio3)
        Radio = int(Radio)
        x = max(Nose[0] - Radio, 0)
        y = max(Nose[1] - Radio, 0)

        Face = frame[y:Nose[1] + Radio, x:Nose[0] + Radio]
        Face = cv2.resize(Face, (64, 64))
        return Face
Esempio n. 2
0
def select_image():
    global panelA, panelB, imageInit
    path = askopenfilename()

    if len(path) > 0:

        image = cv2.imread(path)
        image = cv2.resize(image, (640, 480))

        imageInit = image
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Img.fromarray(image)
        image = ImageTk.PhotoImage(image)

        if panelA is None or panelB is None:
            panelA = Label(image=image)
            panelA.image = image
            panelA.pack(side="left", padx=10, pady=10)

        else:
            panelA.configure(image=image)
            panelA.image = image
def predict_ActionLabel_to_NewVideo_new(video_path):
    frames_before_resize = get_frames(video_path, divide=1, show=False)

    frames = []
    for i in range(len(frames_before_resize)):

        image = cv2.resize(frames_before_resize[i], (960, 540))
        # print(image.shape)
        frames.append(image)

    frames = np.array(frames) / 255.0

    x_test = []
    for i in range(2, len(frames) - 3):
        motion_1 = frames[i - 1] - frames[i]
        motion_2 = frames[i + 1] - frames[i]
        motion_3 = frames[i - 2] - frames[i]
        motion_4 = frames[i + 2] - frames[i]
        x_test.append(
            np.concatenate((frames[i], motion_1, motion_2, motion_3, motion_4),
                           axis=2))

    x_test = np.array(x_test)

    print('input shape:', x_test.shape)
    print('frames shape:', frames.shape)
    model = get_resnet50_model()
    results = np.array(model.predict(x_test))
    # print(results)
    labels = []
    action = {0: 'idle', 1: 'pick', 2: 'push'}
    for result in results:
        index = result.argmax()
        labels.append(action[index])
    extract_actions(labels)

    video_name = video_path.split('/')[-1]
    video = add_ActionLabel_to_frames(labels, frames_before_resize[2:-3])
    frames_to_video(video, video_name)
Esempio n. 4
0
def cropSinglePage(imageName: str, dimensionsDict: dict, folderName: str):
    """
    This function crops a single page from the scan (by its dimensions).

    Parameters:
    imageName (str): The name of the scanned image.
    dimensionsDict (dict): The dimensions of the scanned image.
    folderName (str): The name of the folder that the scan is saved in.
    """
    img = cv2.imread(os.path.join(INPUT_PATH, folderName,
                                  imageName), cv2.IMREAD_GRAYSCALE
                     )  # Read the image from the folder with grayscale mode
    originalName = imageName  # For the log
    x1 = dimensionsDict["x1"]
    x2 = dimensionsDict["x2"]
    y1 = dimensionsDict["y1"]
    y2 = dimensionsDict["y2"]
    delta = dimensionsDict["x2"] - dimensionsDict[
        "x1"]  # The offset to move to the next page of the scan
    for _ in range(dimensionsDict["pageNum"]):
        croppedImage = img[y1:y2, x1:x2]  # Crop the margins of the image
        croppedImage = cv2.resize(
            croppedImage, (RESIZE_UNITS["width"], RESIZE_UNITS["height"])
        )  # Resize the image to work with the same size of the manuscript
        saveName = os.path.splitext(
            imageName
        )[0]  # Get the name of the image without the extension (e.g. without '.jpg')
        cropToPatches(croppedImage, saveName, RESIZE_UNITS["width"],
                      RESIZE_UNITS["height"], folderName)
        x1 += delta + dimensionsDict[
            "margin"]  # Move the X1 axis to the next page
        x2 += delta + dimensionsDict[
            "margin"]  # Move the X2 axis to the next page
        imageName = os.path.splitext(imageName)[0] + "2" + os.path.splitext(
            imageName)[1]  # give the second page a different save name
    global NUM_OF_CROPPED_IMAGES
    NUM_OF_CROPPED_IMAGES += 1
    logging.info("[" + inspect.stack()[0][3] + "] - " + "Image " +
                 originalName + " Cropped successfully.")
Esempio n. 5
0
def load_data(data_dir):
    """
    Load image data from directory `data_dir`.

    Assume `data_dir` has one directory named after each category, numbered
    0 through NUM_CATEGORIES - 1. Inside each category directory will be some
    number of image files.

    Return tuple `(images, labels)`. `images` should be a list of all
    of the images in the data directory, where each image is formatted as a
    numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
    be a list of integer labels, representing the categories for each of the
    corresponding `images`.
    """

    path = os.walk(data_dir)
    img_list = []
    label_list = []

    for root, dirs, files in path:
        for fname in files:
            if re.search('.ppm', fname):
                new_path = os.path.join(root, fname)
            else:
                continue
            num = root.replace(data_dir, '')

            # load a color image, setting flag to 1
            new_img = cv2.imread(new_path, 1)

            # resize image
            img_output = cv2.resize(new_img, (IMG_WIDTH, IMG_HEIGHT))

            # add img array and labels as tuples into a list
            img_list.append(img_output)
            label_list.append(num)

    return (img_list, label_list)
    raise NotImplementedError
Esempio n. 6
0
def prepare_image(image: np.ndarray, new_width, new_height):
    curr_height, curr_width, _  = image.shape
    if curr_height > curr_width:
        diff = curr_height - curr_width
        offset = int(diff / 2)
        if curr_height > new_height:
            
            image = image[offset:offset+curr_width, 0:curr_width]
        else:
            np.pad(image, pad_width=offset, mode='constant', constant_values=0 )
            image = np.pad(image, pad_width=((0,0), (offset, offset), (0, 0)), mode='constant', constant_values=0)
            pass
    else:
        diff = curr_width - curr_height
        offset = int(diff / 2)
        if curr_width > new_width:
            image = image[0:curr_height, offset:offset+curr_height]
        else:
            image = np.pad(image, pad_width=((offset, offset), (0, 0), (0, 0)), mode='constant', constant_values=0)

    image = cv2.resize(image, (new_width, new_height))
    return image
Esempio n. 7
0
def cartoonizer(img, num_down=2, num_bi=5):
    #Params
    #num_down = 2 #DOWNSAMPLE STEPS
    #num_bi = 5 # BILATERAL FILTERING STEPS
    img_c = img
    for ix in range(num_down):
        img_c = cv2.pyrDown(img)  # Pyramid Down : Downsampling
    # print(img_c.shape)

    for iy in range(num_bi):
        img_c = cv2.bilateralFilter(img_c, d=9, sigmaColor=9,
                                    sigmaSpace=7)  #Filtering
    # print(img_c.shape)

    #UPSAMPLING
    for ix in range(num_down):
        img_c = cv2.pyrUp(img_c)  # Pyramid Down : Downsampling
    # print(img_c.shape)

    #BLUR and Threshold
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)  # GRAY SCALE
    img_blur = cv2.medianBlur(img_gray, 7)  #MEDIAN BLUR
    img_edge = cv2.adaptiveThreshold(img_blur,
                                     255,
                                     cv2.ADAPTIVE_THRESH_MEAN_C,
                                     cv2.THRESH_BINARY,
                                     blockSize=9,
                                     C=2)

    img_c = cv2.resize(img_c, (800, 800))
    #RGB CONVERSION + BITWISE &
    img_edge = cv2.cvtColor(img_edge, cv2.COLOR_GRAY2RGB)
    # print(img_c.shape)
    # print(img_edge.shape)
    img_cartoon = cv2.bitwise_and(img_c, img_edge)

    stack = np.hstack([img, img_cartoon])
    return stack
Esempio n. 8
0
def detect_and_predict_mask(frame, faceNet, maskNet):

    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (224, 224), (104.0, 177.0, 123.0))

    faceNet.setInput(blob)
    detections = faceNet.forward()
    print(detections.shape)

    faces = []
    locs = []
    preds = []

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > 0.5:

            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

            face = frame[startY:endY, startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = preprocess_input(face)

            faces.append(face)
            locs.append((startX, startY, endX, endY))

    if len(faces) > 0:
        faces = np.array(faces, dtype="float32")
        preds = maskNet.predict(faces, batch_size=32)

    return (locs, preds)
def get_frames_locally(
    video_path: str, output_frame_resolution=(640, 360)) -> list:

    cap = cv.VideoCapture(video_path)
    frames_read = 0

    frames = []
    print("[INFO]: Getting Frames from the video")
    with tqdm(total=float("inf")) as pbar:
        while cap.isOpened():

            try:
                ret, frame = cap.read()

                # Check for proper read
                if not ret:
                    break

                # Process the frame
                frame = cv.resize(frame,
                                  output_frame_resolution,
                                  interpolation=cv.INTER_CUBIC)
                frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
                frames.append(frame)

                # frames processed
                frames_read += 1
                pbar.set_postfix_str(f"Frames Processed: {frames_read}")
                pbar.update(1)

            except Exception as err:
                print(f"[ERROR]: {err}")

    print(f"""
    [INFO]: Processed {frames_read} frames from the {video_path}
    """)

    return frames
    def get_frame(self):
        ret, img = self.video.read()

        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        for (x, y, w, h) in faces:
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)  # draw rectangle to main image

            detected_face = img[int(y):int(y + h), int(x):int(x + w)]  # crop detected face
            detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY)  # transform to gray scale
            detected_face = cv2.resize(detected_face, (48, 48))  # resize to 48x48

            img_pixels = image.img_to_array(detected_face)
            img_pixels = np.expand_dims(img_pixels, axis=0)

            img_pixels /= 255  # pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1]

            with graph.as_default():
                predictions = model.predict(img_pixels)  # store probabilities of 7 expressions

            # find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral
            max_index = np.argmax(predictions[0])

            emotion = emotions[max_index]

            # img = cv2.flip(img, 1)
            # write emotion text above rectangle
            cv2.putText(img, emotion, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            if emotion is not "neutral":
                global emotion_main
                emotion_main = emotion

        # frame_flip = cv2.flip(img, 1)
        ret, jpeg = cv2.imencode('.jpg', img)
        jpeg_tobytes = jpeg.tobytes()
        return jpeg_tobytes, emotion_main
Esempio n. 11
0
def style_transfer(pathIn='',model='',width=None):
 
    '''
    pathIn: 原始图片的路径
    pathOut: 风格化图片的保存路径
    model: 预训练模型的路径
    width: 设置风格化图片的宽度,默认为None, 即原始图片尺寸
    jpg_quality: 0-100,设置输出图片的质量,默认80,越大图片质量越好
    '''
 
    ## 读入原始图片,调整图片至所需尺寸,然后获取图片的宽度和高度
    img = cv2.imread(pathIn)
    (h, w) = img.shape[:2]
    if width is not None:
        img = cv2.resize(img, (width, round(width*h/w)), interpolation=cv2.INTER_CUBIC)
        (h, w) = img.shape[:2]
    
    ## 从本地加载预训练模型
    print('加载预训练模型......%s'%model)
    net = cv2.dnn.readNetFromTorch(model)
    
    ## 将图片构建成一个blob:设置图片尺寸,将各通道像素值减去平均值(比如ImageNet所有训练样本各通道统计平均值)
    ## 然后执行一次前馈网络计算,并输出计算所需的时间
    blob = cv2.dnn.blobFromImage(img, 1.0, (w, h), (103.939, 116.779, 123.680), swapRB=False, crop=False)
    net.setInput(blob)
    start = time.time()
    output = net.forward()
    end = time.time()
    print("风格迁移花费:{:.2f}秒".format(end - start))
 
    ## reshape输出结果, 将减去的平均值加回来,并交换各颜色通道
    output = output.reshape((3, output.shape[2], output.shape[3]))
    output[0] += 103.939
    output[1] += 116.779
    output[2] += 123.680
    output = output.transpose(1, 2, 0)
    
    return output
Esempio n. 12
0
def findlines(board, showImage=True):
    img = np.array(Image.open(board).convert("L"))
    original_img = img.copy()

    # Images that are too big yield far too many lines
    if img.shape[0] * img.shape[1] > 300000:
        # Keep width and height proportional
        scale_ratio = img.shape[1] / 512
        new_width = int(np.round(img.shape[0] / scale_ratio))

        img = cv2.resize(img, (512, new_width))
    else:
        scale_ratio = 1

    blur_gray = cv2.GaussianBlur(img, (5, 5), 0)
    edges = cv2.Canny(blur_gray, 60, 110)
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 135)
    lines = lines.reshape((lines.shape[0], lines.shape[2]))

    vertical, horizontal = preProcessLines(lines, img)

    y_corners, x_corners = findintersect(vertical, horizontal)

    x_corners = np.round(x_corners * scale_ratio).astype(np.int)
    y_corners = np.round(y_corners * scale_ratio).astype(np.int)

    if showImage:
        for i in range(9):
            for j in range(9):
                x = x_corners[i, j]
                y = y_corners[i, j]
                original_img = cv2.circle(original_img, (x, y), 3, [255, 0, 0],
                                          2)
        cv2.imshow('hough', original_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

    return (x_corners, y_corners)
Esempio n. 13
0
    def apply(self, image: np.ndarray, **params) -> np.ndarray:
        """
        Applies augmentation on real data

        :param data: spectrogram
        :param params: additional params for use of albumentations
        :return: shifted spectrogram
        """

        image_width = image.shape[1]

        speed_rate = np.random.uniform(*self.speed_rate_range)
        audio_speed_tune = cv2.resize(
            image, (int(image_width * speed_rate), image.shape[0]))

        audio_speed_tune_width = audio_speed_tune.shape[1]

        if audio_speed_tune_width < image_width:
            pad_length = image_width - audio_speed_tune_width
            audio_speed_tune = np.r_[np.random.uniform(
                low=-80, high=-79, size=(int(pad_length / 2), image.shape[0])),
                                     audio_speed_tune.transpose(1, 0),
                                     np.random.uniform(low=-80,
                                                       high=-79,
                                                       size=(int(pad_length /
                                                                 2),
                                                             image.shape[0]))]

            audio_speed_tune = audio_speed_tune.transpose(1, 0)
        elif audio_speed_tune_width > image_width:
            cut_len = audio_speed_tune_width - image_width

            start_idx = np.random.randint(0, cut_len)

            audio_speed_tune = audio_speed_tune[:, start_idx:start_idx +
                                                image_width]

        return audio_speed_tune
def input_preprocessing(input_image_path: str, model: tf.keras.Model) -> tuple:

    # Open the image
    input_image = cv.imread(input_image_path)

    # Pre-process it out
    input_image = cv.cvtColor(input_image, cv.COLOR_BGR2RGB)

    # Run MTCNN
    detector = MTCNN()
    faces = detector.detect_faces(input_image)

    if (len(faces) > 0):
        # Get the first face
        for face in faces:
            if face["confidence"] > 0.9:
                # Crop it out
                x, y, width, height = face["box"]
                face_image = np.asarray(input_image[y:y + height, x:x + width],
                                        dtype=np.uint8)

                key = face["keypoints"]
                face_image = fau.face_alignment(face_image, key["left_eye"],
                                                key["right_eye"])

                # Interpolate
                face_image = cv.resize(face_image, (112, 112),
                                       interpolation=cv.INTER_CUBIC)

                # Normalize
                face_image = np.asarray(face_image / 255.0, dtype="float64")

                return face_image, model(face_image[None, ...]).numpy()
            else:
                continue
        raise Exception("[ERROR]: Proper Face not found!")
    else:
        raise Exception("[ERROR]: NO FACE FOUND IN IMAGE")
Esempio n. 15
0
    def start(self):
        print("start function")
        try:
            ret, image = self.cap.read()
            image = cv2.resize(image,
                               None,
                               fx=0.5,
                               fy=0.5,
                               interpolation=cv2.INTER_AREA)
            print("completed image capture")
            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            color = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            print("changed the color")
            barcodes = pyzbar.decode(gray)
            print(barcodes)

            for barcode in barcodes:
                (x, y, w, h) = barcode.rect
                cv2.rectangle(color, (x, y), (x + w, y + h), (0, 0, 255), 2)
                text = barcode.data.decode('utf-8')
                cv2.putText(color, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,
                            0.5, (51, 51, 255), 2)

            # Convert the opencv image to tkinter image
            self.image_array = Image.fromarray(color)
            image_tk = ImageTk.PhotoImage(self.image_array)

            # update the image in the tkinter window
            self.label.config(image=image_tk)
            self.label.image = image_tk

            # refresh the label with recursive call
            if self.stop == False:
                self.label.after(10, self.start)
            else:
                self.label.image = None
        except:
            print("Some error")
Esempio n. 16
0
def aHash(img):
    # 缩放为8*8
    img = cv.imread(img.getim(), 0)
    img = cv.resize(img, (8, 8), interpolation=cv.INTER_CUBIC)
    # 转换为灰度图
    gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    # s为像素和初值为0,hash_str为hash值初值为''
    s = 0
    hash_str = ''
    # 遍历累加求像素和
    for i in range(8):
        for j in range(8):
            s = s + gray[i, j]
    # 求平均灰度
    avg = s / 64
    # 灰度大于平均值为1相反为0生成图片的hash值
    for i in range(8):
        for j in range(8):
            if gray[i, j] > avg:
                hash_str = hash_str + '1'
            else:
                hash_str = hash_str + '0'
    return hash_str
Esempio n. 17
0
    def panoFolder(path, type):

        vidImagesList = [file for file in os.listdir(path) if file.endswith(type)]
        print(vidImagesList)
        

        # list of images
        vidImages=[]
        for image in vidImagesList:
            # take only specifc type file
            file = path +"/"+ image
            print(file)
            img = cv2.imread(file,cv2.IMREAD_UNCHANGED)
            ratio = 0.7 # percent of original size
            height = int(img.shape[0] * ratio)
            width = int(img.shape[1] * ratio)
            resized = cv2.resize(img,(width,height),interpolation=cv2.INTER_AREA)
            # Panorama.displayPano(resized)
            vidImages.append(resized)


        panoImg = ImageProcessor.panoImageList(vidImages)
        return panoImg
Esempio n. 18
0
 def img_btn(self, event):
     self.IMAGE_STREAM = True
     dialog = wx.FileDialog(self,
                            u"选择图片检测",
                            os.getcwd(),
                            '',
                            wildcard="(*.jpg)|*.jpg|(*.png)|*.png",
                            style=wx.FD_OPEN | wx.FD_CHANGE_DIR)
     if dialog.ShowModal() == wx.ID_OK:
         # 如果确定了选择的文件夹,将文件夹路径写到tips控件
         self.tips.SetValue(u"文件路径:" + dialog.GetPath() + "\n")
         self.orgin_img_show = cv2.imread(str(dialog.GetPath()))  # 更新全局变量路径
         dialog.Destroy
     # cv2转wxpython
     self.orgin_img_show = cv2.resize(
         self.orgin_img_show,
         (600, 500),
     )
     height, width = self.orgin_img_show.shape[:2]
     image1 = cv2.cvtColor(self.orgin_img_show, cv2.COLOR_BGR2RGB)
     pic = wx.Bitmap.FromBuffer(width, height, image1)
     # 显示图片在panel上:
     self.orgin_img.SetBitmap(pic)
Esempio n. 19
0
 def transition(self,frame,rng = 0):
     up_x = A1 * (self.x - self.x0) + A2 * (self.xp - self.x0) + B0 * add_guassnoise(rng,TRANS_X_STD) + self.x0
     up_x = max(0.0, min(image_w-1.0,up_x))
     up_y = A1 * (self.y - self.y0) +  A2 * (self.yp - self.y0) + B0 * add_guassnoise(rng,TRANS_Y_STD) + self.y0
     up_y = max(0.0, min(image_h-1.0,up_y))
     up_s = A1 * (self.s - 1.0) + A2 * (self.sp - 1.0) + B0 * add_guassnoise(rng,TRANS_S_STD) + 1.0
     # print(up_s,self.s)
     up_s = max(0.1,up_s)
     self.xp = self.x
     self.yp = self.y
     self.sp = self.s
     self.x = up_x
     self.y = up_y
     self.s = up_s
     y0 = max(0,int(self.y - h * self.s * 0.5))
     y1 = max(0,int(self.y + h * self.s * 0.5))
     x0 = max(0,int(self.x - w * self.s * 0.5))
     x1 = max(0,int(self.x + w * self.s * 0.5))
     # print(self.x,self.y)
     #print(y0,y1,x0,x1)
     noisy_sub = frame[y0: y1,x0:x1]
     noisy_sub = cv2.resize(noisy_sub,(cropped.shape[1],cropped.shape[0]),interpolation=cv2.INTER_CUBIC)
     self.weight =max(0.0, ssim(cropped,noisy_sub,multichannel=True))
Esempio n. 20
0
 def predict_photo(self):
     json_file = open('/home/pratz/Downloads/dataset/results/model28000.json', 'r')
     loaded_model_json = json_file.read()
     json_file.close()
     loaded_model = keras.models.model_from_json(loaded_model_json)
     # load weights into new model
     loaded_model.load_weights("/home/pratz/Downloads/dataset/results/model28000.h5")
     print("Loaded model from disk")
     im = cv2.imread(self.file_name,0)
     plt.imshow(im)
     plt.show()    
     im = cv2.resize(im,(64,64))
     im = im.reshape((1,4096))
     img = loaded_model.predict(im)[0]
     #img=cv2.GaussianBlur(img, (3,3), 0)
     #img = cv2.addWeighted(blur,1.5,img,-0.5,0)
     gen_img = (1/2.5) * img + 0.5 
     #cv2.imwrite("/home/pratz/predicted_photo.jpg",gen_img)
     self.predicted_filename="/home/pratz/predicted_photo.jpg"
     
     plt.imsave(self.predicted_filename,gen_img)       
     plt.imshow(gen_img)
     plt.show()
Esempio n. 21
0
    def send_in_thread(socket_connected, address_connected):
        """
        发送视频子线程
        通过连接socket对象发送数据
        """
        while True:
            time.sleep(0.01)
            frame = video_reader.read()
            frame = cv2.resize(frame, (640, 360))
            frame = frame[..., ::-1]
            retval, imgencode = cv2.imencode('.jpg', frame, encode_param)
            data = np.array(imgencode)
            stringData = data.tostring()
            stringData = frame.tostring()

            try:
                socket_connected.send(
                    len(stringData).to_bytes(4, byteorder='little'))
                socket_connected.send(stringData)
            except Exception:
                socket_connected.close()
                logger.info(f'disconnect from: {address_connected}')
                break
Esempio n. 22
0
    def img_to_text(self, stream):

        img = cv2.cvtColor(stream, cv2.COLOR_BGR2GRAY)

        img = cv2.resize(img,
                         None,
                         fx=10,
                         fy=10,
                         interpolation=cv2.INTER_CUBIC)

        kernel = np.ones((1, 1), np.uint8)

        img = cv2.dilate(img, kernel, iterations=1)
        img = cv2.erode(img, kernel, iterations=1)

        img = cv2.threshold(cv2.GaussianBlur(img, (5, 5), 0), 0, 255,
                            cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

        text = pytesseract.image_to_string(img, config='--psm 11')

        return text
Esempio n. 23
0
def apply_classifier(x, model, img, im0):
    # applies a second stage classifier to yolo outputs
    im0 = [im0] if isinstance(im0, np.ndarray) else im0
    for i, d in enumerate(x):  # per image
        if d is not None and len(d):
            d = d.clone()

            # Reshape and pad cutouts
            b = xyxy2xywh(d[:, :4])  # boxes
            b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # rectangle to square
            b[:, 2:] = b[:, 2:] * 1.3 + 30  # pad
            d[:, :4] = xywh2xyxy(b).long()

            # Rescale boxes from img_size to im0 size
            scale_coords(img.shape[2:], d[:, :4], im0[i].shape)

            # Classes
            pred_cls1 = d[:, 5].long()
            ims = []
            for j, a in enumerate(d):  # per item
                cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
                im = cv2.resize(cutout, (224, 224))  # BGR
                # cv2.imwrite('test%i.jpg' % j, cutout)

                im = im[:, :, ::-1].transpose(2, 0,
                                              1)  # BGR to RGB, to 3x416x416
                im = np.ascontiguousarray(im,
                                          dtype=np.float32)  # uint8 to float32
                im /= 255.0  # 0 - 255 to 0.0 - 1.0
                ims.append(im)

            pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(
                1)  # classifier prediction
            x[i] = x[i][pred_cls1 ==
                        pred_cls2]  # retain matching class detections

    return x
Esempio n. 24
0
def make_video(images,
               outimg=None,
               fps=5,
               size=None,
               is_color=True,
               format="XVID"):
    """
	Create a video from a list of images.

	@param      outvid      output video
	@param      images      list of images to use in the video
	@param      fps         frame per second
	@param      size        size of each frame
	@param      is_color    color
	@param      format      see http://www.fourcc.org/codecs.php
	@return                 see http://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html

	The function relies on http://opencv-python-tutroals.readthedocs.org/en/latest/.
	By default, the video will have the size of the first image.
	It will resize every image to this size before adding them to the video.
	"""
    from cv2.cv2 import VideoWriter, VideoWriter_fourcc, imread, resize
    fourcc = VideoWriter_fourcc(*format)
    vid = None
    for image in images:
        if not os.path.exists(image):
            raise FileNotFoundError(image)
        img = imread(image)
        if vid is None:
            if size is None:
                size = img.shape[1], img.shape[0]
            vid = VideoWriter(outvid, fourcc, float(fps), size, is_color)
        if size[0] != img.shape[1] and size[1] != img.shape[0]:
            img = resize(img, size)
        vid.write(img)
    vid.release()
    return vid
Esempio n. 25
0
def q2(image, sift):
    # a)Enlarge the given image by a scale percentage of 115.
    scale = 115
    scale = scale/100

    width = int(image.shape[1] * scale)
    height = int(image.shape[0] * scale)
    new_dim = (width, height)
    resized = cv2.resize(image, new_dim)
    

    # b) Extract the SIFT features and show the keypoints on the scaled image using the same
    # parameter setting as for Task 1 (for the reduced number of keypoints).
    ## find the keypoints and descriptors using sift detector
    keyPoints1, des1 = sift.detector.detectAndCompute(image, None)
    # Since I have  already found keypoints, call sift.compute() which computes the descriptors 
    # from the keypoints that has been already found
    keyPoints2, des2 = sift.detector.detectAndCompute(resized, None)
    img2 = cv2.drawKeypoints(image, keyPoints1, image)
    # Hint: Brute-force matching is available in OpenCV for feature matching.
    bf_matcher = cv2.BFMatcher()
    #use Matcher.match() method to get the best matches in two images
    matches = bf_matcher.match(des1, des2)
    #matches = bf_matcher.knnMatch(des1, des2, k=2)
    # c)the keypoints in both images similar which shows that they share the same common features.

    # d) Match the SIFT descriptors of the keypoints of the scaled image with those of the original image 
    # using the nearest-neighbour distance ratio method
    # We sort them in ascending order of their distances so that best matches (with low distance) come to front
    matches = sorted(matches, key=lambda x: x.distance)

    # Show the keypoints of the 5 best-matching descriptors on both the original and the scaled image.
    img_q2=cv2.drawMatches(image, keyPoints1, resized, keyPoints2,
                           matches[:6], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
    plt.imshow(img_q2), plt.show()
    cv2.imwrite('d2.jpg', img_q2)
    cv2.imwrite('b2.jpg', img2)
Esempio n. 26
0
def load_images(path, image_size):
    """
    从文件中读取图片,并将图片随机乱序排序
    根据分类文件夹打上标签
    :param path: 需要读取的文件路径
    :param image_size:图像张量
    :return:图片数据和标签
    """
    print("[INFO] loading images...")
    data = []
    labels = []
    # 获得图像路径并随机选取
    imagePaths = sorted(list(paths.list_images(path)))
    lists = sorted(os.listdir(path + "/"))
    random.seed(42)
    random.shuffle(imagePaths)

    # 在输入图像上循环
    for imagePath in imagePaths:
        # 加载图像, 对其进行预处理, 并将其存储在数据列表中
        image = cv2.imread(imagePath)
        image = cv2.resize(image, image_size)
        image = img_to_array(image)
        data.append(image)

        # 从图像路径中提取类标签, 并更新
        # labels 列表
        label = int(lists.index(imagePath.split(os.path.sep)[-2]))
        labels.append(label)

    # 将原始像素强度缩放到范围 [0,1]
    data = np.array(data, dtype="float") / 255.0
    labels = np.array(labels)

    # 将标签从整数转换为向量
    labels = to_categorical(labels, num_classes=len(os.listdir(path)))
    return data, labels
Esempio n. 27
0
    def predict(self, face):
        size = 80
        final_image = cv2.resize(face, (size, size))
        final_image = np.expand_dims(final_image, 0)

        self.model.allocate_tensors()

        # Get input and output tensors.
        input_details = self.model.get_input_details()
        output_details = self.model.get_output_details()

        # Test the model on random input data.
        # input_shape = input_details[0]['shape']
        input_data = np.array(final_image, dtype=np.float32)
        self.model.set_tensor(input_details[0]['index'], input_data)

        self.model.invoke()

        # The function `get_tensor()` returns a copy of the tensor data.
        # Use `tensor()` in order to get a pointer to the tensor.
        result = self.model.get_tensor(output_details[0]['index'])

        self.emo = ""
        if result[0][0] > 0.3:
            self.emo += "anger_disgust"
        if result[0][1] > 0.3:
            self.emo += "joy"
        if result[0][2] > 0.3:
            self.emo += "neutral"
        if result[0][3] > 0.3:
            self.emo += "sadness"
        if result[0][4] > 0.3:
            self.emo += "surprise_fear"
        if self.emo == "":
            self.emo = "No result"

        return result[0]
Esempio n. 28
0
def face_rec(img_path):
    names = ['rabbit']
    [X,y] = read_images(img_path)
    y = np.asarray(y, dtype=np.int32)

    # 特征脸人脸识别算法
    model = cv2.face.EigenFaceRecognizer_create()
    # 传入特征值和特征标签进行训练
    model.train(np.asarray(X), np.asarray(y))
    
    #参数为0,表示打开默认的摄像机
    camera = cv2.VideoCapture(0)

    face_cascade = cv2.CascadeClassifier(r"D:/Anaconda/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml")
    while (True):
        read, img = camera.read()
        faces = face_cascade.detectMultiScale(img, 1.3, 5)
        for (x, y, w, h) in faces:
            img = cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
            gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            roi = gray[x: x+w, y: y+h]
            try:
                # 读入用于识别的人脸
                roi = cv2.resize(roi, (200, 200), interpolation=cv2.INTER_LINEAR)
                # 进行识别,返回标签和置信度
                params = model.predict(roi)
                print("Label: %s, Confidence: %.2f" % (params[0], params[1]))

                # 增加文本内容
                cv2.putText(img, names[params[0]], (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)

            except:
                continue
        cv2.imshow("camera", img)
        if cv2.waitKey(1000 // 12) & 0xff == ord('q'):
            break
    cv2.destroyAllWindows()
Esempio n. 29
0
    def inference(self, cap):
        result_frames = []

        with torch.no_grad():
            # sometimes doesn't work from first time lol
            try:
                sF = self.vgg(self.style)
            except:
                sF = self.vgg(self.style)

        while True:

            ret, frame = cap.read()
            if not ret:
                break
            frame = resize(frame, (512, 256), interpolation=INTER_CUBIC)
            frame = frame.transpose((2, 0, 1))
            frame = frame[::-1, :, :]
            frame = frame / 255.0

            frame = torch.from_numpy(frame.copy()).unsqueeze(0)
            self.content.data.resize_(frame.size()).copy_(frame)
            with torch.no_grad():
                cF = self.vgg(self.content)
                if (self.layer == 'r41'):
                    feature, transmatrix = self.matrix(cF[self.layer],
                                                       sF[self.layer])
                else:
                    feature, transmatrix = self.matrix(cF, sF)
                transfer = self.dec(feature)
            transfer = transfer.clamp(0, 1).squeeze(0).data.cpu().numpy()
            transfer = transfer.transpose((1, 2, 0))
            transfer = transfer[..., ::-1]
            result_frames.append(transfer * 255)

        cap.release()
        return result_frames
Esempio n. 30
0
    def __get_image_chunk__(self, x_idx: int, y_idx: int) -> np.ndarray:
        """
        Reads image chunk by idx

        :param x_idx: vertical index of chunk
        :param y_idx: horizontal index of chunk
        :return: image chunk by idx
        """

        y_window, x_window = self.slide_window

        max_x = math.ceil(self.image.shape[1] / x_window)
        max_y = math.ceil(self.image.shape[0] / y_window)

        assert x_idx < max_x, 'X_idx is more than max X'
        assert x_idx >= 0, 'X_idx is less than zero'

        assert y_idx < max_y, 'Y_idx is more than max Y'
        assert y_idx >= 0, 'Y_idx is less than zero'

        x_idx = x_idx * x_window
        y_idx = y_idx * y_window

        if y_idx + y_window > self.image.shape[0]:
            y_idx = self.image.shape[0] - y_window

        if x_idx + x_window > self.image.shape[1]:
            x_idx = self.image.shape[1] - x_window

        image_chunk = self.image[y_idx:y_idx + y_window,
                                 x_idx:x_idx + x_window]

        # It is not necessary, but still it makes me feel better
        if image_chunk.shape[:2] != self.slide_window:
            image_chunk = cv2.resize(image_chunk, dsize=self.slide_window)

        return image_chunk