Python image_compressionの例、albumentations.augmentations.functional.image_compression Pythonの例

コード例 #1

0

ファイルを表示

ファイル: custum_aug.py プロジェクト: zhang405744522/RankPose

    def apply(self, img, blur_type, **params):
        ksize = random.choice(
            np.arange(self.blur_limit[0], self.blur_limit[1] + 1, 2))
        # print('ksize:', ksize)
        # blur_type = random.randint(0, 90)
        # blur_type = 1
        if blur_type < 10:
            img = F.motion_blur(img, kernel=self.get_motion_kernel())
        elif blur_type < 20:
            img = F.blur(img, random.choice(range(3, 16, 2)))
        elif blur_type < 30:
            img = F.median_blur(img, random.choice(range(3, 16, 2)))
        elif blur_type < 40:
            img = F.gaussian_blur(img, ksize)
        elif blur_type < 50:
            img = ghosting(img,
                           random.uniform(0.01, 0.1),
                           angle=random.randint(-7, 7),
                           scale=random.uniform(0., 0.1))
        elif blur_type < 60:
            kernel, anchor = genaratePsf(random.randint(5, 30),
                                         random.randint(-90, 90))
            img = cv2.filter2D(img, -1, kernel, anchor=anchor)
        elif blur_type < 70:  # 速度会很慢
            if random.random() < 0.5:
                radial_num = random.randint(8, 30)
                radial_type = 'angle'
            else:
                radial_num = random.randint(5, 13)
                radial_type = 'direction'
            if os.name == 'posix':
                h, w, c = img.shape
                img_arr = img.ctypes.data_as(c_char_p)
                clib.apply_radial_blur(img_arr, h, w, radial_num,
                                       0 if radial_type == 'angle' else 1)
            else:
                img = apply_radial_blur(img, num=radial_num, type=radial_type)
        elif blur_type < 80:
            if blur_type < 75:
                img = F.gaussian_blur(img, random.choice(range(3, 9, 2)))
            img = F.image_compression(img, random.randint(0, 15), '.jpg')
        elif blur_type < 85:
            img = gasuss_noise(img, 0, random.uniform(0.05, 0.15))
        elif blur_type < 90:
            img = apply_resize(img, random.randint(2, 7))
        else:
            img = F.gaussian_blur(img, ksize)

        end_plus = random.random()
        if end_plus < 0.05:
            img = F.gaussian_blur(img, random.choice(range(3, 9, 2)))
        elif end_plus < 0.1:
            kernel, anchor = genaratePsf(random.randint(3, 10),
                                         random.randint(-90, 90))
            img = cv2.filter2D(img, -1, kernel, anchor=anchor)

        return img

コード例 #2

0

ファイルを表示

ファイル: kernel_utils.py プロジェクト: simonasdev/dfdc_deepfake_challenge

def predict_on_video(face_extractor,
                     video_path,
                     batch_size,
                     input_size,
                     models,
                     strategy=np.mean,
                     apply_compression=False):
    batch_size *= 4
    try:
        faces = face_extractor.process_video(video_path)
        if len(faces) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3),
                         dtype=np.uint8)
            n = 0
            for frame_data in faces:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face,
                                                         quality=90,
                                                         image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass
            if n > 0:
                x = torch.tensor(x, device="cuda").float()
                # Preprocess the images.
                x = x.permute((0, 3, 1, 2))
                for i in range(len(x)):
                    x[i] = normalize_transform(x[i] / 255.)
                # Make a prediction, then take the average.
                with torch.no_grad():
                    preds = []
                    for model in models:
                        y_pred = model(x[:n].half())
                        y_pred = torch.sigmoid(y_pred.squeeze())
                        bpred = y_pred[:n].cpu().numpy()

                        prediction = strategy(bpred)
                        print("%s: %s" % (model.name, prediction))
                        preds.append(prediction)
                    return np.mean(preds)
    except Exception as e:
        print("Prediction error on video %s: %s" % (video_path, str(e)))

    return 0.5

コード例 #3

0

ファイルを表示

def predict_on_video(face_extractor, video_path, batch_size, input_size, models, strategy=np.mean,
                     apply_compression=False):
    batch_size *= 2
    face_num = 0
    try:
        print('Starting face extraction')
        faces = face_extractor.process_video(video_path)
        face_num = len(faces)
        print('Starting deepfake classifier')
        if len(faces) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3), dtype=np.uint8)
            n = 0
            for frame_data in faces:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face, quality=90, image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass
            if n > 0:
                x = torch.tensor(x, device="cuda").float()
                x = x.permute((0, 3, 1, 2))
                for i in range(len(x)):
                    x[i] = normalize_transform(x[i] / 255.)
                # Make a prediction, then take the average.
                with torch.no_grad():
                    preds = []
                    for model in models:
                        y_pred = model(x[:n].half())
                        y_pred = torch.sigmoid(y_pred.squeeze())
                        bpred = y_pred[:n].cpu().numpy()
                        preds.append(strategy(bpred))
                    return np.mean(preds)
        else:
            return 50
    except Exception as e:
        if face_num >= 2:
            return 100
        else:
            return 200
    return 0.5

コード例 #4

0

ファイルを表示

 def __call__(self, img):
     if self.always_apply or self.apply < self.p:
         if not isinstance(img, np.ndarray):
             img = np.array(img)
         return F.image_compression(img, self.quality, self.image_type)
     return img

コード例 #5

0

ファイルを表示

def predict_on_video_with_trustnet_api(face_extractor,
                                       video_path,
                                       batch_size,
                                       input_size,
                                       trustnet_client,
                                       model_name,
                                       strategy=np.mean,
                                       apply_compression=False):
    batch_size *= 4

    try:
        s_time = time.time()
        faces = face_extractor.process_video(video_path)
        if len(faces) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3),
                         dtype=np.uint8)
            n = 0
            for frame_data in faces:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face,
                                                         quality=90,
                                                         image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass
            if n > 0:
                x = torch.tensor(x).float()
                # Preprocess the images.
                x = x.permute((0, 3, 1, 2))
                for i in range(len(x)):
                    x[i] = normalize_transform(x[i] / 255.)
                # Make a prediction, then take the average.
                x = x[:n]
                x = np.ascontiguousarray(x)
                print("Get X : ", time.time() - s_time)

                inputs = []
                outputs = []
                inputs.append(grpcclient.InferInput('input0', x.shape, "FP32"))
                input0_data = x

                inputs[0].set_data_from_numpy(input0_data)
                outputs.append(grpcclient.InferRequestedOutput('output0'))

                stime = time.time()
                #print(type(trustnet_client))
                results = trustnet_client.infer(model_name=model_name,
                                                inputs=inputs,
                                                outputs=outputs)
                #print('Elapsed : ', time.time() - stime)
                output0_data = results.as_numpy('output0')
                output0_data = sigmoid(output0_data.squeeze())
                return output0_data

    except Exception as e:
        print("Prediction error on video %s: %s" % (video_path, str(e)))

    return 0.5

コード例 #6

0

ファイルを表示

def predict_on_video_with_trt(face_extractor,
                              video_path,
                              batch_size,
                              input_size,
                              engine,
                              strategy=np.mean,
                              apply_compression=False):
    batch_size *= 4

    try:
        faces = face_extractor.process_video(video_path)
        if len(faces) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3),
                         dtype=np.uint8)
            n = 0
            for frame_data in faces:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face,
                                                         quality=90,
                                                         image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass
            if n > 0:
                x = torch.tensor(x).float()
                # Preprocess the images.
                x = x.permute((0, 3, 1, 2))
                for i in range(len(x)):
                    x[i] = normalize_transform(x[i] / 255.)
                # Make a prediction, then take the average.
                x = x[:n]
                x = np.ascontiguousarray(x)
                output = np.empty(x.shape[0], dtype=np.float32)
        cuda.init()
        device = cuda.Device(0)
        ctx = device.make_context()

        d_input = cuda.mem_alloc(1 * x.nbytes)
        d_output = cuda.mem_alloc(1 * output.nbytes)
        bindings = [int(d_input), int(d_output)]

        stream = cuda.Stream()
        with engine.create_execution_context() as context:
            context.get_binding_shape(0)
            context.set_binding_shape(0, x.shape)  #x.shape)
            context.get_binding_shape(0)
            cuda.memcpy_htod_async(d_input, x, stream)
            context.execute_async_v2(bindings=bindings,
                                     stream_handle=stream.handle)
            cuda.memcpy_dtoh_async(output, d_output, stream)
            stream.synchronize()
            print(output)
            output = torch.FloatTensor(output)
            output = torch.sigmoid(output.squeeze())
            print(output)
            ctx.pop()
            return output
    except Exception as e:
        print("Prediction error on video %s: %s" % (video_path, str(e)))

    ctx.pop()
    return 0.5

コード例 #7

0

ファイルを表示

ファイル: kernel_utils.py プロジェクト: Parag0506/SIH-TheSentinels

def predict_on_video(face_extractor,
                     video_path,
                     batch_size,
                     input_size,
                     models,
                     strategy=np.mean,
                     apply_compression=False):
    batch_size *= 4
    try:
        processed_frames = face_extractor.process_video(video_path)
        if len(processed_frames) > 0:
            x = np.zeros((batch_size, input_size, input_size, 3),
                         dtype=np.uint8)

            n = 0
            for frame_data in processed_frames:
                for face in frame_data["faces"]:
                    resized_face = isotropically_resize_image(face, input_size)
                    resized_face = put_to_center(resized_face, input_size)
                    if apply_compression:
                        resized_face = image_compression(resized_face,
                                                         quality=90,
                                                         image_type=".jpg")
                    if n + 1 < batch_size:
                        x[n] = resized_face
                        n += 1
                    else:
                        pass

            if n > 0:
                limit = 180
                start = 0
                end = limit if n >= limit else n
                flag = False
                faces_pred = []
                while end <= n and start <= n:
                    print(str(start) + " - " + str(end))
                    ak = x[start:end]
                    ak = torch.tensor(ak, device="cuda").float()
                    # Preprocess the images.
                    ak = ak.permute((0, 3, 1, 2))
                    for i in range(len(ak)):
                        ak[i] = normalize_transform(ak[i] / 255.)
                    # Make a prediction, then take the average.
                    with torch.no_grad():
                        preds = []
                        model = models[0]
                        # for model in models:

                        y_pred = model(ak[:end].half())
                        y_pred = torch.sigmoid(y_pred.squeeze())

                        bpred = y_pred[:end].cpu().numpy()
                        faces_pred.extend(bpred)
                        preds.append(strategy(bpred))
                    start = end
                    end += limit
                    if end > n and flag == False:
                        flag = True
                        end = n

                n = 0
                reader = cv2.VideoCapture(video_path)
                file_name = video_path.split('/')[-1].split('.')[0]
                video_result_path = file_name + '.mp4'
                fourcc = cv2.VideoWriter_fourcc(*'avc1')
                fps = reader.get(cv2.CAP_PROP_FPS)
                writer = None
                font_face = cv2.FONT_HERSHEY_SIMPLEX
                thickness = 2
                font_scale = 1
                for i, frame_data in enumerate(processed_frames):
                    _, image = reader.read()
                    height, width = image.shape[:2]
                    if writer is None:
                        writer = cv2.VideoWriter(video_result_path, fourcc,
                                                 fps, (width, height))
                    for bbox in frame_data["boxes"]:
                        xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
                        w = xmax - xmin
                        h = ymax - ymin
                        score = faces_pred[n]
                        prediction = 1 if score > 0.5 else 0
                        label = 'FAKE' if prediction else 'REAL'
                        color = (0, 255, 0) if prediction == 0 else (0, 0, 255)
                        output_list = [score, 1.0 - score]
                        cv2.putText(image,
                                    str(output_list) + '=>' + label,
                                    (xmin, ymin + h + 30), font_face,
                                    font_scale, color, thickness, 2)
                        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color,
                                      2)
                        n += 1

                    writer.write(image)

                if writer is not None:
                    writer.release()

                print(len(faces_pred), n)

                subprocess.call([
                    'ffmpeg', '-y', '-i', f'{video_path}', f'{file_name}.mp3'
                ])

                subprocess.call([
                    'ffmpeg', '-y', '-i', f'{video_result_path}', '-i',
                    f'{file_name}.mp3', '-c:v', 'copy', '-c:a', 'aac',
                    'output.mp4'
                ])

                subprocess.call(['rm', f'{file_name}.mp3'])

                subprocess.call(['rm', f'{video_result_path}'])

                return np.mean(strategy(faces_pred))
    except Exception as e:
        print("Prediction error on video %s: %s" % (video_path, str(e)))

    return 0.5

コード例 #8

0

ファイルを表示

ファイル: custum_aug.py プロジェクト: zhang405744522/RankPose

def applyBlur(img):
    blur_type = random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9])
    level = np.random.choice([0, 1, 2, 3, 4, 5], size=2, replace=False)
    # blur_type = 1
    if blur_type == 1:
        l_value = [0, 3, 7, 9, 11, 13]
        img_first = img.copy() if level[0] == 0 else F.motion_blur(
            img, kernel=get_motion_kernel(l_value[level[0]]))
        img_second = img if level[1] == 0 else F.motion_blur(
            img, kernel=get_motion_kernel(l_value[level[1]]))
    elif blur_type == 2:
        l_value = [0, 3, 5, 7, 9, 11]
        img_first = img.copy() if level[0] == 0 else F.blur(
            img, l_value[level[0]])
        img_second = img if level[1] == 0 else F.blur(img, l_value[level[1]])
    elif blur_type == 3:
        l_value = [0, 3, 5, 7, 9, 11]
        img_first = img.copy() if level[0] == 0 else F.median_blur(
            img, l_value[level[0]])
        img_second = img if level[1] == 0 else F.median_blur(
            img, l_value[level[1]])
    elif blur_type == 4:
        l_value = [0, 5, 9, 11, 15, 17]
        img_first = img.copy() if level[0] == 0 else F.gaussian_blur(
            img, l_value[level[0]])
        img_second = img if level[1] == 0 else F.gaussian_blur(
            img, l_value[level[1]])
    elif blur_type == 5:
        l_value = [0, 5, 11, 17, 23, 25]
        kernel_first, anchor_first = genaratePsf(l_value[level[0]],
                                                 random.randint(-90, 90))
        kernel_second, anchor_second = genaratePsf(l_value[level[1]],
                                                   random.randint(-90, 90))
        img_first = img.copy() if level[0] == 0 else cv2.filter2D(
            img, -1, kernel_first, anchor=anchor_first)
        img_second = img if level[1] == 0 else cv2.filter2D(
            img, -1, kernel_second, anchor=anchor_second)
    elif blur_type == 6:
        l_value = [0, 20, 15, 10, 5, 0]
        img_first = img.copy() if level[0] == 0 else F.image_compression(
            img, l_value[level[0]], '.jpg')
        img_second = img if level[1] == 0 else F.image_compression(
            img, l_value[level[1]], '.jpg')
    elif blur_type == 7:
        l_value = [0, 0.04, 0.05, 0.06, 0.07, 0.08]
        img_first = img.copy() if level[0] == 0 else ghosting(
            img, l_value[level[0]])
        img_second = img if level[1] == 0 else ghosting(img, l_value[level[1]])
    elif blur_type == 8:
        if random.random() < 0.3:
            l_value = [0, 8, 12, 15, 20, 25]
            radial_type = 'angle'
        else:
            l_value = [0, 5, 7, 9, 11, 12]
            radial_type = 'direction'
        if os.name == 'posix':
            h, w, c = img.shape
            img_first = img.copy()
            img_second = img
            if level[0] != 0:
                img_arr_f = img_first.ctypes.data_as(c_char_p)
                clib.apply_radial_blur(img_arr_f, h, w, l_value[level[0]],
                                       0 if radial_type == 'angle' else 1)
            if level[1] != 0:
                img_arr_s = img_second.ctypes.data_as(c_char_p)
                clib.apply_radial_blur(img_arr_s, h, w, l_value[level[1]],
                                       0 if radial_type == 'angle' else 1)
        else:
            img_first = img.copy() if level[0] == 0 else apply_radial_blur(
                img.copy(), num=l_value[level[0]], type=radial_type)
            img_second = img if level[1] == 0 else apply_radial_blur(
                img, num=l_value[level[1]], type=radial_type)
    else:
        l_value = [0, 0.025, 0.05, 0.075, 0.1, 0.125]
        img_first = img.copy() if level[0] == 0 else gasuss_noise(
            img, 0, l_value[level[0]])
        img_second = img if level[1] == 0 else gasuss_noise(
            img, 0, l_value[level[1]])

    return img_first, img_second, 1 if level[0] > level[1] else -1