def filter_using_flow(
    frames0: np.ndarray, frames1: np.ndarray, return_inds=False
) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray, List[int]]]:
    assert isinstance(frames0, np.ndarray)
    assert frames0.shape == frames1.shape
    original_shape = frames0.shape
    num_frames = len(frames0)
    small_frames0 = misc_util.resize(frames0, (256, 256), height_channel=1, width_channel=2)
    small_frames1 = misc_util.resize(frames1, (256, 256), height_channel=1, width_channel=2)

    small_frames0 = cv2.cvtColor(small_frames0.reshape(num_frames * 256, 256, 3), cv2.COLOR_RGB2GRAY).reshape(
        num_frames, 256, 256
    )
    small_frames1 = cv2.cvtColor(small_frames1.reshape(num_frames * 256, 256, 3), cv2.COLOR_RGB2GRAY).reshape(
        num_frames, 256, 256
    )
    masks = []

    inds = []
    large_masks = []
    if SHOW_FLOW:
        hsv = np.zeros((256, 256, 3), dtype=np.uint8)
        hsv[..., 1] = 255
    for ff in range(num_frames):
        flow = cv2.calcOpticalFlowFarneback(small_frames0[ff], small_frames1[ff], None, 0.5, 3, 15, 3, 5, 1.2, 0)
        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        if SHOW_FLOW:
            hsv[..., 0] = ang * 180 / np.pi / 2
            hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
            rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
            cv2.imshow("image0", small_frames0[ff])
            cv2.imshow("image1", small_frames1[ff])
            cv2.imshow("flow", rgb)
            cv2.waitKey(0)
        mask = mag > 2
        if 0.25 < np.mean(mask) < 0.6:
            inds.append(ff)
            masks.append(mask)
            large_masks.append(
                cv2.resize(
                    mask.astype(np.uint8) * 255, (original_shape[2], original_shape[1]), interpolation=cv2.INTER_NEAREST
                )
            )
    frames = frames1[inds]

    large_masks = np.array(large_masks)
    if return_inds:
        return frames, large_masks, inds
    else:
        return frames, large_masks
def filter_using_laplacian_opencv(
    frames: np.ndarray, return_inds=False
) -> Union[np.ndarray, Tuple[np.ndarray, List[int]]]:
    assert isinstance(frames, np.ndarray)
    assert len(frames.shape) == 4 and frames.shape[-1] == 3
    small_frames = frames.transpose(1, 2, 0, 3)
    small_frames = misc_util.resize(small_frames, (256, 256), height_channel=0, width_channel=1)
    small_frames = small_frames.reshape(256, 256, -1)
    small_frames_dim = small_frames.shape[-1]

    if small_frames_dim > (512 // 3) * 3:
        # Stupid opencv bug
        laplacian = [
            np.max(
                np.abs(
                    cv2.Laplacian(small_frames[:, :, start : start + (512 // 3) * 3], cv2.CV_16S).reshape(
                        256, 256, -1, 3
                    )
                ),
                axis=3,
            )
            for start in range(0, small_frames_dim, (512 // 3) * 3)
        ]
        laplacian = [(lap > 3).mean(axis=(0, 1)) for lap in laplacian]
        laplacian = np.concatenate(laplacian, axis=-1)

    else:
        laplacian = np.max(np.abs(cv2.Laplacian(small_frames, cv2.CV_16S).reshape(256, 256, -1, 3)), axis=3)
        laplacian = (laplacian > 3).mean(axis=(0, 1))
    new_frames = np.where(laplacian > 0.1)[0]
    if return_inds:
        return frames[new_frames], new_frames
    else:
        return frames[new_frames]
Exemple #3
0
def draw_attention(image, attention, height_channel=0, width_channel=1):
    dtype = image.dtype
    image = pt_util.to_numpy(image)
    attention = pt_util.to_numpy(attention)
    attention = np.clip(attention, 0, 1)
    im_width = image.shape[width_channel]
    im_height = image.shape[height_channel]
    attention = misc_util.resize(
        attention,
        (im_width, im_height),
        interpolation=cv2.INTER_LINEAR,
        height_channel=height_channel,
        width_channel=width_channel,
    )
    image, attention = pt_util.fix_broadcast(image, attention)
    image = (image * (1 - attention) + 255 * attention).astype(dtype)
    return image
Exemple #4
0
    def __init__(self,
                 args,
                 path,
                 data_subset,
                 num_data_points=None,
                 contiguous=True):
        with torch.no_grad():
            self.args = args
            self.data_subset = data_subset
            npz_dataset = np.load(path.format(data_subset=data_subset))
            data = npz_dataset["data"]
            labels = pt_util.from_numpy(npz_dataset["labels"])
            if num_data_points is None:
                num_data_points = len(data)

            if num_data_points < len(data):
                np.random.seed(0)
                rand_inds = np.random.choice(len(data),
                                             num_data_points,
                                             replace=False)
                data = data[rand_inds]
                labels = labels[rand_inds]

            assert len(data.shape) == 4

            if data.shape[1] == 3:
                data = data.transpose(0, 2, 3, 1)

            data = misc_util.resize(data,
                                    (args.input_width, args.input_height),
                                    height_channel=1,
                                    width_channel=2)

            data = pt_util.from_numpy(data).permute(0, 3, 1, 2)
            if contiguous:
                data = data.contiguous()
            super(NPZDataset, self).__init__(data, labels,
                                             self.args.batch_size)
import cv2
import tqdm
from dg_util.python_utils import misc_util, video_utils, youtube_utils

subset = "val"
SCALE = 1
WIDTH = 224 // 2**(SCALE - 1)
HEIGHT = 224 // 2**(SCALE - 1)
SEED = random.randint(0, 2**31)

dataset = pickle.load(open("parsed_dataset_renamed_%s.pkl" % subset, "rb"))

cv2.namedWindow("im", cv2.WINDOW_NORMAL)
for video, label in tqdm.tqdm(dataset.items()):
    labels_on = set(label)
    video_path = youtube_utils.download_video(video)
    if video_path is None:
        continue
    frames = video_utils.get_frames(video_path,
                                    remove_video=True,
                                    max_frames=100,
                                    sample_rate=10)
    for lab in label:
        print("id:", video, "type:", dataset["vocabulary"][lab])
    print("num frames", len(frames))
    frames = video_utils.remove_border(frames)
    for frame in frames:
        frame = misc_util.resize(frame, (WIDTH, HEIGHT))
        cv2.imshow("im", frame[:, :, ::-1])
        cv2.waitKey(0)