from inpainting.load import VideoDataset, MergeDataset from inpainting.visualize import save_frames parser = argparse.ArgumentParser() parser.add_argument('--input-images-dir', type=str, default='data/processed/demo/InputImages') parser.add_argument('--input-masks-dir', type=str, default='data/processed/demo/Masks') parser.add_argument('--results-dir', type=str, default='results/demo/Inpainter') opt = parser.parse_args() input_images_dataset = VideoDataset( list(glob.glob(f'{opt.input_images_dir}/*')), 'image') input_masks_dataset = VideoDataset(list(glob.glob(f'{opt.input_masks_dir}/*')), 'mask') dataset = MergeDataset([input_images_dataset, input_masks_dataset], transform=T.ToTensor()) with torch.no_grad(): inpainting_algorithm = MyDeepFlowGuidedVideoInpaintingAlgorithm() for i, (input_images, input_masks) in enumerate(tqdm(dataset)): output_images = [] inpainting_algorithm.initialize() for input_image, input_mask in list(zip(input_images, input_masks)): input_image = input_image.unsqueeze(0).cuda() input_mask = input_mask.unsqueeze(0).cuda()
import torch from torchvision.transforms import transforms as T from tqdm import tqdm from inpainting.evaluate import evaluate_tracking, save_stats, save_results from inpainting.load import VideoDataset, DynamicMaskVideoDataset parser = argparse.ArgumentParser() parser.add_argument('--output-masks-dir', type=str, default='results/demo/Tracker/OutputMasks') parser.add_argument('--target-masks-dir', type=str, default='data/processed/demo/Masks') parser.add_argument('--results-dir', type=str, default='results/demo/Tracker') opt = parser.parse_args() output_masks_dataset = VideoDataset( list(glob.glob(f'{opt.output_masks_dir}/*')), 'mask' ) target_masks_dataset = VideoDataset( list(glob.glob(f'{opt.target_masks_dir}/*')), 'mask' ) dataset = DynamicMaskVideoDataset(output_masks_dataset, target_masks_dataset, transform=T.ToTensor()) with torch.no_grad(): sample_dfs = [] for i, (output_masks, target_masks) in enumerate(tqdm(dataset)): sample_df = evaluate_tracking(target_masks, output_masks) save_stats(sample_df.drop(columns=['t']), f'{opt.results_dir}/Misc/{i:05d}') sample_df['video'] = i sample_dfs.append(sample_df)
return str(cap.get(cv.CAP_PROP_FRAME_WIDTH)), str( cap.get(cv.CAP_PROP_FRAME_HEIGHT)) cap = cv.VideoCapture(0) set_res(cap, *opt.size[::-1]) while True: _, image = cap.read() image = cv.resize(image, opt.size[::-1]) yield cv_image_to_tensor(image) image_sequence = camera_generator() else: images_dataset = VideoDataset([opt.images_dir], 'image', transform=T.Compose( [T.Resize(opt.size[::-1]), T.ToTensor()])) image_sequence = iter(images_dataset[0]) # Select ROI cv2.namedWindow('Demo', cv2.WND_PROP_FULLSCREEN) init_image = next(image_sequence) x, y, w, h = cv2.selectROI('Demo', tensor_to_cv_image(init_image), False, False) init_rect = ((x, y), (x + w, y + h)) with torch.no_grad(): tracking_algorithm = SiamMaskVideoTrackingAlgorithm( mask_type='segmentation') tracking_algorithm.initialize(init_image, init_rect)
from inpainting.external import FlowAndFillInpaintingAlgorithm from inpainting.load import VideoDataset, DynamicMaskVideoDataset from scripts.train import InpaintingModel batch_size = 1 sizes = [ (256, 256), (512, 512), (1024, 1024) ] for size in sizes: frame_dataset = VideoDataset( list(glob.glob('data/raw/video/DAVIS/JPEGImages/480p/flamingo')), frame_type='image', transform=transforms.Compose([ transforms.Resize(size, interpolation=Image.BILINEAR), transforms.ToTensor() ])) mask_dataset = VideoDataset( list(glob.glob('data/processed/video/DAVIS/Annotations_dilated/480p/flamingo')), frame_type='mask', transform=transforms.Compose([ transforms.Resize(size, interpolation=Image.NEAREST), transforms.ToTensor() ])) dataset = DynamicMaskVideoDataset(frame_dataset, mask_dataset) data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False) with torch.no_grad():
from inpainting.evaluate import save_stats, save_results, evaluate_inpainting from inpainting.load import VideoDataset, MergeDataset parser = argparse.ArgumentParser() parser.add_argument('--output-images-dir', type=str, default='results/demo/Inpainter/OutputImages') parser.add_argument('--target-images-dir', type=str, default='data/processed/demo/TargetImages') parser.add_argument('--results-dir', type=str, default='results/demo/Inpainter') opt = parser.parse_args() output_images_dataset = VideoDataset( list(glob.glob(f'{opt.output_images_dir}/*')), 'image') target_images_dataset = VideoDataset( list(glob.glob(f'{opt.target_images_dir}/*')), 'image') dataset = MergeDataset([output_images_dataset, target_images_dataset], transform=T.ToTensor()) with torch.no_grad(): sample_dfs = [] for i, (output_images, target_images) in enumerate(tqdm(dataset)): sample_df = evaluate_inpainting(target_images, output_images) save_stats(sample_df.drop(columns=['t']), f'{opt.results_dir}/Misc/{i:05d}') sample_df['video'] = i sample_dfs.append(sample_df) df = pd.concat(sample_dfs)
from tqdm import tqdm from inpainting.load import VideoDataset parser = argparse.ArgumentParser() parser.add_argument('--annotations-dir', type=str, default='data/raw/demo/Annotations') parser.add_argument('--output-path', type=str, default='data/processed/demo/object_stats.json') opt = parser.parse_args() # Load annotations annotation_dirs = list(glob.glob(f'{opt.annotations_dir}/*')) annotation_dataset = VideoDataset(annotation_dirs, frame_type='annotation') # Calculate object stats for each sequence dataset_stats = {'videos': {}} for sequence_dir, sequence in tqdm(zip(annotation_dirs, annotation_dataset), desc='Calculating object stats', unit='sequence', total=len(annotation_dirs)): sequence_name = basename(sequence_dir) sequence_stats = [] for annotation_path, annotation in zip(glob.glob(f'{sequence_dir}/*.png'), sequence): annotation_array = np.array(annotation) annotation_pixel_count = annotation_array.size for object_id, object_pixel_count in zip( *np.unique(annotation_array, return_counts=True)):
parser = argparse.ArgumentParser() parser.add_argument('--input-dir', type=str, default='data/raw/DAVIS/JPEGImages/480p') parser.add_argument('--output-dir', type=str, default='data/interim/DAVIS/JPEGImages') parser.add_argument('--size', type=int, nargs=2, default=(256, 256)) parser.add_argument('--type', type=str, default='image') opt = parser.parse_args() # Load and resize frames frame_dirs = list(glob.glob(f'{opt.input_dir}/*')) interpolation = Image.BILINEAR if opt.type == 'image' else Image.NEAREST frame_dataset = VideoDataset(frame_dirs, frame_type=opt.type, transform=transforms.Resize( opt.size[::-1], interpolation)) # Save resized frames for sequence_dir, sequence in tqdm(zip(frame_dirs, frame_dataset), desc='Resizing frames', unit='sequence', total=len(frame_dirs)): sequence_name = basename(sequence_dir) image_o_dir = f'{opt.output_dir}/{sequence_name}' makedirs(image_o_dir, exist_ok=True) for frame_path, frame in zip(glob.glob(f'{sequence_dir}/*'), sequence): frame_name = basename(frame_path) save_image(frame, f'{image_o_dir}/{frame_name}', opt.type)
import glob from os.path import basename from PIL import Image from torchvision.transforms import transforms as T from inpainting.load import VideoDataset from inpainting.visualize import save_video parser = argparse.ArgumentParser() parser.add_argument('--frames-pattern', type=str, default='results/demo/Inpainter/OutputImages/') parser.add_argument('--video-dir', type=str, default='results/demo/Inpainter/Misc') parser.add_argument('--video-name', type=str, default='output_images.mp4') parser.add_argument('--frame-type', type=str, default='image') parser.add_argument('--frame-rate', type=int, default=24) opt = parser.parse_args() frames_dirs = list(glob.glob(opt.frames_pattern)) frames_dataset = VideoDataset(frames_dirs, opt.frame_type, transform=T.Compose([T.ToTensor()])) for frame_dir, frames in zip(frames_dirs, frames_dataset): save_video(frames, f'{opt.video_dir}/{basename(frame_dir)}/{opt.video_name}', opt.frame_type, opt.frame_rate)
import glob from torchvision.transforms import transforms from inpainting.load import VideoDataset from inpainting.visualize import show_image time = 8 batch_size = 8 epochs = 100 learning_rate = 1e-3 size = (256, 256) transform = transforms.Compose( [transforms.Resize(size), transforms.ToTensor()]) video_dataset = VideoDataset(list( glob.glob('../data/raw/video/DAVIS/JPEGImages/480p/*')), sequence_length=time, transform=transform) frames, frame_dir = video_dataset[0] show_image(frames[0:5])
import glob from PIL import Image from torchvision.transforms import transforms as T from tqdm import tqdm from inpainting.load import VideoDataset, VideoObjectRemovalDataset from inpainting.visualize import save_frames parser = argparse.ArgumentParser() parser.add_argument('--images-dir', type=str, default='data/raw/demo/JPEGImages') parser.add_argument('--masks-dir', type=str, default='data/interim/demo/Masks') parser.add_argument('--output-dir', type=str, default='data/processed/demo') opt = parser.parse_args() image_dataset = VideoDataset(glob.glob(f'{opt.images_dir}/*'), frame_type='image') annotation_dataset = VideoDataset(glob.glob(f'{opt.masks_dir}/*'), frame_type='mask') dataset = VideoObjectRemovalDataset(image_dataset, annotation_dataset, transform=T.ToTensor()) for i, (input_images, masks, target_images) in enumerate(tqdm(dataset)): save_frames(input_images, f'{opt.output_dir}/InputImages/{i:05d}', 'image') save_frames(masks, f'{opt.output_dir}/Masks/{i:05d}', 'mask') save_frames(target_images, f'{opt.output_dir}/TargetImages/{i:05d}', 'image')