def get_optical_flow(frame_list, output_dir): #obtain the necessary args for construct the flownet framework #initial a Net print("In function right now!") net = FlowNet2(argparse.Namespace(fp16=False, rgb_max=255.0)).cuda() #load the state_dict dict = torch.load("./FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) for idx, f in enumerate(frame_list): print("Generating flow for %d and %d" % (idx, idx + 1)) if idx == len(frame_list) - 1: break #load the image pair, you can find this operation in dataset.py img1 = read_gen(f) img2 = read_gen(frame_list[idx + 1]) images = [img1, img2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda() #process the image pair to obtian the flow result = net(im).squeeze() data = result.data.cpu().numpy().transpose(1, 2, 0) # write file output_file = '%s%06d.flo' % (output_dir, idx + 1) print("Writing to file: %s" % output_file) writeFlow(output_file, data) return 'Finish'
def infer(args): assert args.data_list is not None or args.frame_dir is not None if args.frame_dir is not None: data_list = generate_flow_list(args.frame_dir) args.data_list = data_list with open(data_list) as FIN: F_SAVE = [line.split()[0] for line in FIN] device = torch.device("cuda:0") Flownet = FlowNet2(args, requires_grad=False) print("====> Loading", args.pretrained_model_flownet2) flownet2_ckpt = torch.load(args.pretrained_model_flownet2) Flownet.load_state_dict(flownet2_ckpt["state_dict"]) Flownet.to(device) Flownet.eval() dataset_ = FlowInfer(args.data_list, size=args.img_size) dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False) # task_bar = ProgressBar(dataset_.__len__()) for i, (f1, f2, output_path_) in enumerate(dataloader_): f1 = f1.to(device) f2 = f2.to(device) flow = Flownet(f1, f2) ''' output_path = output_path_[0] output_file = os.path.dirname(output_path) if not os.path.exists(output_file): os.makedirs(output_file) ''' flow_numpy = flow[0].permute(1, 2, 0).data.cpu().numpy() f_save = os.path.join(save_dest, os.path.basename(F_SAVE[i])) + '.npy' print(flow_numpy.shape) print(flow_numpy.mean()) np.save(f_save, flow_numpy) #cvb.write_flow(flow_numpy, output_path) # task_bar.update() print("FlowNet2 Inference has been finished~!") print("Extracted Flow has been save in", output_file) return
def infer(args): assert args.data_list is not None or args.frame_dir is not None if args.frame_dir is not None: data_list = generate_flow_list(args.frame_dir) args.data_list = data_list device = torch.device('cuda:0') Flownet = FlowNet2(args, requires_grad=False) print('====> Loading', args.pretrained_model_flownet2) flownet2_ckpt = torch.load(args.pretrained_model_flownet2) Flownet.load_state_dict(flownet2_ckpt['state_dict']) Flownet.to(device) Flownet.eval() dataset_ = FlowInfer(args.data_list, size=args.img_size) dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False) task_bar = ProgressBar(dataset_.__len__()) for i, (f1, f2, output_path_) in enumerate(dataloader_): f1 = f1.to(device) f2 = f2.to(device) flow = Flownet(f1, f2) output_path = output_path_[0] output_file = os.path.dirname(output_path) if not os.path.exists(output_file): os.makedirs(output_file) flow_numpy = flow[0].permute(1, 2, 0).data.cpu().numpy() cvb.write_flow(flow_numpy, output_path) task_bar.update() sys.stdout.write('\n') print('FlowNet2 Inference has been finished~!') print('Extracted Flow has been save in', output_file) return output_file
def infer(args): assert args.data_list is not None or args.frame_dir is not None if args.frame_dir is not None: data_list = generate_flow_list(args.frame_dir) args.data_list = data_list device = torch.device('cuda:0') Flownet = FlowNet2(args, requires_grad=False) print('====> Loading', args.pretrained_model_flownet2) flownet2_ckpt = torch.load(args.pretrained_model_flownet2) Flownet.load_state_dict(flownet2_ckpt['state_dict']) Flownet.to(device) Flownet.eval() dataset_ = FlowInfer(args.data_list, size=args.img_size) dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False) task_bar = ProgressBar(dataset_.__len__()) for i, (f1, f2, f3, f4, f5, output_path_1, output_path_2, output_path_3, output_path_4) in enumerate(dataloader_): f1 = f1.to(device) f2 = f2.to(device) f3 = f3.to(device) f4 = f4.to(device) f5 = f5.to(device) if (output_path_1[0][-4:] == 'rflo'): flow_1 = Flownet(f3, f1) flow_2 = Flownet(f3, f2) flow_3 = Flownet(f3, f4) flow_4 = Flownet(f3, f5) else: flow_1 = Flownet(f1, f3) flow_2 = Flownet(f2, f3) flow_3 = Flownet(f4, f3) flow_4 = Flownet(f5, f3) output_path_01 = output_path_1[0] output_path_02 = output_path_2[0] output_path_03 = output_path_3[0] output_path_04 = output_path_4[0] #print(output_path_1) output_file = os.path.dirname(output_path_01) if not os.path.exists(output_file): os.makedirs(output_file) flow_numpy = flow_1[0].permute(1, 2, 0).data.cpu().numpy() cvb.write_flow(flow_numpy, output_path_01) output_file = os.path.dirname(output_path_02) if not os.path.exists(output_file): os.makedirs(output_file) flow_numpy = flow_2[0].permute(1, 2, 0).data.cpu().numpy() cvb.write_flow(flow_numpy, output_path_02) output_file = os.path.dirname(output_path_03) if not os.path.exists(output_file): os.makedirs(output_file) flow_numpy = flow_3[0].permute(1, 2, 0).data.cpu().numpy() cvb.write_flow(flow_numpy, output_path_03) output_file = os.path.dirname(output_path_04) if not os.path.exists(output_file): os.makedirs(output_file) flow_numpy = flow_4[0].permute(1, 2, 0).data.cpu().numpy() cvb.write_flow(flow_numpy, output_path_04) task_bar.update() sys.stdout.write('\n') print('FlowNet2 Inference has been finished~!') print('Extracted Flow has been save in', output_file) return output_file
(256, 256)) #im2 = imread('FlowNet2_src/example/0img1.ppm') im2 = imresize(imread('../s3-drive/abbey/images/sun_aakbdcgfpksytcwj.jpg'), (256, 256)) # B x 3(RGB) x 2(pair) x H x W #ims = np.array([[im1, im2]]) print(f'images shapes: {im1.shape}, {im2.shape}') ims = np.expand_dims(np.stack([im1, im2]), 0) print('ims array shape: ', ims.shape) ims = ims.transpose((0, 4, 1, 2, 3)).astype(np.float32) ims = torch.from_numpy(ims) print(ims.size()) ims_v = Variable(ims.cuda(), requires_grad=False) # Build model flownet2 = FlowNet2(args) path = '../s3-drive/flownet/FlowNet2_checkpoint.pth.tar' pretrained_dict = torch.load(path)['state_dict'] model_dict = flownet2.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) flownet2.load_state_dict(model_dict) flownet2.cuda() pred_flow = flownet2(ims_v).cpu().data pred_flow = pred_flow[0].numpy().transpose((1, 2, 0)) print('pred_flow shape', pred_flow.shape) #flow_im = flow_to_image(pred_flow)
parser = argparse.ArgumentParser() parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() # Prepare img pair im1 = imread('/mnt/data/FlyingChairs_examples/0000000-img0.ppm') im2 = imread('/mnt/data/FlyingChairs_examples/0000000-img1.ppm') # B x 3(RGB) x 2(pair) x H x W ims = np.array([[im1, im2]]).transpose((0, 4, 1, 2, 3)).astype(np.float32) ims = torch.from_numpy(ims) print(ims.size()) ims_v = Variable(ims.cuda(), requires_grad=False) # Build model flownet2 = FlowNet2(args).cuda() #path = '/mnt/data/flownet2-pytorch/FlowNet2_checkpoint.pth.tar' path = '/home/tung/flownet2-pytorch/work/FlowNet2_model_best.pth.tar' pretrained_dict = torch.load(path)['state_dict'] model_dict = flownet2.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) flownet2.load_state_dict(model_dict) flownet2.cuda() pred_flow = flownet2(ims_v).cpu().data pred_flow = pred_flow[0].numpy().transpose((1,2,0)) flow_im = flow_to_image(pred_flow) # Visualization plt.imshow(flow_im)
def just4pair(name, currentpath, nextpath, flowpath, esti_curr, error, flag): flow_path = flowpath + str(flag) + 'flow\\' + name estimated_pim2pathP = esti_curr + str(flag) + 'estimate\\' + name diff_pim2pathP = error + str(flag) + 'error\\' + name pim1path = currentpath + name pim2path = nextpath + name # estimated_pim2pathP = esti_curr # diff_pim2pathP = error # flow_path = flowpath frame1 = cv2.imread(pim1path) frame2 = cv2.imread(pim2path) # crop_size = (512,384) hsv_mask = np.zeros_like(frame1) prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) i = 1 while (i): prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY) next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY) start = time.time() if flag == 'GF': flow = cv2.calcOpticalFlowFarneback(prvs, next, None, pyr_scale=0.5, levels=7, winsize=7, iterations=5, poly_n=9, poly_sigma=1.1, flags=0) end = time.time() print('GFcomputing time : ' + str(end - start)) elif flag == 'HS': # the 3 lines is exclusive for HS to get a new different flow U, V = HornSchunck(prvs, next, alpha=20, Niter=15) end = time.time() print('HScomputing time : ' + str(end - start)) flow = np.array([U.astype('float32'), V.astype('float32')]) flow = np.transpose(flow, (1, 2, 0)) else: # THESE LINES ARE FOR NN flow parser = argparse.ArgumentParser() parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() start = time.time() net = FlowNet2(args).cuda() dict = torch.load("checkpoints/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) end = time.time() print('NNloading time : ' + str(end - start)) images = [frame1, frame2] images = np.array(images).transpose(3, 0, 1, 2) im = torch.from_numpy(images.astype( np.float32)).unsqueeze(0).cuda() start = time.time() flow = net(im) end = time.time() flow = flow.squeeze() print('NNcomputing time : ' + str(end - start)) flow = flow.data.cpu().numpy().transpose(1, 2, 0) start = time.time() mag, ang = cv2.cartToPolar( flow[..., 0], flow[..., 1], angleInDegrees=False) # in radians not degrees hsv_mask[..., 0] = ang * 180 / np.pi / 2 # H : 0-180 hsv_mask[..., 1] = 255 hsv_mask[..., 2] = cv2.normalize( mag, None, 0, 255, cv2.NORM_MINMAX) # nolinear transformation of value sigmoid? end = time.time() print('vis time : ' + str(end - start)) rgb_representation = cv2.cvtColor(hsv_mask, cv2.COLOR_HSV2BGR) # cv2.imshow('result_window', double_representation) # cv2.imshow('result_window', rgb_representation) cv2.imwrite(flow_path, rgb_representation) # estimated_pim2N = warp_flow(frame1, flow) # 这个warp是把1warp到2 estimated_pim2P = image_warp(frame2, flow, mode='nearest') # 这个warp是把2warp回1 # cv2.imwrite(estimated_pim2pathN, estimated_pim2N) cv2.imwrite(estimated_pim2pathP, estimated_pim2P) estimated_pim2P = cv2.cvtColor(estimated_pim2P, cv2.COLOR_BGR2GRAY) # estimated_pim2N = cv2.cvtColor(estimated_pim2N, cv2.COLOR_BGR2GRAY) # diff_P = estimated_pim2P - prvs diff_P = np.abs(-estimated_pim2P.astype(np.int32) + prvs.astype(np.int32)) # # diff_N = -estimated_pim2N + next # diff_N = np.abs(-estimated_pim2N.astype(np.int32) + next.astype(np.int32)) # cv2.imwrite(diff_pim2pathP, diff_P) # cv2.imwrite(diff_pim2pathN, diff_N) kk = cv2.waitKey(20) & 0xff # Press 'e' to exit the video if kk == ord('e'): break # Press 's' to save the video elif kk == ord('s'): cv2.imwrite('Optical_image.png', frame2) cv2.imwrite('HSV_converted_image.png', rgb_representation) i = 0 cv2.destroyAllWindows()
import numpy as np import argparse # from networks.FlowNet2 import FlowNet2#the path is depended on where you create this module from models import FlowNet2 # from frame_utils import read_gen#the path is depended on where you create this module if __name__ == '__main__': #obtain the necessary args for construct the flownet framework parser = argparse.ArgumentParser() parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() #initial a Net net = FlowNet2(args).cuda() #load the state_dict dict = torch.load("models/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) #load the image pair, you can find this operation in dataset.py # pim1 = read_gen("/home/xyliu/2D_pose/flowtrack.pytorch/samples/img0.ppm") # pim2 = read_gen("/home/xyliu/2D_pose/flowtrack.pytorch/samples/img1.ppm") from scipy.misc import imread net.eval() # Prepare img pair # H x W x 3(RGB) aaa = '/home/xyliu/2D_pose/flowtrack.pytorch/samples/img0.ppm' bbb = '/home/xyliu/2D_pose/flowtrack.pytorch/samples/img1.ppm' im1 = imread(aaa)
parser.add_argument("--rgb_max", type=float, default=255.) parser.add_argument("--in_path", type=str, default=None) parser.add_argument("--out_file", type=str, default=None) parser.add_argument("--recalc", type=int, default=1) parser.add_argument("--batch", type=int, default=4) parser.add_argument("--height", type=int, default=128) parser.add_argument("--width", type=int, default=192) parser.add_argument("--scale", type=int, default=3) parser.add_argument("--visualize", type=int, default=0) args = parser.parse_args() args.fp16 = False # initial FlowNet2 and load pretrained weights device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = FlowNet2(args) net.to(device) # load the state_dict dict = torch.load("./models/FlowNet2_checkpoint.pth") net.load_state_dict(dict["state_dict"]) # load the image pair scale = args.scale base_size = (args.width, args.height) upscaled_size = (scale * base_size[0], scale * base_size[1]) if args.in_path is None: img1, img2 = load_image_pair("./test/img1.tif", "./test/img2.tif", dest_size=base_size)
import torch from PIL import Image import time import numpy as np import cv2 as cv from torchvision import transforms from models import FlowNet2 from utils import flow_utils model = FlowNet2().cpu() model.morphize() #model.load_state_dict(torch.load('checkpoint/FlowNet2_checkpoint.pth.tar')['state_dict']) #model.load_state_dict(torch.load('checkpoint/checkpoint3.pth.tar')['state_dict']) #model.load_state_dict(torch.load('checkpoint/checkpoint8.pth.tar')['state_dict']) model.load_state_dict( torch.load('checkpoint/checkpoint9.pth.tar')['state_dict']) model.demorphize() model.eval() model.cuda() def padding(image): _, w, h = image.size() w_pad = (64 - w % 64) % 64 h_pad = (64 - h % 64) % 64 new_image = torch.zeros(3, w + w_pad, h + h_pad) new_image[:, w_pad // 2:w_pad // 2 + w, h_pad // 2:h_pad // 2 + h] = image return new_image
self.th, self.tw = crop_size self.h, self.w = image_size def __call__(self, img): return img[(self.h - self.th) // 2:(self.h + self.th) // 2, (self.w - self.tw) // 2:(self.w + self.tw) // 2, :] def visulize_flow(flow): flow_data = flow img = flow2img(flow_data) plt.imsave("result.png", img) now = time.time() net = FlowNet2().cuda() dict = torch.load("pretrained/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) # load the image pair, you can find this operation in dataset.py img1 = read_gen("data/00000" + ".jpg") img2 = read_gen("data/00001" + ".jpg") images = [img1, img2] print(img1.shape) image_size = img1.shape[:2] frame_size = img1.shape render_size = [((frame_size[0]) // 64) * 64, ((frame_size[1]) // 64) * 64] cropper = StaticCenterCrop(image_size, render_size) images = list(map(cropper, images))
def __init__(self): args = Args() self.net = FlowNet2(args).cuda() dict = torch.load("../vs_final/FlowNet2_checkpoint.pth.tar") self.net.load_state_dict(dict["state_dict"])
def create_optical_flow_dataset( video_data, frame_data_path, colmap_workspace_path, mask_data_path, sparse_depth_map_path, dnn_depth_map_path, relative_depth_scale_path, dataset_path, depth_estimation_model_path, optical_flow_model_path, logger, inference_fn, batch_size): run_colmap(video_data, frame_data_path, mask_data_path, colmap_workspace_path, logger) camera, images_by_id, points3d_by_id = get_colmap_output( colmap_workspace_path, logger) relative_depth_scale = calculate_global_scale_adjustment_factor( video_data, camera, images_by_id, points3d_by_id, depth_estimation_model_path, dnn_depth_map_path, sparse_depth_map_path, relative_depth_scale_path, logger, inference_fn, batch_size) for image in images_by_id.values(): image.camera_pose.t = relative_depth_scale * image.camera_pose.t logger.log( "Scaled the translation component of the camera poses by the relative scale factor of {:.2f}" .format(relative_depth_scale)) frame_pair_indexes = sample_frame_pairs(video_data.num_frames) logger.log("Sampled frame pairs") args = argparse.Namespace(fp16=False, rgb_max=255) t = Compose([ wrap_MiDaS_transform, Resize( video_data.width, video_data.height, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=64, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), unwrap_MiDaS_transform ]) flow_net = FlowNet2(args).cuda() flow_net.load_state_dict(torch.load(optical_flow_model_path)["state_dict"]) logger.log( "Loaded optical flow model from {}.".format(optical_flow_model_path)) os.makedirs(dataset_path) num_filtered = 0 def calculate_optical_flow(frame_i, frame_j): images = list(map(t, (frame_i, frame_j))) images = np.array(images).transpose((3, 0, 1, 2)) images_tensor = torch.from_numpy(images).unsqueeze(0).to( torch.float32).cuda() optical_flow = flow_net(images_tensor) optical_flow = F.interpolate(optical_flow, size=(video_data.height, video_data.width), mode='bilinear', align_corners=True) optical_flow = optical_flow.squeeze().cpu().numpy() return optical_flow with torch.no_grad(), OpticalFlowDatasetBuilder(dataset_path, camera) as dataset_builder: for pair_index, (i, j) in enumerate(frame_pair_indexes): frame_i = video_data.frames[i] frame_j = video_data.frames[j] frame_i_aligned = None frame_j_aligned = None try: frame_j_aligned = align_images(frame_i, frame_j) except ValueError: warnings.warn("Could not align frame #{} and #{}.".format( i, j)) continue try: frame_i_aligned = align_images(video_data.frames[j], video_data.frames[i]) except ValueError: warnings.warn("Could not align frame #{} and #{}.".format( j, i)) continue optical_flow_forwards = calculate_optical_flow( frame_i, frame_j_aligned) optical_flow_backwards = calculate_optical_flow( frame_i_aligned, video_data.frames[j]) delta = np.abs(optical_flow_forwards - optical_flow_backwards) valid_mask = (delta <= 1).astype(np.bool) # `valid_mask` is up to this point, indicating if each of the u and v components of each optical # flow vector are within 1px error. # However, we need it to indicate this on a per-pixel basis, so we combine the binary maps of the u # and v components to give us the validity of the optical flow at the given pixel. valid_mask = valid_mask[0, :, :] & valid_mask[1, :, :] # Ensure that valid mask is a CHW tensor to follow with PyTorch's conventions of dimension ordering. valid_mask = np.expand_dims(valid_mask, axis=0) # TODO: Check if `delta = np.sum(np.abs(a - b), axis=-1) <= 1` would do the same thing as above. should_keep_frame = np.mean(valid_mask) >= 0.8 if should_keep_frame: dataset_builder.add(i, j, frame_i, frame_j, optical_flow_forwards, valid_mask, images_by_id) else: num_filtered += 1 logger.log( "Processed {} frame pairs out of {} ({} kept, {} filtered out).\r" .format(pair_index + 1, len(frame_pair_indexes), pair_index + 1 - num_filtered, num_filtered), end="") print() logger.log("Saved dataset to {}.".format(dataset_path))