Beispiel #1
0
def get_optical_flow(frame_list, output_dir):
    #obtain the necessary args for construct the flownet framework
    #initial a Net
    print("In function right now!")
    net = FlowNet2(argparse.Namespace(fp16=False, rgb_max=255.0)).cuda()
    #load the state_dict
    dict = torch.load("./FlowNet2_checkpoint.pth.tar")
    net.load_state_dict(dict["state_dict"])

    for idx, f in enumerate(frame_list):
        print("Generating flow for %d and %d" % (idx, idx + 1))
        if idx == len(frame_list) - 1:
            break
        #load the image pair, you can find this operation in dataset.py
        img1 = read_gen(f)
        img2 = read_gen(frame_list[idx + 1])
        images = [img1, img2]
        images = np.array(images).transpose(3, 0, 1, 2)
        im = torch.from_numpy(images.astype(np.float32)).unsqueeze(0).cuda()

        #process the image pair to obtian the flow
        result = net(im).squeeze()
        data = result.data.cpu().numpy().transpose(1, 2, 0)

        # write file
        output_file = '%s%06d.flo' % (output_dir, idx + 1)
        print("Writing to file: %s" % output_file)
        writeFlow(output_file, data)

    return 'Finish'
Beispiel #2
0
def infer(args):
    assert args.data_list is not None or args.frame_dir is not None

    if args.frame_dir is not None:
        data_list = generate_flow_list(args.frame_dir)
        args.data_list = data_list

    with open(data_list) as FIN:
        F_SAVE = [line.split()[0] for line in FIN]

    device = torch.device("cuda:0")

    Flownet = FlowNet2(args, requires_grad=False)
    print("====> Loading", args.pretrained_model_flownet2)
    flownet2_ckpt = torch.load(args.pretrained_model_flownet2)
    Flownet.load_state_dict(flownet2_ckpt["state_dict"])
    Flownet.to(device)
    Flownet.eval()

    dataset_ = FlowInfer(args.data_list, size=args.img_size)
    dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False)
    # task_bar = ProgressBar(dataset_.__len__())

    for i, (f1, f2, output_path_) in enumerate(dataloader_):
        f1 = f1.to(device)
        f2 = f2.to(device)

        flow = Flownet(f1, f2)
        '''
        output_path = output_path_[0]

        output_file = os.path.dirname(output_path)
        if not os.path.exists(output_file):
            os.makedirs(output_file)
        '''

        flow_numpy = flow[0].permute(1, 2, 0).data.cpu().numpy()
        f_save = os.path.join(save_dest, os.path.basename(F_SAVE[i])) + '.npy'

        print(flow_numpy.shape)
        print(flow_numpy.mean())

        np.save(f_save, flow_numpy)

        #cvb.write_flow(flow_numpy, output_path)
        # task_bar.update()

    print("FlowNet2 Inference has been finished~!")
    print("Extracted Flow has been save in", output_file)

    return
Beispiel #3
0
def infer(args):
    assert args.data_list is not None or args.frame_dir is not None

    if args.frame_dir is not None:
        data_list = generate_flow_list(args.frame_dir)
        args.data_list = data_list

    device = torch.device('cuda:0')

    Flownet = FlowNet2(args, requires_grad=False)
    print('====> Loading', args.pretrained_model_flownet2)
    flownet2_ckpt = torch.load(args.pretrained_model_flownet2)
    Flownet.load_state_dict(flownet2_ckpt['state_dict'])
    Flownet.to(device)
    Flownet.eval()

    dataset_ = FlowInfer(args.data_list, size=args.img_size)
    dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False)
    task_bar = ProgressBar(dataset_.__len__())

    for i, (f1, f2, output_path_) in enumerate(dataloader_):
        f1 = f1.to(device)
        f2 = f2.to(device)

        flow = Flownet(f1, f2)

        output_path = output_path_[0]

        output_file = os.path.dirname(output_path)
        if not os.path.exists(output_file):
            os.makedirs(output_file)

        flow_numpy = flow[0].permute(1, 2, 0).data.cpu().numpy()
        cvb.write_flow(flow_numpy, output_path)
        task_bar.update()
    sys.stdout.write('\n')
    print('FlowNet2 Inference has been finished~!')
    print('Extracted Flow has been save in', output_file)

    return output_file
Beispiel #4
0
def infer(args):
    assert args.data_list is not None or args.frame_dir is not None

    if args.frame_dir is not None:
        data_list = generate_flow_list(args.frame_dir)
        args.data_list = data_list

    device = torch.device('cuda:0')

    Flownet = FlowNet2(args, requires_grad=False)
    print('====> Loading', args.pretrained_model_flownet2)
    flownet2_ckpt = torch.load(args.pretrained_model_flownet2)
    Flownet.load_state_dict(flownet2_ckpt['state_dict'])
    Flownet.to(device)
    Flownet.eval()

    dataset_ = FlowInfer(args.data_list, size=args.img_size)
    dataloader_ = DataLoader(dataset_, batch_size=1, shuffle=False)
    task_bar = ProgressBar(dataset_.__len__())

    for i, (f1, f2, f3, f4, f5, output_path_1, output_path_2, output_path_3,
            output_path_4) in enumerate(dataloader_):
        f1 = f1.to(device)
        f2 = f2.to(device)
        f3 = f3.to(device)
        f4 = f4.to(device)
        f5 = f5.to(device)
        if (output_path_1[0][-4:] == 'rflo'):
            flow_1 = Flownet(f3, f1)
            flow_2 = Flownet(f3, f2)
            flow_3 = Flownet(f3, f4)
            flow_4 = Flownet(f3, f5)
        else:
            flow_1 = Flownet(f1, f3)
            flow_2 = Flownet(f2, f3)
            flow_3 = Flownet(f4, f3)
            flow_4 = Flownet(f5, f3)
        output_path_01 = output_path_1[0]
        output_path_02 = output_path_2[0]
        output_path_03 = output_path_3[0]
        output_path_04 = output_path_4[0]
        #print(output_path_1)
        output_file = os.path.dirname(output_path_01)
        if not os.path.exists(output_file):
            os.makedirs(output_file)

        flow_numpy = flow_1[0].permute(1, 2, 0).data.cpu().numpy()
        cvb.write_flow(flow_numpy, output_path_01)

        output_file = os.path.dirname(output_path_02)
        if not os.path.exists(output_file):
            os.makedirs(output_file)

        flow_numpy = flow_2[0].permute(1, 2, 0).data.cpu().numpy()
        cvb.write_flow(flow_numpy, output_path_02)

        output_file = os.path.dirname(output_path_03)
        if not os.path.exists(output_file):
            os.makedirs(output_file)

        flow_numpy = flow_3[0].permute(1, 2, 0).data.cpu().numpy()
        cvb.write_flow(flow_numpy, output_path_03)

        output_file = os.path.dirname(output_path_04)
        if not os.path.exists(output_file):
            os.makedirs(output_file)

        flow_numpy = flow_4[0].permute(1, 2, 0).data.cpu().numpy()
        cvb.write_flow(flow_numpy, output_path_04)

        task_bar.update()
    sys.stdout.write('\n')
    print('FlowNet2 Inference has been finished~!')
    print('Extracted Flow has been save in', output_file)

    return output_file
Beispiel #5
0
                   (256, 256))
    #im2 = imread('FlowNet2_src/example/0img1.ppm')
    im2 = imresize(imread('../s3-drive/abbey/images/sun_aakbdcgfpksytcwj.jpg'),
                   (256, 256))
    # B x 3(RGB) x 2(pair) x H x W
    #ims = np.array([[im1, im2]])
    print(f'images shapes: {im1.shape}, {im2.shape}')
    ims = np.expand_dims(np.stack([im1, im2]), 0)
    print('ims array shape: ', ims.shape)
    ims = ims.transpose((0, 4, 1, 2, 3)).astype(np.float32)
    ims = torch.from_numpy(ims)
    print(ims.size())
    ims_v = Variable(ims.cuda(), requires_grad=False)

    # Build model
    flownet2 = FlowNet2(args)
    path = '../s3-drive/flownet/FlowNet2_checkpoint.pth.tar'
    pretrained_dict = torch.load(path)['state_dict']
    model_dict = flownet2.state_dict()
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    flownet2.load_state_dict(model_dict)
    flownet2.cuda()

    pred_flow = flownet2(ims_v).cpu().data
    pred_flow = pred_flow[0].numpy().transpose((1, 2, 0))
    print('pred_flow shape', pred_flow.shape)
    #flow_im = flow_to_image(pred_flow)
Beispiel #6
0
  parser = argparse.ArgumentParser()
  parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
  parser.add_argument("--rgb_max", type=float, default=255.)
  args = parser.parse_args()

  # Prepare img pair
  im1 = imread('/mnt/data/FlyingChairs_examples/0000000-img0.ppm')
  im2 = imread('/mnt/data/FlyingChairs_examples/0000000-img1.ppm')
  # B x 3(RGB) x 2(pair) x H x W
  ims = np.array([[im1, im2]]).transpose((0, 4, 1, 2, 3)).astype(np.float32)
  ims = torch.from_numpy(ims)
  print(ims.size())
  ims_v = Variable(ims.cuda(), requires_grad=False)

  # Build model
  flownet2 = FlowNet2(args).cuda()
  #path = '/mnt/data/flownet2-pytorch/FlowNet2_checkpoint.pth.tar'
  path = '/home/tung/flownet2-pytorch/work/FlowNet2_model_best.pth.tar'
  pretrained_dict = torch.load(path)['state_dict']
  model_dict = flownet2.state_dict()
  pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
  model_dict.update(pretrained_dict)
  flownet2.load_state_dict(model_dict)
  flownet2.cuda()

  pred_flow = flownet2(ims_v).cpu().data
  pred_flow = pred_flow[0].numpy().transpose((1,2,0))
  flow_im = flow_to_image(pred_flow)

  # Visualization
  plt.imshow(flow_im)
Beispiel #7
0
def just4pair(name, currentpath, nextpath, flowpath, esti_curr, error, flag):

    flow_path = flowpath + str(flag) + 'flow\\' + name
    estimated_pim2pathP = esti_curr + str(flag) + 'estimate\\' + name
    diff_pim2pathP = error + str(flag) + 'error\\' + name
    pim1path = currentpath + name
    pim2path = nextpath + name
    # estimated_pim2pathP = esti_curr
    # diff_pim2pathP = error
    # flow_path = flowpath

    frame1 = cv2.imread(pim1path)
    frame2 = cv2.imread(pim2path)
    # crop_size = (512,384)
    hsv_mask = np.zeros_like(frame1)
    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    i = 1
    while (i):
        prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
        next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
        start = time.time()

        if flag == 'GF':
            flow = cv2.calcOpticalFlowFarneback(prvs,
                                                next,
                                                None,
                                                pyr_scale=0.5,
                                                levels=7,
                                                winsize=7,
                                                iterations=5,
                                                poly_n=9,
                                                poly_sigma=1.1,
                                                flags=0)
            end = time.time()
            print('GFcomputing time : ' + str(end - start))

        elif flag == 'HS':
            # the 3 lines is exclusive for HS  to get a new different flow
            U, V = HornSchunck(prvs, next, alpha=20, Niter=15)
            end = time.time()
            print('HScomputing time : ' + str(end - start))
            flow = np.array([U.astype('float32'), V.astype('float32')])
            flow = np.transpose(flow, (1, 2, 0))
        else:
            # THESE LINES ARE FOR NN flow
            parser = argparse.ArgumentParser()
            parser.add_argument(
                '--fp16',
                action='store_true',
                help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
            parser.add_argument("--rgb_max", type=float, default=255.)
            args = parser.parse_args()
            start = time.time()

            net = FlowNet2(args).cuda()
            dict = torch.load("checkpoints/FlowNet2_checkpoint.pth.tar")
            net.load_state_dict(dict["state_dict"])
            end = time.time()
            print('NNloading time : ' + str(end - start))
            images = [frame1, frame2]
            images = np.array(images).transpose(3, 0, 1, 2)
            im = torch.from_numpy(images.astype(
                np.float32)).unsqueeze(0).cuda()

            start = time.time()
            flow = net(im)
            end = time.time()
            flow = flow.squeeze()
            print('NNcomputing time : ' + str(end - start))
            flow = flow.data.cpu().numpy().transpose(1, 2, 0)

        start = time.time()
        mag, ang = cv2.cartToPolar(
            flow[..., 0], flow[..., 1],
            angleInDegrees=False)  # in radians not degrees
        hsv_mask[..., 0] = ang * 180 / np.pi / 2  # H : 0-180
        hsv_mask[..., 1] = 255
        hsv_mask[..., 2] = cv2.normalize(
            mag, None, 0, 255,
            cv2.NORM_MINMAX)  # nolinear transformation of value   sigmoid?
        end = time.time()
        print('vis time : ' + str(end - start))
        rgb_representation = cv2.cvtColor(hsv_mask, cv2.COLOR_HSV2BGR)

        # cv2.imshow('result_window', double_representation)
        # cv2.imshow('result_window', rgb_representation)
        cv2.imwrite(flow_path, rgb_representation)

        # estimated_pim2N = warp_flow(frame1, flow)  # 这个warp是把1warp到2
        estimated_pim2P = image_warp(frame2, flow,
                                     mode='nearest')  # 这个warp是把2warp回1
        # cv2.imwrite(estimated_pim2pathN, estimated_pim2N)
        cv2.imwrite(estimated_pim2pathP, estimated_pim2P)

        estimated_pim2P = cv2.cvtColor(estimated_pim2P, cv2.COLOR_BGR2GRAY)
        # estimated_pim2N = cv2.cvtColor(estimated_pim2N, cv2.COLOR_BGR2GRAY)
        # diff_P = estimated_pim2P - prvs
        diff_P = np.abs(-estimated_pim2P.astype(np.int32) +
                        prvs.astype(np.int32))  #
        # diff_N = -estimated_pim2N + next
        # diff_N = np.abs(-estimated_pim2N.astype(np.int32) + next.astype(np.int32))  #

        cv2.imwrite(diff_pim2pathP, diff_P)
        # cv2.imwrite(diff_pim2pathN, diff_N)

        kk = cv2.waitKey(20) & 0xff
        # Press 'e' to exit the video
        if kk == ord('e'):
            break
        # Press 's' to save the video
        elif kk == ord('s'):
            cv2.imwrite('Optical_image.png', frame2)
            cv2.imwrite('HSV_converted_image.png', rgb_representation)
        i = 0
    cv2.destroyAllWindows()
Beispiel #8
0
import numpy as np
import argparse

#  from networks.FlowNet2 import FlowNet2#the path is depended on where you create this module
from models import FlowNet2
#  from frame_utils import read_gen#the path is depended on where you create this module

if __name__ == '__main__':
    #obtain the necessary args for construct the flownet framework
    parser = argparse.ArgumentParser()
    parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
    parser.add_argument("--rgb_max", type=float, default=255.)
    args = parser.parse_args()

    #initial a Net
    net = FlowNet2(args).cuda()
    #load the state_dict
    dict = torch.load("models/FlowNet2_checkpoint.pth.tar")
    net.load_state_dict(dict["state_dict"])

    #load the image pair, you can find this operation in dataset.py
    #  pim1 = read_gen("/home/xyliu/2D_pose/flowtrack.pytorch/samples/img0.ppm")
    #  pim2 = read_gen("/home/xyliu/2D_pose/flowtrack.pytorch/samples/img1.ppm")

    from scipy.misc import imread
    net.eval()
    # Prepare img pair
    # H x W x 3(RGB)
    aaa = '/home/xyliu/2D_pose/flowtrack.pytorch/samples/img0.ppm'
    bbb = '/home/xyliu/2D_pose/flowtrack.pytorch/samples/img1.ppm'
    im1 = imread(aaa)
    parser.add_argument("--rgb_max", type=float, default=255.)
    parser.add_argument("--in_path", type=str, default=None)
    parser.add_argument("--out_file", type=str, default=None)
    parser.add_argument("--recalc", type=int, default=1)
    parser.add_argument("--batch", type=int, default=4)
    parser.add_argument("--height", type=int, default=128)
    parser.add_argument("--width", type=int, default=192)
    parser.add_argument("--scale", type=int, default=3)
    parser.add_argument("--visualize", type=int, default=0)

    args = parser.parse_args()
    args.fp16 = False

    # initial FlowNet2 and load pretrained weights
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = FlowNet2(args)
    net.to(device)
    # load the state_dict
    dict = torch.load("./models/FlowNet2_checkpoint.pth")
    net.load_state_dict(dict["state_dict"])

    # load the image pair
    scale = args.scale
    base_size = (args.width, args.height)
    upscaled_size = (scale * base_size[0], scale * base_size[1])

    if args.in_path is None:
        img1, img2 = load_image_pair("./test/img1.tif",
                                     "./test/img2.tif",
                                     dest_size=base_size)
Beispiel #10
0
import torch
from PIL import Image
import time

import numpy as np
import cv2 as cv

from torchvision import transforms
from models import FlowNet2
from utils import flow_utils

model = FlowNet2().cpu()
model.morphize()
#model.load_state_dict(torch.load('checkpoint/FlowNet2_checkpoint.pth.tar')['state_dict'])
#model.load_state_dict(torch.load('checkpoint/checkpoint3.pth.tar')['state_dict'])
#model.load_state_dict(torch.load('checkpoint/checkpoint8.pth.tar')['state_dict'])
model.load_state_dict(
    torch.load('checkpoint/checkpoint9.pth.tar')['state_dict'])
model.demorphize()
model.eval()
model.cuda()


def padding(image):
    _, w, h = image.size()
    w_pad = (64 - w % 64) % 64
    h_pad = (64 - h % 64) % 64
    new_image = torch.zeros(3, w + w_pad, h + h_pad)
    new_image[:, w_pad // 2:w_pad // 2 + w, h_pad // 2:h_pad // 2 + h] = image
    return new_image
        self.th, self.tw = crop_size
        self.h, self.w = image_size

    def __call__(self, img):
        return img[(self.h - self.th) // 2:(self.h + self.th) // 2,
                   (self.w - self.tw) // 2:(self.w + self.tw) // 2, :]


def visulize_flow(flow):
    flow_data = flow
    img = flow2img(flow_data)
    plt.imsave("result.png", img)


now = time.time()
net = FlowNet2().cuda()
dict = torch.load("pretrained/FlowNet2_checkpoint.pth.tar")
net.load_state_dict(dict["state_dict"])

# load the image pair, you can find this operation in dataset.py
img1 = read_gen("data/00000" + ".jpg")
img2 = read_gen("data/00001" + ".jpg")
images = [img1, img2]
print(img1.shape)
image_size = img1.shape[:2]

frame_size = img1.shape
render_size = [((frame_size[0]) // 64) * 64, ((frame_size[1]) // 64) * 64]

cropper = StaticCenterCrop(image_size, render_size)
images = list(map(cropper, images))
Beispiel #12
0
 def __init__(self):
     args = Args()
     self.net = FlowNet2(args).cuda()
     dict = torch.load("../vs_final/FlowNet2_checkpoint.pth.tar")
     self.net.load_state_dict(dict["state_dict"])
Beispiel #13
0
def create_optical_flow_dataset(
        video_data, frame_data_path, colmap_workspace_path, mask_data_path,
        sparse_depth_map_path, dnn_depth_map_path, relative_depth_scale_path,
        dataset_path, depth_estimation_model_path, optical_flow_model_path,
        logger, inference_fn, batch_size):
    run_colmap(video_data, frame_data_path, mask_data_path,
               colmap_workspace_path, logger)
    camera, images_by_id, points3d_by_id = get_colmap_output(
        colmap_workspace_path, logger)

    relative_depth_scale = calculate_global_scale_adjustment_factor(
        video_data, camera, images_by_id, points3d_by_id,
        depth_estimation_model_path, dnn_depth_map_path, sparse_depth_map_path,
        relative_depth_scale_path, logger, inference_fn, batch_size)

    for image in images_by_id.values():
        image.camera_pose.t = relative_depth_scale * image.camera_pose.t

    logger.log(
        "Scaled the translation component of the camera poses by the relative scale factor of {:.2f}"
        .format(relative_depth_scale))

    frame_pair_indexes = sample_frame_pairs(video_data.num_frames)
    logger.log("Sampled frame pairs")

    args = argparse.Namespace(fp16=False, rgb_max=255)

    t = Compose([
        wrap_MiDaS_transform,
        Resize(
            video_data.width,
            video_data.height,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=64,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ), unwrap_MiDaS_transform
    ])

    flow_net = FlowNet2(args).cuda()
    flow_net.load_state_dict(torch.load(optical_flow_model_path)["state_dict"])
    logger.log(
        "Loaded optical flow model from {}.".format(optical_flow_model_path))

    os.makedirs(dataset_path)
    num_filtered = 0

    def calculate_optical_flow(frame_i, frame_j):
        images = list(map(t, (frame_i, frame_j)))
        images = np.array(images).transpose((3, 0, 1, 2))
        images_tensor = torch.from_numpy(images).unsqueeze(0).to(
            torch.float32).cuda()

        optical_flow = flow_net(images_tensor)
        optical_flow = F.interpolate(optical_flow,
                                     size=(video_data.height,
                                           video_data.width),
                                     mode='bilinear',
                                     align_corners=True)
        optical_flow = optical_flow.squeeze().cpu().numpy()

        return optical_flow

    with torch.no_grad(), OpticalFlowDatasetBuilder(dataset_path,
                                                    camera) as dataset_builder:
        for pair_index, (i, j) in enumerate(frame_pair_indexes):
            frame_i = video_data.frames[i]
            frame_j = video_data.frames[j]
            frame_i_aligned = None
            frame_j_aligned = None

            try:
                frame_j_aligned = align_images(frame_i, frame_j)
            except ValueError:
                warnings.warn("Could not align frame #{} and #{}.".format(
                    i, j))
                continue

            try:
                frame_i_aligned = align_images(video_data.frames[j],
                                               video_data.frames[i])
            except ValueError:
                warnings.warn("Could not align frame #{} and #{}.".format(
                    j, i))
                continue

            optical_flow_forwards = calculate_optical_flow(
                frame_i, frame_j_aligned)
            optical_flow_backwards = calculate_optical_flow(
                frame_i_aligned, video_data.frames[j])

            delta = np.abs(optical_flow_forwards - optical_flow_backwards)
            valid_mask = (delta <= 1).astype(np.bool)
            # `valid_mask` is up to this point, indicating if each of the u and v components of each optical
            # flow vector are within 1px error.
            # However, we need it to indicate this on a per-pixel basis, so we combine the binary maps of the u
            # and v components to give us the validity of the optical flow at the given pixel.
            valid_mask = valid_mask[0, :, :] & valid_mask[1, :, :]
            # Ensure that valid mask is a CHW tensor to follow with PyTorch's conventions of dimension ordering.
            valid_mask = np.expand_dims(valid_mask, axis=0)

            # TODO: Check if `delta = np.sum(np.abs(a - b), axis=-1) <= 1` would do the same thing as above.
            should_keep_frame = np.mean(valid_mask) >= 0.8

            if should_keep_frame:
                dataset_builder.add(i, j, frame_i, frame_j,
                                    optical_flow_forwards, valid_mask,
                                    images_by_id)
            else:
                num_filtered += 1

            logger.log(
                "Processed {} frame pairs out of {} ({} kept, {} filtered out).\r"
                .format(pair_index + 1, len(frame_pair_indexes),
                        pair_index + 1 - num_filtered, num_filtered),
                end="")
    print()
    logger.log("Saved dataset to {}.".format(dataset_path))