Beispiel #1
0
def flow_images(args, path):
    flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar"
    flownet2 = FlowNet2(args)

    if args.cuda:
        flownet2.cuda()

    flowNetCheckpoint = torch.load(flowNetPath)
    state_dict = flowNetCheckpoint["state_dict"]
    flownet2.load_state_dict(state_dict)
    flownet2.eval()

    testset = FlowVideoFromFolder(args, False, path)
    test_loader = DataLoader(testset, batch_size=1, shuffle=False)

    flow_images = []
    for batch_idx, (data, target) in enumerate(test_loader):
        if args.cuda:
            data, target = [d.cuda(non_blocking=True) for d in data
                            ], [t.cuda(non_blocking=True) for t in target]
        data, target = [Variable(d)
                        for d in data], [Variable(t) for t in target]
        with torch.no_grad():
            inp = data[0]
            print(inp.shape)
            outputs = flownet2(inp)[0].cpu().numpy()
            flow_images.append(outputs)
            if args.flow_vis:
                img = flow2img(outputs.transpose(1, 2, 0)).astype(np.uint8)
                cv2.imshow("image", img)
                cv2.waitKey(0)
    return flow_images
    def __init__(self, args, output_stride=16):
        super(FGPLG, self).__init__()
        self.flownet = FlowNet2(args)
        self.warp = Resample2d()
        channels = 7

        self.backbone = RCRNet(n_classes=1,
                               output_stride=output_stride,
                               pretrained=False,
                               input_channels=channels)

        self.freeze_bn()
        self.freeze_layer()
Beispiel #3
0
def load_model():
    class parsers():
        # 'Run model in pseudo-fp16 mode (fp16 storage fp32 math)
        #  fp16 = True
        fp16 = False
        rgb_max = 255.0

    args = parsers()

    # initial a Net
    net = FlowNet2(args).cuda()
    # load the state_dict
    dict = torch.load("/home/xyliu/2D_pose/deep-high-resolution-net.pytorch/flow_net2/models/FlowNet2_checkpoint.pth.tar")
    #  dict = torch.load("/home/xyliu/2D_pose/deep-high-resolution-net.pytorch/flow_net/models/FlowNet2-S_checkpoint.pth.tar")
    net.load_state_dict(dict["state_dict"])
    net.eval()
    return net
Beispiel #4
0
    def __init__(self,
                 video_folder,
                 transform,
                 resize_height,
                 resize_width,
                 dataset='',
                 time_step=4,
                 num_pred=1,
                 bbox_folder=None,
                 device=None,
                 flow_folder=None):
        self.dir = video_folder
        self.transform = transform
        self.videos = OrderedDict()
        self._resize_height = resize_height
        self._resize_width = resize_width
        self._time_step = time_step
        self._num_pred = num_pred

        self.dataset = dataset  #ped2 or avenue or ShanghaiTech

        self.bbox_folder = bbox_folder  #如果box已经预处理了,则直接将npy数据读出来, 如果没有,则在get_item的时候计算
        if bbox_folder == None:  #装载yolo模型
            self.yolo_weights = 'yolov5/weights/yolov5s.pt'
            self.yolo_device = device
            self.yolo_model = attempt_load(
                self.yolo_weights,
                map_location=self.yolo_device)  # load FP32 model

        self.flow_folder = flow_folder
        if self.flow_folder == None:  #装载flownet
            parser = argparse.ArgumentParser()
            parser.add_argument(
                '--fp16',
                action='store_true',
                help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
            parser.add_argument("--rgb_max", type=float, default=255.)
            args = parser.parse_args()

            self.device = device
            self.flownet = FlowNet2(args).to(self.device)
            dict_ = torch.load("flownet2/FlowNet2_checkpoint.pth.tar")
            self.flownet.load_state_dict(dict_["state_dict"])

        self.setup()
        self.samples = self.get_all_samples()
    def __init__(self, args):
        super(ActionS, self).__init__()
        self.args = args

        flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar"
        flowNetCheckpoint = torch.load(flowNetPath)
        state_dict = flowNetCheckpoint["state_dict"]
        flownet2 = FlowNet2(args)

        self.scn = scn.model(args)
        if args.cuda:
            flownet2 = flownet2.cuda()
            self.scn.cuda()

        flownet2.load_state_dict(state_dict)
        self.flownet2 = flownet2
        self.flownet2.eval()
        self.scn.eval()
Beispiel #6
0
def model(args):
    flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar"
    flownet2 = FlowNet2(args)
    return flownet2
Beispiel #7
0
def main(log_dir, batch_size, num_workers, flownet_ckpt, test_start_frame, test_interval):
    arguments = copy.deepcopy(locals())

    if not torch.cuda.is_available():
        raise RuntimeError('At least 1 GPU is needed by FlowNet2.')
    device_main = torch.device('cuda:0')

    # For viewport alignment on 8K frame, more than 6 GB GPU memory is needed,
    # and thus it needs a different GPU device or fallback to CPU
    if torch.cuda.device_count() > 1:
        device_alignment = torch.device('cuda:1')
    else:
        device_alignment = torch.device('cpu')
    torch.backends.cudnn.benchmark = True

    logger = logging.getLogger("test")
    logger.handlers = []
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    logger.addHandler(ch)

    logger.info("%s", repr(arguments))

    bandwidth = 128
    test_set = DS_VQA_ODV(root=os.path.join(log_dir, "VQA_ODV"), dataset_type='test', tr_te_file='tr_te_VQA_ODV.txt',
                          ds_list_file='VQA_ODV.txt', test_interval=test_interval, test_start_frame=test_start_frame,
                          transform=VQA_ODV_Transform(bandwidth=bandwidth, down_resolution=(1024, 2048), to_rgb=True))

    anchor_shape = (16, 16)
    anchors = torch.tensor(generate_anchors(np.array(anchor_shape)))

    # Gaussian center bias
    cb = np.load(os.path.join(log_dir, 'cb256.npy')).astype(np.float32)[np.newaxis, np.newaxis, ...]
    cb = torch.tensor(cb).to(device_main)
    # Mask for anchors
    anchor_mask = np.load(os.path.join(log_dir, 'anchor_mask.npy')).astype(np.int64)
    anchor_mask = torch.tensor(anchor_mask)

    vpnet = VP_net.Model()
    vpnet.to(device_main)
    vpnet.load_state_dict(torch.load(os.path.join(log_dir, 'vp_state.pkl')))
    logger.info("Successfully loaded VP-net pre-trained model.")

    vqnet = VQ_net.Model()
    vqnet.to(device_main)
    vqnet.load_state_dict(torch.load(os.path.join(log_dir, 'vq_state.pkl')))
    logger.info("Successfully loaded VQ-net pre-trained model.")

    class FlowNetParams:
        rgb_max = 255.0
        fp16 = False

    flownet = FlowNet2(args=FlowNetParams())
    flownet.to(device_main)

    if isinstance(flownet_ckpt, str):
        flownet_ckpt = torch.load(flownet_ckpt)
    flownet.load_state_dict(flownet_ckpt['state_dict'])
    logger.info("Successfully loaded FlowNet2 pre-trained model.")
    flownet.eval()

    test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, num_workers=num_workers,
                                              shuffle=False, pin_memory=True, drop_last=False)

    pred = []
    targets = []

    vpnet.eval()
    vqnet.eval()

    for batch_idx, img_tuple in enumerate(tqdm(test_loader)):
        with torch.no_grad():
            img_s2, img_original, img_down, img_gap_s2, gap_down, ref_original, target = img_tuple

            gap_down = gap_down.to(device_main)
            img_down = img_down.to(device_main)
            gap_down = gap_down.view((-1, *gap_down.shape[-3:]))
            img_down = img_down.view((-1, *img_down.shape[-3:]))

            # Optical flow
            flow = torch.stack((gap_down, img_down), dim=0).permute(1, 2, 0, 3, 4)
            flow = flownet(flow)
            flow = flow.cpu().numpy().transpose((2, 3, 1, 0))
            flow = skimage.transform.resize(flow, (bandwidth * 2, bandwidth * 2) + flow.shape[-2:], order=1,
                                            anti_aliasing=True, mode='reflect', preserve_range=True).astype(np.float32)
            flow_s2 = torch.tensor(flow.transpose((3, 2, 0, 1)))
            flow_s2 = flow_s2.to(device_main)

            # VP net
            img_s2 = img_s2.to(device_main)
            img_gap_s2 = img_gap_s2.to(device_main)
            img_s2 = img_s2.view((-1, *img_s2.shape[-3:]))
            img_gap_s2 = img_gap_s2.view((-1, *img_gap_s2.shape[-3:]))

            vp_hm_weight, vp_hm_offset, _ = vpnet(img_s2, flow_s2, cb)

            # Viewport softer NMS
            hm_after_nms, hm_weight = proposal_layer(vp_hm_weight, vp_hm_offset, 20, 7.5, anchors.to(vp_hm_offset),
                                                     mask=anchor_mask)

            # Viewport alignment
            hm_after_nms = hm_after_nms.to(device_alignment)

            img_original = img_original.to(device_alignment)
            img_original = img_original.view((-1, *img_original.shape[-3:]))
            img_viewport = viewport_alignment(img_original, hm_after_nms[:, 0], hm_after_nms[:, 1])
            del img_original
            img_viewport = img_viewport.to(device_main)

            ref_original = ref_original.to(device_alignment)
            ref_original = ref_original.view((-1, *ref_original.shape[-3:]))
            ref_viewport = viewport_alignment(ref_original, hm_after_nms[:, 0], hm_after_nms[:, 1])
            del ref_original
            ref_viewport = ref_viewport.to(device_main)

            # VQ net
            vq_score, _ = vqnet(img_viewport, ref_viewport - img_viewport)
            vq_score = vq_score.flatten()
            vq_score = (vq_score * hm_weight).sum(dim=0, keepdim=True)

            pred.append(float(vq_score))

            target = target.mean(dim=1).reshape((-1,))
            targets.append(target.numpy())

    pred = np.array(pred)
    targets = np.concatenate(targets, 0)
    video_cnt = len(test_set.cum_frame_num)
    pred = [pred[test_set.cum_frame_num_prev[i]:test_set.cum_frame_num[i]].mean() for i in range(video_cnt)]
    targets = [targets[test_set.cum_frame_num_prev[i]:test_set.cum_frame_num[i]].mean() for i in range(video_cnt)]
    np.savetxt(os.path.join(log_dir, 'test_pred_scores.txt'), np.array(pred))
    np.savetxt(os.path.join(log_dir, 'test_targets.txt'), np.array(targets))
    srocc, _ = scipy.stats.spearmanr(pred, targets)

    logger.info("SROCC:{:.4}".format(srocc))
Beispiel #8
0
    return result.data



if __name__ == '__main__':
    # obtain the necessary args for construct the flownet framework
    parser = argparse.ArgumentParser()
    parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).')
    parser.add_argument("--rgb_max", type=float, default=255.)
    
    args = parser.parse_args()
    # print(args)

    # initial a Net
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net = FlowNet2(args).to(device)
    # load the state_dict
    dict_ = torch.load("flownet2/FlowNet2_checkpoint.pth.tar")
    net.load_state_dict(dict_["state_dict"])


    # # test 
    # img1 = "/data0/lyx/VAD_datasets/avenue/training/frames/01/0001.jpg"
    # img2 = "/data0/lyx/VAD_datasets/avenue/training/frames/01/0002.jpg"

    # flow = get_frame_flow(img1, img2, net, device, 512, 384)
    # writeFlow('flow/flow1.npy',flow)
    # # print( readFlow('flow/flow1.npy') )

    # 计算所有图像的flow
    videos = "../AllDatasets/avenue/testing/frames"