def flow_images(args, path): flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar" flownet2 = FlowNet2(args) if args.cuda: flownet2.cuda() flowNetCheckpoint = torch.load(flowNetPath) state_dict = flowNetCheckpoint["state_dict"] flownet2.load_state_dict(state_dict) flownet2.eval() testset = FlowVideoFromFolder(args, False, path) test_loader = DataLoader(testset, batch_size=1, shuffle=False) flow_images = [] for batch_idx, (data, target) in enumerate(test_loader): if args.cuda: data, target = [d.cuda(non_blocking=True) for d in data ], [t.cuda(non_blocking=True) for t in target] data, target = [Variable(d) for d in data], [Variable(t) for t in target] with torch.no_grad(): inp = data[0] print(inp.shape) outputs = flownet2(inp)[0].cpu().numpy() flow_images.append(outputs) if args.flow_vis: img = flow2img(outputs.transpose(1, 2, 0)).astype(np.uint8) cv2.imshow("image", img) cv2.waitKey(0) return flow_images
def __init__(self, args, output_stride=16): super(FGPLG, self).__init__() self.flownet = FlowNet2(args) self.warp = Resample2d() channels = 7 self.backbone = RCRNet(n_classes=1, output_stride=output_stride, pretrained=False, input_channels=channels) self.freeze_bn() self.freeze_layer()
def load_model(): class parsers(): # 'Run model in pseudo-fp16 mode (fp16 storage fp32 math) # fp16 = True fp16 = False rgb_max = 255.0 args = parsers() # initial a Net net = FlowNet2(args).cuda() # load the state_dict dict = torch.load("/home/xyliu/2D_pose/deep-high-resolution-net.pytorch/flow_net2/models/FlowNet2_checkpoint.pth.tar") # dict = torch.load("/home/xyliu/2D_pose/deep-high-resolution-net.pytorch/flow_net/models/FlowNet2-S_checkpoint.pth.tar") net.load_state_dict(dict["state_dict"]) net.eval() return net
def __init__(self, video_folder, transform, resize_height, resize_width, dataset='', time_step=4, num_pred=1, bbox_folder=None, device=None, flow_folder=None): self.dir = video_folder self.transform = transform self.videos = OrderedDict() self._resize_height = resize_height self._resize_width = resize_width self._time_step = time_step self._num_pred = num_pred self.dataset = dataset #ped2 or avenue or ShanghaiTech self.bbox_folder = bbox_folder #如果box已经预处理了,则直接将npy数据读出来, 如果没有,则在get_item的时候计算 if bbox_folder == None: #装载yolo模型 self.yolo_weights = 'yolov5/weights/yolov5s.pt' self.yolo_device = device self.yolo_model = attempt_load( self.yolo_weights, map_location=self.yolo_device) # load FP32 model self.flow_folder = flow_folder if self.flow_folder == None: #装载flownet parser = argparse.ArgumentParser() parser.add_argument( '--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() self.device = device self.flownet = FlowNet2(args).to(self.device) dict_ = torch.load("flownet2/FlowNet2_checkpoint.pth.tar") self.flownet.load_state_dict(dict_["state_dict"]) self.setup() self.samples = self.get_all_samples()
def __init__(self, args): super(ActionS, self).__init__() self.args = args flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar" flowNetCheckpoint = torch.load(flowNetPath) state_dict = flowNetCheckpoint["state_dict"] flownet2 = FlowNet2(args) self.scn = scn.model(args) if args.cuda: flownet2 = flownet2.cuda() self.scn.cuda() flownet2.load_state_dict(state_dict) self.flownet2 = flownet2 self.flownet2.eval() self.scn.eval()
def model(args): flowNetPath = "flownet2/FlowNet2_checkpoint.pth.tar" flownet2 = FlowNet2(args) return flownet2
def main(log_dir, batch_size, num_workers, flownet_ckpt, test_start_frame, test_interval): arguments = copy.deepcopy(locals()) if not torch.cuda.is_available(): raise RuntimeError('At least 1 GPU is needed by FlowNet2.') device_main = torch.device('cuda:0') # For viewport alignment on 8K frame, more than 6 GB GPU memory is needed, # and thus it needs a different GPU device or fallback to CPU if torch.cuda.device_count() > 1: device_alignment = torch.device('cuda:1') else: device_alignment = torch.device('cpu') torch.backends.cudnn.benchmark = True logger = logging.getLogger("test") logger.handlers = [] logger.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.INFO) logger.addHandler(ch) logger.info("%s", repr(arguments)) bandwidth = 128 test_set = DS_VQA_ODV(root=os.path.join(log_dir, "VQA_ODV"), dataset_type='test', tr_te_file='tr_te_VQA_ODV.txt', ds_list_file='VQA_ODV.txt', test_interval=test_interval, test_start_frame=test_start_frame, transform=VQA_ODV_Transform(bandwidth=bandwidth, down_resolution=(1024, 2048), to_rgb=True)) anchor_shape = (16, 16) anchors = torch.tensor(generate_anchors(np.array(anchor_shape))) # Gaussian center bias cb = np.load(os.path.join(log_dir, 'cb256.npy')).astype(np.float32)[np.newaxis, np.newaxis, ...] cb = torch.tensor(cb).to(device_main) # Mask for anchors anchor_mask = np.load(os.path.join(log_dir, 'anchor_mask.npy')).astype(np.int64) anchor_mask = torch.tensor(anchor_mask) vpnet = VP_net.Model() vpnet.to(device_main) vpnet.load_state_dict(torch.load(os.path.join(log_dir, 'vp_state.pkl'))) logger.info("Successfully loaded VP-net pre-trained model.") vqnet = VQ_net.Model() vqnet.to(device_main) vqnet.load_state_dict(torch.load(os.path.join(log_dir, 'vq_state.pkl'))) logger.info("Successfully loaded VQ-net pre-trained model.") class FlowNetParams: rgb_max = 255.0 fp16 = False flownet = FlowNet2(args=FlowNetParams()) flownet.to(device_main) if isinstance(flownet_ckpt, str): flownet_ckpt = torch.load(flownet_ckpt) flownet.load_state_dict(flownet_ckpt['state_dict']) logger.info("Successfully loaded FlowNet2 pre-trained model.") flownet.eval() test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory=True, drop_last=False) pred = [] targets = [] vpnet.eval() vqnet.eval() for batch_idx, img_tuple in enumerate(tqdm(test_loader)): with torch.no_grad(): img_s2, img_original, img_down, img_gap_s2, gap_down, ref_original, target = img_tuple gap_down = gap_down.to(device_main) img_down = img_down.to(device_main) gap_down = gap_down.view((-1, *gap_down.shape[-3:])) img_down = img_down.view((-1, *img_down.shape[-3:])) # Optical flow flow = torch.stack((gap_down, img_down), dim=0).permute(1, 2, 0, 3, 4) flow = flownet(flow) flow = flow.cpu().numpy().transpose((2, 3, 1, 0)) flow = skimage.transform.resize(flow, (bandwidth * 2, bandwidth * 2) + flow.shape[-2:], order=1, anti_aliasing=True, mode='reflect', preserve_range=True).astype(np.float32) flow_s2 = torch.tensor(flow.transpose((3, 2, 0, 1))) flow_s2 = flow_s2.to(device_main) # VP net img_s2 = img_s2.to(device_main) img_gap_s2 = img_gap_s2.to(device_main) img_s2 = img_s2.view((-1, *img_s2.shape[-3:])) img_gap_s2 = img_gap_s2.view((-1, *img_gap_s2.shape[-3:])) vp_hm_weight, vp_hm_offset, _ = vpnet(img_s2, flow_s2, cb) # Viewport softer NMS hm_after_nms, hm_weight = proposal_layer(vp_hm_weight, vp_hm_offset, 20, 7.5, anchors.to(vp_hm_offset), mask=anchor_mask) # Viewport alignment hm_after_nms = hm_after_nms.to(device_alignment) img_original = img_original.to(device_alignment) img_original = img_original.view((-1, *img_original.shape[-3:])) img_viewport = viewport_alignment(img_original, hm_after_nms[:, 0], hm_after_nms[:, 1]) del img_original img_viewport = img_viewport.to(device_main) ref_original = ref_original.to(device_alignment) ref_original = ref_original.view((-1, *ref_original.shape[-3:])) ref_viewport = viewport_alignment(ref_original, hm_after_nms[:, 0], hm_after_nms[:, 1]) del ref_original ref_viewport = ref_viewport.to(device_main) # VQ net vq_score, _ = vqnet(img_viewport, ref_viewport - img_viewport) vq_score = vq_score.flatten() vq_score = (vq_score * hm_weight).sum(dim=0, keepdim=True) pred.append(float(vq_score)) target = target.mean(dim=1).reshape((-1,)) targets.append(target.numpy()) pred = np.array(pred) targets = np.concatenate(targets, 0) video_cnt = len(test_set.cum_frame_num) pred = [pred[test_set.cum_frame_num_prev[i]:test_set.cum_frame_num[i]].mean() for i in range(video_cnt)] targets = [targets[test_set.cum_frame_num_prev[i]:test_set.cum_frame_num[i]].mean() for i in range(video_cnt)] np.savetxt(os.path.join(log_dir, 'test_pred_scores.txt'), np.array(pred)) np.savetxt(os.path.join(log_dir, 'test_targets.txt'), np.array(targets)) srocc, _ = scipy.stats.spearmanr(pred, targets) logger.info("SROCC:{:.4}".format(srocc))
return result.data if __name__ == '__main__': # obtain the necessary args for construct the flownet framework parser = argparse.ArgumentParser() parser.add_argument('--fp16', action='store_true', help='Run model in pseudo-fp16 mode (fp16 storage fp32 math).') parser.add_argument("--rgb_max", type=float, default=255.) args = parser.parse_args() # print(args) # initial a Net device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") net = FlowNet2(args).to(device) # load the state_dict dict_ = torch.load("flownet2/FlowNet2_checkpoint.pth.tar") net.load_state_dict(dict_["state_dict"]) # # test # img1 = "/data0/lyx/VAD_datasets/avenue/training/frames/01/0001.jpg" # img2 = "/data0/lyx/VAD_datasets/avenue/training/frames/01/0002.jpg" # flow = get_frame_flow(img1, img2, net, device, 512, 384) # writeFlow('flow/flow1.npy',flow) # # print( readFlow('flow/flow1.npy') ) # 计算所有图像的flow videos = "../AllDatasets/avenue/testing/frames"