def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.extend( [ToTensor(), ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToArray()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data = get_inference_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) inference_loader = paddle.batch(inference_data.reader, batch_size=opt.inference_batch_size) return inference_loader, inference_data.class_names
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.inference_label_path, opt.video_id_path, 'test', opt.inference_frame_dir, opt.image_size, window_size=opt.window_size) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False, worker_init_fn=worker_init_fn) # collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inf_data_checkpoint_path = opt.result_path / Path('inf_data_' + opt.dataset + '.data') inf_collate_checkpoint_path = opt.result_path / Path('inf_coll_' + opt.dataset + '.data') if os.path.exists(inf_data_checkpoint_path) and os.path.exists( inf_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'rb') as filehandle: inference_data = pickle.load(filehandle) with open(inf_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'wb') as filehandle: pickle.dump(inference_data, filehandle) with open(inf_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def compute_saliency_maps(model, opt): # Generate tiny data loader # Loop through it to generate saliency maps assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) tiny_video_path = Path('/home/ruta/teeny_data/nturgb/jpg') tiny_annotation_path = Path('/home/ruta/teeny_data/ntu_01.json') tiny_data, collate_fn = get_inference_data( tiny_video_path, tiny_annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) tiny_loader = torch.utils.data.DataLoader( tiny_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=None, worker_init_fn=worker_init_fn, collate_fn=collate_fn) saliency_maps = [] for i, (inputs, targets) in enumerate(tiny_loader): sal_map = get_saliency_map(inputs, targets, model, opt) # Plot the saliency map using matplotlib and save to a file plot_saliency(sal_map, i, inputs, targets) saliency_maps.append(sal_map) return saliency_maps
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.video_path, opt.input_type, opt.file_type, spatial_transform, temporal_transform) # inference_data, collate_fn = get_inference_data( # opt.video_path, opt.input_type, opt.file_type, # spatial_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) df = pd.read_csv('kinetics_700_labels.csv') class_names = {} for i in range(df.shape[0]): row = df.iloc[i] class_names[row[0]] = row[1] return inference_loader, class_names
#normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm) normalize = Normalize(mean, std) spatial_transform = [Resize(sample_size)] if inference_crop == 'center': spatial_transform.append(CenterCrop(sample_size)) if input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(sample_t_stride)) temporal_transform.append(SlidingWindow(sample_duration, inference_stride)) temporal_transform = TemporalCompose(temporal_transform) # 加载模型 #print('load model begin!') model = generate_model_resnet(1) # 生成resnet模型 #model = torch.load('./save_200.pth') checkpoint = torch.load('./save_200.pth', map_location='cpu') model.load_state_dict(checkpoint['state_dict']) #print(model) model.eval() # 固定batchnorm,dropout等,一定要有 model = model.to(device) #print('load model done!') count = 0 # 测试单个视频