def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToArray()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data = get_inference_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) inference_loader = paddle.batch(inference_data.reader, batch_size=opt.inference_batch_size) return inference_loader, inference_data.class_names
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.extend( [ToTensor(), ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.inference_label_path, opt.video_id_path, 'test', opt.inference_frame_dir, opt.image_size, window_size=opt.window_size) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False, worker_init_fn=worker_init_fn) # collate_fn=collate_fn) return inference_loader, inference_data.class_names
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToArray(), ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data = get_validation_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) val_loader = paddle.batch(val_data.reader, batch_size=opt.batch_size) val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc']) return val_loader, val_logger
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if opt.train_crop == 'other': spatial_transform = [ Resize((opt.scale_h, opt.scale_w)), RandomCrop(opt.sample_size), ToTensor() ] else: spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc', 'acc_num']) else: val_logger = None return val_loader, val_logger
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inf_data_checkpoint_path = opt.result_path / Path('inf_data_' + opt.dataset + '.data') inf_collate_checkpoint_path = opt.result_path / Path('inf_coll_' + opt.dataset + '.data') if os.path.exists(inf_data_checkpoint_path) and os.path.exists( inf_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'rb') as filehandle: inference_data = pickle.load(filehandle) with open(inf_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: inference_data, collate_fn = get_inference_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(inf_data_checkpoint_path, 'wb') as filehandle: pickle.dump(inference_data, filehandle) with open(inf_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) return inference_loader, inference_data.class_names
def compute_saliency_maps(model, opt): # Generate tiny data loader # Loop through it to generate saliency maps assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) tiny_video_path = Path('/home/ruta/teeny_data/nturgb/jpg') tiny_annotation_path = Path('/home/ruta/teeny_data/ntu_01.json') tiny_data, collate_fn = get_inference_data( tiny_video_path, tiny_annotation_path, opt.dataset, opt.input_type, opt.file_type, opt.inference_subset, spatial_transform, temporal_transform) tiny_loader = torch.utils.data.DataLoader( tiny_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=None, worker_init_fn=worker_init_fn, collate_fn=collate_fn) saliency_maps = [] for i, (inputs, targets) in enumerate(tiny_loader): sal_map = get_saliency_map(inputs, targets, model, opt) # Plot the saliency map using matplotlib and save to a file plot_saliency(sal_map, i, inputs, targets) saliency_maps.append(sal_map) return saliency_maps
def get_inference_utils(opt): assert opt.inference_crop in ['center', 'nocrop'] normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size)] if opt.inference_crop == 'center': spatial_transform.append(CenterCrop(opt.sample_size)) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( SlidingWindow(opt.sample_duration, opt.inference_stride)) temporal_transform = TemporalCompose(temporal_transform) inference_data, collate_fn = get_inference_data(opt.video_path, opt.input_type, opt.file_type, spatial_transform, temporal_transform) # inference_data, collate_fn = get_inference_data( # opt.video_path, opt.input_type, opt.file_type, # spatial_transform) inference_loader = torch.utils.data.DataLoader( inference_data, batch_size=opt.inference_batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True, worker_init_fn=worker_init_fn, collate_fn=collate_fn) df = pd.read_csv('kinetics_700_labels.csv') class_names = {} for i in range(df.shape[0]): row = df.iloc[i] class_names[row[0]] = row[1] return inference_loader, class_names
def retrieve_spatial_temporal_transforms(opt): opt.mean, opt.std = get_mean_std(opt.value_scale, dataset=opt.mean_dataset) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), ScaleValue(opt.value_scale), normalize] spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalNonOverlappingWindow(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) return spatial_transform, temporal_transform
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data, collate_fn = get_validation_data(opt.label_path, opt.video_id_path, 'val', opt.frame_dir, opt.image_size, window_size=opt.window_size) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=False, sampler=val_sampler, worker_init_fn=worker_init_fn) # collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc', 'precision', 'recall', 'f1', 'tiou']) else: val_logger = None return val_loader, val_logger
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) if opt.colorjitter: spatial_transform.append(ColorJitter()) spatial_transform.append(ToTensor()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_data) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_data, batch_size=opt.batch_size, shuffle=(train_sampler is None), num_workers=opt.n_threads, pin_memory=True, sampler=train_sampler, worker_init_fn=worker_init_fn) if opt.is_master_node: train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) else: train_logger = None train_batch_logger = None if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = SGD(model_parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau( optimizer, 'min', patience=opt.plateau_patience) else: scheduler = lr_scheduler.MultiStepLR(optimizer, opt.multistep_milestones) return (train_loader, train_sampler, train_logger, train_batch_logger, optimizer, scheduler)
inference_stride = 16 #normalize = get_normalize_method(mean, std, no_mean_norm, no_std_norm) normalize = Normalize(mean, std) spatial_transform = [Resize(sample_size)] if inference_crop == 'center': spatial_transform.append(CenterCrop(sample_size)) if input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ToTensor()) spatial_transform.extend([ScaleValue(value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(sample_t_stride)) temporal_transform.append(SlidingWindow(sample_duration, inference_stride)) temporal_transform = TemporalCompose(temporal_transform) # 加载模型 #print('load model begin!') model = generate_model_resnet(1) # 生成resnet模型 #model = torch.load('./save_200.pth') checkpoint = torch.load('./save_200.pth', map_location='cpu') model.load_state_dict(checkpoint['state_dict']) #print(model) model.eval() # 固定batchnorm,dropout等,一定要有 model = model.to(device) #print('load model done!') count = 0
def get_val_utils(opt): normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) spatial_transform = [ Resize(opt.sample_size), CenterCrop(opt.sample_size), ToTensor() ] if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.extend([ScaleValue(opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(opt.sample_duration, opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) val_data_checkpoint_path = opt.result_path / Path('val_data_' + opt.dataset + '.data') val_collate_checkpoint_path = opt.result_path / Path('val_coll_' + opt.dataset + '.data') if os.path.exists(val_data_checkpoint_path) and os.path.exists( val_collate_checkpoint_path) and opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'rb') as filehandle: val_data = pickle.load(filehandle) with open(val_collate_checkpoint_path, 'rb') as filehandle: collate_fn = pickle.load(filehandle) else: val_data, collate_fn = get_validation_data( opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) if opt.save_load_data_checkpoint: with open(val_data_checkpoint_path, 'wb') as filehandle: pickle.dump(val_data, filehandle) with open(val_collate_checkpoint_path, 'wb') as filehandle: pickle.dump(collate_fn, filehandle) if opt.distributed: val_sampler = torch.utils.data.distributed.DistributedSampler( val_data, shuffle=False) else: val_sampler = None val_loader = torch.utils.data.DataLoader(val_data, batch_size=(opt.batch_size // opt.n_val_samples), shuffle=False, num_workers=opt.n_threads, pin_memory=True, sampler=val_sampler, worker_init_fn=worker_init_fn, collate_fn=collate_fn) if opt.is_master_node: val_logger = Logger(opt.result_path / 'val.log', ['epoch', 'loss', 'acc']) else: val_logger = None return val_loader, val_logger
def get_train_utils(opt, model_parameters): assert opt.train_crop in ['random', 'corner', 'center'] spatial_transform = [] if opt.train_crop == 'random': spatial_transform.append( RandomResizedCrop( opt.sample_size, (opt.train_crop_min_scale, 1.0), (opt.train_crop_min_ratio, 1.0 / opt.train_crop_min_ratio))) elif opt.train_crop == 'corner': scales = [1.0] scale_step = 1 / (2**(1 / 4)) for _ in range(1, 5): scales.append(scales[-1] * scale_step) spatial_transform.append(MultiScaleCornerCrop(opt.sample_size, scales)) elif opt.train_crop == 'center': spatial_transform.append(Resize(opt.sample_size)) spatial_transform.append(CenterCrop(opt.sample_size)) normalize = get_normalize_method(opt.mean, opt.std, opt.no_mean_norm, opt.no_std_norm) if not opt.no_hflip: spatial_transform.append(RandomHorizontalFlip()) spatial_transform.append(ToArray()) if opt.colorjitter: spatial_transform.append(ColorJitter()) if opt.input_type == 'flow': spatial_transform.append(PickFirstChannels(n=2)) spatial_transform.append(ScaleValue(opt.value_scale)) spatial_transform.append(normalize) spatial_transform = Compose(spatial_transform) assert opt.train_t_crop in ['random', 'center'] temporal_transform = [] if opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(opt.sample_t_stride)) if opt.train_t_crop == 'random': temporal_transform.append(TemporalRandomCrop(opt.sample_duration)) elif opt.train_t_crop == 'center': temporal_transform.append(TemporalCenterCrop(opt.sample_duration)) temporal_transform = TemporalCompose(temporal_transform) train_data = get_training_data(opt.video_path, opt.annotation_path, opt.dataset, opt.input_type, opt.file_type, spatial_transform, temporal_transform) train_loader = paddle.batch(train_data.reader, batch_size=opt.batch_size) train_logger = Logger(opt.result_path / 'train.log', ['epoch', 'loss', 'acc', 'lr']) train_batch_logger = Logger( opt.result_path / 'train_batch.log', ['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) assert opt.lr_scheduler in ['plateau', 'multistep'] assert not (opt.lr_scheduler == 'plateau' and opt.no_val) if opt.lr_scheduler == 'plateau': scheduler = ReduceLROnPlateau(learning_rate=opt.learning_rate, mode='min', patience=opt.plateau_patience) else: scheduler = MultiStepDecay(learning_rate=opt.learning_rate, milestones=opt.multistep_milestones) optimizer = fluid.optimizer.MomentumOptimizer( learning_rate=scheduler, momentum=opt.momentum, parameter_list=model_parameters, use_nesterov=opt.nesterov, regularization=fluid.regularizer.L2Decay( regularization_coeff=opt.weight_decay)) return (train_loader, train_logger, train_batch_logger, optimizer, scheduler)
def score(self): normalize = get_normalize_method(self.opt.mean, self.opt.std, self.opt.no_mean_norm, self.opt.no_std_norm) spatial_transform = [ Resize(self.opt.sample_size), CenterCrop(self.opt.sample_size), ToTensor() ] spatial_transform.extend([ScaleValue(self.opt.value_scale), normalize]) spatial_transform = Compose(spatial_transform) temporal_transform = [] if self.opt.sample_t_stride > 1: temporal_transform.append(TemporalSubsampling(self.opt.sample_t_stride)) temporal_transform.append( TemporalEvenCrop(self.opt.sample_duration, self.opt.n_val_samples)) temporal_transform = TemporalCompose(temporal_transform) frame_count = get_n_frames(self.opt.video_jpgs_dir_path) frame_indices = list(range(0, frame_count)) frame_indices = temporal_transform(frame_indices) spatial_transform.randomize_parameters() image_name_formatter = lambda x: f'image_{x:05d}.jpg' loader = VideoLoader(image_name_formatter) print('frame_indices', frame_indices) #clips = [] video_outputs = [] model = generate_model(self.opt) model = load_pretrained_model(model, self.opt.pretrain_path, self.opt.model, self.opt.n_finetune_classes) i =0 for frame_indice in frame_indices: print("%d indice: %s" % (i, str(frame_indice))) i+=1 clip = loader(self.opt.video_jpgs_dir_path, frame_indice) clip = [spatial_transform(img) for img in clip] clip = torch.stack(clip, 0).permute(1, 0, 2, 3) #parameters = get_fine_tuning_parameters(model, opt.ft_begin_module) #print('clips:', clips) #for clip in clips: with torch.no_grad(): print(clip.shape) output = model(torch.unsqueeze(clip, 0)) output = F.softmax(output, dim=1).cpu() #print(output) video_outputs.append(output[0]) del clip video_outputs = torch.stack(video_outputs) average_scores = torch.mean(video_outputs, dim=0) #inference_loader, inference_class_names = main.get_inference_utils(self.opt) with self.opt.annotation_path.open('r') as f: data = json.load(f) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name print(idx_to_class) inference_result = inference.get_video_results( average_scores, idx_to_class, self.opt.output_topk) print(inference_result)