def __init__(self, root_dir, spatial_transform=None, seqLen=20, train=True, mulSeg=False, numSeg=1, fmt='.png', regression=True, numOrdClass=12): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) self.images, self.labels, self.numFrames = gen_split( root_dir, 5, train) # vedi sopra self.main_spatial_transform = spatial_transform # transformation di data augmentation self.spatial_transform_rgb = Compose( [self.main_spatial_transform, ToTensor(), normalize]) if regression == False: self.spatial_transform_mmaps = Compose([ self.main_spatial_transform, Scale(7), ToTensor(), Binary(0.4) ]) else: self.spatial_transform_mmaps = Compose( [self.main_spatial_transform, Scale(7), ToTensor()]) self.train = train self.mulSeg = mulSeg self.numSeg = numSeg self.seqLen = seqLen self.fmt = fmt self.numOrdClass = numOrdClass
def main_run(dataset,model_state_dict, dataset_dir, seqLen, memSize,stackSize): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([Scale(256), CenterCrop(224)]) spatial_transorm2 = Compose([Scale((7,7)), ToTensor()]) sequence = True vid_seq_test = makeDataset(dataset_dir, spatial_transorm2 ,spatial_transform=spatial_transform, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for flowX, flowY, inputs, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) output_label, _ , flowXprediction , flowYprediction = model(inputVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy)
def get_cam_visualisation(self, resnet, weight_softmax, input_pil_image, preprocess_for_viz=None, preprocess_for_model=None): if preprocess_for_viz == None: preprocess_for_viz = Compose([ Scale(256), CenterCrop(224), ]) if preprocess_for_model == None: normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) preprocess_for_model = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) tensor_image = preprocess_for_model(input_pil_image) pil_image = preprocess_for_viz(input_pil_image) logit, feature_conv, _ = resnet(tensor_image.unsqueeze(0).cuda()) bz, nc, h, w = feature_conv.size() feature_conv = feature_conv.view(bz, nc, h * w) h_x = F.softmax(logit, dim=1).data probs, idx = h_x.sort(1, True) cam_img = torch.bmm(weight_softmax[idx[:, 0]].unsqueeze(1), feature_conv).squeeze(1) cam_img = F.softmax(cam_img, 1).data cam_img = cam_img.cpu() cam_img = cam_img.reshape(h, w) cam_img = cam_img - torch.min(cam_img) cam_img = cam_img / torch.max(cam_img) cam_img = np.uint8(255 * cam_img) img = np.uint8(pil_image) output_cam = cv2.resize(cam_img, pil_image.size) heatmap = cv2.applyColorMap(output_cam, cv2.COLORMAP_JET) img = cv2.cvtColor(np.uint8(img), cv2.COLOR_RGB2BGR) result = heatmap * 0.4 + img * 0.6 result = cv2.cvtColor(np.uint8(result), cv2.COLOR_BGR2RGB) return Image.fromarray(result)
def extract_feature(opt, video_dir, C3D_model): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) load_image_fn = None data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) c3d_features = [] for i, clip in enumerate(data_loader): print(clip.mean()) ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) # 汇总 c3d_features.append(c3d_outputs.cpu().data) # torch.Size([8, 512, 14, 14]) c3d_features = torch.cat(c3d_features, 0) # c3d feature of one video return c3d_features.cpu().numpy()
def classify_video(video_dir, video_name, class_names, model, opt): # print("video_dir: {}, video_name: {}".format(video_dir,video_name)); assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] # video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) # video_segments.append(segments) if len(video_outputs) != 0: video_outputs = torch.cat(video_outputs) return video_outputs.numpy() else: return None
def predict(clip, model): if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transform = Compose([ Scale((150, 150)), #Scale(int(opt.sample_size / opt.scale_in_test)), #CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) if spatial_transform is not None: # spatial_transform.randomize_parameters() clip = [spatial_transform(img) for img in clip] clip = torch.stack(clip, dim=0) clip = clip.unsqueeze(0) with torch.no_grad(): print(clip.shape) outputs = model(clip) outputs = F.softmax(outputs) print(outputs) scores, idx = torch.topk(outputs, k=1) mask = scores > 0.6 preds = idx[mask] return preds
def get_dataloader(opt): mean = [110.63666788 / 255, 103.16065604 / 255, 96.29023126 / 255] std = [1, 1, 1] norm_method = Normalize(mean, std) spatial_transform = Compose( [Scale(112), CornerCrop(112, 'c'), ToTensor(255), norm_method]) temporal_transform = LoopPadding(16) target_transform = ClassLabel() test_data = SurgicalDataset(os.path.abspath(opt.frames_path), os.path.abspath( opt.video_phase_annotation_path), opt.class_names, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=16) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, pin_memory=True) return test_loader
def pre_process_frame(frame, opt): # Convert from BGR opencv channel layout to RGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Convert to pillow format for easy pre-processing frame = Image.fromarray(frame) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transforms_det = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) # Use torchvision transforms for compatibility with SSAR model spatial_transforms_clf = transforms.Compose([ transforms.Resize(opt.sample_size_clf), transforms.ToTensor(), transforms.Normalize(opt.mean_clf, opt.std_clf) ]) det_frame = spatial_transforms_det(frame) clf_frame = spatial_transforms_clf(frame) return det_frame, clf_frame
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader norm_method = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), Resize(256), CenterCrop(224), ToTensor(), norm_method ]) temporal_transform = TemporalRandomCrop(25) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader target_transform = ClassLabel() temporal_transform = LoopPadding(25) validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def main(): spatial_transforms = Compose([ Scale(112), CenterCrop(112), ]) stream_camera_rgb(spatial_transforms)
def get_loaders(opt): """ Make dataloaders for train and validation sets """ # train loader opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) spatial_transform = Compose([ # crop_method, Scale((opt.sample_size, opt.sample_size)), # RandomHorizontalFlip(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(16) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) # validation loader spatial_transform = Compose([ Scale((opt.sample_size, opt.sample_size)), # CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) target_transform = ClassLabel() temporal_transform = LoopPadding(16) validation_data = get_validation_set( opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) return train_loader, val_loader
def model_process(count, model): opt = parse_opts() if opt.root_path != '': opt.video_path = os.path.join(opt.root_path, opt.video_path) opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) opt.result_path = os.path.join(opt.root_path, opt.result_path) if opt.resume_path: opt.resume_path = os.path.join(opt.root_path, opt.resume_path) if opt.pretrain_path: opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) opt.scales = [opt.initial_scale] for i in range(1, opt.n_scales): opt.scales.append(opt.scales[-1] * opt.scale_step) #opt.arch = '{}-{}'.format(opt.model, opt.model_depth) opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset) opt.std = get_std(opt.norm_value) #print(opt) #print(opt.result_path) with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: json.dump(vars(opt), opt_file) torch.manual_seed(opt.manual_seed) #print(model) criterion = nn.CrossEntropyLoss() if not opt.no_cuda: criterion = criterion.cuda() if opt.no_mean_norm and not opt.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not opt.std_norm: norm_method = Normalize(opt.mean, [1, 1, 1]) else: norm_method = Normalize(opt.mean, opt.std) print('testing is run') if opt.test: spatial_transform = Compose([ Scale(int(opt.sample_size / opt.scale_in_test)), CornerCrop(opt.sample_size, opt.crop_position_in_test), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = VideoID() test_data = get_test_set(opt, spatial_transform, temporal_transform, target_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) tester.test(count, test_loader, model, opt, test_data.class_names)
def classify_video(video_dir, video_name, class_names, model, opt, annotation_digit=5): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) print('reading file from: ', video_dir, 'file name: ', video_name) video_outputs = [] video_segments = [] shit_lol = enumerate(data_loader) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() clip_results['ground_truth_annotaion'] = annotation_digit results['clips'].append(clip_results) return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] print('video_name, class_names', video_name) spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] print('Running on video', video_dir) #print ('Data loader size', len(data_loader)) for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) print(i, inputs.size(), segments.shape) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) #print('Video outputs and segments', video_outputs) results = {'video': video_name, 'clips': []} if len(video_outputs) > 0: print('Video outputs and segments: ', video_outputs[0].shape) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) _, max_indices = video_outputs.max(dim=1) print('Video outputs', video_outputs.size()) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def extract_feature(opt, video_dir, C3D_model, load_image_fn, C2D_model, c2d_shape, duration): assert opt.mode in ['score', 'feature'] C, H, W = c2d_shape spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) opt.num_segments = max(int(duration / opt.clip_len), 1) data = Video(opt, video_dir, load_image_fn, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=0, pin_memory=True) c3d_features = [] c2d_features = [] for i, (clip, frames_npy_data) in enumerate(data_loader): ## c3d feats clip = clip.to(opt.device) with torch.no_grad(): c3d_outputs = C3D_model(clip) frames = frames_npy_data.to(opt.device) with torch.no_grad(): c2d_outputs = C2D_model(frames).squeeze() if len(c2d_outputs.shape) == 1: c2d_outputs = c2d_outputs.unsqueeze(0) # 汇总 c3d_features.append(c3d_outputs.cpu().data) c2d_features.append(c2d_outputs.cpu().data) try: c3d_features = torch.cat(c3d_features) # c3d feature of one video c2d_features = torch.cat(c2d_features) # c3d feature of one video except: return None, None return c3d_features.cpu().numpy(), c2d_features.cpu().numpy()
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1])]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration, stride=opt.stride) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if len(video_outputs) == 0: with open("error.list", 'a') as fout: fout.write("{}\n".format(video_name)) return {} video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = { 'video': video_name, 'clips': [] } _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } if opt.mode == 'score': clip_results['label'] = class_names[max_indices[i]] clip_results['scores'] = video_outputs[i].tolist() elif opt.mode == 'feature': clip_results['features'] = video_outputs[i].tolist() results['clips'].append(clip_results) return results
def __init__(self, root_dir, spatial_transform=None, seqLen=20, train=True, mulSeg=False, numSeg=1, fmt='.png', phase='train', regressor=False): self.images, self.maps, self.labels, self.numFrames = gen_split( root_dir, 5, phase) normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.spatial_transform0 = spatial_transform self.spatial_rgb = Compose( [self.spatial_transform0, ToTensor(), normalize]) if not (regressor): self.spatial_transform_map = Compose( [self.spatial_transform0, Scale(7), ToTensor(), Binary(0.4)]) else: self.spatial_transform_map = Compose( [self.spatial_transform0, Scale(7), ToTensor()]) self.train = train self.mulSeg = mulSeg self.numSeg = numSeg self.seqLen = seqLen self.fmt = fmt
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = {'video': video_name, 'clips': []} os.mkdir('features/' + video_name.split('.')[0]) mypath = 'features/' + video_name.split('.')[0] + '/' _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): with open(mypath + str(i) + '.txt', 'w+') as f: f.write(' '.join(map(str, video_outputs[i].tolist()))) return results
def extract_feats(file_path, net, filenames, frame_num, batch_size, save_path): """Extract 3D features (saved in .npy) for a video. """ net.eval() mean = get_mean(255, dataset='kinetics') std = get_std(255) transform = Compose([ trn.ToPILImage(), Scale(112), CornerCrop(112, 'c'), ToTensor(), Normalize(mean, std) ]) print("Network loaded") #Read videos and extract features in batches for file in filenames[start_idx:end_idx]: feat_file = os.path.join(save_path, file[:-4] + '.npy') if os.path.exists(feat_file): continue vid = imageio.get_reader(os.path.join(file_path, file), 'ffmpeg') curr_frames = [] for frame in vid: if len(frame.shape) < 3: frame = np.repeat(frame, 3) curr_frames.append(transform(frame).unsqueeze(0)) curr_frames = torch.cat(curr_frames, dim=0) print("Shape of frames: {0}".format(curr_frames.shape)) idx = np.linspace(0, len(curr_frames) - 1, frame_num).astype(int) print("Captured {} clips: {}".format(len(idx), curr_frames.shape)) curr_feats = [] for i in range(0, len(idx), batch_size): curr_batch = [ curr_frames[x - 8:x + 8, ...].unsqueeze(0) for x in idx[i:i + batch_size] ] curr_batch = torch.cat(curr_batch, dim=0).cuda() out = net(curr_batch.transpose(1, 2).cuda()) curr_feats.append(out.detach().cpu()) print("Appended {} features {}".format(i + 1, out.shape)) curr_feats = torch.cat(curr_feats, 0) del out #set_trace() np.save(feat_file, curr_feats.numpy()) print("Saved file {}\nExiting".format(file[:-4] + '.npy'))
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs, volatile=True) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) # results = { # 'video': video_name, # 'clips': [] # } clips = [] _, max_indices = video_outputs.max(dim=1) for i in range(video_outputs.size(0)): clip_results = { 'segment': video_segments[i].tolist(), } clip_results['features'] = video_outputs[i].tolist() clips.append(clip_results) return video_name, clips
def classify_video(video_dir, video_name, model, opt): assert opt.mode in ['score', 'feature'] spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=False) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) if video_outputs: video_outputs = torch.cat(video_outputs) video_segments = torch.cat(video_segments) results = dict() results['video'] = video_name results['features'] = video_outputs results['clips'] = video_segments return results
def classify_video(video_dir, video_name, class_names, model, opt): assert opt.mode == 'feature' spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) data = Video(video_dir, spatial_transform=spatial_transform, temporal_transform=temporal_transform, sample_duration=opt.sample_duration) data_loader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) video_outputs = [] video_segments = [] with torch.no_grad(): for i, (inputs, segments) in enumerate(data_loader): inputs = Variable(inputs) outputs = model(inputs) video_outputs.append(outputs.cpu().data) video_segments.append(segments) video_outputs = torch.cat(video_outputs) # video_segments = torch.cat(video_segments) results = [] for i in range(video_outputs.size(0)): clip_results = np.expand_dims(video_outputs[i].numpy(), axis=0) results.append(clip_results) results = np.concatenate(results, axis=0) return results
if not os.path.exists(os.path.join(opt.save_path, vid)): os.makedirs(os.path.join(opt.save_path, vid)) cmd = 'cp -r "{}" "{}"'.format(image_path, target_image_path) subprocess.call(cmd, shell=True) print(cmd) # print(vid) if __name__ == '__main__': opt = opts.parse_opts() opt.sample_size = 112 spatial_transform = Compose([Scale(opt.sample_size), CenterCrop(opt.sample_size)]) loader = get_default_video_loader() base_dir = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/youtube-frames/*" videos_dir = glob.glob(base_dir) opt.save_path = "/userhome/dataset/MSVD/Video-Description-with-Spatial-Temporal-Attention/28frames-msvd/" if not os.path.exists(opt.save_path): os.makedirs(opt.save_path) # for video_path in videos_dir: # Video(video_path) pool = ThreadPool(8) # 创建4个容量的线程池并发执行
def main_run(numEpochs, lr, stepSize, decayRate, trainBatchSize, seqLen, memSize, evalInterval, evalMode, numWorkers, outDir, fightsDir_train, noFightsDir_train, fightsDir_test, noFightsDir_test): train_dataset_dir_fights = fightsDir_train train_dataset_dir_noFights = noFightsDir_train test_dataset_dir_fights = fightsDir_test test_dataset_dir_noFights = noFightsDir_test trainDataset, trainLabels, trainNumFrames = make_split( train_dataset_dir_fights, train_dataset_dir_noFights) testDataset, testLabels, testNumFrames = make_split( test_dataset_dir_fights, test_dataset_dir_noFights) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224), ToTensor(), normalize ]) vidSeqTrain = VideoDataset(trainDataset, trainLabels, trainNumFrames, spatial_transform=spatial_transform, seqLen=seqLen) trainLoader = torch.utils.data.DataLoader(vidSeqTrain, batch_size=trainBatchSize, shuffle=True, num_workers=numWorkers, pin_memory=True, drop_last=True) if evalMode == 'centerCrop': test_spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) testBatchSize = 1 elif evalMode == 'tenCrops': test_spatial_transform = Compose( [Scale(256), TenCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'fiveCrops': test_spatial_transform = Compose( [Scale(256), FiveCrops(size=224, mean=mean, std=std)]) testBatchSize = 1 elif evalMode == 'horFlip': test_spatial_transform = Compose([ Scale(256), CenterCrop(224), FlippedImagesTest(mean=mean, std=std) ]) testBatchSize = 1 vidSeqTest = VideoDataset(testDataset, testLabels, testNumFrames, seqLen=seqLen, spatial_transform=test_spatial_transform) testLoader = torch.utils.data.DataLoader(vidSeqTest, batch_size=testBatchSize, shuffle=False, num_workers=int(numWorkers / 2), pin_memory=True) numTrainInstances = vidSeqTrain.__len__() numTestInstances = vidSeqTest.__len__() print('Number of training samples = {}'.format(numTrainInstances)) print('Number of testing samples = {}'.format(numTestInstances)) modelFolder = './experiments_' + outDir # Dir for saving models and log files # Create the dir if os.path.exists(modelFolder): print(modelFolder + ' exists!!!') sys.exit() else: os.makedirs(modelFolder) # Log files writer = SummaryWriter(modelFolder) trainLogLoss = open((modelFolder + '/trainLogLoss.txt'), 'w') trainLogAcc = open((modelFolder + '/trainLogAcc.txt'), 'w') testLogLoss = open((modelFolder + '/testLogLoss.txt'), 'w') testLogAcc = open((modelFolder + '/testLogAcc.txt'), 'w') model = ViolenceModel(mem_size=memSize) trainParams = [] for params in model.parameters(): params.requires_grad = True trainParams += [params] model.train(True) model.cuda() lossFn = nn.CrossEntropyLoss() optimizerFn = torch.optim.RMSprop(trainParams, lr=lr) optimScheduler = torch.optim.lr_scheduler.StepLR(optimizerFn, stepSize, decayRate) minAccuracy = 50 for epoch in range(numEpochs): optimScheduler.step() epochLoss = 0 numCorrTrain = 0 iterPerEpoch = 0 model.train(True) print('Epoch = {}'.format(epoch + 1)) writer.add_scalar('lr', optimizerFn.param_groups[0]['lr'], epoch + 1) for i, (inputs, targets) in enumerate(trainLoader): iterPerEpoch += 1 optimizerFn.zero_grad() inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) outputLabel = model(inputVariable1) loss = lossFn(outputLabel, labelVariable) loss.backward() optimizerFn.step() outputProb = torch.nn.Softmax(dim=1)(outputLabel) _, predicted = torch.max(outputProb.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() epochLoss += loss.data[0] avgLoss = epochLoss / iterPerEpoch trainAccuracy = (numCorrTrain / numTrainInstances) * 100 print('Training: Loss = {} | Accuracy = {}% '.format( avgLoss, trainAccuracy)) writer.add_scalar('train/epochLoss', avgLoss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) trainLogLoss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avgLoss)) trainLogAcc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if (epoch + 1) % evalInterval == 0: model.train(False) print('Evaluating...') testLossEpoch = 0 testIter = 0 numCorrTest = 0 for j, (inputs, targets) in enumerate(testLoader): testIter += 1 if evalMode == 'centerCrop': inputVariable1 = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) else: inputVariable1 = Variable(inputs[0].cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) outputLabel = model(inputVariable1) outputLabel_mean = torch.mean(outputLabel, 0, True) testLoss = lossFn(outputLabel_mean, labelVariable) testLossEpoch += testLoss.data[0] _, predicted = torch.max(outputLabel_mean.data, 1) numCorrTest += (predicted == targets[0]).sum() testAccuracy = (numCorrTest / numTestInstances) * 100 avgTestLoss = testLossEpoch / testIter print('Testing: Loss = {} | Accuracy = {}% '.format( avgTestLoss, testAccuracy)) writer.add_scalar('test/epochloss', avgTestLoss, epoch + 1) writer.add_scalar('test/accuracy', testAccuracy, epoch + 1) testLogLoss.write('Test Loss after {} epochs = {}\n'.format( epoch + 1, avgTestLoss)) testLogAcc.write('Test Accuracy after {} epochs = {}%\n'.format( epoch + 1, testAccuracy)) if testAccuracy > minAccuracy: savePathClassifier = (modelFolder + '/bestModel.pth') torch.save(model, savePathClassifier) minAccuracy = testAccuracy trainLogAcc.close() testLogAcc.close() trainLogLoss.close() testLogLoss.close() writer.export_scalars_to_json(modelFolder + "/all_scalars.json") writer.close() return True
if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=opt.lr_patience) if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), norm_method ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() validation_data = get_validation_set(opt, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader(validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) val_logger = Logger(os.path.join(opt.result_path, 'val.log'), ['epoch', 'loss', 'acc'])
['epoch', 'batch', 'iter', 'loss', 'acc', 'lr']) if opt.nesterov: dampening = 0 else: dampening = opt.momentum optimizer = optim.SGD(model.parameters(), lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening, weight_decay=opt.weight_decay, nesterov=opt.nesterov) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min') if not opt.no_val: spatial_transform = Compose([ Scale(opt.sample_size), CenterCrop(opt.sample_size), ToTensor(opt.norm_value), Normalize(opt.mean, [1, 1, 1]) ]) temporal_transform = LoopPadding(opt.sample_duration) target_transform = ClassLabel() if opt.dataset == 'kinetics': validation_data = Kinetics(opt.video_path, opt.annotation_path, 'validation', opt.n_val_samples, spatial_transform, temporal_transform, target_transform, sample_duration=opt.sample_duration)
def main_run(dataset, stage, trainDatasetDir, valDatasetDir, stage1_dict, stackSize, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, alphaX, alphaY): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join( './', out_dir, 'attConvLSTM', str(seqLen), 'stage' + str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([ Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224) ]) spatial_transform2 = Compose([Scale((7, 7)), ToTensor()]) vid_seq_train = makeDataset(trainDatasetDir, spatial_transform2, spatial_transform=spatial_transform, sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', seqLen=seqLen) trainInstances = vid_seq_train.__len__() train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if valDatasetDir is not None: vid_seq_val = makeDataset(valDatasetDir, spatial_transform2, spatial_transform=Compose( [Scale(256), CenterCrop(224)]), sequence=False, numSeg=1, stackSize=stackSize, fmt='.png', phase='Test', seqLen=seqLen) valInstances = vid_seq_val.__len__() val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) train_params = [] if stage == 1: model = attentionModel(num_classes=num_classes, mem_size=memSize) model.train(False) for params in model.parameters(): params.requires_grad = False else: # stage == 2 model = attentionModel(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(stage1_dict), strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fn_regression = nn.MSELoss() # Loss function for the regression model optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 x_loss = 0 y_loss = 0 trainSamples = 0 iterPerEpoch = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch + 1) if stage == 2: model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) #for i, (inputs, targets) in enumerate(train_loader): for flowX, flowY, inputs, targets in train_loader: train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowYprediction.view(-1) flowY = torch.reshape(flowY, (-1, )).float() #print(f'Prediction: {flowXprediction.size()}') #print(f'Input : {flowX.size()}') #sys.exit() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) loss = loss_fn(output_label, labelVariable) #Weighting the loss of the ss task #by multiplying it by alpha total_loss = loss + lossX + lossY total_loss.backward() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += (predicted == targets.cuda()).sum() x_loss += lossX.item() y_loss += lossY.item() epoch_loss += loss.item() optim_scheduler.step() avg_x_loss = x_loss / iterPerEpoch avg_y_loss = y_loss / iterPerEpoch avg_loss = epoch_loss / iterPerEpoch trainAccuracy = torch.true_divide(numCorrTrain, trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format( epoch + 1, avg_loss, trainAccuracy)) print('X loss after {} epoch = {}% '.format(epoch + 1, avg_x_loss)) print('Y loss after {} epoch = {}% '.format(epoch + 1, avg_y_loss)) writer.add_scalar('train/epoch_loss', avg_loss, epoch + 1) writer.add_scalar('train/accuracy', trainAccuracy, epoch + 1) writer.add_scalar('x_train_loss', avg_x_loss, epoch + 1) writer.add_scalar('y_train_loss', avg_y_loss, epoch + 1) train_log_loss.write('Training X loss after {} epoch= {}'.format( epoch + 1, avg_x_loss)) train_log_loss.write('Training Y loss after {} epoch= {}'.format( epoch + 1, avg_y_loss)) train_log_loss.write('Training loss after {} epoch = {}\n'.format( epoch + 1, avg_loss)) train_log_acc.write('Training accuracy after {} epoch = {}\n'.format( epoch + 1, trainAccuracy)) if valDatasetDir is not None: model.train(False) val_loss_epoch = 0 val_iter = 0 val_x_loss = 0 val_y_loss = 0 val_samples = 0 numCorr = 0 mmap_loss = 0 with torch.no_grad(): #for j, (inputs, targets) in enumerate(val_loader): for flowX, flowY, inputs, targets in val_loader: val_iter += 1 val_samples += inputs.size(0) flowX = flowX.cuda() flowY = flowY.cuda() inputVariable = Variable( inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda(async=True)) #labelVariable = Variable(targets.cuda()) output_label, _, flowXprediction, flowYprediction = model( inputVariable) #Reshaping predictions and inputs in order #to correctly regress on the inputs flowXprediction = flowXprediction.view(-1) flowX = torch.reshape(flowX, (-1, )).float() flowYprediction = flowXprediction.view(-1) flowY = torch.reshape(flowX, (-1, )).float() lossX = alphaX * loss_fn_regression(flowXprediction, flowX) lossY = alphaY * loss_fn_regression(flowYprediction, flowY) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() val_x_loss += lossX.item() val_y_loss += lossY.item() _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() avg_x_val_loss = val_x_loss / val_iter avg_y_val_loss = val_y_loss / val_iter val_accuracy = torch.true_divide(numCorr, val_samples) * 100 avg_val_loss = val_loss_epoch / val_iter print('Val X Loss after {} epochs, loss = {}'.format( epoch + 1, avg_x_val_loss)) print('Val Y Loss after {} epochs, loss = {}'.format( epoch + 1, avg_y_val_loss)) print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format( epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val x/epoch_loss', avg_x_val_loss, epoch + 1) writer.add_scalar('val y/epoch_loss', avg_y_val_loss, epoch + 1) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val X Loss after {} epochs = {}\n'.format( epoch + 1, avg_x_val_loss)) val_log_loss.write('Val Y Loss after {} epochs = {}\n'.format( epoch + 1, avg_y_val_loss)) val_log_loss.write('Val Loss after {} epochs = {}\n'.format( epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format( epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_rgb_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close()
args.n_classes = 174 img_prefix = '' whole_model, parameters = generate_model(args) print(whole_model) # input('...') if args.no_mean_norm and not args.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not args.std_norm: norm_method = Normalize(args.mean, [1, 1, 1]) else: norm_method = Normalize(args.mean, args.std) spatial_transform = Compose([ Scale(args.sample_size), CenterCrop(args.sample_size), ToTensor(args.norm_value), norm_method ]) # if not args.test_temp_crop == 'sparse': if args.compared_temp_transform == 'shuffle': temp_transform = ShuffleFrames(args.sample_duration) else: temp_transform = ReverseFrames(args.sample_duration) temp_crop_method = TemporalRandomCrop(args.sample_duration) # if args.compared_temp_transform == 'reverse': # temp_transform = Compose([ # ReverseFrames(args.sample_duration), # temp_crop_method # ])
def main_run(dataset, stage, train_data_dir, val_data_dir, stage1_dict, out_dir, seqLen, trainBatchSize, valBatchSize, numEpochs, lr1, decay_factor, decay_step, memSize, regressor): if dataset == 'gtea61': num_classes = 61 elif dataset == 'gtea71': num_classes = 71 elif dataset == 'gtea_gaze': num_classes = 44 elif dataset == 'egtea': num_classes = 106 else: print('Dataset not found') sys.exit() model_folder = os.path.join('./', out_dir, dataset, 'MS',str(stage)) # Dir for saving models and log files # Create the dir if os.path.exists(model_folder): print('Directory {} exists!'.format(model_folder)) sys.exit() os.makedirs(model_folder) # Log files writer = SummaryWriter(model_folder) train_log_loss = open((model_folder + '/train_log_loss.txt'), 'w') train_log_acc = open((model_folder + '/train_log_acc.txt'), 'w') val_log_loss = open((model_folder + '/val_log_loss.txt'), 'w') val_log_acc = open((model_folder + '/val_log_acc.txt'), 'w') train_log_loss_ms= open((model_folder + '/train_log_loss_ms.txt'), 'w') val_log_loss_ms = open((model_folder + '/val_log_loss_ms.txt'), 'w') train_log_acc_ms= open((model_folder + '/train_log_acc_ms.txt'), 'w') val_log_acc_ms = open((model_folder + '/val_log_acc_ms.txt'), 'w') # Data loader normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) spatial_transform = Compose([Scale(256), RandomHorizontalFlip(), MultiScaleCornerCrop([1, 0.875, 0.75, 0.65625], 224)]) vid_seq_train = makeDataset(train_data_dir, spatial_transform=spatial_transform, seqLen=seqLen, fmt='.png',phase='train', regressor=regressor) train_loader = torch.utils.data.DataLoader(vid_seq_train, batch_size=trainBatchSize, shuffle=True, num_workers=4, pin_memory=True) if val_data_dir is not None: vid_seq_val = makeDataset(val_data_dir, spatial_transform=Compose([Scale(256), CenterCrop(224)]), seqLen=seqLen, fmt='.png',phase='test', regressor=regressor) val_loader = torch.utils.data.DataLoader(vid_seq_val, batch_size=valBatchSize, shuffle=False, num_workers=2, pin_memory=True) valInstances = vid_seq_val.__len__() trainInstances = vid_seq_train.__len__() train_params = [] if stage == 1: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.train(False) for params in model.parameters(): params.requires_grad = False else: model = attentionModel_ml(num_classes=num_classes, mem_size=memSize, regressor=regressor) model.load_state_dict(torch.load(stage1_dict),strict=False) model.train(False) for params in model.parameters(): params.requires_grad = False # for params in model.resNet.layer4[0].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[0].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv1.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[1].conv2.parameters(): params.requires_grad = True train_params += [params] for params in model.resNet.layer4[2].conv1.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.layer4[2].conv2.parameters(): params.requires_grad = True train_params += [params] # for params in model.resNet.fc.parameters(): params.requires_grad = True train_params += [params] for params in model.conv.parameters(): params.requires_grad = True train_params += [params] for params in model.clas.parameters(): params.requires_grad = True train_params += [params] model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for params in model.lstm_cell.parameters(): params.requires_grad = True train_params += [params] for params in model.classifier.parameters(): params.requires_grad = True train_params += [params] model.lstm_cell.train(True) model.classifier.train(True) model.cuda() loss_fn = nn.CrossEntropyLoss() loss_fms = nn.NLLLoss() loss_reg = nn.MSELoss() optimizer_fn = torch.optim.Adam(train_params, lr=lr1, weight_decay=4e-5, eps=1e-4) optim_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer_fn, milestones=decay_step, gamma=decay_factor) train_iter = 0 min_accuracy = 0 for epoch in range(numEpochs): epoch_loss = 0 numCorrTrain = 0 numCorrTrain_ms = 0 trainSamples = 0 iterPerEpoch = 0 epoch_loss_ms = 0 model.lstm_cell.train(True) model.classifier.train(True) writer.add_scalar('lr', optimizer_fn.param_groups[0]['lr'], epoch+1) if stage == 2: model.conv.train(True) model.clas.train(True) model.resNet.layer4[0].conv1.train(True) model.resNet.layer4[0].conv2.train(True) model.resNet.layer4[1].conv1.train(True) model.resNet.layer4[1].conv2.train(True) model.resNet.layer4[2].conv1.train(True) model.resNet.layer4[2].conv2.train(True) model.resNet.fc.train(True) for i, (inputs ,binary_map, targets) in enumerate(train_loader): train_iter += 1 iterPerEpoch += 1 optimizer_fn.zero_grad() inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) labelVariable = Variable(targets.cuda()) trainSamples += inputs.size(0) output_label, output_ms = model(inputVariable) loss = loss_fn(output_label, labelVariable) if stage==2 : loss.backward(retain_graph=True) else: loss.backward() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) loss_ms.backward() epoch_loss_ms+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) loss_ms.backward() _, predicted = torch.max(output_ms.data, 1) numCorrTrain_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms+=loss_ms.item() optimizer_fn.step() _, predicted = torch.max(output_label.data, 1) numCorrTrain += torch.sum(predicted == labelVariable.data).data.item() epoch_loss += loss.item() avg_loss = epoch_loss/iterPerEpoch if stage ==2: trainAccuracy = (numCorrTrain_ms / trainSamples) * 100 avg_loss_ms= epoch_loss_ms/iterPerEpoch #avg_loss = avg_loss + avg_loss_ms train_log_loss_ms.write('Train Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:train_log_acc_ms.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) trainAccuracy = (numCorrTrain / trainSamples) * 100 print('Train: Epoch = {} | Loss = {} | Accuracy = {}'.format(epoch+1, avg_loss, trainAccuracy)) writer.add_scalar('train/epoch_loss', avg_loss, epoch+1) writer.add_scalar('train/accuracy', trainAccuracy, epoch+1) train_log_loss.write('Train Loss after {} epochs = {}\n'.format(epoch + 1, avg_loss)) train_log_acc.write('Train Accuracy after {} epochs = {}%\n'.format(epoch + 1, trainAccuracy)) if val_data_dir is not None: if (epoch+1) % 1 == 0: model.train(False) val_loss_epoch = 0 val_iter = 0 val_samples = 0 numCorr = 0 numCorr_ms = 0 epoch_loss_ms_val=0 for j, (inputs, binary_map, targets) in enumerate(val_loader): val_iter += 1 val_samples += inputs.size(0) inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda(), volatile=True) labelVariable = Variable(targets.cuda(async=True), volatile=True) output_label, output_ms = model(inputVariable) val_loss = loss_fn(output_label, labelVariable) val_loss_epoch += val_loss.item() if regressor == 0: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).type(torch.LongTensor).cuda()) output_ms = output_ms.view(-1,2) elif regressor == 1: binary_map = Variable(binary_map.permute(1, 0, 2, 3, 4).cuda()) output_ms = output_ms.view(-1) binary_map =binary_map.contiguous().view(-1) if stage==2: if regressor == 1: loss_ms=loss_reg(output_ms, binary_map) epoch_loss_ms_val+=loss_ms.item() elif regressor == 0: loss_ms=loss_fn(output_ms, binary_map) _, predicted = torch.max(output_ms.data, 1) numCorr_ms += torch.sum(predicted == binary_map.data).data.item() epoch_loss_ms_val+=loss_ms.item() _, predicted = torch.max(output_label.data, 1) numCorr += torch.sum(predicted == labelVariable.data).data.item() avg_val_loss = val_loss_epoch / val_iter if stage ==2: avg_loss_ms= epoch_loss_ms_val/ val_iter val_accuracy = (numCorr_ms / val_samples) * 100 #avg_loss = avg_loss + avg_loss_ms val_log_loss_ms.write('Val Loss MS after {} epochs = {}\n'.format(epoch + 1, avg_loss_ms)) if regressor == 0:val_log_acc_ms.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) val_accuracy = (numCorr / val_samples) * 100 print('Val: Epoch = {} | Loss {} | Accuracy = {}'.format(epoch + 1, avg_val_loss, val_accuracy)) writer.add_scalar('val/epoch_loss', avg_val_loss, epoch + 1) writer.add_scalar('val/accuracy', val_accuracy, epoch + 1) val_log_loss.write('Val Loss after {} epochs = {}\n'.format(epoch + 1, avg_val_loss)) val_log_acc.write('Val Accuracy after {} epochs = {}%\n'.format(epoch + 1, val_accuracy)) if val_accuracy > min_accuracy: save_path_model = (model_folder + '/model_ms_state_dict.pth') torch.save(model.state_dict(), save_path_model) min_accuracy = val_accuracy else: if (epoch+1) % 10 == 0: save_path_model = (model_folder + '/model_ms_state_dict_epoch' + str(epoch+1) + '.pth') torch.save(model.state_dict(), save_path_model) train_log_loss.close() train_log_acc.close() val_log_acc.close() val_log_loss.close() train_log_loss_ms.close() val_log_loss_ms.close() writer.export_scalars_to_json(model_folder + "/all_scalars.json") writer.close() optim_scheduler.step()
def main_run(model_state_dict, dataset_dir, seqLen, memSize, out_dir): model_folder = os.path.join('./', out_dir, 'attConvLSTMDoubleResnet', str(seqLen)) #dataset = 'gtea61' num_classes = 61 mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] normalize = Normalize(mean=mean, std=std) spatial_transform = Compose( [Scale(256), CenterCrop(224), ToTensor(), normalize]) vid_seq_test = makeDataset(dataset_dir, seqLen=seqLen, fmt='.png', train=False, spatial_transform=spatial_transform, users=['S2']) test_loader = torch.utils.data.DataLoader(vid_seq_test, batch_size=1, shuffle=False, num_workers=2, pin_memory=True) model = attentionDoubleResnet(num_classes=num_classes, mem_size=memSize) model.load_state_dict(torch.load(model_state_dict)) for params in model.parameters(): params.requires_grad = False model.train(False) model.cuda() test_samples = vid_seq_test.__len__() print('Number of samples = {}'.format(test_samples)) print('Evaluating...') numCorr = 0 true_labels = [] predicted_labels = [] with torch.no_grad(): #for j, (inputs, targets) in enumerate(test_loader): for inputs, inputsSN, targets in test_loader: inputVariable = Variable(inputs.permute(1, 0, 2, 3, 4).cuda()) inputSNVariable = Variable(inputsSN.permute(1, 0, 2, 3, 4).cuda()) output_label, _ = model(inputVariable, inputSNVariable) _, predicted = torch.max(output_label.data, 1) numCorr += (predicted == targets.cuda()).sum() true_labels.append(targets) predicted_labels.append(predicted.cpu()) test_accuracy = torch.true_divide(numCorr, test_samples) * 100 test_accuracy = 'Test Accuracy = {}%'.format(test_accuracy) print(test_accuracy) fil = open(model_folder + "/test_log_acc.txt", "w") fil.write(test_accuracy) fil.close() cnf_matrix = confusion_matrix(true_labels, predicted_labels).astype(float) cnf_matrix_normalized = cnf_matrix / cnf_matrix.sum(axis=1)[:, np.newaxis] ticks = np.linspace(0, 60, num=61) plt.figure(1, figsize=(12, 12), dpi=100.0) plt.imshow(cnf_matrix_normalized, interpolation='none', cmap='binary') plt.colorbar() plt.xticks(ticks, fontsize=6) plt.yticks(ticks, fontsize=6) plt.grid(True) plt.clim(0, 1) xy = np.arange(start=0, stop=61) plt.plot(xy, xy) plt.savefig(model_folder + '/cnf_matrix_normalized.png', bbox_inches='tight') plt.show()