def Extract_features(mode='train'): extractor = models.vgg16(pretrained=True).features.cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) csv = 0 if mode == 'train': csv = getVideoList(gt_train) else: # validation csv = getVideoList(gt_valid) frames = 0 vdo_features = 0 for clip_id in range(len(csv['Video_index'])): # print(mode, clip_id) if mode == 'train': frames = readShortVideo(train_vdo_dir, csv['Video_category'][clip_id], csv['Video_name'][clip_id], 5) else: #validation frames = readShortVideo(valid_vdo_dir, csv['Video_category'][clip_id], csv['Video_name'][clip_id], 5) skip_num = frames.shape[0] // 4 # 4 is frame number after sampling frame_sample = np.expand_dims(frames[0], axis=0) for i in range(1, 4): frame_sample = np.vstack( (frame_sample, np.expand_dims(frames[i * skip_num], axis=0))) ### now frame_sample is in shape of (4,240,320,3) frame_sample = np.transpose(frame_sample, (0, 3, 1, 2)) frame_sample = torch.tensor(frame_sample, dtype=torch.float).cuda() ### now frame_sample is transformed to tensor in shape of (4,3,240,320) ###normalize frame_sample = (frame_sample / 255) for i in range(4): frame_sample[i] = normalize(frame_sample[i]) frame_features = extractor(frame_sample) ### in shape of (4,512,7,10) frame_features = frame_features.view( 1, -1, 7, 10 ) ### in shape of (1,2048,7,10) --> input for ActionPredictor for 1 clip if clip_id == 0: vdo_features = frame_features.cpu().detach().numpy() else: vdo_features = np.vstack( (vdo_features, frame_features.cpu().detach().numpy())) np.save(mode + '_feature.npy', vdo_features)
def extract(folder, csvpath, load, num_class, batch_size, name): print("extract frames...") frames = [] labels = [] video_list = getVideoList(csvpath) if (load == 0): for i in range(len(video_list["Video_name"])): frame = readShortVideo(folder, video_list["Video_category"][i], video_list["Video_name"][i]) frame = np.mean(frame, axis=0, keepdims=True) #print(frame.shape) for j in range(len(frame)): frames.append(np.moveaxis(frame[j], -1, 0)) label = np.zeros(num_class) label[int(video_list["Action_labels"][i])] = 1 labels.append(label) frames = np.array(frames, dtype=np.uint8) labels = np.array(labels, dtype=np.uint8) #np.save("./"+name+"_frames.npy",frames) #np.save("./"+name+"_labels.npy",labels) elif load == 1: frames = np.load("./" + name + "_frames.npy") labels = np.load("./" + name + "_labels.npy") print(frames.shape, labels.shape) data = [(frames[i], labels[i]) for i in range(len(frames))] dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) return dataloader
def get_features(): extractor = models.vgg16(pretrained= True).features.cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) csv = getVideoList(csv_file) vdo_num = len(csv['Video_index']) vdo_features = torch.zeros((vdo_num,2048,7,10),dtype= torch.float) for clip_id in range(vdo_num): # frames = readShortVideo(vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],5) skip_num = frames.shape[0]//4 # 4 is frame number after sampling frame_sample = np.expand_dims(frames[0],axis=0) for i in range(1,4): frame_sample = np.vstack((frame_sample,np.expand_dims(frames[i*skip_num],axis=0))) ### now frame_sample is in shape of (4,240,320,3) frame_sample = np.transpose(frame_sample,(0,3,1,2)) frame_sample = torch.tensor(frame_sample,dtype = torch.float).cuda() ### now frame_sample is transformed to tensor in shape of (4,3,240,320) ###normalize frame_sample = (frame_sample / 255) for i in range(4): frame_sample[i] = normalize(frame_sample[i]) frame_features = extractor(frame_sample) ### in shape of (4,512,7,10) frame_features = frame_features.view(1,-1,7,10) ### in shape of (1,2048,7,10) --> input for ActionPredictor for 1 clip vdo_features[clip_id] = frame_features.detach().cpu() print('finish extracting features') return vdo_features
def extract_frames(opt): label_dir = opt.val_label_dir video_dir = opt.val_video_dir # Read CSV label file video_dict = reader.getVideoList(label_dir) # Initialize return lists all_frames = [ ] # length equal to number of videos. Elements are sublists. Those lists contain numpy arrays of frames (240, 320, 3) # For length of the csv file: for i in range(len(video_dict["Video_index"])): print("Extracting frames from video %d..." % (i + 1)) frame_list = [] # Take video category and video name from current dict entry folder_name = video_dict["Video_category"][i] file_name = video_dict["Video_name"][i] # Present to helper function current_frames = reader.readShortVideo(video_dir, folder_name, file_name) # Separate each frame in returned array and put into a list for j in range(current_frames.shape[0]): frame_list.append(current_frames[j, :, :, :]) # Append the list of individual frames, and the corresponding label, onto the lists all_frames.append(frame_list) return all_frames
def convert_videos_to_np(mode, labels_fp, videos_fp, save_fp, limit) : batch_max = 1000 l = getVideoList(labels_fp) videos_output, labels_output = [], [] data_num = limit if limit != None else len(l["Video_category"]) for i in range(data_num): print ("Convert videos into numpy: {}/{} \r".format(i + 1, data_num), end="") cat = l["Video_category"][i] name = l["Video_name"][i] label = l["Action_labels"][i] data = readShortVideo(videos_fp, cat, name, downsample_factor=12).astype(np.int8) videos_output.append(data) labels_output.append(int(label)) if (i+1) % batch_max == 0 : videos_output, labels_fp = np.array(videos_output), np.array(labels_output) np.save(os.path.join(save_fp, "videos_{}_{}.npy".format(mode, i//batch_max)), videos_output) np.save(os.path.join(save_fp, "labels_{}_{}.npy".format(mode, i//batch_max)), labels_output) videos_output = [] labels_output = [] if (i+1) % batch_max != 0 : videos_output, labels_fp = np.array(videos_output), np.array(labels_output) np.save(os.path.join(save_fp, "videos_{}_{}.npy".format(mode, (i // batch_max))), videos_output) np.save(os.path.join(save_fp, "labels_{}_{}.npy".format(mode, (i // batch_max))), labels_output) print ("\nDone !")
def import_test_trimmed(): path = sys.argv[2] print('path =', path) od = reader.getVideoList(path) print('len(od) =', len(od)) path = sys.argv[1] num = len(od['Video_name']) print('num of videos =', num) df = 12 count = 0 leng_idx = np.zeros([ num, ], np.uint32) for i in range(num): if i % 100 == 0 and i > 0: print(i) video = reader.readShortVideo(path, od['Video_category'][i], od['Video_name'][i], downsample_factor=df, rescale_factor=1) if i == 0: videos = video else: videos = np.concatenate([videos, video]) count += video.shape[0] leng_idx[i] = video.shape[0] print("count =", count, np.sum(leng_idx)) print('videos.shape =', videos.shape) return videos, leng_idx
def load_extract_video(video_path, df, model, filename): print("===== read video =====") codes = list() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(df.shape[0]): print(i, end="\r") video = readShortVideo(video_path=video_path, video_category=df.iloc[i]['Video_category'], video_name=df.iloc[i]['Video_name'], downsample_factor=12, rescale_factor=1) # extract features batch-wise if video.shape[0] < 50: tmp = sess.run(model.output, feed_dict={model.x: video}) else: tmp = list() for i in range(int(video.shape[0] / 50) + 1): st = 50 * i ed = min(50 * i + 50, video.shape[0]) tmp_video = video[st:ed, :] tmp.append( sess.run(model.output, feed_dict={model.x: tmp_video})) tmp = np.concatenate(tmp, axis=0) codes.append(tmp) print('Done') print("===== save into %s =====" % filename) with open(filename, 'wb') as f: pickle.dump(codes, f)
def get_data(video_path, tag_path, model): if torch.cuda.is_available(): model.cuda() file_dict = getVideoList(tag_path) x, y = [], [] print(len(file_dict['Video_index'])) with torch.no_grad(): for i in range(len(file_dict['Video_index'])): frames = readShortVideo(video_path, file_dict['Video_category'][i], file_dict['Video_name'][i]) if frames.shape[0] > 120: output_1 = model( torch.from_numpy( frames[0:120, :, :, :]).cuda()).detach().cpu().reshape( -1, 512 * 7 * 7) output_2 = model( torch.from_numpy( frames[120:, :, :, :]).cuda()).detach().cpu().reshape( -1, 512 * 7 * 7) output = torch.cat((output_1, output_2), 0) else: output = model( torch.from_numpy(frames).cuda()).detach().cpu().reshape( -1, 512 * 7 * 7) output = torch.mean(output, 0).numpy() x.append(output) y.append(int(file_dict['Action_labels'][i])) print('\rreading image from {}...{}'.format(video_path, i), end='') print('\rreading image from {}...finished'.format(video_path)) return np.array(x).astype(np.float32), np.array(y).astype(np.uint8)
def Video2Seq(video_path, video_category, video_name): features = torch.Tensor() seq_length = [] for i in range(len(video_name)): frames = readShortVideo(video_path, video_category[i], video_name[i]) ts_frames = torch.from_numpy(frames.transpose( (0, 3, 1, 2))).float() / 255. sys.stdout.write('\rReading the Video... : {:}'.format(i)) sys.stdout.flush() set = Data.TensorDataset(ts_frames) dataloader = Data.DataLoader(dataset=set, batch_size=3) seq_length.append(0) for batch_idx, b_frame in enumerate(dataloader): features = torch.cat( [features, resnet50(b_frame[0].cuda()).detach().cpu()]) seq_length[i] += len(b_frame[0]) max_length = max(seq_length) seq = torch.zeros(len(seq_length), max_length, features.shape[1]) start = 0 for i in range(len(seq_length)): seq[i, 0:seq_length[i], :] = features[start:start + seq_length[i], :] start += seq_length[i] sys.stdout.write('... Done\n') sys.stdout.flush() return seq, seq_length
def load_test_pred(video_path, gt_path, model_path): feature_size = 512 * 7 * 7 CNN_pre_model = torchvision.models.vgg16(pretrained=True).features model = RNN_model(feature_size) # GPU enable use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Device used:', device) if torch.cuda.is_available(): CNN_pre_model = CNN_pre_model.to(device) model = model.to(device) load_checkpoint(model_path, model) CNN_pre_model.eval() # -> label loading # test_label = pd.read_csv(gt_path)["Action_labels"] test_features = [] category_path = sorted(os.listdir(video_path)) with torch.no_grad(): for category in category_path: mask = pd.read_csv(gt_path)["Video_category"] == category test_name = pd.read_csv(gt_path)[mask]["Video_name"] for i, video_name in enumerate(test_name): print("\r%d/%d" % (i, len(test_name)), end="") frames = readShortVideo(video_path, category, video_name, downsample_factor=12, rescale_factor=1) frames = Variable(torch.from_numpy(frames)).to(device) tmp = CNN_pre_model(frames).cpu().view(-1, feature_size) test_features.append(tmp) print("") print("Processing [%s] finished!" % (category)) print("Pre-train finished!") with torch.no_grad(): RNN_feature = [] preds = [] model.eval() for i in range(0, len(test_features)): padded_feature, lengths = test_features[i], [ test_features[i].shape[0] ] # padded_label, test_label[i] padded_feature = Variable(padded_feature).to(device).unsqueeze(1) lengths = torch.LongTensor(lengths) #print(padded_feature.shape) #print(padded_label) lengths = Variable(lengths).to(device) output, hidden = model(padded_feature, lengths) pred = torch.argmax(output, 1).cpu() preds.append(pred) RNN_feature.append(hidden.cpu().data.numpy().reshape(-1)) RNN_feature = np.array(RNN_feature) preds = np.array(preds) print(pred.shape) return RNN_feature, preds # ,test_label
def __getitem__(self, idx): label = torch.LongTensor(np.array([float(self.vid_label_list[idx]) ])) # (2, 3, 240, 320) frames = readShortVideo(self.vid_dir, self.vid_categ_list[idx], self.vid_name_list[idx]) t, h, w, c = frames.shape if self.model_type == 'cnn': frames_tensor = torch.zeros([self.num_sample, c, h, w], dtype=torch.float) rand_frame_idx = torch.randint(0, t, (self.num_sample, )) for i in range(self.num_sample): frames_tensor[i] = self.transform( Image.fromarray(frames[rand_frame_idx[i]])) elif self.model_type == 'rnn': frames_tensor = [] if t > 10: scale = round(t / self.num_sample) for i in range(t): if i % scale == 0: frames_tensor.append( self.transform(Image.fromarray(frames[i]))) if len(frames_tensor) > self.num_sample: frames_tensor = frames_tensor[:10] frames_tensor = torch.stack(frames_tensor) else: frames_tensor = torch.zeros([t, c, h, w], dtype=torch.float) for i in range(t): frames_tensor[i] = self.transform( Image.fromarray(frames[i])) # label = torch.LongTensor( np.array( [ float( self.vid_label_list[idx] ) ] ) ) # (2, 3, 240, 320) return frames_tensor, label
def __getitem__(self, idx): # read video video = reader.readShortVideo(self.dir, self.vid_cat[idx], self.vid_name[idx]) return video
def Get_data(video_path, tag_path): model = torchvision.models.vgg16(pretrained=True).features if torch.cuda.is_available(): model.cuda() file_dict = getVideoList(tag_path) feature_size = 512 * 7 * 7 x, y = [], [] print(len(file_dict['Video_index'])) with torch.no_grad(): for i in range(len(file_dict['Video_index'])): frames = readShortVideo(video_path, file_dict['Video_category'][i], file_dict['Video_name'][i]) if frames.shape[0] > 120: output_1 = model( torch.from_numpy( frames[0:120, :, :, :]).cuda()).detach().cpu().reshape( -1, feature_size) output_2 = model( torch.from_numpy( frames[120:, :, :, :]).cuda()).detach().cpu().reshape( -1, feature_size) output = torch.cat((output_1, output_2), 0) else: output = model( torch.from_numpy(frames).cuda()).detach().cpu().reshape( -1, feature_size) x.append(output) y.append(int(file_dict['Action_labels'][i])) print('\rreading image from {}...{}'.format(video_path, i), end='') print('\rreading image from {}...finished'.format(video_path)) return x, y
def __getitem__(self, idx): ''' get data ''' video_category = self.video_category_list[idx] video_name = self.video_name_list[idx] label = self.label_list[idx] frames = reader.readShortVideo(self.video_path, video_category, video_name, downsample_factor=12, rescale_factor=self.rescale_factor) #print("frames.shape: ",frames.shape) frames_list = [] for f in range(frames.shape[0]): frame = frames[f, :, :, :] frame = self.transform(frame) frames_list.append(frame) """ #handle every sample, then concat imgs = [] for frame in frames: imgs.append(self.transform(frame)) imgs = np.array(imgs) print("imgs shape ",imgs.shape) """ return torch.stack(frames_list), torch.tensor(int(label))
def __getitem__(self, index): current_video_imgs = reader.readShortVideo(self.root_videos, self.csv_dict['Video_category'][index], self.csv_dict['Video_name'][index], downsample_factor=downsample_factor, rescale_factor=(224, 224)) videos = torch.zeros([current_video_imgs.shape[0],current_video_imgs.shape[3],current_video_imgs.shape[1],current_video_imgs.shape[2]], dtype=torch.float32) for i in range(len(current_video_imgs)): current_img = Image.fromarray(current_video_imgs[i]) if self.transform is not None: current_img = self.transform(current_img) videos[i] = current_img return videos, int(self.labels[index])
def __getitem__(self, idx): # read video video = reader.readShortVideo(self.dir, self.vid_cat[idx], self.vid_name[idx]) # get action label act = self.act_label[idx] return video, act
def prediction(model_fp, data_fp, label_fp, output_fp, limit): model = tor.load(model_fp) model.cuda() ### Load data l = getVideoList(label_fp) videos_output, labels_output = [], [] total = len(l["Video_category"]) if not limit else limit for i in range(total): print("Convert videos into numpy: {}/{} \r".format( i + 1, len(l["Video_category"])), end="") cat = l["Video_category"][i] name = l["Video_name"][i] label = l["Action_labels"][i] data = readShortVideo(data_fp, cat, name, downsample_factor=12).astype(np.int8) videos_output.append(data.astype(np.int16)) labels_output.append(int(label)) videos, labels = np.array(videos_output), np.array(labels_output).astype( np.uint8) ### Prediction correct, total = 0, len(labels) preds = [] videos = normalize(videos / 255.) videos = select_data(videos, VIDEOS_MAX_BATCH) for i, (x, label) in enumerate(zip(videos, labels), 1): print("Process: {}/{}".format(i, len(videos))) x = tor.Tensor(x).permute(0, 3, 1, 2).cuda() out = model(x) out = out.mean(dim=0).unsqueeze(0) pred = model.pred(out) y = tor.max(pred, 1)[1] pred = int(y[0].data) if pred == label: correct += 1 preds.append(pred) acc = correct / total print(acc) with open(os.path.join(output_fp, "p1_valid.txt"), "w") as f: for i, item in enumerate(preds): if i != len(preds) - 1: f.write(str(item) + "\n") else: f.write(str(item))
def Extract_features(mode='train'): extractor = models.vgg16(pretrained= True).features.cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) csv = 0 if mode == 'train': csv = getVideoList(gt_train) else: # validation csv = getVideoList(gt_valid) vdo_num = len(csv['Video_index']) frames = 0 ##vdo-frames-holder frames_num = 10 ## number of frames per clip vdo_features = np.zeros((vdo_num,frames_num,512*3*5),dtype= np.float) for clip_id in range(vdo_num): print(mode,clip_id) if mode == 'train': frames = readShortVideo(train_vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],2,0.5) else : #validation frames = readShortVideo(valid_vdo_dir,csv['Video_category'][clip_id],csv['Video_name'][clip_id],2,0.5) # frames is in shape of (num, 120,160,3) vdo_size = frames.shape[0] #get length of vdo, want to sample 40 frames of it skip = vdo_size/frames_num frame_sample = np.zeros((frames_num,120,160,3)) for i in range(frames_num): frame_sample[i] = frames[int(i*skip)] frame_sample = np.transpose(frame_sample,(0,3,1,2)) frame_sample = torch.tensor(frame_sample,dtype= torch.float).cuda() # sample completed --> (frames_num,3,120,160)per clip # now normalize it frame_sample = (frame_sample/255) for i in range(frames_num): frame_sample[i] = normalize(frame_sample[i]) feature_batchframe_features = extractor(frame_sample) frame_features = frame_features.view(frames_num,-1) ## in shape of (frames_num,512*3*5) vdo_features[clip_id] = frame_features.cpu().detach().numpy() np.save('rnn_'+mode+'_feature.npy',vdo_features) #shape = (vdo_num, seq = frames_num, 512*h*w)
def evaluate(feature_stractor, rnn, data_loader, batch_size): ''' set model to evaluate mode ''' rnn.eval() feature_stractor.eval() iters = 0 gts = [] preds = [] with torch.no_grad(): for idx, (video, video_path) in enumerate(data_loader): #print(iters) iters += 1 batch_img = [] batch_gt = [] for i in range(len(video_path)): frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i]) vid = [] for j in range(frames.shape[0]): im = transforms_array(frames[j]) vid.append(im) vid = torch.stack(vid).cuda() with torch.no_grad(): feature = feature_stractor(vid) batch_img.append(feature) gt = (int(video.get('Action_labels')[i])) batch_gt.append(gt) sequence, label, n_frames = batch_padding(batch_img, batch_gt) #print(sequence.shape) _, pred = rnn(sequence, n_frames) _, pred = torch.max(pred, dim=1) batch_gt = torch.from_numpy(np.asarray(batch_gt)) # print(batch_gt.shape) pred = pred.cpu().numpy().squeeze() batch_gt = batch_gt.numpy().squeeze() preds.append(pred) gts.append(batch_gt) if batch_size != 1: gts = np.concatenate(gts) preds = np.concatenate(preds) print(preds) return accuracy_score(gts, preds)
def __getitem__(self, index): video_name = self.video_list['Video_name'][index] video_category = self.video_list['Video_category'][index] video_label = None if 'Action_labels' in self.video_list: video_label = torch.LongTensor( [self.video_list['Action_labels'][index]]) # --------------------------------------------------------------- # Sample for HW4.1, pick the fixed number of frames # Downsample for HW4.2, pick the frames with the downsampling rate # ---------------------------------------------------------------- if self.feature_path is not None: video = reader.readShortFeature(self.feature_path, video_category, video_name, downsample_factor=self.downsample) elif self.video_path is not None: video = reader.readShortVideo(self.video_path, video_category, video_name, downsample_factor=self.downsample, rescale_factor=self.rescale) if self.sample: step = video.shape[0] / self.sample frame = np.around(np.arange(0, video.shape[0], step), decimals=0).astype(int) video = video[frame] # --------------------------------------------------- # Features Output dimension: (frames, 2048) # Full video Output dimension: (frames, channel, height, width) # --------------------------------------------------- if self.transform: if self.feature_path is not None: tensor = self.transform(video) return tensor.squeeze(0), video_label if self.video_path is not None: tensor = torch.zeros(video.shape[0], 3, 240, 320).type(torch.float32) for i in range(video.shape[0]): tensor[i] = self.transform(video[i]) return tensor, video_label return video, video_label
def output_features(rnn, feature_stractor, data_loader, json_dir): ''' set model to evaluate mode ''' rnn.eval() feature_stractor.eval() iters = 0 with torch.no_grad(): # do not need to caculate information for gradient during eval data = [] for idx, (video, video_path) in enumerate(data_loader): print(iters) iters += 1 batch_img = [] batch_gt = [] for i in range(len(video_path)): frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i]) vid = [] for j in range(frames.shape[0]): im = transforms_array(frames[j]) vid.append(im) vid = torch.stack(vid).cuda() with torch.no_grad(): feature = feature_stractor(vid) batch_img.append(feature) gt = (int(video.get('Action_labels')[i])) batch_gt.append(gt) sequence, label, n_frames = batch_padding(batch_img, batch_gt) # print(sequence.shape) feat, _ = rnn(sequence, n_frames) features_flt = [] for imgs in feat: imgs_feature = [] for fea in imgs: imgs_feature.append(float(fea)) features_flt.append(list(imgs_feature)) ##strore the values of the pred. for i in range(0, len(features_flt)): data.append([list(features_flt[i]), batch_gt[i]]) data = list(data) with open(json_dir, 'w') as outfile: json.dump(data, outfile)
def test(): # test Reader # input = video_path, video_category, video_name, downsample_factor=12, rescale_factor=1 frames = readShortVideo( "hw4_data/TrimmedVideos/video/train/", "OP01-R01-PastaSalad", "OP01-R01-PastaSalad-66680-68130-F001597-F001639.mp4", downsample_factor=12, rescale_factor=1) cc = frames[0] cc = cc.transpose(1, 2, 0) print(cc.shape) print(cc)
def store(feature_stractor, rnn, data_loader, batch_size): ''' set model to evaluate mode ''' rnn.eval() feature_stractor.eval() iters = 0 gts = [] preds = [] with torch.no_grad(): for idx, (video, video_path) in enumerate(data_loader): #print(iters) iters += 1 batch_img = [] for i in range(len(video_path)): frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i]) vid = [] for j in range(frames.shape[0]): im = transforms_array(frames[j]) vid.append(im) vid = torch.stack(vid).cuda() print('working in video ', video.get('Video_index')[i], ' with size ', vid.shape) feature = feature_stractor(vid) batch_img.append(feature) #print(batch_img[0].shape) #print(batch_img[1].shape) sequence, n_frames = batch_padding(batch_fea=batch_img) #print(sequence.shape) #print(n_frames) _, pred = rnn(sequence, n_frames) #print(pred.shape) _, pred = torch.max(pred, dim=1) pred = pred.cpu().numpy().squeeze() preds.append(pred) if batch_size != 1: preds = np.concatenate(preds) #print(preds.shape) print(preds) f = open("p2_result.txt", "w+") for pred in preds: f.write("%d\n" % pred) f.close()
def __getitem__(self, index): """ Get a sample from the dataset """ frames = readShortVideo(self.video_root, self.all_video_frames[index][0], self.all_video_frames[index][1], downsample_factor=12, rescale_factor=1) # image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) # label = self.landmarks_frame['label'][index] # label = torch.FloatTensor([label]) frames = torch.stack(frames) # output = torch.mean(frames,0) # if self.transform: # frames = self.transform(frames) return frames, self.label[index]
def __getitem__(self, index): frames = reader.readShortVideo( video_path=self.video_path, video_category=self.gt["Video_category"][index], video_name=self.gt["Video_name"][index]) images = [] for i in range(len(frames)): images.append(self.transform(frames[i]).unsqueeze(0)) images = torch.cat(images) if self.is_train: label = int(self.gt["Action_labels"][index]) return images, label return images
def import_four_trimmed(data='valid'): print('\ndata type =', data) path = '../HW5_data/TrimmedVideos/label/gt_' + data + '.csv' od = reader.getVideoList(path) print('len(od) =', len(od)) print("len(od['Video_name']) =", len(od['Video_name'])) print("len(od['Action_labels']) =", len(od['Action_labels'])) print("len(od['Action_labels'] =", len(od['Action_labels'])) path = '../HW5_data/TrimmedVideos/video/' + data + '/' num = len(od['Video_name']) print('num of videos =', num) videos = np.empty([num, 4, 240, 320, 3], np.uint8) labels = np.zeros([ num, ], np.uint8) for i in range(num): #'Action_labels', 'Nouns', 'End_times', 'Start_times', 'Video_category', 'Video_index', 'Video_name' #readShortVideo(path, video_category, video_name, downsample_factor=12, rescale_factor=1) if i % 100 == 0 and i > 0: print(i) df = int(( (int(od['End_times'][i]) - int(od['Start_times'][i])) / 35.0 - 1) / 3.0) video = reader.readShortVideo(path, od['Video_category'][i], od['Video_name'][i], downsample_factor=df, rescale_factor=1) print('video =', video.shape) count += video.shape[0] assert video.shape[0] >= 4 videos[i] = video[:4] labels[i] = od['Action_labels'][i] leng_idx[i] = video.shape[0] print('videos.shape =', videos.shape) print('labels.shape =', labels.shape) print('labels =', labels[:13]) return videos, labels, leng_idx
def import_trimmed(data='valid'): print('\ndata type =', data) path = '../HW5_data/TrimmedVideos/label/gt_' + data + '.csv' od = reader.getVideoList(path) print('len(od) =', len(od)) print("len(od['Video_name']) =", len(od['Video_name'])) print("len(od['Action_labels']) =", len(od['Action_labels'])) print("len(od['Action_labels'] =", len(od['Action_labels'])) path = '../HW5_data/TrimmedVideos/video/' + data + '/' num = len(od['Video_name']) print('num of videos =', num) videos = np.empty([4, 240, 320, 3], np.uint8) labels = np.zeros([ num, ], np.uint8) df = 12 count = 0 leng_idx = np.zeros([ num, ], np.uint32) for i in range(num): if i % 100 == 0 and i > 0: print(i) video = reader.readShortVideo(path, od['Video_category'][i], od['Video_name'][i], downsample_factor=df, rescale_factor=1) #print('video =', video.shape) if i == 0: videos = video else: videos = np.concatenate([videos, video]) count += video.shape[0] labels[i] = od['Action_labels'][i] leng_idx[i] = video.shape[0] print("count =", count, np.sum(leng_idx)) print('videos.shape =', videos.shape) print('labels.shape =', labels.shape) print('labels =', labels[:13]) return videos, labels, leng_idx
def output_features(classi, feaStract, data_loader, json_dir): ''' set model to evaluate mode ''' classi.eval() feaStract.eval() with torch.no_grad(): # do not need to caculate information for gradient during eval data = [] for idx, (video, video_path) in enumerate(data_loader): features = [] clss = [] print('Preprocessing the data') for i in range(len(video_path)): print('working ', i) frames = readShortVideo(video_path[i], video.get('Video_category')[i], video.get('Video_name')[i]) frames_res = torch.from_numpy(frames) frames_res.resize_(len(frames), 3, 240, 240) frames_res = frames_res.float().cuda() print(feaStract(frames_res).shape) # , end="\r") features.append(torch.mean(feaStract(frames_res), 0).cpu().detach().numpy()) clss.append(int(video.get('Action_labels')[i])) features = torch.from_numpy(np.asarray(features)) clss = torch.from_numpy(np.asarray(clss)) # FC print('Classifier') features = features.cuda() feat, _ = classi(features) features_flt = [] for imgs in feat: imgs_feature = [] for fea in imgs: imgs_feature.append(float(fea)) features_flt.append(list(imgs_feature)) ##strore the values of the pred. for i in range(0, len(features_flt)): data.append([list(features_flt[i]), clss[i]]) data = list(data) with open(json_dir, 'w') as outfile: json.dump(data, outfile)
def extract_frames(opt, mode): if mode == "train": label_dir = opt.train_label_dir video_dir = opt.train_video_dir save_dir = opt.save_train_frames_dir end_video_index = opt.num_videos_train elif mode == "val": label_dir = opt.val_label_dir video_dir = opt.val_video_dir save_dir = opt.save_val_frames_dir end_video_index = opt.num_videos_val else: print("ERROR: invalid mode in frame generator") # Read CSV label file video_dict = reader.getVideoList(label_dir) # For length of the csv file: for i in range(end_video_index): # Clear dict and list for each new video data_dict = {} frame_list = [] # Take video category and video name from current dict entry folder_name = video_dict["Video_category"][i] file_name = video_dict["Video_name"][i] # Present to helper function frames = reader.readShortVideo(video_dir, folder_name, file_name) # Separate each frame in returned array and put into a list for j in range(frames.shape[0]): frame_list.append(frames[j, :, :, :]) # Populate a dictionary with the list of individual frames, and the corresponding label data_dict["frame_list"] = frame_list data_dict["label"] = video_dict["Action_labels"][i] # Save dict of frames/label for current video print("Saving frames from video %d..." % (i + 1)) with open(os.path.join(save_dir, "{}.pk".format(i + 1)), "wb") as f: pk.dump(data_dict, f)
def Video2Tensor(video_path, video_category, video_name): features = torch.Tensor() for i in range(len(video_name)): frames = readShortVideo(video_path, video_category[i], video_name[i]) ts_frames = torch.from_numpy(frames.transpose( (0, 3, 1, 2))).float() / 255. sys.stdout.write('\rReading the Video... Frame: {:}'.format(i)) sys.stdout.flush() set = Data.TensorDataset(ts_frames) dataloader = Data.DataLoader(dataset=set, batch_size=1) feature = torch.zeros(1, 1000).cuda() for batch_idx, b_frame in enumerate(dataloader): feature += resnet50(b_frame[0].cuda()).detach() features = torch.cat([features, (feature / len(set)).cpu()]) sys.stdout.write('... Done\n') sys.stdout.flush() return features