def main(official_split_path, csv_path, videos_path, frames_path, json_path, args): # Labeling if args.dataset_name == "UCF101": train_csv_path, val_csv_path, test_csv_path = Labeler.UCF101.run( official_split_path=official_split_path, save_path=csv_path, id=args.split_id) elif args.dataset_name == "HMDB51": train_csv_path, val_csv_path, test_csv_path = Labeler.HMDB51.run( official_split_path=official_split_path, save_path=csv_path, id=args.split_id) elif args.dataset_name == "ActivityNet": train_csv_path, val_csv_path, test_csv_path = Labeler.ActivityNet.run( official_split_path=official_split_path, save_path=csv_path, id=args.split_id) else: print(f"'{args.dataset_name}' is not supported :(") return # Frame Extraction FrameExtractor.run(videos_path=videos_path, save_path=frames_path, frame_size=args.frame_size_extractor, qscale=args.qscale, workers=args.workers, original_size=args.original_size) # Frame Sampling if path_manager(json_path, raise_error=False, path_exist=True): print(f"{json_path} path already exists skip this step...") return else: path_manager(json_path, create_new=True) for csv_path in [train_csv_path, val_csv_path, test_csv_path]: if csv_path: FrameSampler.run(frames_path=frames_path, csv_path=csv_path, save_path=json_path, frame_batch_size=args.frame_batch_size, frame_size=args.frame_size_sampler, only_cpu=args.only_cpu, gpu_number=args.gpu_number)
def run(videos_path: str, save_path: str, frame_size: int, qscale: float, workers: int, original_size: bool): # path checking path_manager(videos_path, raise_error=True, path_exist=True) if path_manager(save_path, raise_error=False, path_exist=True): print(f"{save_path} path already exists skip this step...") return # get videos path (start point of the path is using for flexible path parsing) start_point_of_path = len(os.path.join(videos_path, "hello").split("/")) - 1 videos_path = glob(os.path.join(videos_path, "**/*.*"), recursive=True) # run ~ Parallel(n_jobs=workers, backend="threading")(delayed(frame_extractor)( [i, len(videos_path)], video_path, start_point_of_path, save_path, frame_size, qscale, original_size) for i, video_path in enumerate(videos_path))
def run(official_split_path: str, save_path: str, id: int = 3): # path checking(1) path_manager(official_split_path, raise_error=True, path_exist=True) # version id => 2(1.2), 3(1.3) assert id in [2, 3], f"'{id}' is not supported version id on ActivityNet :(" json_path = os.path.join(official_split_path, f"activity_net.v1-{id}.min.json") train_csv_path = os.path.join(save_path, f"train_{id}.csv") val_csv_path = os.path.join(save_path, f"val_{id}.csv") if path_manager(save_path, raise_error=False, path_exist=True): print(f"{save_path} path already exists skip this step...") return train_csv_path, val_csv_path, None else: path_manager(save_path, create_new=True) # path checking(2) path_manager(train_csv_path, val_csv_path, remove_response=True) # load and read json # keylist => [database, taxonomy, version] # see more => http://activity-net.org/download.html with open(json_path, "r") as f: database = json.load(f)["database"] trains = [] vals = [] categories = [] for vid in database: subset = database[vid]["subset"] if subset == "testing": continue category = database[vid]["annotations"][0]["label"] if not category in categories: categories.append(category) label = f"v_{vid},{categories.index(category)},{category}" # train if subset == "training": trains.append(label) # validation if subset == "validation": vals.append(label) with open(train_csv_path, "w") as f: f.writelines("\n".join(trains)) with open(val_csv_path, "w") as f: f.writelines("\n".join(vals)) return train_csv_path, val_csv_path, None
def run(official_split_path: str, save_path: str, id: int = 1): # path checking path_manager(official_split_path, raise_error=True, path_exist=True) # split id => 1, 2, 3 assert id in [1, 2, 3], f"'{id}' is not supported split id on UCF101 :(" train_csv_path = os.path.join(save_path, f"train_{id}.csv") test_csv_path = os.path.join(save_path, f"test_{id}.csv") if path_manager(save_path, raise_error=False, path_exist=True): print(f"{save_path} path already exists skip this step...") return train_csv_path, None, test_csv_path else: path_manager(save_path, create_new=True) # path checking path_manager(train_csv_path, test_csv_path, remove_response=True) categories = {} # train with open(f"{train_csv_path}", "w") as f1: # trainlist01, trainlist02, trainlist03 with open(os.path.join(official_split_path, f"trainlist0{id}.txt"), "r") as f2: for line in f2.read().splitlines(): splited_line = line.split(" ") category, filename = splited_line[0].split("/") label = int(splited_line[1]) - 1 # indexing for test if category not in categories: categories[category] = label # save f1.writelines(f"{splited_line[0][:-4]},{label},{category}\n") # test with open(f"{test_csv_path}", "w") as f1: # testlist01, testlist02, testlist03 with open(os.path.join(official_split_path, f"testlist0{id}.txt"), "r") as f2: for line in f2.read().splitlines(): category, filename = line.split("/") label = categories[category] # save f1.writelines(f"{line[:-4]},{label},{category}\n") return train_csv_path, None, test_csv_path
def run(official_split_path: str, save_path: str, id: int = 1): # path checking(1) path_manager(official_split_path, raise_error=True, path_exist=True) # split id => 1, 2, 3 assert id in [1, 2, 3], f"'{id}' is not supprted split id on HMDB51 :(" train_csv_path = os.path.join(save_path, f"train_{id}.csv") val_csv_path = os.path.join(save_path, f"val_{id}.csv") test_csv_path = os.path.join(save_path, f"test_{id}.csv") if path_manager(save_path, raise_error=False, path_exist=True): print(f"{save_path} path already exists skip this step...") return train_csv_path, val_csv_path, test_csv_path else: path_manager(save_path, create_new=True) # path checking(2) path_manager(train_csv_path, val_csv_path, test_csv_path, remove_response=True) # ready for writing train_csv = open(f"{train_csv_path}", "w") val_csv = open(f"{val_csv_path}", "w") test_csv = open(f"{test_csv_path}", "w") # for indexing categories = [] label = 0 for text_filename in glob(os.path.join(official_split_path, "*")): splited_filename = (text_filename.split("/")[-1]).split("_") category = "_".join(splited_filename[:-2]) text_id = splited_filename[-1][5: -4] # get siplit id from text filename # get information from each same split id and different categories if category not in categories and int(text_id) == id: with open(text_filename, "r") as f: for line in f.read().splitlines(): video_filename, video_id = line.split(" ")[:-1] # category/video_filename video_file_path = os.path.join(category, video_filename[:-4]) # https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/split_readme.txt # train if int(video_id) == 1: train_csv.writelines( f"{video_file_path},{label},{category}\n") # test if int(video_id) == 2: test_csv.writelines( f"{video_file_path},{label},{category}\n") # validation if int(video_id) == 0: val_csv.writelines( f"{video_file_path},{label},{category}\n") categories.append(category) label += 1 # close train_csv.close() val_csv.close() test_csv.close() return train_csv_path, val_csv_path, test_csv_path
def run(frames_path:str, csv_path:str, save_path:str, frame_batch_size:int, frame_size:int, only_cpu:bool, gpu_number:int): # path checking path_manager(frames_path, raise_error=True, path_exist=True) # For saving json file json_path = os.path.join(save_path, csv_path.split("/")[-1].split(".")[0] + ".json") # get a device device = get_device(only_cpu=only_cpu, gpu_number=gpu_number, cudnn_benchmark=True) # 2D CNNs(vgg16) model = models.vgg16(pretrained=True) # Pretrained on ImageNet model.classifier = nn.Sequential(*list(model.classifier.children())[:-3]) model.to(device) # Images(Frames) Transformer(ImageNet) transform = transforms.Compose([ transforms.Resize((frame_size, frame_size)), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) ]) model.eval() with torch.no_grad(): labels, categories = read_csv(csv_path) json_dict = {} for i, (sub_file_path, label) in enumerate(labels): datas = [] # HMDB51 has some weird filenames. Therefore we need to replace the weird name replaced_sub_file_path = sub_file_path.replace("]", "?") # Transform the images to tensor sorted_replaced_sub_file_path = sorted(glob(os.path.join(frames_path, replaced_sub_file_path, "*"))) # Frame Sampler Mini-Batch for j in range(0, math.ceil(len(sorted_replaced_sub_file_path))): sliced_sorted_replaced_sub_file_path = sorted_replaced_sub_file_path[j*frame_batch_size:(j+1)*frame_batch_size] if len(sliced_sorted_replaced_sub_file_path) == 0: break data = torch.stack([transform(Image.open(image_path)) for image_path in sliced_sorted_replaced_sub_file_path], dim=0).to(device) # Extract features data = model(data) # Detach from the current graph and change the device datas.append(data.detach().cpu()) # Index ranking datas = torch.cat(datas) indices = torch.argsort(F.cosine_similarity(datas, datas.mean(dim=0, keepdim=True)), descending=True) # Save the json file json_dict[sub_file_path] = { "label": label, "category": categories[label], "index": indices.numpy().tolist() } print(f"{i+1}/{len(labels)} Frame Path: {sub_file_path} Numbef of Frames: {len(sorted_replaced_sub_file_path)} Frame Sampling Complete !!") with open(json_path, "w") as f: json.dump(json_dict, f)