Ejemplo n.º 1
0
def main(official_split_path, csv_path, videos_path, frames_path, json_path,
         args):
    # Labeling
    if args.dataset_name == "UCF101":
        train_csv_path, val_csv_path, test_csv_path = Labeler.UCF101.run(
            official_split_path=official_split_path,
            save_path=csv_path,
            id=args.split_id)
    elif args.dataset_name == "HMDB51":
        train_csv_path, val_csv_path, test_csv_path = Labeler.HMDB51.run(
            official_split_path=official_split_path,
            save_path=csv_path,
            id=args.split_id)
    elif args.dataset_name == "ActivityNet":
        train_csv_path, val_csv_path, test_csv_path = Labeler.ActivityNet.run(
            official_split_path=official_split_path,
            save_path=csv_path,
            id=args.split_id)
    else:
        print(f"'{args.dataset_name}' is not supported :(")
        return

    # Frame Extraction
    FrameExtractor.run(videos_path=videos_path,
                       save_path=frames_path,
                       frame_size=args.frame_size_extractor,
                       qscale=args.qscale,
                       workers=args.workers,
                       original_size=args.original_size)

    # Frame Sampling
    if path_manager(json_path, raise_error=False, path_exist=True):
        print(f"{json_path} path already exists skip this step...")
        return
    else:
        path_manager(json_path, create_new=True)
    for csv_path in [train_csv_path, val_csv_path, test_csv_path]:
        if csv_path:
            FrameSampler.run(frames_path=frames_path,
                             csv_path=csv_path,
                             save_path=json_path,
                             frame_batch_size=args.frame_batch_size,
                             frame_size=args.frame_size_sampler,
                             only_cpu=args.only_cpu,
                             gpu_number=args.gpu_number)
Ejemplo n.º 2
0
def run(videos_path: str, save_path: str, frame_size: int, qscale: float,
        workers: int, original_size: bool):
    # path checking
    path_manager(videos_path, raise_error=True, path_exist=True)
    if path_manager(save_path, raise_error=False, path_exist=True):
        print(f"{save_path} path already exists skip this step...")
        return

    # get videos path (start point of the path is using for flexible path parsing)
    start_point_of_path = len(os.path.join(videos_path,
                                           "hello").split("/")) - 1
    videos_path = glob(os.path.join(videos_path, "**/*.*"), recursive=True)

    # run ~
    Parallel(n_jobs=workers,
             backend="threading")(delayed(frame_extractor)(
                 [i, len(videos_path)], video_path, start_point_of_path,
                 save_path, frame_size, qscale, original_size)
                                  for i, video_path in enumerate(videos_path))
Ejemplo n.º 3
0
def run(official_split_path: str, save_path: str, id: int = 3):
    # path checking(1)
    path_manager(official_split_path, raise_error=True, path_exist=True)

    # version id => 2(1.2), 3(1.3)
    assert id in [2,
                  3], f"'{id}' is not supported version id on ActivityNet :("
    json_path = os.path.join(official_split_path,
                             f"activity_net.v1-{id}.min.json")
    train_csv_path = os.path.join(save_path, f"train_{id}.csv")
    val_csv_path = os.path.join(save_path, f"val_{id}.csv")

    if path_manager(save_path, raise_error=False, path_exist=True):
        print(f"{save_path} path already exists skip this step...")
        return train_csv_path, val_csv_path, None
    else:
        path_manager(save_path, create_new=True)

    # path checking(2)
    path_manager(train_csv_path, val_csv_path, remove_response=True)

    # load and read json
    # keylist => [database, taxonomy, version]
    # see more => http://activity-net.org/download.html
    with open(json_path, "r") as f:
        database = json.load(f)["database"]

    trains = []
    vals = []
    categories = []
    for vid in database:
        subset = database[vid]["subset"]
        if subset == "testing":
            continue

        category = database[vid]["annotations"][0]["label"]

        if not category in categories:
            categories.append(category)

        label = f"v_{vid},{categories.index(category)},{category}"

        # train
        if subset == "training":
            trains.append(label)

        # validation
        if subset == "validation":
            vals.append(label)

    with open(train_csv_path, "w") as f:
        f.writelines("\n".join(trains))

    with open(val_csv_path, "w") as f:
        f.writelines("\n".join(vals))

    return train_csv_path, val_csv_path, None
Ejemplo n.º 4
0
def run(official_split_path: str, save_path: str, id: int = 1):
    # path checking
    path_manager(official_split_path, raise_error=True, path_exist=True)

    # split id => 1, 2, 3
    assert id in [1, 2, 3], f"'{id}' is not supported split id on UCF101 :("
    train_csv_path = os.path.join(save_path, f"train_{id}.csv")
    test_csv_path = os.path.join(save_path, f"test_{id}.csv")

    if path_manager(save_path, raise_error=False, path_exist=True):
        print(f"{save_path} path already exists skip this step...")
        return train_csv_path, None, test_csv_path
    else:
        path_manager(save_path, create_new=True)

    # path checking
    path_manager(train_csv_path, test_csv_path, remove_response=True)

    categories = {}
    # train
    with open(f"{train_csv_path}", "w") as f1:
        # trainlist01, trainlist02, trainlist03
        with open(os.path.join(official_split_path, f"trainlist0{id}.txt"),
                  "r") as f2:
            for line in f2.read().splitlines():
                splited_line = line.split(" ")
                category, filename = splited_line[0].split("/")
                label = int(splited_line[1]) - 1

                # indexing for test
                if category not in categories:
                    categories[category] = label

                # save
                f1.writelines(f"{splited_line[0][:-4]},{label},{category}\n")

    # test
    with open(f"{test_csv_path}", "w") as f1:
        # testlist01, testlist02, testlist03
        with open(os.path.join(official_split_path, f"testlist0{id}.txt"),
                  "r") as f2:
            for line in f2.read().splitlines():
                category, filename = line.split("/")
                label = categories[category]

                # save
                f1.writelines(f"{line[:-4]},{label},{category}\n")

    return train_csv_path, None, test_csv_path
Ejemplo n.º 5
0
def run(official_split_path: str, save_path: str, id: int = 1):
    # path checking(1)
    path_manager(official_split_path, raise_error=True, path_exist=True)

    # split id => 1, 2, 3
    assert id in [1, 2, 3], f"'{id}' is not supprted split id on HMDB51 :("
    train_csv_path = os.path.join(save_path, f"train_{id}.csv")
    val_csv_path = os.path.join(save_path, f"val_{id}.csv")
    test_csv_path = os.path.join(save_path, f"test_{id}.csv")

    if path_manager(save_path, raise_error=False, path_exist=True):
        print(f"{save_path} path already exists skip this step...")
        return train_csv_path, val_csv_path, test_csv_path
    else:
        path_manager(save_path, create_new=True)

    # path checking(2)
    path_manager(train_csv_path,
                 val_csv_path,
                 test_csv_path,
                 remove_response=True)

    # ready for writing
    train_csv = open(f"{train_csv_path}", "w")
    val_csv = open(f"{val_csv_path}", "w")
    test_csv = open(f"{test_csv_path}", "w")

    # for indexing
    categories = []
    label = 0

    for text_filename in glob(os.path.join(official_split_path, "*")):
        splited_filename = (text_filename.split("/")[-1]).split("_")
        category = "_".join(splited_filename[:-2])
        text_id = splited_filename[-1][5:
                                       -4]  # get siplit id from text filename

        # get information from each same split id and different categories
        if category not in categories and int(text_id) == id:
            with open(text_filename, "r") as f:
                for line in f.read().splitlines():
                    video_filename, video_id = line.split(" ")[:-1]

                    # category/video_filename
                    video_file_path = os.path.join(category,
                                                   video_filename[:-4])

                    # https://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/split_readme.txt
                    # train
                    if int(video_id) == 1:
                        train_csv.writelines(
                            f"{video_file_path},{label},{category}\n")

                    # test
                    if int(video_id) == 2:
                        test_csv.writelines(
                            f"{video_file_path},{label},{category}\n")

                    # validation
                    if int(video_id) == 0:
                        val_csv.writelines(
                            f"{video_file_path},{label},{category}\n")

            categories.append(category)
            label += 1

    # close
    train_csv.close()
    val_csv.close()
    test_csv.close()

    return train_csv_path, val_csv_path, test_csv_path
Ejemplo n.º 6
0
def run(frames_path:str, csv_path:str, save_path:str, frame_batch_size:int, frame_size:int, only_cpu:bool, gpu_number:int):
    # path checking
    path_manager(frames_path, raise_error=True, path_exist=True)

    # For saving json file
    json_path = os.path.join(save_path, csv_path.split("/")[-1].split(".")[0] + ".json")

    # get a device
    device = get_device(only_cpu=only_cpu, gpu_number=gpu_number, cudnn_benchmark=True)

    # 2D CNNs(vgg16)
    model = models.vgg16(pretrained=True) # Pretrained on ImageNet
    model.classifier = nn.Sequential(*list(model.classifier.children())[:-3])
    model.to(device)

    # Images(Frames) Transformer(ImageNet)
    transform = transforms.Compose([
        transforms.Resize((frame_size, frame_size)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

    model.eval()
    with torch.no_grad():
        labels, categories = read_csv(csv_path)
        json_dict = {}
        for i, (sub_file_path, label) in enumerate(labels):
            datas = []
            
            # HMDB51 has some weird filenames. Therefore we need to replace the weird name
            replaced_sub_file_path = sub_file_path.replace("]", "?")
            
            # Transform the images to tensor
            sorted_replaced_sub_file_path = sorted(glob(os.path.join(frames_path, replaced_sub_file_path, "*")))
            # Frame Sampler Mini-Batch
            for j in range(0, math.ceil(len(sorted_replaced_sub_file_path))):
                sliced_sorted_replaced_sub_file_path = sorted_replaced_sub_file_path[j*frame_batch_size:(j+1)*frame_batch_size]
                if len(sliced_sorted_replaced_sub_file_path) == 0:
                    break
                data = torch.stack([transform(Image.open(image_path)) for image_path in sliced_sorted_replaced_sub_file_path], dim=0).to(device)

                # Extract features
                data = model(data)

                # Detach from the current graph and change the device
                datas.append(data.detach().cpu())

            # Index ranking
            datas = torch.cat(datas)
            indices = torch.argsort(F.cosine_similarity(datas, datas.mean(dim=0, keepdim=True)), descending=True)

            # Save the json file
            json_dict[sub_file_path] = {
                "label": label,
                "category": categories[label],
                "index": indices.numpy().tolist()
            }
            
            print(f"{i+1}/{len(labels)} Frame Path: {sub_file_path} Numbef of Frames: {len(sorted_replaced_sub_file_path)} Frame Sampling Complete !!")
            
        with open(json_path, "w") as f:
            json.dump(json_dict, f)