Example #1
0
def download_test_set(num_workers, failed_log, compress, verbose, skip,
                      log_file):
    """
  Download the test set.
  :param num_workers:           Number of downloads in parallel.
  :param failed_log:            Where to save failed video ids.
  :param compress:              Decides if the videos should be compressed.
  :param verbose:               Print status.
  :param skip:                  Skip classes that already have folders (i.e. at least one video was downloaded).
  :param log_file:              Path to log file for youtube-dl.
  :return:
  """

    with open(config.TEST_METADATA_PATH) as file:
        data = json.load(file)

    pool = parallel.Pool(None,
                         data,
                         config.TEST_ROOT,
                         num_workers,
                         failed_log,
                         compress,
                         verbose,
                         skip,
                         log_file=log_file)
    pool.start_workers()
    pool.feed_videos()
    pool.stop_workers()
Example #2
0
def download_classes(classes, num_workers, failed_save_file, compress, verbose,
                     skip, log_file):
    """
  Download all videos of the provided classes.
  :param classes:               List of classes to download.
  :param num_workers:           Number of downloads in parallel.
  :param failed_save_file:      Where to save failed video ids.
  :param compress:              Decides if the videos should be compressed.
  :param verbose:               Print status.
  :param skip:                  Skip classes that already have folders (i.e. at least one video was downloaded).
  :param log_file:              Path to log file for youtube-dl.
  :return:                      None.
  """

    for list_path, save_root in zip(
        [config.TRAIN_METADATA_PATH, config.VAL_METADATA_PATH],
        [config.TRAIN_ROOT, config.VALID_ROOT]):
        with open(list_path) as file:
            data = json.load(file)

        pool = parallel.Pool(classes,
                             data,
                             save_root,
                             num_workers,
                             failed_save_file,
                             compress,
                             verbose,
                             skip,
                             log_file=log_file)
        pool.start_workers()
        pool.feed_videos()
        pool.stop_workers()
Example #3
0
def download_classes(classes, num_workers, failed_save_file, compress, verbose,
                     skip, log_file):
    """
  Download all videos of the provided classes.
  :param classes:               List of classes to download.
  :param num_workers:           Number of downloads in parallel.
  :param failed_save_file:      Where to save failed video ids.
  :param compress:              Decides if the videos should be compressed.
  :param verbose:               Print status.
  :param skip:                  Skip classes that already have folders (i.e. at least one video was downloaded).
  :param log_file:              Path to log file for youtube-dl.
  :return:                      None.
  """

    for list_path, save_root in zip(
        [config.TRAIN_METADATA_PATH, config.VAL_METADATA_PATH],
        [config.TRAIN_ROOT, config.VALID_ROOT]):
        with open(list_path) as file:
            data = json.load(file)

        video_ids_all = list(data.keys())
        # blob_video = CloudStorage(config.STORAGE_ACCOUNT_NAME, "kinetics700", config.CONNECTION_STRING, config.SAS_TOKEN)
        # video_stored = [os.path.basename(file_name).replace(".mp4","") for file_name in blob_video.list_blob_names()]
        blob_image = CloudStorage(config.STORAGE_ACCOUNT_NAME,
                                  "kinetics700-image",
                                  config.CONNECTION_STRING, config.SAS_TOKEN)
        image_stored = set([
            os.path.basename(file_name).split("_frame")[0]
            for file_name in blob_image.list_blob_names()
        ])
        data_to_process = {
            key: val
            for key, val in data.items() if key not in image_stored
        }

        pool = parallel.Pool(classes,
                             data_to_process,
                             save_root,
                             num_workers,
                             failed_save_file,
                             compress,
                             verbose,
                             skip,
                             log_file=log_file)
        pool.start_workers()
        pool.feed_videos()
        pool.stop_workers()
Example #4
0
def download_classes_from_file(classes_file,
                               num_workers,
                               failed_save_file,
                               compress,
                               verbose,
                               skip,
                               log_file,
                               stats_file=None):
    """
  Download all videos of the provided classes.
  :param classes:               List of classes to download.
  :param num_workers:           Number of downloads in parallel.
  :param failed_save_file:      Where to save failed video ids.
  :param compress:              Decides if the videos should be compressed.
  :param verbose:               Print status.
  :param skip:                  Skip classes that already have folders (i.e. at least one video was downloaded).
  :param log_file:              Path to log file for youtube-dl.
  :return:                      None.
  """

    with open(config.SUB_CLASS_PATH) as c_file:
        classes_data = json.load(c_file)

    for list_path, save_root in tqdm(
            zip([config.TEST_METADATA_PATH], [config.TEST_ROOT])):
        with open(list_path) as file:
            data = json.load(file)

        filtered = data
        # import ipdb; ipdb.set_trace()
        print('DOWNLOADING {} of {}'.format(len(filtered), save_root))
        pool = parallel.Pool(None,
                             data,
                             save_root,
                             num_workers,
                             failed_save_file,
                             compress,
                             verbose,
                             skip,
                             log_file=log_file,
                             stats_file=stats_file)

        pool.start_workers()
        pool.feed_videos()
        pool.stop_workers()
Example #5
0
def download_missing(data,
                     save_root,
                     num_workers,
                     failed_save_file,
                     compress,
                     verbose,
                     skip,
                     log_file,
                     stats_file=None):
    print('DOWNLOADING {} of {}'.format(len(data.keys()), save_root))
    pool = parallel.Pool(None,
                         data,
                         save_root,
                         num_workers,
                         failed_save_file,
                         compress,
                         verbose,
                         skip,
                         log_file=log_file,
                         stats_file=stats_file)

    pool.start_workers()
    pool.feed_videos()
    pool.stop_workers()