Esempio n. 1
0
    def _process(self, input_pack: DataPack):
        sub_path = self.sub_output_path(input_pack)
        if sub_path == '':
            raise ValueError("No concrete path provided from sub_output_path.")

        maybe_create_dir(self.configs.output_dir)
        write_pack(input_pack, self.configs.output_dir, sub_path,
                   self.configs.indent, self.configs.zip_pack,
                   self.configs.overwrite)
Esempio n. 2
0
 def _process(self, input_pack: DataPack):
     sub_path = self.sub_output_path(input_pack)
     if sub_path is not None and not sub_path == "":
         # Sub path could be empty, which we will skip writing the file.
         maybe_create_dir(self.configs.output_dir)
         write_pack(
             input_pack,
             self.configs.output_dir,
             sub_path,
             self.configs.indent,
             self.configs.zip_pack,
             self.configs.overwrite,
             self.configs.drop_record,
         )
Esempio n. 3
0
    def _process(self, input_pack: PackType):
        sub_path = self.sub_output_path(input_pack)
        if sub_path == '':
            raise ValueError(
                "No concrete path provided from sub_output_path.")

        maybe_create_dir(self.root_output_dir)
        p = os.path.join(self.root_output_dir, sub_path)

        if self.zip_pack:
            with gzip.open(p + '.gz', 'wt') as out:
                out.write(input_pack.serialize())
        else:
            with open(p, 'w') as out:
                out.write(input_pack.serialize())
Esempio n. 4
0
def maybe_download(urls, path, filenames=None, extract=False):
    r"""Downloads a set of files.

    Args:
        urls: A (list of) URLs to download files.
        path (str): The destination path to save the files.
        filenames: A (list of) strings of the file names. If given,
            must have the same length with :attr:`urls`. If `None`,
            filenames are extracted from :attr:`urls`.
        extract (bool): Whether to extract compressed files.

    Returns:
        A list of paths to the downloaded files.
    """
    maybe_create_dir(path)

    if not isinstance(urls, (list, tuple)):
        is_list = False
        urls = [urls]
    else:
        is_list = True
    if filenames is not None:
        if not isinstance(filenames, (list, tuple)):
            filenames = [filenames]
        if len(urls) != len(filenames):
            raise ValueError(
                '`filenames` must have the same number of elements as `urls`.')

    result = []
    for i, url in enumerate(urls):
        if filenames is not None:
            filename = filenames[i]
        elif 'drive.google.com' in url:
            filename = _extract_google_drive_file_id(url)
        else:
            filename = url.split('/')[-1]
            # If downloading from GitHub, remove suffix ?raw=True
            # from local filename
            if filename.endswith("?raw=true"):
                filename = filename[:-9]

        filepath = os.path.join(path, filename)
        result.append(filepath)

        # if not tf.gfile.Exists(filepath):
        if not os.path.exists(filepath):
            if 'drive.google.com' in url:
                filepath = _download_from_google_drive(url, filename, path)
            else:
                filepath = _download(url, filename, path)

            if extract:
                logging.info('Extract %s', filepath)
                if tarfile.is_tarfile(filepath):
                    tarfile.open(filepath, 'r').extractall(path)
                elif zipfile.is_zipfile(filepath):
                    with zipfile.ZipFile(filepath) as zfile:
                        zfile.extractall(path)
                else:
                    logging.info("Unknown compression type. Only .tar.gz"
                                 ".tar.bz2, .tar, and .zip are supported")
    if not is_list:
        return result[0]
    return result