Beispiel #1
0
    def from_json(cls, local_path, header=None, line_delimited=False):
        """
        Create a ``parsons table`` from a json file

        `Args:`
            local_path: list
                A JSON formatted local path, url or ftp. If this is a
                file path that ends in ".gz", the file will be decompressed first.
            header: list
                List of columns to use for the destination table. If omitted, columns will
                be inferred from the initial data in the file.
            line_delimited: bool
                Whether the file is line-delimited JSON (with a row on each line), or a proper
                JSON file.
        `Returns:`
            Parsons Table
                See :ref:`parsons-table` for output options.
        """

        if line_delimited:
            if files.is_gzip_path(local_path):
                open_fn = gzip.open
            else:
                open_fn = open

            with open_fn(local_path, 'r') as file:
                rows = [json.loads(line) for line in file]
            return cls(rows)

        else:
            return cls(petl.fromjson(local_path, header=header))
Beispiel #2
0
    def to_json(self, local_path=None, temp_file_compression=None, line_delimited=False):
        """
        Outputs table to a JSON file

        .. warning::
                If a file already exists at the given location, it will be
                overwritten.

        `Args:`
            local_path: str
                The path to write the JSON locally. If it ends in ".gz", it will be
                compressed first. If not specified, a temporary file will be created and returned,
                and that file will be removed automatically when the script is done running.
            temp_file_compression: str
                If a temp file is requested (ie. no ``local_path`` is specified), the compression
                type for that file. Currently "None" and "gzip" are supported.
                If a ``local_path`` is specified, this argument is ignored.
            line_delimited: bool
                Whether the file will be line-delimited JSON (with a row on each line), or a proper
                JSON file.

        `Returns:`
            str
                The path of the new file
        """

        if not local_path:
            suffix = '.json' + files.suffix_for_compression_type(temp_file_compression)
            local_path = files.create_temp_file(suffix=suffix)

        # Note we don't use the much simpler petl.tojson(), since that method reads the whole
        # table into memory before writing to file.

        if files.is_gzip_path(local_path):
            open_fn = gzip.open
            mode = 'w+t'
        else:
            open_fn = open
            mode = 'w'

        with open_fn(local_path, mode) as file:
            if not line_delimited:
                file.write('[')

            i = 0
            for row in self:
                if i:
                    if not line_delimited:
                        file.write(',')
                    file.write('\n')
                i += 1
                json.dump(row, file)

            if not line_delimited:
                file.write(']')

        return local_path
Beispiel #3
0
def test_is_gzip_path():
    assert files.is_gzip_path('some/file.gz')
    assert not files.is_gzip_path('some/file')
    assert not files.is_gzip_path('some/file.csv')