Ejemplo n.º 1
0
 def test_xopen_xz(self):
     text = "test åäö"
     handle, path = tempfile.mkstemp(".xz")
     with util.xopen(path, "wt") as f:
         f.write(text)
     with util.xopen(path, "rt") as f:
         assert f.read() == text
Ejemplo n.º 2
0
    def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs):
        """
        Return data from GeoJSON file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `columns` is an optional list of columns to limit to. `dtypes` is an
        optional dict mapping column names to NumPy datatypes. `kwargs` are
        passed to ``json.load``.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            raw = AttributeDict(json.load(f, **kwargs))
        cls._check_raw_data(raw)
        data = {}
        for feature in raw.features:
            for key in feature.properties:
                data.setdefault(key, [])
        if columns:
            data = {k: v for k, v in data.items() if k in columns}
        for feature in raw.features:
            for key in data:
                value = feature.properties.get(key, None)
                data[key].append(value)
        data["geometry"] = [x.geometry for x in raw.features]
        for name, dtype in dtypes.items():
            data[name] = DataFrameColumn(data[name], dtype)
        data = cls(**data)
        del raw.features
        data.metadata = raw
        return data
Ejemplo n.º 3
0
    def write(self, path, *, encoding="utf-8", **kwargs):
        """
        Write data to GeoJSON file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.

        `kwargs` are passed to ``json.dump``.
        """
        kwargs.setdefault("default", str)
        kwargs.setdefault("ensure_ascii", False)
        indent_width = kwargs.pop("indent", 2) or 0
        indent1 = " " * indent_width * 1
        indent2 = " " * indent_width * 2
        if "geometry" not in self:
            raise ValueError("Geometry missing")
        data = self.to_list_of_dicts()
        util.makedirs_for_file(path)
        with util.xopen(path, "wt", encoding=encoding) as f:
            f.write("{\n")
            for key, value in self.metadata.items():
                blob = json.dumps(value, **kwargs)
                f.write(f'{indent1}"{key}": {blob},\n')
            f.write(f'{indent1}"features": [\n')
            for i, item in enumerate(data):
                geometry = item.pop("geometry")
                blob = {
                    "type": "Feature",
                    "properties": item,
                    "geometry": geometry
                }
                blob = json.dumps(blob, **kwargs)
                comma = "," if i < len(data) - 1 else ""
                f.write(f"{indent2}{blob}{comma}\n")
            f.write(f"{indent1}]\n")
            f.write("}\n")
Ejemplo n.º 4
0
    def read_pickle(cls, path):
        """
        Return a new data frame from Pickle file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.
        """
        with util.xopen(path, "rb") as f:
            return cls(pickle.load(f))
Ejemplo n.º 5
0
    def write_pickle(self, path):
        """
        Write data frame to Pickle file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.
        """
        util.makedirs_for_file(path)
        with util.xopen(path, "wb") as f:
            out = {k: np.array(v, v.dtype) for k, v in self.items()}
            pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 6
0
    def write_pickle(self, path):
        """
        Write list to Pickle file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.
        """
        util.makedirs_for_file(path)
        with util.xopen(path, "wb") as f:
            out = [dict(x) for x in self]
            pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 7
0
    def read_json(cls, path, *, encoding="utf-8", keys=[], types={}, **kwargs):
        """
        Return a new list from JSON file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `keys` is an optional list of keys to limit to. `types` is an optional
        dict mapping keys to datatypes. `kwargs` are passed to ``json.load``.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            return cls.from_json(f.read(), keys=keys, types=types, **kwargs)
Ejemplo n.º 8
0
    def read_json(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs):
        """
        Return a new data frame from JSON file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `columns` is an optional list of columns to limit to. `dtypes` is an
        optional dict mapping column names to NumPy datatypes. `kwargs` are
        passed to ``json.load``.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            return cls.from_json(f.read(), columns=columns, dtypes=dtypes, **kwargs)
Ejemplo n.º 9
0
    def write_json(self, path, *, encoding="utf-8", **kwargs):
        """
        Write list to JSON file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.

        `kwargs` are passed to ``json.JSONEncoder``.
        """
        kwargs.setdefault("default", str)
        kwargs.setdefault("ensure_ascii", False)
        kwargs.setdefault("indent", 2)
        util.makedirs_for_file(path)
        with util.xopen(path, "wt", encoding=encoding) as f:
            encoder = json.JSONEncoder(**kwargs)
            for chunk in encoder.iterencode(self):
                f.write(chunk)
            f.write("\n")
Ejemplo n.º 10
0
    def write_csv(self, path, *, encoding="utf-8", header=True, sep=","):
        """
        Write list to CSV file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.
        """
        if not self:
            raise ValueError("Cannot write empty CSV file")
        # Take a superset of all keys.
        keys = util.unique_keys(itertools.chain(*self))
        util.makedirs_for_file(path)
        with util.xopen(path, "wt", encoding=encoding) as f:
            writer = csv.DictWriter(f,
                                    keys,
                                    dialect="unix",
                                    delimiter=sep,
                                    quoting=csv.QUOTE_MINIMAL)

            writer.writeheader() if header else None
            for item in self:
                # Fill in missing as None.
                item = {**dict.fromkeys(keys), **item}
                writer.writerow(item)
Ejemplo n.º 11
0
    def read_csv(cls,
                 path,
                 *,
                 encoding="utf-8",
                 sep=",",
                 header=True,
                 keys=[],
                 types={}):
        """
        Return a new list from CSV file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `keys` is an optional list of keys to limit to. `types` is an optional
        dict mapping keys to datatypes.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            rows = list(csv.reader(f, dialect="unix", delimiter=sep))
            if not rows: return cls([])
            colnames = rows.pop(0) if header else util.generate_colnames(
                len(rows[0]))
            if keys:
                # Drop all keys except the requested ones.
                drop = [
                    i for i in range(len(rows[0])) if colnames[i] not in keys
                ]
                for row in rows:
                    for i in reversed(drop):
                        del row[i]
                colnames = keys
            data = cls(dict(zip(colnames, x)) for x in rows)
            for key, type in types.items():
                for item in data:
                    if key in item:
                        item[key] = type(item[key])
            return data