def test_xopen_xz(self): text = "test åäö" handle, path = tempfile.mkstemp(".xz") with util.xopen(path, "wt") as f: f.write(text) with util.xopen(path, "rt") as f: assert f.read() == text
def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs): """ Return data from GeoJSON file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `columns` is an optional list of columns to limit to. `dtypes` is an optional dict mapping column names to NumPy datatypes. `kwargs` are passed to ``json.load``. """ with util.xopen(path, "rt", encoding=encoding) as f: raw = AttributeDict(json.load(f, **kwargs)) cls._check_raw_data(raw) data = {} for feature in raw.features: for key in feature.properties: data.setdefault(key, []) if columns: data = {k: v for k, v in data.items() if k in columns} for feature in raw.features: for key in data: value = feature.properties.get(key, None) data[key].append(value) data["geometry"] = [x.geometry for x in raw.features] for name, dtype in dtypes.items(): data[name] = DataFrameColumn(data[name], dtype) data = cls(**data) del raw.features data.metadata = raw return data
def write(self, path, *, encoding="utf-8", **kwargs): """ Write data to GeoJSON file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. `kwargs` are passed to ``json.dump``. """ kwargs.setdefault("default", str) kwargs.setdefault("ensure_ascii", False) indent_width = kwargs.pop("indent", 2) or 0 indent1 = " " * indent_width * 1 indent2 = " " * indent_width * 2 if "geometry" not in self: raise ValueError("Geometry missing") data = self.to_list_of_dicts() util.makedirs_for_file(path) with util.xopen(path, "wt", encoding=encoding) as f: f.write("{\n") for key, value in self.metadata.items(): blob = json.dumps(value, **kwargs) f.write(f'{indent1}"{key}": {blob},\n') f.write(f'{indent1}"features": [\n') for i, item in enumerate(data): geometry = item.pop("geometry") blob = { "type": "Feature", "properties": item, "geometry": geometry } blob = json.dumps(blob, **kwargs) comma = "," if i < len(data) - 1 else "" f.write(f"{indent2}{blob}{comma}\n") f.write(f"{indent1}]\n") f.write("}\n")
def read_pickle(cls, path): """ Return a new data frame from Pickle file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. """ with util.xopen(path, "rb") as f: return cls(pickle.load(f))
def write_pickle(self, path): """ Write data frame to Pickle file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. """ util.makedirs_for_file(path) with util.xopen(path, "wb") as f: out = {k: np.array(v, v.dtype) for k, v in self.items()} pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
def write_pickle(self, path): """ Write list to Pickle file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. """ util.makedirs_for_file(path) with util.xopen(path, "wb") as f: out = [dict(x) for x in self] pickle.dump(out, f, pickle.HIGHEST_PROTOCOL)
def read_json(cls, path, *, encoding="utf-8", keys=[], types={}, **kwargs): """ Return a new list from JSON file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `keys` is an optional list of keys to limit to. `types` is an optional dict mapping keys to datatypes. `kwargs` are passed to ``json.load``. """ with util.xopen(path, "rt", encoding=encoding) as f: return cls.from_json(f.read(), keys=keys, types=types, **kwargs)
def read_json(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs): """ Return a new data frame from JSON file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `columns` is an optional list of columns to limit to. `dtypes` is an optional dict mapping column names to NumPy datatypes. `kwargs` are passed to ``json.load``. """ with util.xopen(path, "rt", encoding=encoding) as f: return cls.from_json(f.read(), columns=columns, dtypes=dtypes, **kwargs)
def write_json(self, path, *, encoding="utf-8", **kwargs): """ Write list to JSON file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. `kwargs` are passed to ``json.JSONEncoder``. """ kwargs.setdefault("default", str) kwargs.setdefault("ensure_ascii", False) kwargs.setdefault("indent", 2) util.makedirs_for_file(path) with util.xopen(path, "wt", encoding=encoding) as f: encoder = json.JSONEncoder(**kwargs) for chunk in encoder.iterencode(self): f.write(chunk) f.write("\n")
def write_csv(self, path, *, encoding="utf-8", header=True, sep=","): """ Write list to CSV file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. """ if not self: raise ValueError("Cannot write empty CSV file") # Take a superset of all keys. keys = util.unique_keys(itertools.chain(*self)) util.makedirs_for_file(path) with util.xopen(path, "wt", encoding=encoding) as f: writer = csv.DictWriter(f, keys, dialect="unix", delimiter=sep, quoting=csv.QUOTE_MINIMAL) writer.writeheader() if header else None for item in self: # Fill in missing as None. item = {**dict.fromkeys(keys), **item} writer.writerow(item)
def read_csv(cls, path, *, encoding="utf-8", sep=",", header=True, keys=[], types={}): """ Return a new list from CSV file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `keys` is an optional list of keys to limit to. `types` is an optional dict mapping keys to datatypes. """ with util.xopen(path, "rt", encoding=encoding) as f: rows = list(csv.reader(f, dialect="unix", delimiter=sep)) if not rows: return cls([]) colnames = rows.pop(0) if header else util.generate_colnames( len(rows[0])) if keys: # Drop all keys except the requested ones. drop = [ i for i in range(len(rows[0])) if colnames[i] not in keys ] for row in rows: for i in reversed(drop): del row[i] colnames = keys data = cls(dict(zip(colnames, x)) for x in rows) for key, type in types.items(): for item in data: if key in item: item[key] = type(item[key]) return data