def __init__(self, *args, **kwargs): """ Return a new GeoJSON object. `args` and `kwargs` are like for ``dict``. https://docs.python.org/3/library/stdtypes.html#dict """ super().__init__(*args, **kwargs) self.metadata = AttributeDict(type="FeatureCollection")
def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs): """ Return data from GeoJSON file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `columns` is an optional list of columns to limit to. `dtypes` is an optional dict mapping column names to NumPy datatypes. `kwargs` are passed to ``json.load``. """ with util.xopen(path, "rt", encoding=encoding) as f: raw = AttributeDict(json.load(f, **kwargs)) cls._check_raw_data(raw) data = {} for feature in raw.features: for key in feature.properties: data.setdefault(key, []) if columns: data = {k: v for k, v in data.items() if k in columns} for feature in raw.features: for key in data: value = feature.properties.get(key, None) data[key].append(value) data["geometry"] = [x.geometry for x in raw.features] for name, dtype in dtypes.items(): data[name] = DataFrameColumn(data[name], dtype) data = cls(**data) del raw.features data.metadata = raw return data
def select(self, *keys): """ Return items, keeping only `keys`. >>> data = di.read_json("data/listings.json") >>> data.select("id", "hood", "zipcode") """ for item in self: yield AttributeDict({x: item[x] for x in keys if x in item})
def rename(self, **to_from_pairs): """ Return items with keys renamed. >>> data = di.read_json("data/listings.json") >>> data.rename(listing_id="id") """ renames = {v: k for k, v in to_from_pairs.items()} for item in self: keys = [renames.get(x, x) for x in item.keys()] yield AttributeDict(zip(keys, item.values()))
def append(self, item): """ Return list with `item` added to the end. >>> data = di.read_json("data/listings.json") >>> data = data.append(dict.fromkeys(data[0].keys())) >>> data.tail() """ if not isinstance(item, AttributeDict): item = AttributeDict(item) yield from itertools.chain(self, [item])
def test_deepcopy(self): self.ad.a = 1 self.ad.b = "test" self.ad.c = AttributeDict() self.ad.c.d = 1 self.ad.c.e = "test" adc = copy.deepcopy(self.ad) assert isinstance(adc, AttributeDict) assert isinstance(adc.c, AttributeDict) assert adc == self.ad assert adc is not self.ad assert adc.c == self.ad.c assert adc.c is not self.ad.c
def insert(self, index, item): """ Return list with `item` inserted at `index`. >>> data = di.read_json("data/listings.json") >>> data = data.insert(0, dict.fromkeys(data[0].keys())) >>> data.head() """ if not isinstance(item, AttributeDict): item = AttributeDict(item) for i in range(len(self)): if i == index: yield item yield self[i]
def setup_method(self, method): self.ad = AttributeDict()
class TestAttributeDict: def setup_method(self, method): self.ad = AttributeDict() def test___delattr__(self): self.ad.test = 1 assert "test" in self.ad assert hasattr(self.ad, "test") del self.ad.test assert "test" not in self.ad assert not hasattr(self.ad, "test") def test___getattr__(self): self.ad.test = 1 assert self.ad.test == 1 assert self.ad.test is self.ad["test"] def test___setattr__(self): self.ad.test = 1 assert self.ad.test == 1 assert self.ad.test is self.ad["test"] def test___setattr____conflict(self): self.ad.items = [] assert callable(self.ad.items) def test___setattr____nested(self): self.ad.test = {"test": {"nested": 1}} assert self.ad.test.test.nested == 1 def test___setitem__(self): self.ad["test"] = 1 assert self.ad["test"] == 1 assert self.ad.test is self.ad["test"] def test_copy(self): self.ad.a = 1 self.ad.b = "test" self.ad.c = AttributeDict() self.ad.c.d = 1 self.ad.c.e = "test" adc = self.ad.copy() assert isinstance(adc, AttributeDict) assert isinstance(adc.c, AttributeDict) assert adc == self.ad assert adc is not self.ad assert adc.c == self.ad.c assert adc.c is self.ad.c def test_copy_copy(self): self.ad.a = 1 self.ad.b = "test" self.ad.c = AttributeDict() self.ad.c.d = 1 self.ad.c.e = "test" adc = copy.copy(self.ad) assert isinstance(adc, AttributeDict) assert isinstance(adc.c, AttributeDict) assert adc == self.ad assert adc is not self.ad assert adc.c == self.ad.c assert adc.c is self.ad.c def test_deepcopy(self): self.ad.a = 1 self.ad.b = "test" self.ad.c = AttributeDict() self.ad.c.d = 1 self.ad.c.e = "test" adc = copy.deepcopy(self.ad) assert isinstance(adc, AttributeDict) assert isinstance(adc.c, AttributeDict) assert adc == self.ad assert adc is not self.ad assert adc.c == self.ad.c assert adc.c is not self.ad.c def test_setdefault(self): self.ad.setdefault("test", 1) assert self.ad["test"] == 1 assert self.ad.test is self.ad["test"] def test_setdefault__nested(self): self.ad.test = {"test": {"nested": 1}} assert self.ad.test.test.nested == 1 def test_update(self): self.ad.update({"test": 1}) assert self.ad["test"] == 1 assert self.ad.test is self.ad["test"] def test_update__nested(self): self.ad.update({"test": {"nested": 1}}) assert self.ad.test.nested == 1 def test_from_json(self): test = AttributeDict.from_json('{"test": 1}') assert isinstance(test, AttributeDict) assert test == {"test": 1} def test_to_json(self): self.ad.test = 1 test = self.ad.to_json() test = AttributeDict.from_json(test) assert test == self.ad
def test_to_json(self): self.ad.test = 1 test = self.ad.to_json() test = AttributeDict.from_json(test) assert test == self.ad
def test_from_json(self): test = AttributeDict.from_json('{"test": 1}') assert isinstance(test, AttributeDict) assert test == {"test": 1}
def __setitem__(self, index, value): if not isinstance(value, AttributeDict): value = AttributeDict(value) return super().__setitem__(index, value)
class GeoJSON(DataFrame): """ A class for GeoJSON data. GeoJSON is a simple wrapper class that reads GeoJSON features into a :class:`.DataFrame`. Any operations on the data are thus done with methods provided by the data frame class. Geometry is available in the "geometry" column, but no special geometric operations are supported. All other data is available in the "metadata" attribute as an ``attd.AttributeDict``. """ # List of names that are actual attributes, not columns ATTRIBUTES = DataFrame.ATTRIBUTES + ["metadata"] # Lists of supported GeoJSON keys and types FEATURE_KEYS = ["type", "properties", "geometry"] FEATURE_TYPES = ["Feature"] PROPERTY_TYPES = [bool, int, float, str, type(None)] TOP_LEVEL_TYPES = ["FeatureCollection"] def __init__(self, *args, **kwargs): """ Return a new GeoJSON object. `args` and `kwargs` are like for ``dict``. https://docs.python.org/3/library/stdtypes.html#dict """ super().__init__(*args, **kwargs) self.metadata = AttributeDict(type="FeatureCollection") @classmethod def _check_raw_data(cls, data): if data.type not in cls.TOP_LEVEL_TYPES: raise TypeError(f"Top-level type {data.type!r} not supported") warned_feature_keys = [] for feature in data.features: cls._check_raw_feature(feature, warned_feature_keys) @classmethod def _check_raw_feature(cls, feature, warned_feature_keys): if feature.type not in cls.FEATURE_TYPES: raise TypeError(f"Feature type {feature.type!r} not supported") for key in set(feature) - set(cls.FEATURE_KEYS): if key in warned_feature_keys: continue print(f"Warning: Ignoring feature key {key!r}") warned_feature_keys.append(key) for key, value in feature.properties.items(): if isinstance(value, tuple(cls.PROPERTY_TYPES)): continue raise TypeError( f"Property type {type(value)} of {key!r} not supported") @classmethod def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs): """ Return data from GeoJSON file `path`. Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``. `columns` is an optional list of columns to limit to. `dtypes` is an optional dict mapping column names to NumPy datatypes. `kwargs` are passed to ``json.load``. """ with util.xopen(path, "rt", encoding=encoding) as f: raw = AttributeDict(json.load(f, **kwargs)) cls._check_raw_data(raw) data = {} for feature in raw.features: for key in feature.properties: data.setdefault(key, []) if columns: data = {k: v for k, v in data.items() if k in columns} for feature in raw.features: for key in data: value = feature.properties.get(key, None) data[key].append(value) data["geometry"] = [x.geometry for x in raw.features] for name, dtype in dtypes.items(): data[name] = DataFrameColumn(data[name], dtype) data = cls(**data) del raw.features data.metadata = raw return data def to_string(self, *, max_rows=None, max_width=None): geometry = [f"<{x['type']}>" for x in self.geometry] data = self.modify(geometry=Vector.fast(geometry, object)) return DataFrame.to_string(data, max_rows, max_width) def write(self, path, *, encoding="utf-8", **kwargs): """ Write data to GeoJSON file `path`. Will automatically compress if `path` ends in ``.bz2|.gz|.xz``. `kwargs` are passed to ``json.dump``. """ kwargs.setdefault("default", str) kwargs.setdefault("ensure_ascii", False) indent_width = kwargs.pop("indent", 2) or 0 indent1 = " " * indent_width * 1 indent2 = " " * indent_width * 2 if "geometry" not in self: raise ValueError("Geometry missing") data = self.to_list_of_dicts() util.makedirs_for_file(path) with util.xopen(path, "wt", encoding=encoding) as f: f.write("{\n") for key, value in self.metadata.items(): blob = json.dumps(value, **kwargs) f.write(f'{indent1}"{key}": {blob},\n') f.write(f'{indent1}"features": [\n') for i, item in enumerate(data): geometry = item.pop("geometry") blob = { "type": "Feature", "properties": item, "geometry": geometry } blob = json.dumps(blob, **kwargs) comma = "," if i < len(data) - 1 else "" f.write(f"{indent2}{blob}{comma}\n") f.write(f"{indent1}]\n") f.write("}\n")