예제 #1
0
    def __init__(self, *args, **kwargs):
        """
        Return a new GeoJSON object.

        `args` and `kwargs` are like for ``dict``.

        https://docs.python.org/3/library/stdtypes.html#dict
        """
        super().__init__(*args, **kwargs)
        self.metadata = AttributeDict(type="FeatureCollection")
예제 #2
0
    def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs):
        """
        Return data from GeoJSON file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `columns` is an optional list of columns to limit to. `dtypes` is an
        optional dict mapping column names to NumPy datatypes. `kwargs` are
        passed to ``json.load``.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            raw = AttributeDict(json.load(f, **kwargs))
        cls._check_raw_data(raw)
        data = {}
        for feature in raw.features:
            for key in feature.properties:
                data.setdefault(key, [])
        if columns:
            data = {k: v for k, v in data.items() if k in columns}
        for feature in raw.features:
            for key in data:
                value = feature.properties.get(key, None)
                data[key].append(value)
        data["geometry"] = [x.geometry for x in raw.features]
        for name, dtype in dtypes.items():
            data[name] = DataFrameColumn(data[name], dtype)
        data = cls(**data)
        del raw.features
        data.metadata = raw
        return data
예제 #3
0
    def select(self, *keys):
        """
        Return items, keeping only `keys`.

        >>> data = di.read_json("data/listings.json")
        >>> data.select("id", "hood", "zipcode")
        """
        for item in self:
            yield AttributeDict({x: item[x] for x in keys if x in item})
예제 #4
0
    def rename(self, **to_from_pairs):
        """
        Return items with keys renamed.

        >>> data = di.read_json("data/listings.json")
        >>> data.rename(listing_id="id")
        """
        renames = {v: k for k, v in to_from_pairs.items()}
        for item in self:
            keys = [renames.get(x, x) for x in item.keys()]
            yield AttributeDict(zip(keys, item.values()))
예제 #5
0
    def append(self, item):
        """
        Return list with `item` added to the end.

        >>> data = di.read_json("data/listings.json")
        >>> data = data.append(dict.fromkeys(data[0].keys()))
        >>> data.tail()
        """
        if not isinstance(item, AttributeDict):
            item = AttributeDict(item)
        yield from itertools.chain(self, [item])
예제 #6
0
파일: test_attd.py 프로젝트: otsaloma/attd
 def test_deepcopy(self):
     self.ad.a = 1
     self.ad.b = "test"
     self.ad.c = AttributeDict()
     self.ad.c.d = 1
     self.ad.c.e = "test"
     adc = copy.deepcopy(self.ad)
     assert isinstance(adc, AttributeDict)
     assert isinstance(adc.c, AttributeDict)
     assert adc == self.ad
     assert adc is not self.ad
     assert adc.c == self.ad.c
     assert adc.c is not self.ad.c
예제 #7
0
    def insert(self, index, item):
        """
        Return list with `item` inserted at `index`.

        >>> data = di.read_json("data/listings.json")
        >>> data = data.insert(0, dict.fromkeys(data[0].keys()))
        >>> data.head()
        """
        if not isinstance(item, AttributeDict):
            item = AttributeDict(item)
        for i in range(len(self)):
            if i == index:
                yield item
            yield self[i]
예제 #8
0
파일: test_attd.py 프로젝트: otsaloma/attd
 def setup_method(self, method):
     self.ad = AttributeDict()
예제 #9
0
파일: test_attd.py 프로젝트: otsaloma/attd
class TestAttributeDict:
    def setup_method(self, method):
        self.ad = AttributeDict()

    def test___delattr__(self):
        self.ad.test = 1
        assert "test" in self.ad
        assert hasattr(self.ad, "test")
        del self.ad.test
        assert "test" not in self.ad
        assert not hasattr(self.ad, "test")

    def test___getattr__(self):
        self.ad.test = 1
        assert self.ad.test == 1
        assert self.ad.test is self.ad["test"]

    def test___setattr__(self):
        self.ad.test = 1
        assert self.ad.test == 1
        assert self.ad.test is self.ad["test"]

    def test___setattr____conflict(self):
        self.ad.items = []
        assert callable(self.ad.items)

    def test___setattr____nested(self):
        self.ad.test = {"test": {"nested": 1}}
        assert self.ad.test.test.nested == 1

    def test___setitem__(self):
        self.ad["test"] = 1
        assert self.ad["test"] == 1
        assert self.ad.test is self.ad["test"]

    def test_copy(self):
        self.ad.a = 1
        self.ad.b = "test"
        self.ad.c = AttributeDict()
        self.ad.c.d = 1
        self.ad.c.e = "test"
        adc = self.ad.copy()
        assert isinstance(adc, AttributeDict)
        assert isinstance(adc.c, AttributeDict)
        assert adc == self.ad
        assert adc is not self.ad
        assert adc.c == self.ad.c
        assert adc.c is self.ad.c

    def test_copy_copy(self):
        self.ad.a = 1
        self.ad.b = "test"
        self.ad.c = AttributeDict()
        self.ad.c.d = 1
        self.ad.c.e = "test"
        adc = copy.copy(self.ad)
        assert isinstance(adc, AttributeDict)
        assert isinstance(adc.c, AttributeDict)
        assert adc == self.ad
        assert adc is not self.ad
        assert adc.c == self.ad.c
        assert adc.c is self.ad.c

    def test_deepcopy(self):
        self.ad.a = 1
        self.ad.b = "test"
        self.ad.c = AttributeDict()
        self.ad.c.d = 1
        self.ad.c.e = "test"
        adc = copy.deepcopy(self.ad)
        assert isinstance(adc, AttributeDict)
        assert isinstance(adc.c, AttributeDict)
        assert adc == self.ad
        assert adc is not self.ad
        assert adc.c == self.ad.c
        assert adc.c is not self.ad.c

    def test_setdefault(self):
        self.ad.setdefault("test", 1)
        assert self.ad["test"] == 1
        assert self.ad.test is self.ad["test"]

    def test_setdefault__nested(self):
        self.ad.test = {"test": {"nested": 1}}
        assert self.ad.test.test.nested == 1

    def test_update(self):
        self.ad.update({"test": 1})
        assert self.ad["test"] == 1
        assert self.ad.test is self.ad["test"]

    def test_update__nested(self):
        self.ad.update({"test": {"nested": 1}})
        assert self.ad.test.nested == 1

    def test_from_json(self):
        test = AttributeDict.from_json('{"test": 1}')
        assert isinstance(test, AttributeDict)
        assert test == {"test": 1}

    def test_to_json(self):
        self.ad.test = 1
        test = self.ad.to_json()
        test = AttributeDict.from_json(test)
        assert test == self.ad
예제 #10
0
파일: test_attd.py 프로젝트: otsaloma/attd
 def test_to_json(self):
     self.ad.test = 1
     test = self.ad.to_json()
     test = AttributeDict.from_json(test)
     assert test == self.ad
예제 #11
0
파일: test_attd.py 프로젝트: otsaloma/attd
 def test_from_json(self):
     test = AttributeDict.from_json('{"test": 1}')
     assert isinstance(test, AttributeDict)
     assert test == {"test": 1}
예제 #12
0
 def __setitem__(self, index, value):
     if not isinstance(value, AttributeDict):
         value = AttributeDict(value)
     return super().__setitem__(index, value)
예제 #13
0
class GeoJSON(DataFrame):
    """
    A class for GeoJSON data.

    GeoJSON is a simple wrapper class that reads GeoJSON features into a
    :class:`.DataFrame`. Any operations on the data are thus done with
    methods provided by the data frame class. Geometry is available in the
    "geometry" column, but no special geometric operations are supported.

    All other data is available in the "metadata" attribute as an
    ``attd.AttributeDict``.
    """

    # List of names that are actual attributes, not columns
    ATTRIBUTES = DataFrame.ATTRIBUTES + ["metadata"]

    # Lists of supported GeoJSON keys and types
    FEATURE_KEYS = ["type", "properties", "geometry"]
    FEATURE_TYPES = ["Feature"]
    PROPERTY_TYPES = [bool, int, float, str, type(None)]
    TOP_LEVEL_TYPES = ["FeatureCollection"]

    def __init__(self, *args, **kwargs):
        """
        Return a new GeoJSON object.

        `args` and `kwargs` are like for ``dict``.

        https://docs.python.org/3/library/stdtypes.html#dict
        """
        super().__init__(*args, **kwargs)
        self.metadata = AttributeDict(type="FeatureCollection")

    @classmethod
    def _check_raw_data(cls, data):
        if data.type not in cls.TOP_LEVEL_TYPES:
            raise TypeError(f"Top-level type {data.type!r} not supported")
        warned_feature_keys = []
        for feature in data.features:
            cls._check_raw_feature(feature, warned_feature_keys)

    @classmethod
    def _check_raw_feature(cls, feature, warned_feature_keys):
        if feature.type not in cls.FEATURE_TYPES:
            raise TypeError(f"Feature type {feature.type!r} not supported")
        for key in set(feature) - set(cls.FEATURE_KEYS):
            if key in warned_feature_keys: continue
            print(f"Warning: Ignoring feature key {key!r}")
            warned_feature_keys.append(key)
        for key, value in feature.properties.items():
            if isinstance(value, tuple(cls.PROPERTY_TYPES)): continue
            raise TypeError(
                f"Property type {type(value)} of {key!r} not supported")

    @classmethod
    def read(cls, path, *, encoding="utf-8", columns=[], dtypes={}, **kwargs):
        """
        Return data from GeoJSON file `path`.

        Will automatically decompress if `path` ends in ``.bz2|.gz|.xz``.

        `columns` is an optional list of columns to limit to. `dtypes` is an
        optional dict mapping column names to NumPy datatypes. `kwargs` are
        passed to ``json.load``.
        """
        with util.xopen(path, "rt", encoding=encoding) as f:
            raw = AttributeDict(json.load(f, **kwargs))
        cls._check_raw_data(raw)
        data = {}
        for feature in raw.features:
            for key in feature.properties:
                data.setdefault(key, [])
        if columns:
            data = {k: v for k, v in data.items() if k in columns}
        for feature in raw.features:
            for key in data:
                value = feature.properties.get(key, None)
                data[key].append(value)
        data["geometry"] = [x.geometry for x in raw.features]
        for name, dtype in dtypes.items():
            data[name] = DataFrameColumn(data[name], dtype)
        data = cls(**data)
        del raw.features
        data.metadata = raw
        return data

    def to_string(self, *, max_rows=None, max_width=None):
        geometry = [f"<{x['type']}>" for x in self.geometry]
        data = self.modify(geometry=Vector.fast(geometry, object))
        return DataFrame.to_string(data, max_rows, max_width)

    def write(self, path, *, encoding="utf-8", **kwargs):
        """
        Write data to GeoJSON file `path`.

        Will automatically compress if `path` ends in ``.bz2|.gz|.xz``.

        `kwargs` are passed to ``json.dump``.
        """
        kwargs.setdefault("default", str)
        kwargs.setdefault("ensure_ascii", False)
        indent_width = kwargs.pop("indent", 2) or 0
        indent1 = " " * indent_width * 1
        indent2 = " " * indent_width * 2
        if "geometry" not in self:
            raise ValueError("Geometry missing")
        data = self.to_list_of_dicts()
        util.makedirs_for_file(path)
        with util.xopen(path, "wt", encoding=encoding) as f:
            f.write("{\n")
            for key, value in self.metadata.items():
                blob = json.dumps(value, **kwargs)
                f.write(f'{indent1}"{key}": {blob},\n')
            f.write(f'{indent1}"features": [\n')
            for i, item in enumerate(data):
                geometry = item.pop("geometry")
                blob = {
                    "type": "Feature",
                    "properties": item,
                    "geometry": geometry
                }
                blob = json.dumps(blob, **kwargs)
                comma = "," if i < len(data) - 1 else ""
                f.write(f"{indent2}{blob}{comma}\n")
            f.write(f"{indent1}]\n")
            f.write("}\n")