def _to_geojson(df, d, **kw): if _is_geojson_imported is True: nkw = Struct.inspect_kwargs(kwargs, gpd.to_file) df.to_file(d, driver='GeoJSON', **nkw) else: nkw = Struct.inspect_kwargs(kwargs, Frame.to_json) res = Frame.to_json(df, **nkw) with open(d, 'w', encoding=encoding) as f: json.dump(res, f, ensure_ascii=False)
def _read_geojson(s, **kw): if _is_geopandas_imported is True: nkw = Struct.inspect_kwargs(kw, gpd.read_file) nkw.update({'driver': 'GeoJSON'}) return gpd.read_file(s, **nkw) else: nkw = Struct.inspect_kwargs(kw, geojson.load) # note that geojson.load is a wrapper around the core json.load function # with the same name, and will pass through any additional arguments return geojson.load(s, **nkw)
def _read_shapefile(s, **kw): try: assert osp.exists(s) is True # Misc.File.file_exists(s) except: warnings.warn( "\n! GeoPandas reads URLs and files on disk only - set flags on_disk=True and ignore_buffer=True when loading sourc" ) try: p, f = osp.dirname(s), osp.basename(osp.splitext(s)[0]) except: pass try: assert (osp.exists(osp.join(p,'%s.shx' % f)) or osp.exists(osp.join(p,'%s.SHX' % f))) \ and (osp.exists(osp.join(p,'%s.prj' % f)) or osp.exists(osp.join(p,'%s.PRJ' % f))) \ and (osp.exists(osp.join(p,'%s.dbf' % f)) or osp.exists(osp.join(p,'%s.DBF' % f))) except AssertionError: warnings.warn( "\n! Companion files [.dbf, .shx, .prj] are required together with shapefile source" " - add companion files to path, e.g. set flags fmt='csv' and 'infer_fmt'=False when loading source" ) except: pass nkw = Struct.inspect_kwargs(kw, gpd.read_file) nkw.update({'driver': 'shapefile'}) return gpd.read_file(s, **nkw)
def loads(cls, s, **kwargs): """ """ serialize = kwargs.pop('serialize', False) nkwargs = Struct.inspect_kwargs(kwargs, json.loads) try: assert serialize is True except: return json.loads(s, **kwargs) else: return json.loads(s, object_hook=cls.restore, **nkwargs)
def dumps(cls, data, **kwargs): """ """ serialize = kwargs.pop('serialize', False) nkwargs = Struct.inspect_kwargs(kwargs, json.dumps) try: assert serialize is True except: return json.dumps(data, **nkwargs) else: return json.dumps(cls.serialize(data), **nkwargs)
def dump(cls, data, f, **kwargs): serialize = kwargs.pop('serialize', False) # note: when is_order_preserved is False, this entire class can actually be # ignored since the dump/load methods are exactly equivalent to the original # dump/load method of the json package nkwargs = Struct.inspect_kwargs(kwargs, json.dump) try: assert serialize is True except: json.dump(data, f, **nkwargs) else: json.dump(cls.serialize(data), f, **nkwargs)
def check_format(fmt, infer_fmt=False): """ >>> fmt = File.check_format(fmt, infer_fmt=False) """ try: assert fmt is None or isinstance(fmt, string_types) \ or (isinstance(fmt, Sequence) and all([isinstance(f, string_types) for f in fmt])) except: raise IOError("Wrong format for FMT parameter: '%s'" % fmt) if fmt is None: fmt = list(File.FORMATS.values()) elif isinstance(fmt, string_types): fmt = [ fmt, ] try: assert isinstance(infer_fmt, (bool, string_types)) \ or (isinstance(infer_fmt, Sequence) and all([isinstance(f, string_types) for f in infer_fmt])) except: raise IOError("Wrong format for INFER_FMT flag: '%s'" % infer_fmt) if infer_fmt is True: # extend... with all besides those parsed infer_fmt = File.FORMATS.keys() # default elif isinstance(infer_fmt, string_types): infer_fmt = [ infer_fmt, ] if not infer_fmt is False: # extend... with all besides those parsed fmt.extend(infer_fmt) # test all! try: fmt.insert(fmt.index('xlsx'), 'xls') or fmt.remove('xlsx') except: pass fmt = Struct.uniq_items(fmt, items=File.FORMATS) try: assert fmt not in (None, [], '') except: raise IOError("Data format FMT not recognised: '%s'" % fmt) if isinstance(fmt, string_types): fmt = [ fmt, ] return fmt
def _read_topojson(s, **kw): nkw = Struct.inspect_kwargs(kw, gpd.read_file) nkw.update({'driver': 'TopoJSON'}) #with fiona.MemoryFile(s) as f: #with fiona.ZipMemoryFile(s) as f: # return gpd.GeoDataFrame.from_features(f, crs=f.crs, **nkw) return gpd.read_file(s, **nkw)
def _read_table(s, **kw): nkw = Struct.inspect_kwargs(kw, pd.read_table) return pd.read_table(s, **nkw)
def _read_html(s, **kw): nkw = Struct.inspect_kwargs(kw, pd.read_html) return pd.read_html(s, **nkw)
def _read_sas(s, **kw): nkw = Struct.inspect_kwargs(kw, pd.read_sas) return pd.read_sas(s, **nkw)
def unzip(file, **kwargs): """Unzip file on-disk. >>> res = File.unzip(file, **kwargs) """ # try: # assert isinstance(file, (io.BytesIO,string_types)) # except: # raise TypeError("Zip file '%s' not recognised" % file) try: assert zipfile.is_zipfile(file) except: raise IOError("Zip file '%s' not recognised" % file) path = kwargs.pop( 'path') if 'path' in kwargs else SysEnv.default_cache() operators = [op for op in ['open', 'extract', 'extractall', 'getinfo', 'namelist', 'read', 'infolist'] \ if op in kwargs.keys()] try: assert operators in ([], [None]) or sum([1 for op in operators]) == 1 except: raise IOError("Only one operation supported per call") else: if operators in ([], [None]): operator = 'extractall' else: operator = operators[0] if operator in ('infolist', 'namelist'): try: assert kwargs.get(operator) not in (False, None) except: raise IOError("No operation parsed") else: members = kwargs.pop(operator, None) #if operator.startswith('extract'): # warn("\n! Data extracted from zip file will be physically stored on local disk !") if isinstance(members, string_types): members = [ members, ] with zipfile.ZipFile(file) as zf: namelist, infolist = zf.namelist(), zf.infolist() if operator == 'namelist': return namelist if len(namelist) > 1 else namelist[0] elif operator == 'infolist': return infolist if len(infolist) > 1 else infolist[0] elif operator == 'extractall': if members in (None, True): members = namelist return zf.extractall(path=path, members=members) if members is None and len(namelist) == 1: members = namelist elif members is not None: for i in reversed(range(len(members))): m = members[i] try: assert m in namelist except: try: _mem = [n for n in namelist if n.endswith(m)] assert len(_mem) == 1 except: if len(_mem) > 1: warn( "\n! Mulitple files machting in zip source - ambiguity not resolved !" % m) else: # len(_mem) == 0 <=> _mem = [] warn( "\n! File '%s' not found in zip source !" % m) members.pop(i) else: members[i] = _mem[0] else: pass # continue # now: operator in ('extract', 'getinfo', 'read') if members in ([], None): raise IOError( "Impossible to retrieve member file(s) from zipped data") nkw = Struct.inspect_kwargs(kwargs, getattr(zf, operator)) if operator == 'extract': nkw.update({'path': path}) results = {m: getattr(zf, operator)(m, **nkw) for m in members} return results
def _to_json(df, d, **kw): nkw = Struct.inspect_kwargs(kwargs, Frame.to_json) res = cls.to_json(df, **nkw) with open(d, 'w', encoding=encoding) as f: json.dump(res, f, ensure_ascii=False)
def _to_excel(df, d, **kw): nkw = Struct.inspect_kwargs(kwargs, pd.to_excel) df.to_excel(d, **nkw)
def _to_csv(df, d, **kw): nkw = Struct.inspect_kwargs(kw, pd.to_csv) df.to_csv(d, **nkw)
def _read_geopackage(s, **kw): nkw = Struct.inspect_kwargs(kw, gpd.read_file) nkw.update({'driver': 'GPKG'}) return gpd.read_file(s, **nkw)
def _read_csv(s, **kw): nkw = Struct.inspect_kwargs(kw, pd.read_csv) try: return pd.read_csv(s, **nkw) except: return SysEnv.chardet_decorate(pd.read_csv)(s, **nkw)
def _to_geopackage(df, d, **kw): nkw = Struct.inspect_kwargs(kwargs, gpd.to_file) df.to_file(d, driver='GPKG', **nkw)
def _read_json(s, **kw): nkw = Struct.inspect_kwargs(kw, pd.read_json) return pd.read_json(s, **nkw)