def _process_changes(self, ilayer, olayer, column='gml_id'): changes_list = {} ilayer.ResetReading() ifeature = ilayer.GetNextFeature() while ifeature: fcode = ifeature.GetField(column) # check if feature already exists in output layer found = [] olayer.SetAttributeFilter("%s = '%s'" % (column, fcode)) for feature in olayer: found.append(feature.GetFID()) n_feat = len(found) changes_list[ifeature.GetFID()] = (Action.update, found[0]) if n_feat > 0 \ else (Action.add, -1) if n_feat > 1: # TODO: how to handle correctly? VfrLogger.warning("Layer '%s': %d features '%s' found. Duplicated features will be deleted." % \ (olayer.GetName(), n_feat, fcode)) for fid in found[1:]: # delete duplicates olayer.DeleteFeature(fid) ifeature = ilayer.GetNextFeature() # unset attribute filter olayer.SetAttributeFilter(None) return changes_list
def _error_handler(self,err_level, err_no, err_msg): if err_level > gdal.CE_Warning: raise RuntimeError(err_msg) elif err_level == gdal.CE_Debug: VfrLogger.debug(err_msg + os.linesep) else: VfrLogger.warning(err_msg)
def _update_fid_seq(self, table, fid, column = 'ogc_fid'): if not self._conn: VfrLogger.warning("Unable to update FID sequence for table '%s'" % table) return cursor = self._conn.cursor() try: cursor.execute("SELECT setval('%s_%s_seq', %d)" % (table, column, fid)) except StandardError as e: VfrLogger.warning("Unable to update FID sequence for table '%s': %s" % (table, e)) cursor.close()
def _modify_feature(self, feature, geom_idx, ofeature, suppress=False): # set requested geometry if geom_idx > -1: geom = feature.GetGeomFieldRef(geom_idx) if geom: ofeature.SetGeometry(geom.Clone()) else: ofeature.SetGeometry(None) if not suppress: VfrLogger.warning("Feature %d has no geometry (geometry column: %d)" % \ (feature.GetFID(), geom_idx)) return geom_idx
def open_file(self, filename, force_date = None): self._file_list = list() ds = None if os.linesep in filename: # already list of files (date range) return filename.split(os.linesep) mtype = mimetypes.guess_type(filename)[0] if mtype is None or 'xml' not in mtype: # assuming text file containing list of VFR files try: f = open(filename) i = 0 lines = f.read().splitlines() for line in lines: if len(line) < 1 or line.startswith('#'): continue # skip empty or commented lines if not line.startswith('http://') and \ not line.startswith('20'): # determine date if missing if not force_date: if line.startswith('ST_Z'): date = yesterday() else: date = last_day_of_month() else: date = force_date line = date + '_' + line if not line.endswith('.xml.gz'): # add extension if missing line += '.xml.gz' if not os.path.exists(line): if not line.startswith('http://'): line = 'http://vdp.cuzk.cz/vymenny_format/soucasna/' + line line = download_vfr(line) self._file_list.append(line) i += 1 VfrLogger.msg("%d VFR files will be processed..." % len(self._file_list)) except IOError: raise VfrError("Unable to read '%s'" % filename) f.close() else: # single VFR file self._file_list.append(filename) return self._file_list
def __init__(self, frmt, dsn, geom_name=None, layers=[], nogeomskip=False, overwrite=False, lco_options=[]): self._check_ogr() self.frmt = frmt self._geom_name = geom_name self._overwrite = overwrite self._layer_list = layers self._nogeomskip = nogeomskip self._lco_options = lco_options self._file_list = [] # input self._idrv = ogr.GetDriverByName("GML") if self._idrv is None: raise VfrError("Unable to select GML driver") self._ids = None # output self.odsn = dsn if not self.odsn: self._odrv = self._ods = None return self._odrv = ogr.GetDriverByName(frmt) if self._odrv is None: raise VfrError("Format '%s' is not supported" % frmt) # try to open datasource self._ods = self._odrv.Open(self.odsn, True) if self._ods is None: # if fails, try to create new datasource self._ods = self._odrv.CreateDataSource(self.odsn) if self._ods is None: raise VfrError("Unable to open or create new datasource '%s'" % self.odsn) self._create_geom = self._ods.TestCapability(ogr.ODsCCreateGeomFieldAfterCreateLayer) if not self._geom_name and \ not self._create_geom: VfrLogger.warning("Driver '%s' doesn't support multiple geometry columns. " "Only first will be used." % self._odrv.GetName()) # OVERWRITE is not support by Esri Shapefile if self._overwrite: if self.frmt != 'ESRI Shapefile': self._lco_options.append("OVERWRITE=YES")
def _check_epsg(self): if not self._conn: return cursor = self._conn.cursor() try: cursor.execute("SELECT srid FROM spatial_ref_sys WHERE srid = 5514") except StandardError as e: raise VfrError("PostGIS doesn't seems to be activated. %s" % e) epsg_exists = bool(cursor.fetchall()) if not epsg_exists: stmt = """INSERT INTO spatial_ref_sys (srid, auth_name, auth_srid, proj4text, srtext) VALUES ( 5514, 'EPSG', 5514, '+proj=krovak +lat_0=49.5 +lon_0=24.83333333333333 +alpha=30.28813972222222 +k=0.9999 +x_0=0 +y_0=0 +ellps=bessel +towgs84=570.8,85.7,462.8,4.998,1.587,5.261,3.56 +units=m +no_defs ', 'PROJCS["S-JTSK / Krovak East North",GEOGCS["S-JTSK",DATUM["System_Jednotne_Trigonometricke_Site_Katastralni",SPHEROID["Bessel 1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]],TOWGS84[570.8,85.7,462.8,4.998,1.587,5.261,3.56],AUTHORITY["EPSG","6156"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4156"]],PROJECTION["Krovak"],PARAMETER["latitude_of_center",49.5],PARAMETER["longitude_of_center",24.83333333333333],PARAMETER["azimuth",30.28813972222222],PARAMETER["pseudo_standard_parallel_1",78.5],PARAMETER["scale_factor",0.9999],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["X",EAST],AXIS["Y",NORTH],AUTHORITY["EPSG","5514"]]')""" cursor.execute(stmt) self._conn.commit() VfrLogger.msg("EPSG 5514 defined in DB") cursor.close()
def print_summary(self): stime = time.time() layer_list = copy.deepcopy(self._layer_list) if not layer_list: for idx in range(self._ods.GetLayerCount()): layer_list.append(self._ods.GetLayer(idx).GetName()) VfrLogger.msg("Summary") for layer_name in layer_list: layer = self._ods.GetLayerByName(layer_name) if not layer: continue sys.stdout.write("Layer %-20s ... %10d features\n" % \ (layer_name, layer.GetFeatureCount())) nsec = time.time() - stime etime = str(datetime.timedelta(seconds=nsec)) VfrLogger.msg("Time elapsed: %s" % str(etime))
def _get_fid_max(self, table, column='ogc_fid'): if not self._conn: VfrLogger.warning("No DB connection defined." % table) return cursor = self._conn.cursor() try: cursor.execute("SELECT max(%s) FROM %s" % (column, table)) except StandardError as e: cursor.execute('ROLLBACK') cursor.close() return -1 try: fid_max = int(cursor.fetchall()[0][0]) except TypeError: fid_max = -1 cursor.close() return fid_max
def create_indices(self): if not self._conn: return if not self.schema_list: self.schema_list = ['public'] column = "gml_id" cursor = self._conn.cursor() for schema in self.schema_list: for layer in self._layer_list: if layer == 'ZaniklePrvky': # skip deleted features continue if '.' in layer: schema, table = map(lambda x: x.lower(), layer.split('.', 1)) else: table = layer.lower() indexname = "%s_%s_idx" % (table, column) cursor.execute("SELECT COUNT(*) FROM pg_indexes WHERE " "tablename = '%s' and schemaname = '%s' and " "indexname = '%s'" % (table, schema, indexname)) if cursor.fetchall()[0][0] > 0: continue # indices for specified table already exists cursor.execute('BEGIN') try: cursor.execute("CREATE INDEX %s ON %s.%s (%s)" % \ (indexname, schema, table, column)) cursor.execute('COMMIT') except StandardError as e: VfrLogger.warning("Unable to create index %s_%s: %s" % (table, column, e)) cursor.execute('ROLLBACK') cursor.close()
def run(self, append=False, extended=False): ipass = 0 stime = time.time() layer_list = copy.deepcopy(self._layer_list) pg = hasattr(self, "_conn") if pg: self.schema_list = [] epsg_checked = False for fname in self._file_list: VfrLogger.msg("Processing %s (%d out of %d)..." % \ (fname, ipass+1, len(self._file_list))) # open OGR datasource ids = self._open_ds(fname) if ids is None: ipass += 1 continue # unable to open - skip if not self.odsn: # no output datasource given -> list available layers and exit layer_list = self._list_layers(extended, sys.stdout) if extended and os.path.exists(filename): compare_list(layer_list, parse_xml_gz(filename)) else: if self.odsn is None: self.odsn = '.' # current directory if pg and not epsg_checked: # check if EPSG 5514 exists in output DB (only first pass) self._check_epsg() epsg_checked = True if not layer_list: for l in self._list_layers(fd=None): if l not in layer_list: layer_list.append(l) schema_name = None if pg: # build datasource string per file odsn_reset = self.odsn if self._schema_per_file or self._schema: if self._schema_per_file: # set schema per file schema_name = os.path.basename(fname).rstrip('.xml.gz').lower() if schema_name[0].isdigit(): schema_name = 'vfr_' + schema_name else: schema_name = self._schema.lower() # create schema in output DB if needed self._create_schema(schema_name) self.odsn += ' active_schema=%s' % schema_name if schema_name not in self.schema_list: self.schema_list.append(schema_name) self._ods.Destroy() # TODO: do it better self._ods = self._odrv.Open(self.odsn, True) if self._ods is None: raise VfrError("Unable to open or create new datasource '%s'" % self.odsn) # check mode - process changes or append mode = Mode.write if fname.split('_')[-1][0] == 'Z': mode = Mode.change if pg: # insert required os.environ['PG_USE_COPY'] = 'NO' elif append: mode = Mode.append if pg: # force copy over insert os.environ['PG_USE_COPY'] = 'YES' # do the conversion try: nfeat = self._convert_vfr(mode, schema_name) except RuntimeError as e: raise VfrError("Unable to read %s: %s" % (fname, e)) if pg: # reset datasource string per file if self._schema_per_file or self._schema: self.odsn = odsn_reset self._ods.Destroy() self._ods = self._odrv.Open(self.odsn, True) if self._ods is None: raise VfrError("Unable to open or create new datasource '%s'" % self.odsn) if nfeat > 0: append = True # append on next passes ids.Destroy() self._ids = None ipass += 1
def _process_deleted_features(self, layer): lcode2lname = { 'ST' : 'Staty', 'RS' : 'RegionySoudrznosti', 'KR' : 'Kraje', 'VC' : 'Vusc', 'OK' : 'Okresy', 'OP' : 'Orp', 'PU' : 'Pou', 'OB' : 'Obce', 'SP' : 'SpravniObvody', 'MP' : 'Mop', 'MC' : 'Momc', 'CO' : 'CastiObci', 'KU' : 'KatastralniUzemi', 'ZJ' : 'Zsj', 'UL' : 'Ulice', 'PA' : 'Parcely', 'SO' : 'StavebniObjekty', 'AD' : 'AdresniMista', } column = 'gml_id' dlist = {} for layer_name in lcode2lname.itervalues(): dlist[layer_name] = {} layer.ResetReading() feature = layer.GetNextFeature() layer_previous = None while feature: # determine layer and attribute filter for given feature lcode = feature.GetField("TypPrvkuKod") layer_name = lcode2lname.get(lcode, None) if not layer_name: error("Unknown layer code '%s'" % lcode) feature = layer.GetNextFeature() continue if self._layer_list and layer_name not in self._layer_list: feature = layer.GetNextFeature() continue fcode = "%s.%s" % (lcode, feature.GetField("PrvekId")) if not layer_previous or layer_previous != layer_name: dlayer = self._ods.GetLayerByName('%s' % layer_name) if dlayer is None: error("Layer '%s' not found" % layer_name) feature = layer.GetNextFeature() continue # find features to be deleted (collect their FIDs) n_feat = 0 dlayer.SetAttributeFilter("%s = '%s'" % (column, fcode)) for dfeature in dlayer: fid = dfeature.GetFID() dlist[layer_name][fid] = (Action.delete, fid) n_feat += 1 dlayer.SetAttributeFilter(None) # check for consistency (one feature should be found) if n_feat == 0: VfrLogger.warning("Layer '%s': no feature '%s' found. " "Nothing to delete." % \ (layer_name, fcode)) elif n_feat > 1: VfrLogger.warning("Layer '%s': %d features '%s' found. " "All of them will be deleted." % (layer_name, n_feat, fcode)) layer_previous = layer_name feature = layer.GetNextFeature() # return statistics return dlist
def _convert_vfr(self, mode = Mode.write, schema=None): if self._overwrite and mode == Mode.write: # delete also layers which are not part of ST_UKSH for layer in ("ulice", "parcely", "stavebniobjekty", "adresnimista"): if self._ods.GetLayerByName(layer) is not None: self._ods.DeleteLayer(layer) # process features marked for deletion first dlist = None # statistics if mode == Mode.change: dlayer = self._ids.GetLayerByName('ZaniklePrvky') if dlayer: dlist = self._process_deleted_features(dlayer) # process layers start = time.time() nlayers = self._ids.GetLayerCount() nfeat = 0 for iLayer in range(nlayers): layer = self._ids.GetLayer(iLayer) layer_name = layer.GetName() ### force lower case for output layers, some drivers are doing ### that automatically anyway layer_name_lower = layer_name.lower() if self._layer_list and layer_name not in self._layer_list: # process only selected layers continue if layer_name == 'ZaniklePrvky': # skip deleted features (already done) continue olayer = self._ods.GetLayerByName('%s' % layer_name_lower) sys.stdout.write("Processing layer %-20s ..." % layer_name) if not self._overwrite and (olayer and mode == Mode.write): sys.stdout.write(" already exists (use --overwrite or --append to modify existing data)\n") continue ### TODO: fix output drivers not to use default geometry ### names if self.frmt in ('PostgreSQL', 'OCI') and not self._geom_name: if layer_name_lower == 'ulice': self._remove_option('GEOMETRY_NAME') self._lco_options.append('GEOMETRY_NAME=definicnicara') else: self._remove_option('GEOMETRY_NAME') self._lco_options.append('GEOMETRY_NAME=definicnibod') # delete layer if exists and append is not True if olayer and mode == Mode.write: if self._delete_layer(layer_name_lower): olayer = None # create new output layer if not exists if not olayer: olayer = self._create_layer(layer_name_lower, layer) if olayer is None: raise VfrError("Unable to export layer '%s'. Exiting..." % layer_name) # pre-process changes if mode == Mode.change: change_list = self._process_changes(layer, olayer) if dlist and layer_name in dlist: # add features to be deleted change_list.update(dlist[layer_name]) ifeat = n_nogeom = 0 geom_idx = -1 # make sure that PG sequence is up-to-date (import for fid == -1) fid = -1 if hasattr(self, "_conn"): # TODO (do it better) if schema: table_name = '%s.%s' % (schema, layer_name_lower) else: table_name = layer_name_lower fid = self._get_fid_max(table_name) if fid > 0: self._update_fid_seq(table_name, fid) if fid is None or fid == -1: fid = olayer.GetFeatureCount() # start transaction in output layer if olayer.TestCapability(ogr.OLCTransactions): olayer.StartTransaction() # delete marked features first (changes only) if mode == Mode.change and dlist and layer_name in dlist: for fid in dlist[layer_name].keys(): olayer.DeleteFeature(fid) # do mapping for fields (needed for Esri Shapefile when # field names are truncated) field_map = [i for i in range(0, layer.GetLayerDefn().GetFieldCount())] # copy features from source to destination layer layer.ResetReading() feature = layer.GetNextFeature() while feature: # check for changes first (delete/update/add) if mode == Mode.change: c_fid = feature.GetFID() action, o_fid = change_list.get(c_fid, (None, None)) if action is None: raise VfrError("Layer %s: unable to find feature %d" % (layer_name, c_fid)) # feature marked to be changed (delete first) if action in (Action.delete, Action.update): olayer.DeleteFeature(o_fid) # determine fid for new feature if action == Action.add: fid = -1 else: fid = o_fid if action == Action.delete: # do nothing and continue feature = layer.GetNextFeature() ifeat += 1 continue else: fid += 1 # clone feature ### ofeature = feature.Clone() # replace by SetFrom() ofeature = ogr.Feature(olayer.GetLayerDefn()) ofeature.SetFromWithMap(feature, True, field_map) # modify geometry columns if requested if self._geom_name: if geom_idx < 0: geom_idx = feature.GetGeomFieldIndex(self._geom_name) # delete remaining geometry columns ### not needed - see SetFrom() ### odefn = ofeature.GetDefnRef() ### for i in range(odefn.GetGeomFieldCount()): ### if i == geom_idx: ### continue ### odefn.DeleteGeomFieldDefn(i) self._modify_feature(feature, geom_idx, ofeature) if ofeature.GetGeometryRef() is None: n_nogeom += 1 if self._nogeomskip: # skip feature without geometry feature = layer.GetNextFeature() ofeature.Destroy() continue # set feature id if fid >= -1: # fid == -1 -> unknown fid ofeature.SetFID(fid) # add new feature to output layer olayer.CreateFeature(ofeature) feature = layer.GetNextFeature() ifeat += 1 # commit transaction in output layer if olayer.TestCapability(ogr.OLCTransactions): olayer.CommitTransaction() # print statistics per layer sys.stdout.write(" %10d features" % ifeat) if mode == Mode.change: n_added = n_updated = n_deleted = 0 for action, unused in change_list.itervalues(): if action == Action.update: n_updated += 1 elif action == Action.add: n_added += 1 else: # Action.delete: n_deleted += 1 sys.stdout.write(" (%5d added, %5d updated, %5d deleted)" % \ (n_added, n_updated, n_deleted)) else: sys.stdout.write(" added") if n_nogeom > 0: if self._nogeomskip: sys.stdout.write(" (%d without geometry skipped)" % n_nogeom) else: sys.stdout.write(" (%d without geometry)" % n_nogeom) sys.stdout.write("\n") nfeat += ifeat # update sequence for PG if hasattr(self, "_conn"): ### fid = get_fid_max(userdata['pgconn'], layer_name_lower) if fid > 0: if schema: table_name = '%s.%s' % (schema, layer_name_lower) else: table_name = layer_name_lower self._update_fid_seq(table_name, fid) # final statistics (time elapsed) VfrLogger.msg("Time elapsed: %d sec" % (time.time() - start)) return nfeat