class Analyser_Sax(Analyser): def __init__(self, config, logger = OsmoseLog.logger()): Analyser.__init__(self, config, logger) self.resume_from_timestamp = None def __enter__(self): Analyser.__enter__(self) # open database connections self._load_reader() self._load_parser() return self def __exit__(self, exc_type, exc_value, traceback): # close database connections self._log(u"Closing reader and parser") del self.parser del self._reader Analyser.__exit__(self, exc_type, exc_value, traceback) def timestamp(self): return self.parser.timestamp() def analyser(self): plugins = self._load_all_plugins() self._init_plugins(plugins) self._load_output(change=self.parsing_change_file) try: self._run_analyse() self._close_plugins() finally: self._close_output() def analyser_resume(self, timestamp, already_issued_objects): self.resume_from_timestamp = timestamp self.already_issued_objects = already_issued_objects self.config.timestamp = self.timestamp() plugins = self._load_all_plugins() self._init_plugins(plugins) self._load_output(change=True) self._run_analyse() if self.resume_from_timestamp: for id in self.already_issued_objects['N']: self.error_file.delete('node', id) for id in self.already_issued_objects['W']: self.error_file.delete('way', id) for id in self.already_issued_objects['R']: self.error_file.delete('relation', id) self._close_output() ################################################################################ #### Useful functions def ToolsGetFilePath(self, filename): return os.path.join(modules.config.dir_osmose, filename) def ToolsOpenFile(self, filename, mode): return open(self.ToolsGetFilePath(filename), mode, encoding="utf-8") def ToolsListDir(self, dirname): return os.listdir(self.ToolsGetFilePath(dirname)) def ToolsReadList(self, filename): f = self.ToolsOpenFile(filename, "r") d = [] for x in f.readlines(): x = x.strip() if not x: continue if x[0] == "#": continue d.append(x) f.close() return d def ToolsReadDict(self, filename, separator): f = self.ToolsOpenFile(filename, "r") d = {} for x in f.readlines(): x = x.strip() if x and separator in x: x = x.split(separator) d[x[0]] = x[1] f.close() return d ################################################################################ #### Reader def NodeGet(self, NodeId): return self._reader.NodeGet(NodeId) def WayGet(self, WayId): return self._reader.WayGet(WayId, dump_sub_elements=True) def RelationGet(self, RelationId): return self._reader.RelationGet(RelationId, dump_sub_elements=True) def UserGet(self, UserId): return self._reader.UserGet(UserId) def ExtendData(self, data): if "uid" in data and not "user" in data: user = self.UserGet(data["uid"]) if user: data["user"] = user return data ################################################################################ #### Logs def _log(self, txt): self.logger.log(txt) def _sublog(self, txt): self.logger.sub().log(txt) def _err(self, txt): self.logger.err(txt) ################################################################################ #### Node parsing def NodeCreate(self, data): if self.resume_from_timestamp: already_issued = data["id"] in self.already_issued_objects['N'] if already_issued: self.already_issued_objects['N'].remove(data["id"]) if "timestamp" in data and data["timestamp"] <= self.resume_from_timestamp: return elif already_issued: self.error_file.delete("node", data["id"]) # Initialisation err = [] tags = data[u"tag"] if tags == {}: return # Running jobs for meth in self.pluginsNodeMethodes: res = meth(data, tags) if res: if isinstance(res, dict): err.append(res) else: err += res # Write the issues if err: if not "uid" in data and not "user" in data: data = self.NodeGet(data["id"]) or data data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") allow_fix_override = e.get('allow_fix_override') self.error_file.error( classs, subclass, text, [data["id"]], ["node"], fix, {"position": [data], "node": [data]}, allow_override = allow_fix_override) except: self._err("Error on error %s from %s" % (str(e), str(err))) raise def NodeUpdate(self, data): self.NodeDelete(data) self.NodeCreate(data) def NodeDelete(self, data): self.error_file.delete("node", data["id"]) ################################################################################ #### Way parsing def WayCreate(self, data): if self.resume_from_timestamp: already_issued = data["id"] in self.already_issued_objects['W'] if already_issued: self.already_issued_objects['W'].remove(data["id"]) if "timestamp" in data and data["timestamp"] <= self.resume_from_timestamp: return elif already_issued: self.error_file.delete("way", data["id"]) # Initialisation err = [] tags = data[u"tag"] nds = data[u"nd"] # Run jobs for meth in self.pluginsWayMethodes: res = meth(data, tags, nds) if res: if isinstance(res, dict): err.append(res) else: err += res # Write the issues if err: if not "uid" in data and not "user" in data: tmp_data = self.WayGet(data["id"]) or data if tmp_data: # way from reader can be None if there is only one node on it data = tmp_data node = self.NodeGet(nds[len(nds)//2]) if not node: node = {u"lat":0, u"lon":0} data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") allow_fix_override = e.get('allow_fix_override') self.error_file.error( classs, subclass, text, [data["id"]], ["way"], fix, {"position": [node], "way": [data]}, allow_override = allow_fix_override) except: self._err("Error on error %s from %s" % (str(e), str(err))) raise def WayUpdate(self, data): self.WayDelete(data) self.WayCreate(data) def WayDelete(self, data): self.error_file.delete("way", data["id"]) ################################################################################ #### Relation parsing def locateRelation(self, data, recur_control = []): node = None for memb in data[u"member"]: if memb[u"type"] == u"node": node = self.NodeGet(memb[u"ref"]) elif memb[u"type"] == "way": way = self.WayGet(memb[u"ref"]) if way: node = self.NodeGet(way[u"nd"][0]) if node: break if not node: for memb in data[u"member"]: if memb[u"type"] == u"relation": ref = memb[u"ref"] if ref == data["id"] or ref in recur_control: # don't reread the same relation continue rel = self.RelationGet(memb[u"ref"]) if rel: node = self.locateRelation(rel, recur_control=recur_control+[data["id"]]) if node: break return node def RelationCreate(self, data): if self.resume_from_timestamp: already_issued = data["id"] in self.already_issued_objects['R'] if already_issued: self.already_issued_objects['R'].remove(data["id"]) if "timestamp" in data and data["timestamp"] <= self.resume_from_timestamp: return elif already_issued: self.error_file.delete("relation", data["id"]) # Initialisation err = [] tags = data[u"tag"] members = data[u"member"] # Run jobs for meth in self.pluginsRelationMethodes: res = meth(data, tags, members) if res: if isinstance(res, dict): err.append(res) else: err += res # Write the issues if err and data[u"member"]: if not "uid" in data and not "user" in data: data = self.RelationGet(data["id"]) or data node = self.locateRelation(data) if not node: node = {u"lat":0, u"lon":0} data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") allow_fix_override = e.get('allow_fix_override') self.error_file.error( classs, subclass, text, [data["id"]], ["relation"], fix, {"position": [node], "relation": [data]}, allow_override = allow_fix_override) except: self._err("Error on error %s from %s" % (str(e), str(err))) raise def RelationUpdate(self, data): self.RelationDelete(data) self.RelationCreate(data) def RelationDelete(self, data): self.error_file.delete("relation", data["id"]) ################################################################################ def _load_reader(self): if hasattr(self.config, 'osmosis_manager') and self.config.osmosis_manager: self._reader = self.config.osmosis_manager.osmosis() return try: from modules import OsmBin self._reader = OsmBin.OsmBin("/data/work/osmbin/data") return except IOError: pass if hasattr(self.config, "reader"): self._reader = self.config.reader else: from modules import OsmSaxAlea self._reader = OsmSaxAlea.OsmSaxReader(self.config.src, self.config.src_state) ################################################################################ def _load_parser(self): if self.config.src.endswith(".pbf"): from modules.OsmPbf import OsmPbfReader self.parser = OsmPbfReader(self.config.src, getattr(self.config, 'src_state', None), self.logger.sub()) self.parsing_change_file = False elif (self.config.src.endswith(".osc") or self.config.src.endswith(".osc.gz") or self.config.src.endswith(".osc.bz2")): from modules.OsmSax import OscSaxReader self.parser = OscSaxReader(self.config.src, getattr(self.config, 'src_state', None), self.logger.sub()) self.parsing_change_file = True elif (self.config.src.endswith(".osm") or self.config.src.endswith(".osm.gz") or self.config.src.endswith(".osm.bz2")): from modules.OsmSax import OsmSaxReader self.parser = OsmSaxReader(self.config.src, getattr(self.config, 'src_state', None), self.logger.sub()) self.parsing_change_file = False else: raise Exception("File extension '%s' is not recognized" % self.config.src) ################################################################################ def _load_plugin(self, plugin): module = importlib.import_module('plugins.' + plugin) if getattr(module, 'P_' + plugin, None): pass else: return getattr(module, plugin) def _load_all_plugins(self): self._log(u"Loading plugins") available_plugins = [] for plugin in sorted(self.ToolsListDir(u"plugins")): if not plugin.endswith(".py") or plugin in ("__init__.py", "Plugin.py"): continue pluginName = plugin[:-3] clazz = self._load_plugin(pluginName) if clazz: available_plugins.append(clazz) return available_plugins def _init_plugins(self, available_plugin_classes): self._Err = {} self.plugins = {} self.pluginsNodeMethodes = [] self.pluginsWayMethodes = [] self.pluginsRelationMethodes = [] conf_limit = set() for i in ("country", "language"): if i in self.config.options: if isinstance(self.config.options[i], str): conf_limit.add(self.config.options[i]) for pluginClazz in available_plugin_classes: if "only_for" in dir(pluginClazz): if not any(map(lambda of: any(map(lambda co: co.startswith(of), conf_limit)), pluginClazz.only_for)): self._sublog(u"skip "+pluginClazz.__name__) continue if "not_for" in dir(pluginClazz): if any(map(lambda of: any(map(lambda co: co.startswith(of), conf_limit)), pluginClazz.not_for)): self._sublog(u"skip "+pluginClazz.__name__) continue # Plugin Initialisation pluginInstance = pluginClazz(self) if pluginInstance.init(self.logger.sub().sub()) is False: self._sublog(u"self-disabled "+pluginClazz.__name__) continue else: self._sublog(u"init "+pluginClazz.__name__+" ("+", ".join(pluginInstance.availableMethodes())+")") pluginAvailableMethodes = pluginInstance.availableMethodes() self.plugins[pluginClazz.__name__] = pluginInstance # Fetch functions to call if "node" in pluginAvailableMethodes: self.pluginsNodeMethodes.append(pluginInstance.node) if "way" in pluginAvailableMethodes: self.pluginsWayMethodes.append(pluginInstance.way) if "relation" in pluginAvailableMethodes: self.pluginsRelationMethodes.append(pluginInstance.relation) # Liste generated issues for (cl, v) in self.plugins[pluginClazz.__name__].errors.items(): if cl in self._Err: raise Exception("class %d already present as item %d" % (cl, self._Err[cl]['item'])) self._Err[cl] = v ################################################################################ def _load_output(self, change): self.error_file.analyser(self.timestamp(), self.analyser_version(), change=change) # Create classes in issues file for (cl, item) in sorted(self._Err.items()): self.error_file.classs( id = cl, item = item['item'], level = item['level'], tags = item.get('tags', item.get('tag')), title = item.get('title', item.get('desc')), detail = item.get('detail'), fix = item.get('fix'), trap = item.get('trap'), example = item.get('example'), source = item.get('source'), resource = item.get('resource'), ) ################################################################################ def _run_analyse(self): self._log(u"Analysing file "+self.config.src) self.parser.CopyTo(self) self._log(u"Analyse finished") ################################################################################ def _close_plugins(self): # Close plugins self._log(u"Unloading plugins") for y in sorted(self.plugins.keys()): self._sublog(u"end "+y) self.plugins[y].end(self.logger.sub().sub()) def _close_output(self): self.error_file.analyser_end()
class Analyser_Sax(Analyser): def __init__(self, config, logger=OsmoseLog.logger()): Analyser.__init__(self, config, logger) def __enter__(self): Analyser.__enter__(self) # open database connections self._load_reader() self._load_parser() return self def __exit__(self, exc_type, exc_value, traceback): # close database connections self._log(u"Closing reader and parser") del self.parser del self._reader Analyser.__exit__(self, exc_type, exc_value, traceback) def analyser(self): self._load_plugins() self._load_output() self._run_analyse() self._close_plugins() self._close_output() ################################################################################ #### Useful functions def ToolsGetFilePath(self, filename): return os.path.join(self.config.dir_scripts, filename) def ToolsOpenFile(self, filename, mode): return open(self.ToolsGetFilePath(filename).encode("utf8"), mode) def ToolsListDir(self, dirname): return [ x.decode("utf8") for x in os.listdir(self.ToolsGetFilePath(dirname)) ] def ToolsReadList(self, filename): f = self.ToolsOpenFile(filename, "r") d = [] for x in f.readlines(): x = x.strip().decode("utf-8") if not x: continue if x[0] == "#": continue d.append(x) f.close() return d def ToolsReadDict(self, filename, separator): f = self.ToolsOpenFile(filename, "r") d = {} for x in f.readlines(): x = x.strip().decode("utf-8") if x and separator in x: x = x.split(separator) d[x[0]] = x[1] f.close() return d ################################################################################ #### Reader def NodeGet(self, NodeId): return self._reader.NodeGet(NodeId) def WayGet(self, WayId): return self._reader.WayGet(WayId) def RelationGet(self, RelationId): return self._reader.RelationGet(RelationId) def UserGet(self, UserId): return self._reader.UserGet(UserId) def ExtendData(self, data): if "uid" in data and not "user" in data: user = self.UserGet(data["uid"]) if user: data["user"] = user return data ################################################################################ #### Logs def _log(self, txt): self.logger.log(txt) def _sublog(self, txt): self.logger.sub().log(txt) def _cpt(self, txt): self.logger.cpt(txt) def _subcpt(self, txt): self.logger.sub().cpt(txt) ################################################################################ #### Node parsing def NodeCreate(self, data): # Initialisation err = [] tags = data[u"tag"] if tags == {}: return # On execute les jobs for meth in self.pluginsNodeMethodes: res = meth(data, tags) if res: if isinstance(res, dict): err.append(res) else: err += res # Enregistrement des erreurs if err: if not "uid" in data and not "user" in data: data = self.NodeGet(data["id"]) data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") self.error_file.error(classs, subclass, text, [data["id"]], ["node"], fix, { "position": [data], "node": [data] }) except: print("Error on error", e, "from", err) raise def NodeUpdate(self, data): self.NodeDelete(data) self.NodeCreate(data) def NodeDelete(self, data): self.error_file.node_delete(data["id"]) ################################################################################ #### Way parsing def WayCreate(self, data): # Initialisation err = [] tags = data[u"tag"] nds = data[u"nd"] # On execute les jobs for meth in self.pluginsWayMethodes: res = meth(data, tags, nds) if res: if isinstance(res, dict): err.append(res) else: err += res # Enregistrement des erreurs if err: if not "uid" in data and not "user" in data: tmp_data = self.WayGet(data["id"]) if tmp_data: # way from reader can be None if there is only one node on it data = tmp_data node = self.NodeGet(nds[len(nds) / 2]) if not node: node = {u"lat": 0, u"lon": 0} data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") self.error_file.error(classs, subclass, text, [data["id"]], ["way"], fix, { "position": [node], "way": [data] }) except: print("Error on error", e, "from", err) raise def WayUpdate(self, data): self.WayDelete(data) self.WayCreate(data) def WayDelete(self, data): self.error_file.way_delete(data["id"]) ################################################################################ #### Relation parsing def locateRelation(self, data, recur_control=[]): node = None for memb in data[u"member"]: if memb[u"type"] == u"node": node = self.NodeGet(memb[u"ref"]) elif memb[u"type"] == "way": way = self.WayGet(memb[u"ref"]) if way: node = self.NodeGet(way[u"nd"][0]) if node: break if not node: for memb in data[u"member"]: if memb[u"type"] == u"relation": ref = memb[u"ref"] if ref == data["id"] or ref in recur_control: # don't reread the same relation continue rel = self.RelationGet(memb[u"ref"]) if rel: node = self.locateRelation( rel, recur_control=recur_control + [data["id"]]) if node: break return node def RelationCreate(self, data): # Initialisation err = [] tags = data[u"tag"] members = data[u"member"] # On execute les jobs for meth in self.pluginsRelationMethodes: res = meth(data, tags, members) if res: if isinstance(res, dict): err.append(res) else: err += res # Enregistrement des erreurs if err and data[u"member"]: if not "uid" in data and not "user" in data: data = self.RelationGet(data["id"]) node = self.locateRelation(data) if not node: node = {u"lat": 0, u"lon": 0} data = self.ExtendData(data) for e in err: try: classs = e["class"] subclass = e.get("subclass", 0) text = e.get("text", {}) fix = e.get("fix") self.error_file.error(classs, subclass, text, [data["id"]], ["relation"], fix, { "position": [node], "relation": [data] }) except: print("Error on error", e, "from", err) raise def RelationUpdate(self, data): self.RelationDelete(data) self.RelationCreate(data) def RelationDelete(self, data): self.error_file.relation_delete(data["id"]) ################################################################################ def _load_reader(self): if hasattr(self.config, 'db_string') and self.config.db_string: from modules import OsmOsis self._reader = OsmOsis.OsmOsis(self.config.db_string, self.config.db_schema) return try: from modules import OsmBin self._reader = OsmBin.OsmBin("/data/work/osmbin/data") return except IOError: pass if hasattr(self.config, "reader"): self._reader = self.config.reader else: from modules import OsmSaxAlea self._reader = OsmSaxAlea.OsmSaxReader(self.config.src) ################################################################################ def _load_parser(self): if self.config.src.endswith(".pbf"): from modules.OsmPbf import OsmPbfReader self.parser = OsmPbfReader(self.config.src, self.logger.sub()) self.parsing_change_file = False elif (self.config.src.endswith(".osc") or self.config.src.endswith(".osc.gz") or self.config.src.endswith(".osc.bz2")): from modules.OsmSax import OscSaxReader self.parser = OscSaxReader(self.config.src, self.logger.sub()) self.parsing_change_file = True elif (self.config.src.endswith(".osm") or self.config.src.endswith(".osm.gz") or self.config.src.endswith(".osm.bz2")): from modules.OsmSax import OsmSaxReader self.parser = OsmSaxReader(self.config.src, self.logger.sub()) self.parsing_change_file = False else: raise Exception("File extension '%s' is not recognized" % self.config.src) ################################################################################ def _load_plugins(self): self._log(u"Loading plugins") self._Err = {} d = {} self.plugins = {} self.pluginsNodeMethodes = [] self.pluginsWayMethodes = [] self.pluginsRelationMethodes = [] _order = ["pre_pre_", "pre_", "", "post_", "post_post_"] _types = ["way", "node", "relation"] for x in _order: for y in _types: d[x + y] = [] conf_limit = set() for i in ("country", "language"): if i in self.config.options: if isinstance(self.config.options[i], basestring): conf_limit.add(self.config.options[i]) # load plugins for plugin in sorted(self.ToolsListDir("plugins")): if not plugin.endswith(".py") or plugin in ("__init__.py", "Plugin.py"): continue pluginName = plugin[:-3] pluginModule = importlib.import_module("plugins." + pluginName) available_classes = getattr(pluginModule, "available_plugin_classes", [pluginName]) for pluginName in available_classes: pluginClazz = getattr(pluginModule, pluginName) if "only_for" in dir(pluginClazz): if conf_limit.isdisjoint(set(pluginClazz.only_for)): self._sublog(u"skip " + plugin[:-3]) continue if "not_for" in dir(pluginClazz): if not conf_limit.isdisjoint(set(pluginClazz.not_for)): self._sublog(u"skip " + plugin[:-3]) continue # Initialisation du plugin pluginInstance = pluginClazz(self) self._sublog(u"init " + pluginName + " (" + ", ".join(pluginInstance.availableMethodes()) + ")") if pluginInstance.init(self.logger.sub().sub()) != False: pluginAvailableMethodes = pluginInstance.availableMethodes( ) self.plugins[pluginName] = pluginInstance # Récupération des fonctions à appeler if "node" in pluginAvailableMethodes: self.pluginsNodeMethodes.append(pluginInstance.node) if "way" in pluginAvailableMethodes: self.pluginsWayMethodes.append(pluginInstance.way) if "relation" in pluginAvailableMethodes: self.pluginsRelationMethodes.append( pluginInstance.relation) # Liste des erreurs générées for (cl, v) in self.plugins[pluginName].errors.items(): if cl in self._Err: raise Exception( "class %d already present as item %d" % (cl, self._Err[cl]['item'])) self._Err[cl] = v ################################################################################ def _load_output(self): self.error_file.analyser(change=self.parsing_change_file) # Création des classes dans le fichier des erreurs for (cl, item) in sorted(self._Err.items()): self.error_file.classs(cl, item["item"], item.get("level"), item.get("tag"), item['desc']) ################################################################################ def _run_analyse(self): self._log(u"Analysing file " + self.config.src) self.parser.CopyTo(self) self._log(u"Analyse finished") ################################################################################ def _close_plugins(self): # Fermeture des plugins self._log(u"Unloading plugins") for y in sorted(self.plugins.keys()): self._sublog(u"end " + y) self.plugins[y].end(self.logger.sub().sub()) def _close_output(self): self.error_file.analyser_end()