def load_osm_boundary(filename): """Loads a boundary relation from an OSM file. The file must also contain all the nodes and way used by the boundary. Only the first boundary is read.""" reporting = Reporting() elem_count = count_elements(filename, "node") elem_count += count_elements(filename, "way") elem_count += 1 reporting.progress_start(u"Ładuję %s" % (filename,), elem_count) nodes = [] ways = [] relation = None for event, elem in ElementTree.iterparse(filename): if event != 'end': continue if elem.tag == 'node': nodes.append(elem) elif elem.tag == 'way': ways.append(elem) elif elem.tag == 'relation' and not relation: relation = elem else: continue reporting.progress() reporting.progress_stop() if not relation: reproting.output_msg("errors", u"Nie znaleziono relacji") raise ValueError, "Relation not found" reporting.output_msg("stats", u"Załadowano relację, %i dróg i %i węzłów." % (len(ways), len(nodes))) boundary = OSM_Boundary(relation, ways, nodes) return boundary
def load_terc(): reporting = Reporting() row_count = count_elements("data/TERC.xml", "row") reporting.progress_start(u"Ładowanie data/TERC.xml", row_count) for event, elem in ElementTree.iterparse("data/TERC.xml"): if event == 'end' and elem.tag == 'row': load_terc_object(elem) reporting.progress() reporting.progress_stop() reporting.output_msg("stats", u"Załadowano %i województw, %i powiatów i %i gmin" % ( Wojewodztwo.count(), Powiat.count(), Gmina.count()))
def load_simc(): load_wmrodz() reporting = Reporting() row_count = count_elements("data/SIMC.xml", "row") reporting.progress_start(u"Ładowanie data/SIMC.xml", row_count) for event, elem in ElementTree.iterparse("data/SIMC.xml"): if event == 'end' and elem.tag == 'row': SIMC_Place.from_element(elem) reporting.progress() reporting.progress_stop() reporting.output_msg("stats", u"Załadowano %i miejscowości" % (SIMC_Place.count(),)) SIMC_Place.link_parents()
def load_osm(): reporting = Reporting() row_count = count_elements("data/data.osm", "node") reporting.progress_start(u"Ładuję data/data.osm", row_count) for event, elem in ElementTree.iterparse("data/data.osm"): if event == 'end' and elem.tag == 'node': osm_place = OSM_Place(elem) reporting.progress() reporting.progress_stop() reporting.output_msg("stats", u"Załadowano %i miejsc." u"Dopasowano %i województw, %i powiatów i %i gmin." % ( OSM_Place.count(), OSM_Place.woj_matched, OSM_Place.pow_matched, OSM_Place.gmi_matched))
def load_wmrodz(): reporting = Reporting() reporting.output_msg("info", u"Ładowanie data/WMRODZ.xml") tree = ElementTree.parse("data/WMRODZ.xml") root = tree.getroot() catalog = tree.find("catalog") for row in catalog: if row.tag != "row": continue rm = None nazwa = None for col in row: if col.tag != 'col': continue key = col.attrib["name"] if key == "RM": rm = col.text elif key == "NAZWA_RM": nazwa = col.text.strip() if rm and nazwa: wmrodz[rm] = nazwa if nazwa in simc2place_mapping: rm2place_mapping[rm] = simc2place_mapping[nazwa]
def __init__(self, places, width, height): self.width = width self.height = height reporting = Reporting() reporting.progress_start("Creating grid %ix%i" % (width, height), len(places) * 2) left, right, top, bottom = 180, -180, -90, 90 for p in places: reporting.progress() left = min(left, p.lon) right = max(right, p.lon) top = max(top, p.lat) bottom = min(bottom, p.lat) reporting.output_msg("info", "Bounding box: (%r,%r,%r,%r)" % ( left, bottom, right, top)) self.left = left self.right = right self.top = top self.bottom = bottom self.lon_ratio = (right - left) * 1.01 / width self.lat_ratio = (top - bottom) * 1.01 / height reporting.output_msg("info", "lon_ratio: %r, lat_ratio: %r" % ( self.lon_ratio, self.lat_ratio)) self.cells = {} for x in range(0, width): for y in range(0, height): self.cells[(x,y)] = Cell(x,y) for place in places: reporting.progress() cell = self.get_cell(place) cell.add_place(place) reporting.progress_stop()
def __init__(self, relation_element, way_elements, node_elements): self.polygons = [] self.ways = {} self.relation = relation_element self.open = True reporting = Reporting() self.id = relation_element.attrib["id"] self.version = relation_element.attrib["version"] self.changeset = relation_element.attrib["changeset"] self.tags = {} nodes = {} for element in node_elements: node = OSM_Node(element) nodes[node.id] = node ways = {} for element in way_elements: way = OSM_Way(element) way.add_nodes(nodes) ways[way.id] = way for sub in relation_element: if sub.tag == 'tag': key = sub.attrib["k"] value = sub.attrib["v"] self.tags[key] = value elif sub.tag == 'member' and sub.attrib["type"] == 'way': role = sub.attrib.get("role", "") if role: raise NotImplementedError, "Role %r for relation way members not supported" % (role,) way_id = sub.attrib["ref"] way = ways.get(way_id) if way: if not way.complete: raise ValueError, "Incomplete way: %r" % (way,) self.ways[way_id] = way else: raise ValueError, "Way not found: %r" % (way_id,) self.name = self.tags.get("name") if not self.ways: raise ValueError, "No ways" self.open = False ways_left = self.ways.values() while ways_left: segment_start = ways_left.pop(0) polygon = [] for node in segment_start.nodes: polygon.append((node.lat, node.lon)) last_end = segment_start.end_node while ways_left: if last_end is segment_start.start_node: # cycle ended break next = None for way in ways_left: if way.start_node is last_end: last_end = way.end_node for node in way.nodes[1:]: polygon.append((node.lat, node.lon)) next = way break elif way.end_node is last_end: last_end = way.start_node rnodes = list(way.nodes[1:]) rnodes.reverse() for node in rnodes: polygon.append((node.lat, node.lon)) next = way break if next: ways_left.remove(next) else: # open segment ends self.open = True break self.polygons.append(polygon) if HAVE_SHAPELY: reporting.output_msg("info", "Using Shapely for 'point in polygon' checks") self.multi_polygon = MultiPolygon([(p, ()) for p in self.polygons]) self. _contains_impl = self._contains_shapely_impl else: reporting.output_msg("info", "Using Python function for the 'point in polygon' checks") self. _contains_impl = self._contains_python_impl
def __init__(self, woj_name, pow_name, gmi_name): if woj_name: self.wojewodztwo = Location(woj_name) else: self.wojewodztwo = None if pow_name: self.powiat = Location(pow_name) else: self.powiat = None if gmi_name: self.gmina = Location(gmi_name) else: self.gmina = None setup_locale() reporting = Reporting() reporting.progress_start("progress", 10) time.sleep(0.2) reporting.progress() time.sleep(0.2) reporting.output_msg("debug", u"Cośtam cośtam") time.sleep(0.2) reporting2 = Reporting() reporting.progress() time.sleep(0.2) reporting2.output_msg("debug", u"Cośtam cośtam") time.sleep(0.2) reporting2.output_msg("debug", u"Cośtam cośtam") reporting.progress() reporting.output_msg("debug", u"Cośtam cośtam") reporting.output_msg("debug", u"Cośtam cośtam")
def __init__(self, element): OSM_Node.__init__(self, element) reporting = Reporting() self.element = element self.wojewodztwo = None self.powiat = None self.gmina = None self.simc_id = None self.terc_id = None self.simc_place = None tags = self.tags if "place" in tags: self.type = tags["place"] else: self.type = None self.normalized_type = place_aliases.get(self.type, self.type) if "is_in" in tags: is_in_parts = [s.strip() for s in tags["is_in"].split(",")] self.is_in = ", ".join(is_in_parts) else: is_in_parts = [] self.is_in = None if "is_in:province" in tags: woj = tags["is_in:province"] self.wojewodztwo = Wojewodztwo.try_by_name(woj, True) OSM_Place.woj_matched += 1 elif is_in_parts: for part in is_in_parts: woj = Wojewodztwo.try_by_name(part, False) if woj: self.wojewodztwo = woj OSM_Place.woj_matched += 1 break if self.wojewodztwo: reporting.output_msg("woj_set", u"%s (%s) jest w %s" % (self.name, self.id, self.wojewodztwo.full_name()), self) if "is_in:county" in tags: pow = tags["is_in:county"] self.powiat = Powiat.try_by_name(pow, True, self.wojewodztwo) OSM_Place.pow_matched += 1 elif is_in_parts: for part in is_in_parts: pow = Powiat.try_by_name(part, False, self.wojewodztwo) if pow: self.powiat = pow OSM_Place.pow_matched += 1 break if self.powiat: reporting.output_msg("pow_set", u"%s jest w %s" % (self.name, self.powiat.full_name()), self) if self.wojewodztwo: if self.powiat.wojewodztwo != self.wojewodztwo: reporting.output_msg("errors", u"%s: Powiat nie pasuje do województwa" % (self,)) else: self.wojewodztwo = self.powiat.wojewodztwo if "is_in:municipality" in tags: gmi = tags["is_in:municipality"] self.gmina = Gmina.try_by_name(gmi, True, powiat = self.powiat, place_name = self.name) OSM_Place.gmi_matched += 1 elif is_in_parts: for part in is_in_parts: gmi = Gmina.try_by_name(part, False, powiat = self.powiat, place_name = self.name) if gmi: self.gmi = gmi OSM_Place.gmi_matched += 1 break if self.gmina: reporting.output_msg("gmi_set", u"%s jest w %s" % (self.name, self.gmina.full_name()), self) if self.powiat: if self.gmina.powiat != self.powiat: reporting.output_msg("errors", u"%s: Gmina nie pasuje do powiatu" % (self,)) self.gmina = None else: self.powiat = self.gmina.powiat if "teryt:simc" in tags: try: self.simc_id = tags["teryt:simc"] except ValueError: reporting.output_msg("errors", u"Nieprawidłowa wartość teryt:simc: %r" % ( tags["teryt:simc"],)) if self.simc_id: try: self.simc_place = SIMC_Place.by_id(self.simc_id) except KeyError: reporting.output_msg("errors", u"wartość teryt:simc nie istnieje w bazie SIMC") if self.simc_id in self._by_simc_id: reporting.output_msg("errors", u"Powtórzony kod SIMC w danych OSM: %r (%r and %r)" % ( self.simc_id, self, self._by_simc_id[self.simc_id]), self) else: self._by_simc_id[self.simc_id] = self if self.simc_place: gmina = self.simc_place.gmina if (self.gmina and gmina != self.gmina or self.powiat and gmina.powiat != self.powiat or self.wojewodztwo and gmina.wojewodztwo != self.wojewodztwo): reporting.output_msg("errors", u"%s: teryt:simc nie zgadza się z położeniem wynikającym z innych tagów" u" (%r != %r | %r != %r | %r != %r)" % (self, self.gmina, gmina, self.powiat, gmina.powiat, self.wojewodztwo, gmina.wojewodztwo)) else: self.gmina = self.simc_place.gmina self.powiat = self.simc_place.powiat self.wojewodztwo = self.simc_place.wojewodztwo reporting.output_msg("preassigned", u"%r ma już przypisany rekord SIMC: %r" % (self, self.simc_place), self) if "teryt:terc" in tags: try: self.terc_id = tags["teryt:terc"] except: reporting.output_msg("errors", u"Błędny kod teryt:terc: %r" % (tags["teryt:terc"],)) if self.terc_id: self.terc_id = tags["teryt:terc"] if self.simc_place and self.terc_id != self.simc_place.terc_id: reporting.output_msg("errors", u"teryt:terc nie zgadza się z teryt:simc") else: try: gmina = Gmina.by_code(self.terc_id) if (self.gmina and gmina != self.gmina or self.powiat and gmina.powiat != self.powiat or self.wojewodztwo and gmina.wojewodztwo != self.wojewodztwo): reporting.output_msg("errors", u"%s: teryt:terc nie zgadza się" u" z położeniem wynikającym z innych tagów" u" (%r != %r | %r != %r | %r != %r)" % (self, self.gmina, gmina, self.powiat, gmina.powiat, self.wojewodztwo, gmina.wojewodztwo)) if gmina and not self.gmina: self.gmina = gmina self.powiat = gmina.powiat self.wojewodztwo = gmina.wojewodztwo except KeyError: pass self._by_id[self.id] = self if self.name: add_to_list_dict(self._by_name, self.name.lower(), self) add_to_list_dict(self._by_type, self.type, self)
def update(self): if not self.simc_place: return False reporting = Reporting() updated = [] tags = self.tags if "teryt:simc" not in tags or tags['teryt:simc'] != self.simc_place.id: updated.append("teryt:simc") tags["teryt:simc"] = self.simc_place.id if "teryt:terc" not in tags or tags['teryt:terc'] != self.gmina.code: updated.append("teryt:terc") tags["teryt:terc"] = self.gmina.code if "teryt:rm" not in tags or tags['teryt:rm'] != self.simc_place.rm: updated.append("teryt:rm") tags["teryt:rm"] = self.simc_place.rm if "teryt:date" in tags: updated.append("teryt:date") del tags["teryt:date"] if "teryt:stan_na" not in tags or tags['teryt:stan_na'] != self.simc_place.date: updated.append("teryt:stan_na") tags["teryt:stan_na"] = self.simc_place.date if self.simc_place.parent: if "teryt:sympod" not in tags or tags['teryt:sympod'] != self.simc_place.parent.id: updated.append("teryt:sympod") tags["teryt:sympod"] = self.simc_place.parent.id elif "teryt:sympod" in tags: updated.append("teryt:sympod") del tags["teryt:sympod"] is_in = [] is_in_places = {} if self.powiat.is_capital and self.name != "Warszawa": is_in_places["city"] = "Warszawa" parent = self.simc_place.parent while parent: is_in.append(parent.name) parent_place = self._by_simc_id.get(parent.id) if parent_place: is_in_places[parent_place.type] = parent_place.name parent = parent.parent if self.simc_place.rm not in ("00", "99", "95") and not self.gmina.is_city(self.name): is_in.append(self.gmina.full_name()) if not self.powiat.is_city: is_in.append(self.powiat.full_name()) elif self.powiat.is_capital and self.name != self.powiat.name: is_in.append(self.powiat.full_name()) is_in += [self.wojewodztwo.full_name(), u"Poland"] is_in_tags = set([unicode(tag).lower() for tag in is_in]) is_in = u", ".join(is_in) if "is_in" in tags: orig_is_in = tags['is_in'].replace(u";", u",") orig_is_in_tags = orig_is_in.split(u",") orig_is_in_tags = [unicode(t).strip().lower() for t in orig_is_in_tags] orig_is_in_tags = set(orig_is_in_tags) if self.powiat.is_capital: orig_is_in_tags.discard("powiat st. warszawa") else: orig_is_in = None orig_is_in_tags = set() if not orig_is_in: updated.append("is_in") tags['is_in'] = is_in elif orig_is_in != is_in: if not (orig_is_in_tags - is_in_tags): # original is_in is a subset of new is_in updated.append("is_in") tags['is_in'] = is_in else: reporting.output_msg("warnings", u"Uwaga: nie zmienione" u" is_in='%s' dla %r (nasze: %r, istniejące: %r, różnica: %r)" % (tags['is_in'], self, is_in_tags, orig_is_in_tags, (orig_is_in_tags - is_in_tags))) if "is_in:country" not in tags or tags['is_in:country'] != "Poland": updated.append("is_in:country") tags['is_in:country'] = "Poland" if "is_in:province" not in tags or tags['is_in:province'] != self.wojewodztwo.full_name(): updated.append("is_in:province") tags['is_in:province'] = self.wojewodztwo.full_name() if "is_in:county" not in tags or tags["is_in:county"] != self.powiat.full_name(): updated.append("is_in:county") tags['is_in:county'] = self.powiat.full_name() if "is_in:municipality" not in tags or tags["is_in:municipality"] != self.gmina.full_name(): updated.append("is_in:municipality") tags['is_in:municipality'] = self.gmina.full_name() for parent_type in ("village", "suburb", "city", "town"): parent_name = is_in_places.get(parent_type) tag_name = "is_in:" + parent_type if not parent_name: if tag_name in tags: updated.append("tag_name") del tags[tag_name] elif tag_name not in tags or tags[tag_name] != parent_name: updated.append("tag_name") tags[tag_name] = parent_name if updated: reporting.output_msg("info", u"%s: zmieniono: %s" % ( self, u", ".join(updated))) return len(updated) > 0
def match_names(pass_no, places_to_match, grid = None): reporting = Reporting() places_count = len(places_to_match) if grid: reporting.progress_start( u"Dopasowywanie nazw %i miejsc, przebieg %i, z siatką %s" % (places_count, pass_no, grid), places_count) else: reporting.progress_start( u"Dopasowywanie nazw %i miejsc, przebieg %i" % (places_count, pass_no), places_count) osm_matched = set() simc_matched = set() places = [ (str(p), p) for p in places_to_match ] for name, osm_place in places: reporting.progress() if osm_place.name is None: reporting.output_msg("errors", u"%r: brak nazwy" % (osm_place,), osm_place) continue # Find matching entry in SIMC try: matching_simc_places = SIMC_Place.by_name(osm_place.name) except KeyError: reporting.output_msg("not_found", u"%s: nie znaleziono w TERYT" % (osm_place,), osm_place) places_to_match.remove(osm_place) continue simc_places = [place for place in matching_simc_places if place.type == osm_place.normalized_type and place.osm_place is None] if not simc_places: types_found = [ place.type for place in matching_simc_places ] reporting.output_msg("bad_type", u"%s: nie znalezionow w TERYT" u" obiektu właściwego typu (%r, znaleziono: %r)" % ( osm_place, osm_place.type, types_found), osm_place) continue cell = None if grid: try: cell = grid.get_cell(osm_place) except KeyError: pass if cell: simc_places = [ p for p in simc_places if p.powiat in cell.powiaty ] if len(simc_places) > 1: simc_places = [ p for p in simc_places if p.gmina in cell.gminy ] if not simc_places: reporting.output_msg("not_found", u"%s: nie znaleziono w TERYT miejsca" u" pasującego do komórki %s" % (osm_place, cell), osm_place) continue if len(simc_places) > 1: if grid: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z OSM pasuje do wielu obiektów" u" SIMC w komórce %s: %s" % (osm_place, cell, u", ".join([str(p) for p in simc_places])), osm_place) else: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z OSM pasuje do wielu obiektów w SIMC: %s" % (osm_place, u", ".join([str(p) for p in simc_places])), osm_place) continue simc_place = simc_places[0] # now check if reverse assignment is not ambigous matching_osm_places = OSM_Place.by_name(simc_place.name) confl_osm_places = [] for place in matching_osm_places: if place is osm_place: continue if cell: try: g_cell = grid.get_cell(place) except KeyError: g_cell = None if g_cell is not cell: continue if place.gmina and place.gmina != simc_place.gmina: continue if place.powiat and place.powiat != simc_place.powiat: continue if place.wojewodztwo and place.wojewodztwo != simc_place.wojewodztwo: continue confl_osm_places.append(place) if confl_osm_places: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z SIMC pasuje do wielu obiektów w OMS: %s" % (simc_place, ", ".join([str(p) for p in confl_osm_places])), osm_place) continue if simc_place.osm_place: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z SIMC ma już przypisany obiekt OSM: %s" % ( simc_place, simc_place.osm_place), osm_place) # good match osm_place.assign_simc(simc_place) simc_place.assign_osm(osm_place) reporting.output_msg("match", u"%s w OSM to %s w SIMC" % (osm_place, simc_place), osm_place) osm_matched.add(osm_place) simc_matched.add(simc_place) places_to_match.remove(osm_place) reporting.progress_stop() reporting.output_msg("stats", u"Przebieg %i: znaleziono w SIMC %i z %i miejscowości OSM" % ( pass_no, len(osm_matched), places_count)) return osm_matched, simc_matched
def write_changes(updated_places, created_by): for filename in glob.glob("output/*.osc") + glob.glob("output/*.comment"): os.unlink(filename) reporting = Reporting() reporting.progress_start(u"Preparing osmChange files", len(updated_places)) woj_trees = {} for place in updated_places: woj_name = place.wojewodztwo.name if not woj_name in woj_trees: root = ElementTree.Element(u"osmChange", version = u"0.3", generator = created_by) tree = ElementTree.ElementTree(root) modify = ElementTree.Element(u"modify", version = u"0.3", generator = created_by) root.append(modify) woj_trees[woj_name] = tree else: root = woj_trees[woj_name].getroot() modify = root[0] node = ElementTree.Element(u"node", id = place.id, lon = str(place.lon), lat = str(place.lat), version = place.version, changeset = place.changeset) modify.append(node) for k, v in place.tags.items(): if k == 'teryt:updated_by': continue tag = ElementTree.Element(u"tag", k = k, v = v) node.append(tag) tag = ElementTree.Element(u"tag", k = u"teryt:updated_by", v = created_by) node.append(tag) reporting.progress() reporting.progress_stop() reporting = Reporting() reporting.progress_start(u"Writting osmChange files", len(woj_trees)) for woj_name, tree in woj_trees.items(): basename = os.path.join("output", woj_name.encode("utf-8")) tree.write(basename + ".osc", "utf-8") comment_file = codecs.open(basename + ".comment", "w", "utf-8") print >> comment_file, u"TERYT import, województwo %s, prepared by %s" % ( woj_name, created_by) comment_file.close() reporting.progress() reporting.progress_stop()
for woj_name, tree in woj_trees.items(): basename = os.path.join("output", woj_name.encode("utf-8")) tree.write(basename + ".osc", "utf-8") comment_file = codecs.open(basename + ".comment", "w", "utf-8") print >> comment_file, u"TERYT import, województwo %s, prepared by %s" % ( woj_name, created_by) comment_file.close() reporting.progress() reporting.progress_stop() try: this_dir = os.path.dirname(__file__) version = subprocess.Popen(["svnversion", this_dir], stdout = subprocess.PIPE).communicate()[0].strip() setup_locale() reporting = Reporting() reporting.output_msg("info", u"teryt2osm combine.py version: %s" % (version,)) reporting.config_channel("errors", split_level = 1, mapping = True) reporting.config_channel("bad_type", split_level = 1, mapping = True, quiet = True) reporting.config_channel("not_found", split_level = 1, quiet = True, mapping = True) reporting.config_channel("ambigous1", split_level = 1, quiet = True, mapping = True) reporting.config_channel("ambigous2", split_level = 1, quiet = True, mapping = True) reporting.config_channel("ambigous3", split_level = 1, mapping = True) reporting.config_channel("match", split_level = 2, quiet = True, mapping = True) reporting.config_channel("bad_match", split_level = 1, quiet = True, mapping = True) reporting.config_channel("really_bad_match", split_level = 1, quiet = True, mapping = True) reporting.config_channel("gmi_set", quiet = True) reporting.config_channel("pow_set", quiet = True) reporting.config_channel("woj_set", quiet = True) reporting.config_channel("preassigned", quiet = True) for filename in ("data.osm", "SIMC.xml", "TERC.xml", "WMRODZ.xml"):