def import_data(self, metadata, output): source, entity_type = self._get_source(), self._get_entity_type() parser = etree.XMLParser(load_dtd=True) parser.resolvers.add(BBCTPEGResolver()) xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser) entities, seen = {}, set() for entity in Entity.objects.filter(source=source): if 'bbc-tpeg' in entity.identifiers: entities[entity.identifiers['bbc-tpeg']] = entity for message in xml.getroot().findall('tpeg_message'): id = message.find('road_traffic_message').attrib['message_id'] road_traffic_message = message.find('road_traffic_message') try: entity = entities[id] except KeyError: entity = Entity() entities[id] = entity entity.source = source entity.primary_type = entity_type locs = map(self._wgs84_to_point, road_traffic_message.findall('location_container/location_coordinates/WGS84')) if len(locs) > 1: entity.geometry = LineString(*locs) elif len(locs) == 1: entity.geometry = locs[0] else: continue entity.location = Point( sum(p.x for p in locs) / len(locs), sum(p.y for p in locs) / len(locs), srid=4326, ) entity.metadata['bbc_tpeg'] = { 'xml': etree.tostring(message), 'severity': road_traffic_message.attrib['severity_factor'], 'generated': road_traffic_message.attrib['message_generation_time'], 'version': int(road_traffic_message.attrib['version_number']), } entity.save(identifiers={'bbc-tpeg': id}) set_name_in_language(entity, 'en', title=message.find('summary').text) entity.all_types = [entity_type] entity.update_all_types_completion() seen.add(entity.pk) for entity in Entity.objects.filter(source=source): if not entity.pk in seen: entity.delete()
def _load_from_csv(self, reader, entity_type, source): j = 0 for i, line in enumerate(reader): postcode_abbrev, (easting, northing) = line[0], line[10:12] postcode_abbrev = postcode_abbrev.replace(' ', '') # Now try to figure out where to put the space in if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev): # A9 9AA postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:]) elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev): # A99 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev): # AA9 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev): # AA99 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # A9A 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # AA9A 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) else: postcode = postcode_abbrev try: easting, northing = int(easting), int(northing) except ValueError: continue j += 1 try: entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev) except Entity.DoesNotExist: entity = Entity(source=source) entity.location = Point(easting, northing, srid=3857) entity.geometry = entity.location entity.primary_type = entity_type identifiers = { 'postcode': postcode_abbrev, 'postcode-canonical': postcode, } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=postcode) entity.all_types.add(entity_type) entity.update_all_types_completion()
def endElement(self, name): if name in ('node', 'way') and self.valid: try: types = self.find_types(self.tags) except ValueError: self.ignore_count += 1 return # Ignore ways that lay partly outside our bounding box if name == 'way' and not all(id in self.node_locations for id in self.nodes): return # We already have these from OxPoints, so leave them alone. if self.tags.get('amenity') == 'library' and self.tags.get('operator') == 'University of Oxford': return # Ignore disused and under-construction entities if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get('disused') in ('1', 'yes', 'true'): return reset_queries() try: entity = Entity.objects.get(source=self.source, _identifiers__scheme='osm', _identifiers__value=self.id) created = True except Entity.DoesNotExist: entity = Entity(source=self.source) created = False if not 'osm' in entity.metadata or \ entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']: if created: self.create_count += 1 else: self.modify_count += 1 if name == 'node': entity.location = Point(self.node_location, srid=4326) entity.geometry = entity.location elif name == 'way': cls = LinearRing if self.nodes[0] == self.nodes[-1] else LineString entity.geometry = cls([self.node_locations[n] for n in self.nodes], srid=4326) min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lon, lat in [self.node_locations[n] for n in self.nodes]: min_ = min(min_[0], lon), min(min_[1], lat) max_ = max(max_[0], lon), max(max_[1], lat) entity.location = Point((min_[0] + max_[0]) / 2 , (min_[1] + max_[1]) / 2 , srid=4326) else: raise AssertionError("There should be no other types of entity we're to deal with.") names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): if '-' in lang_code: tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator') else: tags_to_try = ('name:%s' % lang_code, 'name', 'operator') name = None for tag_to_try in tags_to_try: if self.tags.get(tag_to_try): name = self.tags.get(tag_to_try) break if name is None: try: name = reverse_geocode(*entity.location)[0]['name'] if not name: raise IndexError name = u"↝ %s" % name except IndexError: name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0]) names[lang_code] = name entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': dict(zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values())) } entity.primary_type = self.entity_types[types[0]] entity.save(identifiers={'osm': self.id}) for lang_code, name in names.items(): set_name_in_language(entity, lang_code, title=name) entity.all_types = [self.entity_types[et] for et in types] entity.update_all_types_completion() else: self.unchanged_count += 1
def add_stop(self, meta, entity_type, source): # Check this entity is in an area if self.areas != None: in_area = False for area in self.areas: if meta["atco-code"].startswith(area): in_area = True if not in_area: return # See if we're updating an existing object, or creating a new one try: entity = Entity.objects.get( source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"] ) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: # Handle clashes Entity.objects.filter( source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"] ).delete() entity = Entity(source=source) common_name, indicator, locality, street = [ meta.get(k) for k in ("common-name", "indicator", "locality-ref", "street") ] if ( (common_name or "").endswith(" DEL") or (indicator or "").lower() == "not in use" or "to define route" in (common_name or "") or "to def rte" in (common_name or "") or "to def route" in (common_name or "") or "def.rte" in (common_name or "") ): # In the NaPTAN list, but indicates it's an unused stop return if self.meta["stop-type"] in ("MET", "GAT", "FER", "RLY"): names = self.names else: names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): # Try and find one in our preferred order for lang in (lang_code, "en", None): if lang in self.names: common_name = self.names[lang] break # Expand abbreviations in indicators if indicator is not None: parts = [] for part in indicator.split(): parts.append( { # Translators: This is referring to bus stop location descriptions "op": ugettext("Opposite"), "opp": ugettext("Opposite"), "opposite": ugettext("Opposite"), # Translators: This is referring to bus stop location descriptions "adj": ugettext("Adjacent"), # Translators: This is referring to bus stop location descriptions "outside": ugettext("Outside"), "o/s": ugettext("Outside"), # Translators: This is referring to bus stop location descriptions "nr": ugettext("Near"), # Translators: This is referring to bus stop location descriptions "inside": ugettext("Inside"), # Translators: This is referring to bus stop location descriptions "stp": ugettext("Stop"), }.get(part.lower(), part) ) indicator = " ".join(parts) if indicator is None and self.meta["stop-type"] in ("AIR", "FTD", "RSE", "TMU", "BCE"): # Translators: This is referring to public transport entities title = ugettext("Entrance to %s") % common_name elif indicator is None and self.meta["stop-type"] in ("FBT",): # Translators: This is referring to ferry ports title = ugettext("Berth at %s") % common_name elif indicator is None and self.meta["stop-type"] in ("RPL", "PLT"): # Translators: This is referring to rail and metro stations title = ugettext("Platform at %s") % common_name elif indicator is not None and indicator.lower() != "none" and indicator not in common_name: title = indicator + " " + common_name else: title = common_name if street != None and street != "-": # Deal with all-caps street names if street.upper() == street: fixedstreet = "" wordstart = True for letter in street: if wordstart: wordstart = False fixedstreet += letter continue elif letter == " ": wordstart = True fixedstreet += letter continue else: fixedstreet += letter.lower() street = fixedstreet if street not in title: title += ", " + street locality_lang = self.nptg_localities.get(locality) if locality_lang != None: for lang in (lang_code, "en", "cy"): if lang in locality_lang: if locality_lang[lang] != street: title += ", " + locality_lang[lang] break names[lang_code] = title entity.primary_type = entity_type if not entity.metadata: entity.metadata = {} entity.metadata["naptan"] = meta entity.location = Point(float(meta["longitude"]), float(meta["latitude"]), srid=4326) entity.geometry = entity.location if meta["atco-code"] in self.tube_references: entity.metadata["london-underground-identifiers"] = self.tube_references[meta["atco-code"]] identifiers = {"atco": meta["atco-code"]} if "naptan-code" in meta: meta["naptan-code"] = "".join(map(self.naptan_dial, meta["naptan-code"])) identifiers["naptan"] = meta["naptan-code"] if "plate-code" in meta: identifiers["plate"] = meta["plate-code"] if "crs" in meta: identifiers["crs"] = meta["crs"] if indicator != None and re.match("Stop [A-Z]\d\d?", indicator): identifiers["stop"] = indicator[5:] entity.save(identifiers=identifiers) for lang_code, name in names.items(): # This is the NaPTAN, so default to English if lang_code is None: lang_code = "en" set_name_in_language(entity, lang_code, title=name) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.groups.clear() for stop_area in self.stop_areas: sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area) entity.groups.add(sa) entity.save() return entity