def _load_from_csv(self, reader, entity_type, source): j = 0 for i, line in enumerate(reader): postcode_abbrev, (easting, northing) = line[0], line[10:12] if postcode_abbrev[-4] != ' ': postcode = '%s %s' % (postcode_abbrev[:-3], postcode_abbrev[-3:]) else: postcode = postcode_abbrev postcode_abbrev = postcode_abbrev.replace(' ', '') try: easting, northing = int(easting), int(northing) except ValueError: continue j += 1 try: entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev) except Entity.DoesNotExist: entity = Entity(source=source) entity.title = postcode entity.location = Point(easting, northing, srid=27700) entity.geometry = entity.location entity.primary_type = entity_type identifiers = { 'postcode': postcode_abbrev, 'postcode-canonical': postcode, } entity.save(identifiers=identifiers) entity.all_types.add(entity_type) entity.update_all_types_completion()
def _get_entity(self, stop_code, stop_name, source, entity_type): """Finds a bus stop entity or creates one if it cannot be found. If multiple entities are found we clean them up. """ scheme = 'naptan' try: entity = get_entity(scheme, stop_code) except: try: entity = Entity.objects.get(_identifiers__scheme=scheme, _identifiers__value=stop_code) logger.debug("Found Entity: %s" % entity) except Entity.DoesNotExist: logger.debug("Entity does not exist: %s-%s" % (stop_code, stop_name)) entity = Entity() except Entity.MultipleObjectsReturned: logger.warning("Multiple Entities found for : %s-%s" % (stop_code, stop_name)) Entity.objects.filter(_identifiers__scheme=scheme, _identifiers__value=stop_code).delete() entity = Entity() entity.primary_type = entity_type entity.source = source identifiers = {scheme: stop_code} set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type, ) entity.save(identifiers=identifiers) return entity
def _load_from_csv(self, reader, entity_type, source): j = 0 for i, line in enumerate(reader): postcode_abbrev, (easting, northing) = line[0], line[10:12] postcode_abbrev = postcode_abbrev.replace(' ', '') # Now try to figure out where to put the space in if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev): # A9 9AA postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:]) elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev): # A99 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev): # AA9 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev): # AA99 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # A9A 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # AA9A 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) else: postcode = postcode_abbrev try: easting, northing = int(easting), int(northing) except ValueError: continue j += 1 try: entity = Entity.objects.get( source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev) except Entity.DoesNotExist: entity = Entity(source=source) entity.location = Point(easting, northing, srid=27700) entity.geometry = entity.location entity.primary_type = entity_type identifiers = { 'postcode': postcode_abbrev, 'postcode-canonical': postcode, } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=postcode) entity.all_types.add(entity_type) entity.update_all_types_completion()
def _get_entity(self, stop_code, stop_name, source, entity_type): """Finds a bus stop entity or creates one if it cannot be found. If multiple entities are found we clean them up. """ scheme = 'naptan' try: entity = get_entity(scheme, stop_code) except: try: entity = Entity.objects.get(_identifiers__scheme=scheme, _identifiers__value=stop_code) logger.debug("Found Entity: %s" % entity) except Entity.DoesNotExist: logger.debug("Entity does not exist: %s-%s" % (stop_code, stop_name)) entity = Entity() except Entity.MultipleObjectsReturned: logger.warning("Multiple Entities found for : %s-%s" % (stop_code, stop_name)) Entity.objects.filter(_identifiers__scheme=scheme, _identifiers__value=stop_code).delete() entity = Entity() entity.primary_type = entity_type entity.source = source identifiers = {scheme: stop_code} set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type,) entity.save(identifiers=identifiers) return entity
def _load_from_csv(self, reader, entity_type, source): j = 0 for i, line in enumerate(reader): postcode_abbrev, (easting, northing) = line[0], line[10:12] postcode_abbrev = postcode_abbrev.replace(' ', '') # Now try to figure out where to put the space in if re.match(r'[A-Z][0-9]{2}[A-Z]{2}', postcode_abbrev): # A9 9AA postcode = '%s %s' % (postcode_abbrev[:2], postcode_abbrev[2:]) elif re.match(r'[A-Z][0-9]{3}[A-Z]{2}', postcode_abbrev): # A99 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{2}[A-Z]{2}', postcode_abbrev): # AA9 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9]{3}[A-Z]{2}', postcode_abbrev): # AA99 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) elif re.match(r'[A-Z][0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # A9A 9AA postcode = '%s %s' % (postcode_abbrev[:3], postcode_abbrev[3:]) elif re.match(r'[A-Z]{2}[0-9][A-Z][0-9][A-Z]{2}', postcode_abbrev): # AA9A 9AA postcode = '%s %s' % (postcode_abbrev[:4], postcode_abbrev[4:]) else: postcode = postcode_abbrev try: easting, northing = int(easting), int(northing) except ValueError: continue j += 1 try: entity = Entity.objects.get(source=source, _identifiers__scheme='postcode', _identifiers__value=postcode_abbrev) except Entity.DoesNotExist: entity = Entity(source=source) entity.location = Point(easting, northing, srid=27700) entity.geometry = entity.location entity.primary_type = entity_type identifiers = { 'postcode': postcode_abbrev, 'postcode-canonical': postcode, } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title = postcode) entity.all_types.add(entity_type) entity.update_all_types_completion()
def import_data(self, metadata, output): "Imports places data from OxPoints" self.entity_types = self._get_entity_types() data = simplejson.load(urllib.urlopen(self.ALL_OXPOINTS)) source = self._get_source() entities, parents = {}, [] for datum in data: oxpoints_id = datum['uri'].rsplit('/')[-1] oxpoints_type = datum['type'].rsplit('#')[-1] if not oxpoints_type in self.OXPOINTS_TYPES: continue try: entity = Entity.objects.get(source=source, _identifiers__scheme='oxpoints', _identifiers__value=oxpoints_id) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: Entity.objects.filter(source=source, _identifiers__scheme='oxpoints', _identifiers__value=oxpoints_id).delete() entity = Entity(source=source) entity.title = datum.get('oxp_fullyQualifiedTitle', datum.get('dc_title', '')) entity.primary_type = self.entity_types[self.OXPOINTS_TYPES[oxpoints_type][0]] if 'geo_lat' in datum and 'geo_long' in datum: entity.location = Point(datum['geo_long'], datum['geo_lat'], srid=4326) else: entity.location = None if 'dct_isPartOf' in datum: parent_id = datum['dct_isPartOf']['uri'].rsplit('/')[-1] if parent_id in entities: entity.parent = entities[parent_id] else: parents.append((oxpoints_id, parent_id)) else: entity.parent = None entity.metadata['oxpoints'] = datum identifiers = { 'oxpoints': oxpoints_id, 'uri': datum['uri'], } if 'oxp_hasOUCSCode' in datum: identifiers['oucs'] = datum['oxp_hasOUCSCode'] if 'oxp_hasOLISCode' in datum: identifiers['olis'] = datum['oxp_hasOLISCode'] entity.save(identifiers=identifiers) entity.all_types = [self.entity_types[t] for t in self.OXPOINTS_TYPES[oxpoints_type]] entity.update_all_types_completion() entities[oxpoints_id] = entity for oxpoints_id, parent_id in parents: try: entities[oxpoints_id].parent = entities[parent_id] entities[oxpoints_id].save() except KeyError: pass return metadata
def endElement(self, name): if name in ('node','way') and self.valid: try: types = self.find_types(self.tags) except ValueError: self.ignore_count += 1 return # Ignore ways that lay partly outside our bounding box if name == 'way' and not all(id in self.node_locations for id in self.nodes): return # We already have these from OxPoints, so leave them alone. if self.tags.get('amenity') == 'library' and self.tags.get('operator') == 'University of Oxford': return # Ignore disused and under-construction entities if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get('disused') in ('1', 'yes', 'true'): return try: entity = Entity.objects.get(source=self.source, _identifiers__scheme='osm', _identifiers__value=self.id) created = True except Entity.DoesNotExist: entity = Entity(source=self.source) created = False if not 'osm' in entity.metadata or \ entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']: if created: self.create_count += 1 else: self.modify_count += 1 if name == 'node': entity.location = Point(self.node_location, srid=4326) entity.geometry = entity.location elif name == 'way': cls = LinearRing if self.nodes[0] == self.nodes[-1] else LineString entity.geometry = cls([self.node_locations[n] for n in self.nodes], srid=4326) min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lon, lat in [self.node_locations[n] for n in self.nodes]: min_ = min(min_[0], lon), min(min_[1], lat) max_ = max(max_[0], lon), max(max_[1], lat) entity.location = Point( (min_[0]+max_[0])/2 , (min_[1]+max_[1])/2 , srid=4326) else: raise AssertionError("There should be no other types of entity we're to deal with.") names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): if '-' in lang_code: tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator') else: tags_to_try = ('name:%s' % lang_code, 'name', 'operator') name = None for tag_to_try in tags_to_try: if self.tags.get(tag_to_try): name = self.tags.get(tag_to_try) break if name is None: try: name = reverse_geocode(*entity.location)[0]['name'] if not name: raise IndexError name = u"↝ %s" % name except IndexError: name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0]) names[lang_code] = name entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': dict(zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values())) } entity.primary_type = self.entity_types[types[0]] entity.save(identifiers={'osm': self.id}) for lang_code, name in names.items(): set_name_in_language(entity, lang_code, title=name) entity.all_types = [self.entity_types[et] for et in types] entity.update_all_types_completion() else: self.unchanged_count += 1
def add_stop(self, meta, entity_type, source, is_entrance): # Check this entity is in an area if self.areas != None: in_area = False for area in self.areas: if meta['atco-code'].startswith(area): in_area = True if not in_area: return # See if we're updating an existing object, or creating a new one try: entity = Entity.objects.get(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: # Handle clashes Entity.objects.filter(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']).delete() entity = Entity(source=source) common_name, indicator, locality, street = [meta.get(k) for k in ('common-name', 'indicator', 'locality-ref', 'street')] if (common_name or '').endswith(' DEL') or \ (indicator or '').lower() == 'not in use' or \ 'to define route' in (common_name or '') or \ 'to def rte' in (common_name or '') or \ 'to def route' in (common_name or '') or \ 'def.rte' in (common_name or ''): # In the NaPTAN list, but indicates it's an unused stop return if self.meta['stop-type'] in ('MET','GAT','FER', 'RLY'): names = self.names else: names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): # Try and find one in our preferred order for lang in (lang_code, 'en', None): if lang in self.names: common_name = self.names[lang] break # Expand abbreviations in indicators if indicator is not None: parts = [] for part in indicator.split(): parts.append({ # Translators: This is referring to bus stop location descriptions 'op': ugettext('Opposite'), 'opp': ugettext('Opposite'), 'opposite': ugettext('Opposite'), # Translators: This is referring to bus stop location descriptions 'adj': ugettext('Adjacent'), # Translators: This is referring to bus stop location descriptions 'outside': ugettext('Outside'), 'o/s': ugettext('Outside'), # Translators: This is referring to bus stop location descriptions 'nr': ugettext('Near'), # Translators: This is referring to bus stop location descriptions 'inside': ugettext('Inside'), # Translators: This is referring to bus stop location descriptions 'stp': ugettext('Stop'), }.get(part.lower(), part)) indicator = ' '.join(parts) if indicator is None and self.meta['stop-type'] in ('AIR', 'FTD', 'RSE', 'TMU', 'BCE'): # Translators: This is referring to public transport entities title = ugettext('Entrance to %s') % common_name elif indicator is None and self.meta['stop-type'] in ('FBT',): # Translators: This is referring to ferry ports title = ugettext('Berth at %s') % common_name elif indicator is None and self.meta['stop-type'] in ('RPL','PLT'): # Translators: This is referring to rail and metro stations title = ugettext('Platform at %s') % common_name elif indicator is not None and indicator.lower() != 'none' \ and indicator not in common_name: title = indicator + ' ' + common_name else: title = common_name if street not in (None, '-', '---'): # Deal with all-caps street names if street.upper() == street: fixedstreet = '' wordstart = True for letter in street: if wordstart: wordstart = False fixedstreet += letter continue elif letter == ' ': wordstart = True fixedstreet += letter continue else: fixedstreet += letter.lower() street = fixedstreet if street not in title: title += ', ' + street locality_lang = self.nptg_localities.get(locality) if locality_lang != None: for lang in (lang_code, 'en', 'cy'): if lang in locality_lang: if locality_lang[lang] != street: title += ', ' + locality_lang[lang] break names[lang_code] = title entity.primary_type = entity_type entity.is_entrance = is_entrance if not entity.metadata: entity.metadata = {} entity.metadata['naptan'] = meta entity.location = Point(float(meta['longitude']), float(meta['latitude']), srid=4326) entity.geometry = entity.location if meta['atco-code'] in self.tube_references: entity.metadata['london-underground-identifiers'] = self.tube_references[meta['atco-code']] identifiers = { 'atco': meta['atco-code'], } if 'naptan-code' in meta: meta['naptan-code'] = ''.join(map(self.naptan_dial, meta['naptan-code'])) identifiers['naptan'] = meta['naptan-code'] if 'plate-code' in meta: identifiers['plate'] = meta['plate-code'] if 'crs' in meta: identifiers['crs'] = meta['crs'] if 'tiploc' in meta: identifiers['tiploc'] = meta['tiploc'] if indicator != None and re.match('Stop [A-Z]\d\d?', indicator): identifiers['stop'] = indicator[5:] entity.save(identifiers=identifiers) for lang_code, name in names.items(): # This is the NaPTAN, so default to English if lang_code is None: lang_code = 'en' set_name_in_language(entity, lang_code, title=name) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.groups.clear() for stop_area in self.stop_areas: sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area) entity.groups.add(sa) entity.save() return entity
def _scrape(self, route, url, output): self._output.write(route) url += '&showall=1' service = etree.parse(urlopen(url), parser = etree.HTMLParser()) route.stops.clear() for i, tr in enumerate(service.find('.//table').findall('tr')[1:]): try: stop_code = tr[1][0].text except IndexError: # Stops on ACIS Live that don't have codes, e.g., out of county # stops stop_name = tr[3][0].text try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='acisroute', _identifiers__value=stop_name) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) entity_type = self._get_entity_type() entity.primary_type = entity_type identifiers = { 'acisroute': stop_name } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type,) entity.update_all_types_completion() else: # TODO: Change identifier lookup based on ACIS region try: entity = get_entity('naptan', stop_code) if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes - alternatively, # the fake bus stops Oxontime made up for the TUBE route try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='naptan', _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = { 'naptan': stop_code } entity_type = self._get_entity_type() entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=tr[3][0].text) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.save() StopOnRoute.objects.create(route=route, entity=entity, order=i)
def add_stop(self, meta, entity_type, source): try: entity = Entity.objects.get(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']) except Entity.DoesNotExist: entity = Entity(source=source) cnm, lmk, ind, str = [meta.get(k) for k in ['common-name', 'landmark', 'indicator', 'street']] if (cnm or '').endswith(' DEL') or (ind or '').lower == 'not in use': return if lmk and ind and ind.endswith(lmk) and len(ind) > len(lmk): ind = ind[:-len(lmk)] ind = { 'opp': 'Opposite', 'opposite': 'Opposite', 'adj': 'Adjacent to', 'outside': 'Outside', 'o/s': 'Outside', 'nr': 'Near', 'inside': 'Inside', }.get(ind, ind) if meta['stop-type'] == 'RSE': title = cnm elif (ind or '').lower() == 'corner': title = "Corner of %s and %s" % (str, lmk) elif cnm == str: if ind in ('Opposite','Adjacent to','Outside','Near'): title = "%s %s" % (ind, cnm) else: title = "%s, %s" % (ind, cnm) elif ind == lmk: title = "%s, %s" % (lmk, str) elif lmk != str: title = "%s %s, on %s" % (ind, lmk, str) else: title = "%s %s, %s" % (ind, lmk, cnm) entity.title = title entity.primary_type = entity_type if not entity.metadata: entity.metadata = {} entity.metadata['naptan'] = meta entity.location = Point(float(meta['longitude']), float(meta['latitude']), srid=4326) entity.geometry = entity.location identifiers = { 'atco': meta['atco-code'], } if 'naptan-code' in meta: meta['naptan-code'] = ''.join(map(self.naptan_dial, meta['naptan-code'])) identifiers['naptan'] = meta['naptan-code'] if ind and re.match('Stop [A-Z]\d\d?', ind): identifiers['stop'] = ind[5:] entity.save(identifiers=identifiers) entity.all_types.add(entity_type) entity.update_all_types_completion() return entity
def endElement(self, name): if name in ('node', 'way') and self.valid: try: types = self.find_types(self.tags) except ValueError: self.ignore_count += 1 return # Ignore ways that lay partly outside our bounding box if name == 'way' and not all(id in self.node_locations for id in self.nodes): return # Ignore disused and under-construction entities if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get( 'disused') in ('1', 'yes', 'true'): return # Memory management in debug mode reset_queries() if self.id in self.identities: entity = get_entity(*self.identities[self.id].split(':')) entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': dict( zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values())) } identifiers = entity.identifiers identifiers.update({'osm': self.id}) entity.save(identifiers=identifiers) entity.all_types = set(entity.all_types.all()) | set( self.entity_types[et] for et in types) entity.update_all_types_completion() self.ids.remove(self.id) else: try: entity = Entity.objects.get(source=self.source, _identifiers__scheme='osm', _identifiers__value=self.id) created = False except Entity.DoesNotExist: entity = Entity(source=self.source) created = True if not 'osm' in entity.metadata or \ entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']: if created: self.create_count += 1 else: self.modify_count += 1 if name == 'node': entity.location = Point(self.node_location, srid=4326) entity.geometry = entity.location elif name == 'way': cls = LinearRing if self.nodes[0] == self.nodes[ -1] else LineString entity.geometry = cls( [self.node_locations[n] for n in self.nodes], srid=4326) min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lon, lat in [ self.node_locations[n] for n in self.nodes ]: min_ = min(min_[0], lon), min(min_[1], lat) max_ = max(max_[0], lon), max(max_[1], lat) entity.location = Point((min_[0] + max_[0]) / 2, (min_[1] + max_[1]) / 2, srid=4326) else: raise AssertionError( "There should be no other types of entity we're to deal with." ) names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): if '-' in lang_code: tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator') else: tags_to_try = ('name:%s' % lang_code, 'name', 'operator') name = None for tag_to_try in tags_to_try: if self.tags.get(tag_to_try): name = self.tags.get(tag_to_try) break if name is None: try: name = reverse_geocode( *entity.location)[0]['name'] if not name: raise IndexError name = u"↝ %s" % name except IndexError: name = u"↝ %f, %f" % ( self.node_location[1], self.node_location[0]) names[lang_code] = name entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': dict( zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values())) } entity.primary_type = self.entity_types[types[0]] identifiers = entity.identifiers identifiers.update({'osm': self.id}) entity.save(identifiers=identifiers) for lang_code, name in names.items(): set_name_in_language(entity, lang_code, title=name) entity.all_types = [self.entity_types[et] for et in types] entity.update_all_types_completion() else: self.unchanged_count += 1
def endElement(self, name): if name in ('node', 'way') and self.valid: try: types = self.find_types(self.tags) except ValueError: self.ignore_count += 1 return # Ignore ways that lay partly outside our bounding box if name == 'way' and not all(id in self.node_locations for id in self.nodes): return # We already have these from OxPoints, so leave them alone. if self.tags.get('amenity') == 'library' and self.tags.get( 'operator') == 'University of Oxford': return # Ignore disused and under-construction entities if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get( 'disused') in ('1', 'yes', 'true'): return try: entity = Entity.objects.get(source=self.source, _identifiers__scheme='osm', _identifiers__value=self.id) created = True except Entity.DoesNotExist: entity = Entity(source=self.source) created = False if not 'osm' in entity.metadata or entity.metadata['osm'].get( 'attrs', {}).get('timestamp', '') < self.attrs['timestamp']: if created: self.create_count += 1 else: self.modify_count += 1 if name == 'node': entity.location = Point(self.node_location, srid=4326) entity.geometry = entity.location elif name == 'way': cls = LinearRing if self.nodes[0] == self.nodes[ -1] else LineString entity.geometry = cls( [self.node_locations[n] for n in self.nodes], srid=4326) min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf')) for lon, lat in [ self.node_locations[n] for n in self.nodes ]: min_ = min(min_[0], lon), min(min_[1], lat) max_ = max(max_[0], lon), max(max_[1], lat) entity.location = Point((min_[0] + max_[0]) / 2, (min_[1] + max_[1]) / 2, srid=4326) else: raise AssertionError( "There should be no other types of entity we're to deal with." ) try: name = self.tags.get('name') or self.tags['operator'] except (KeyError, AssertionError): try: name = reverse_geocode(*entity.location)[0]['name'] if not name: raise IndexError name = u"↝ %s" % name except IndexError: name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0]) entity.title = name entity.metadata['osm'] = { 'attrs': dict(self.attrs), 'tags': self.tags } entity.primary_type = self.entity_types[types[0]] if 'addr:postcode' in self.tags: entity.post_code = self.tags['addr:postcode'].replace( ' ', '') else: entity.post_code = "" entity.save(identifiers={'osm': self.id}) entity.all_types = [self.entity_types[et] for et in types] entity.update_all_types_completion() else: self.unchanged_count += 1
def _scrape(self, route, url, output): url += '&showall=1' service = etree.parse(urlopen(url), parser=etree.HTMLParser()) route.stops.clear() for i, tr in enumerate(service.find('.//table').findall('tr')[1:]): try: stop_code = tr[1][0].text except IndexError: # Stops on ACIS Live that don't have codes, e.g., out of county # stops stop_name = tr[3][0].text try: entity = Entity.objects.get( source=self._get_source(), _identifiers__scheme='acisroute', _identifiers__value=stop_name) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) entity_type = self._get_entity_type() entity.primary_type = entity_type identifiers = {'acisroute': stop_name} entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type, ) entity.update_all_types_completion() else: if stop_code.startswith('693') or stop_code.startswith('272') \ or stop_code.startswith('734') or stop_code.startswith('282'): # Oxontime uses NaPTAN code scheme = 'naptan' elif stop_code.startswith('450'): # West Yorkshire uses plate code scheme = 'plate' else: # Everyone else uses ATCO scheme = 'atco' if stop_code.startswith('370'): # Except South Yorkshire, which mangles the code stop_code = '3700%s' % stop_code[3:] try: entity = get_entity(scheme, stop_code) if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes - alternatively, # the fake bus stops Oxontime made up for the TUBE route try: entity = Entity.objects.get( source=self._get_source(), _identifiers__scheme=scheme, _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = {scheme: stop_code} entity_type = self._get_entity_type() entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=tr[3][0].text) entity.all_types = (entity_type, ) entity.update_all_types_completion() entity.save() StopOnRoute.objects.create(route=route, entity=entity, order=i)
def _scrape(self, route, url, output): url += '&showall=1' service = etree.parse(urlopen(url), parser = etree.HTMLParser()) route.stops.clear() for i, tr in enumerate(service.find('.//table').findall('tr')[1:]): try: stop_code = tr[1][0].text except IndexError: # Stops on ACIS Live that don't have codes, e.g., out of county # stops stop_name = tr[3][0].text try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='acisroute', _identifiers__value=stop_name) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) entity_type = self._get_entity_type() entity.primary_type = entity_type identifiers = { 'acisroute': stop_name } entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=stop_name) entity.all_types = (entity_type,) entity.update_all_types_completion() else: if stop_code.startswith('693') or stop_code.startswith('272') \ or stop_code.startswith('734') or stop_code.startswith('282'): # Oxontime uses NaPTAN code scheme = 'naptan' elif stop_code.startswith('450'): # West Yorkshire uses plate code scheme = 'plate' else: # Everyone else uses ATCO scheme = 'atco' if stop_code.startswith('370'): # Except South Yorkshire, which mangles the code stop_code = '3700%s' % stop_code[3:] try: entity = get_entity(scheme, stop_code) if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes - alternatively, # the fake bus stops Oxontime made up for the TUBE route try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme=scheme, _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = {scheme: stop_code} entity_type = self._get_entity_type() entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=tr[3][0].text) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.save() StopOnRoute.objects.create(route=route, entity=entity, order=i)
def import_data(self, metadata, output): source, entity_type = self._get_source(), self._get_entity_type() parser = etree.XMLParser(load_dtd=True) parser.resolvers.add(BBCTPEGResolver()) xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser) entities, seen = {}, set() for entity in Entity.objects.filter(source=source): if 'bbc-tpeg' in entity.identifiers: entities[entity.identifiers['bbc-tpeg']] = entity for message in xml.getroot().findall('tpeg_message'): id = message.find('road_traffic_message').attrib['message_id'] road_traffic_message = message.find('road_traffic_message') try: entity = entities[id] except KeyError: entity = Entity() entities[id] = entity entity.source = source entity.title = message.find('summary').text entity.primary_type = entity_type locs = map( self._wgs84_to_point, road_traffic_message.findall( 'location_container/location_coordinates/WGS84')) if len(locs) > 1: entity.geometry = LineString(*locs) elif len(locs) == 1: entity.geometry = locs[0] else: continue entity.location = Point( sum(p.x for p in locs) / len(locs), sum(p.y for p in locs) / len(locs), srid=4326, ) entity.metadata['bbc_tpeg'] = { 'xml': etree.tostring(message), 'severity': road_traffic_message.attrib['severity_factor'], 'generated': road_traffic_message.attrib['message_generation_time'], 'version': int(road_traffic_message.attrib['version_number']), } entity.save(identifiers={'bbc-tpeg': id}) entity.all_types = [entity_type] entity.update_all_types_completion() seen.add(entity.pk) for entity in Entity.objects.filter(source=source): if not entity.pk in seen: entity.delete()
def _import_cif(self, cif): """ Parse a CIF file """ # Clear cache once per file - avoid high memory usage self._cache = EntityCache() # Also reset SQL queries log reset_queries() routes = [] this_journey = None for line in cif: if line[:2] == 'QS': # Journey header if this_journey is not None: routes[-1]['journies'].append(this_journey) if line[2] == 'D': this_journey = None continue this_journey = { 'operator-code': line[3:7], 'id': line[7:13], 'start-date': self._parse_cif_date(line[13:21]), 'end-date': self._parse_cif_date(line[21:29]), 'days': weekbool( line[29] == '1', # Monday line[30] == '1', # Tuesday line[31] == '1', # Wednesday line[32] == '1', # Thursday line[33] == '1', # Friday line[34] == '1', # Saturday line[35] == '1', # Sunday ), 'school-holidays': { 'S': 'term-time', 'H': 'holidays' }.get(line[36], 'all'), 'bank-holidays': { 'A': 'additional', 'B': 'holidays', 'X': 'non-holidays' }.get(line[37], 'all'), 'route': line[38:42], 'vehicle': line[48:56].strip(), 'direction': line[64], 'notes': [], 'stops': [], } elif line[:2] in ('QN', 'ZN'): # Notes this_journey['notes'].append(line[7:]) elif line[:2] == 'QO': # Journey start try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': None, 'std': self._parse_cif_time(line[14:18]), 'activity': 'O', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'QI': # Journey intermediate stop try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': self._parse_cif_time(line[18:22]), 'activity': line[22], 'estimated': line[27] == '0', 'fare-stage': line[29] == '1' }) except Http404: pass elif line[:2] == 'QT': # Journey complete try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': None, 'activity': 'F', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'ZL': # Route ID route_id = line[2:] elif line[:2] == 'ZD': # Days route ID route_id += line[18:-1] elif line[:2] == 'ZS': # Route if this_journey is not None: routes[-1]['journies'].append(this_journey) routes.append({ 'id': route_id, 'number': line[10:14].strip(), 'description': line[14:-1], 'stops': [], 'journies': [] }) elif line[:2] == 'ZA': stop_code = line[3:15].strip() try: entity = self._cache['atco:%s' % stop_code] if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes try: entity = Entity.objects.get( source=self._get_source(), _identifiers__scheme='atco', _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = {'atco': stop_code} entity_type = self._entity_type entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=line[15:63].strip()) entity.all_types = (entity_type, ) entity.update_all_types_completion() entity.save() routes[-1]['stops'].append(entity) if this_journey is not None: routes[-1]['journies'].append(this_journey) return routes
def _import_cif(self, cif): """ Parse a CIF file """ # Clear cache once per file - avoid high memory usage self._cache = EntityCache() # Also reset SQL queries log reset_queries() routes = [] this_journey = None for line in cif: if line[:2] == 'QS': # Journey header if this_journey is not None: routes[-1]['journies'].append(this_journey) if line[2] == 'D': this_journey = None continue this_journey = { 'operator-code': line[3:7], 'id': line[7:13], 'start-date': self._parse_cif_date(line[13:21]), 'end-date': self._parse_cif_date(line[21:29]), 'days': weekbool( line[29] == '1', # Monday line[30] == '1', # Tuesday line[31] == '1', # Wednesday line[32] == '1', # Thursday line[33] == '1', # Friday line[34] == '1', # Saturday line[35] == '1', # Sunday ), 'school-holidays': { 'S': 'term-time', 'H': 'holidays' }.get(line[36], 'all'), 'bank-holidays': { 'A': 'additional', 'B': 'holidays', 'X': 'non-holidays' }.get(line[37], 'all'), 'route': line[38:42], 'vehicle': line[48:56].strip(), 'direction': line[64], 'notes': [], 'stops': [], } elif line[:2] in ('QN', 'ZN'): # Notes this_journey['notes'].append(line[7:]) elif line[:2] == 'QO': # Journey start try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': None, 'std': self._parse_cif_time(line[14:18]), 'activity': 'O', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'QI': # Journey intermediate stop try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': self._parse_cif_time(line[18:22]), 'activity': line[22], 'estimated': line[27] == '0', 'fare-stage': line[29] == '1' }) except Http404: pass elif line[:2] == 'QT': # Journey complete try: this_journey['stops'].append({ 'entity': self._cache['atco:%s' % line[2:14].strip()], 'sta': self._parse_cif_time(line[14:18]), 'std': None, 'activity': 'F', 'estimated': line[22] == '0', 'fare-stage': line[24] == '1' }) except Http404: pass elif line[:2] == 'ZL': # Route ID route_id = line[2:] elif line[:2] == 'ZD': # Days route ID route_id += line[18:-1] elif line[:2] == 'ZS': # Route if this_journey is not None: routes[-1]['journies'].append(this_journey) routes.append({ 'id': route_id, 'number': line[10:14].strip(), 'description': line[14:-1], 'stops': [], 'journies': [] }) elif line[:2] == 'ZA': stop_code = line[3:15].strip() try: entity = self._cache['atco:%s' % stop_code] if entity.source == self._get_source(): # Raise Http404 if this is a bus stop we came up with, # so any name changes, etc, get processed raise Http404() except Http404: # Out of zone bus stops with NaPTAN codes try: entity = Entity.objects.get(source=self._get_source(), _identifiers__scheme='atco', _identifiers__value=stop_code) except Entity.DoesNotExist: entity = Entity(source=self._get_source()) identifiers = { 'atco': stop_code } entity_type = self._entity_type entity.primary_type = entity_type entity.save(identifiers=identifiers) set_name_in_language(entity, 'en', title=line[15:63].strip()) entity.all_types = (entity_type,) entity.update_all_types_completion() entity.save() routes[-1]['stops'].append(entity) if this_journey is not None: routes[-1]['journies'].append(this_journey) return routes
def add_stop(self, meta, entity_type, source): # Check this entity is in an area if self.areas != None: in_area = False for area in self.areas: if meta['atco-code'].startswith(area): in_area = True if not in_area: return # See if we're updating an existing object, or creating a new one try: entity = Entity.objects.get(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: # Handle clashes Entity.objects.filter(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']).delete() entity = Entity(source=source) common_name, indicator, locality, street = [meta.get(k) for k in ('common-name', 'indicator', 'locality-ref', 'street')] if (common_name or '').endswith(' DEL') or \ (indicator or '').lower() == 'not in use': # In the NaPTAN list, but indicates it's an unused stop return # Convert indicator to a friendlier format indicator = { 'opp': 'Opposite', 'opposite': 'Opposite', 'adj': 'Adjacent', 'outside': 'Outside', 'o/s': 'Outside', 'nr': 'Near', 'inside': 'Inside', }.get(indicator, indicator) title = '' if indicator != None: title += indicator + ' ' title += common_name if street != None and not common_name.startswith(street): title += ', ' + street locality = self.nptg_localities.get(locality) if locality != None: title += ', ' + locality entity.title = title entity.primary_type = entity_type if not entity.metadata: entity.metadata = {} entity.metadata['naptan'] = meta entity.location = Point(float(meta['longitude']), float(meta['latitude']), srid=4326) entity.geometry = entity.location identifiers = { 'atco': meta['atco-code'], } if 'naptan-code' in meta: meta['naptan-code'] = ''.join(map(self.naptan_dial, meta['naptan-code'])) identifiers['naptan'] = meta['naptan-code'] if 'plate-code' in meta: identifiers['plate'] = meta['plate-code'] if indicator != None and re.match('Stop [A-Z]\d\d?', indicator): identifiers['stop'] = indicator[5:] entity.save(identifiers=identifiers) entity.all_types.add(entity_type) entity.update_all_types_completion() return entity
def add_stop(self, meta, entity_type, source): # Check this entity is in an area if self.areas != None: in_area = False for area in self.areas: if meta['atco-code'].startswith(area): in_area = True if not in_area: return # See if we're updating an existing object, or creating a new one try: entity = Entity.objects.get(source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']) except Entity.DoesNotExist: entity = Entity(source=source) except Entity.MultipleObjectsReturned: # Handle clashes Entity.objects.filter( source=source, _identifiers__scheme='atco', _identifiers__value=meta['atco-code']).delete() entity = Entity(source=source) common_name, indicator, locality, street = [ meta.get(k) for k in ('common-name', 'indicator', 'locality-ref', 'street') ] if (common_name or '').endswith(' DEL') or \ (indicator or '').lower() == 'not in use' or \ 'to define route' in (common_name or '') or \ 'to def rte' in (common_name or '') or \ 'to def route' in (common_name or '') or \ 'def.rte' in (common_name or ''): # In the NaPTAN list, but indicates it's an unused stop return if self.meta['stop-type'] in ('MET', 'GAT', 'FER', 'RLY'): names = self.names else: names = dict() for lang_code, lang_name in settings.LANGUAGES: with override(lang_code): # Try and find one in our preferred order for lang in (lang_code, 'en', None): if lang in self.names: common_name = self.names[lang] break # Expand abbreviations in indicators if indicator is not None: parts = [] for part in indicator.split(): parts.append({ # Translators: This is referring to bus stop location descriptions 'op': ugettext('Opposite'), 'opp': ugettext('Opposite'), 'opposite': ugettext('Opposite'), # Translators: This is referring to bus stop location descriptions 'adj': ugettext('Adjacent'), # Translators: This is referring to bus stop location descriptions 'outside': ugettext('Outside'), 'o/s': ugettext('Outside'), # Translators: This is referring to bus stop location descriptions 'nr': ugettext('Near'), # Translators: This is referring to bus stop location descriptions 'inside': ugettext('Inside'), # Translators: This is referring to bus stop location descriptions 'stp': ugettext('Stop'), }.get(part.lower(), part)) indicator = ' '.join(parts) if indicator is None and self.meta['stop-type'] in ( 'AIR', 'FTD', 'RSE', 'TMU', 'BCE'): # Translators: This is referring to public transport entities title = ugettext('Entrance to %s') % common_name elif indicator is None and self.meta['stop-type'] in ( 'FBT', ): # Translators: This is referring to ferry ports title = ugettext('Berth at %s') % common_name elif indicator is None and self.meta['stop-type'] in ( 'RPL', 'PLT'): # Translators: This is referring to rail and metro stations title = ugettext('Platform at %s') % common_name elif indicator is not None and indicator.lower() != 'none' \ and indicator not in common_name: title = indicator + ' ' + common_name else: title = common_name if street != None and street != '-': # Deal with all-caps street names if street.upper() == street: fixedstreet = '' wordstart = True for letter in street: if wordstart: wordstart = False fixedstreet += letter continue elif letter == ' ': wordstart = True fixedstreet += letter continue else: fixedstreet += letter.lower() street = fixedstreet if street not in title: title += ', ' + street locality_lang = self.nptg_localities.get(locality) if locality_lang != None: for lang in (lang_code, 'en', 'cy'): if lang in locality_lang: if locality_lang[lang] != street: title += ', ' + locality_lang[lang] break names[lang_code] = title entity.primary_type = entity_type if not entity.metadata: entity.metadata = {} entity.metadata['naptan'] = meta entity.location = Point(float(meta['longitude']), float(meta['latitude']), srid=4326) entity.geometry = entity.location if meta['atco-code'] in self.tube_references: entity.metadata[ 'london-underground-identifiers'] = self.tube_references[ meta['atco-code']] identifiers = { 'atco': meta['atco-code'], } if 'naptan-code' in meta: meta['naptan-code'] = ''.join( map(self.naptan_dial, meta['naptan-code'])) identifiers['naptan'] = meta['naptan-code'] if 'plate-code' in meta: identifiers['plate'] = meta['plate-code'] if 'crs' in meta: identifiers['crs'] = meta['crs'] if indicator != None and re.match('Stop [A-Z]\d\d?', indicator): identifiers['stop'] = indicator[5:] entity.save(identifiers=identifiers) for lang_code, name in names.items(): # This is the NaPTAN, so default to English if lang_code is None: lang_code = 'en' set_name_in_language(entity, lang_code, title=name) entity.all_types = (entity_type, ) entity.update_all_types_completion() entity.groups.clear() for stop_area in self.stop_areas: sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area) entity.groups.add(sa) entity.save() return entity
def import_data(self, metadata, output): source, entity_type = self._get_source(), self._get_entity_type() parser = etree.XMLParser(load_dtd=True) parser.resolvers.add(BBCTPEGResolver()) xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser) entities, seen = {}, set() for entity in Entity.objects.filter(source=source): if 'bbc-tpeg' in entity.identifiers: entities[entity.identifiers['bbc-tpeg']] = entity for message in xml.getroot().findall('tpeg_message'): id = message.find('road_traffic_message').attrib['message_id'] road_traffic_message = message.find('road_traffic_message') try: entity = entities[id] except KeyError: entity = Entity() entities[id] = entity entity.source = source entity.title = message.find('summary').text entity.primary_type = entity_type locs = map(self._wgs84_to_point, road_traffic_message.findall('location_container/location_coordinates/WGS84')) if len(locs) > 1: entity.geometry = LineString(*locs) elif len(locs) == 1: entity.geometry = locs[0] else: continue entity.location = Point( sum(p.x for p in locs)/len(locs), sum(p.y for p in locs)/len(locs), srid=4326, ) entity.metadata['bbc_tpeg'] = { 'xml': etree.tostring(message), 'severity': road_traffic_message.attrib['severity_factor'], 'generated': road_traffic_message.attrib['message_generation_time'], 'version': int(road_traffic_message.attrib['version_number']), } entity.save(identifiers={'bbc-tpeg': id}) entity.all_types = [entity_type] entity.update_all_types_completion() seen.add(entity.pk) for entity in Entity.objects.filter(source=source): if not entity.pk in seen: entity.delete()