Example #1
0
 def import_data(self, metadata, output):
     source, entity_type = self._get_source(), self._get_entity_type()
     
     parser = etree.XMLParser(load_dtd=True)
     parser.resolvers.add(BBCTPEGResolver())
     xml = etree.parse(urllib.urlopen(self._tpeg_url), parser=parser)
     
     entities, seen = {}, set()
     for entity in Entity.objects.filter(source=source):
         if 'bbc-tpeg' in entity.identifiers:
             entities[entity.identifiers['bbc-tpeg']] = entity
     
     for message in xml.getroot().findall('tpeg_message'):
         id = message.find('road_traffic_message').attrib['message_id']
         road_traffic_message = message.find('road_traffic_message')
         
         try:
             entity = entities[id]
         except KeyError:
             entity = Entity()
             entities[id] = entity
         
         entity.source = source
         entity.primary_type = entity_type
         
         locs = map(self._wgs84_to_point, road_traffic_message.findall('location_container/location_coordinates/WGS84'))
         if len(locs) > 1:
             entity.geometry = LineString(*locs)
         elif len(locs) == 1:
             entity.geometry = locs[0]
         else:
             continue
         entity.location = Point(
             sum(p.x for p in locs) / len(locs),
             sum(p.y for p in locs) / len(locs),
             srid=4326,
         )
         
         entity.metadata['bbc_tpeg'] = {
             'xml': etree.tostring(message),
             'severity': road_traffic_message.attrib['severity_factor'],
             'generated': road_traffic_message.attrib['message_generation_time'],
             'version': int(road_traffic_message.attrib['version_number']),
         }
         
         entity.save(identifiers={'bbc-tpeg': id})
         set_name_in_language(entity, 'en',
                              title=message.find('summary').text)
         entity.all_types = [entity_type]
         entity.update_all_types_completion()
         seen.add(entity.pk)
     
     for entity in Entity.objects.filter(source=source):
         if not entity.pk in seen:
             entity.delete()
Example #2
0
 def endElement(self, name):
     if name in ('node', 'way') and self.valid:
         try:
             types = self.find_types(self.tags)
         except ValueError:
             self.ignore_count += 1
             return
         
         # Ignore ways that lay partly outside our bounding box
         if name == 'way' and not all(id in self.node_locations for id in self.nodes):
             return
         
         # We already have these from OxPoints, so leave them alone.
         if self.tags.get('amenity') == 'library' and self.tags.get('operator') == 'University of Oxford':
             return
         
         # Ignore disused and under-construction entities
         if self.tags.get('life_cycle', 'in_use') != 'in_use' or self.tags.get('disused') in ('1', 'yes', 'true'):
             return
         
         reset_queries()
         
         try:
             entity = Entity.objects.get(source=self.source,
                                         _identifiers__scheme='osm',
                                         _identifiers__value=self.id)
             created = True
         except Entity.DoesNotExist:
             entity = Entity(source=self.source)
             created = False
         
         if not 'osm' in entity.metadata or \
           entity.metadata['osm'].get('attrs', {}).get('timestamp', '') < self.attrs['timestamp']:
             
             if created:
                 self.create_count += 1
             else:
                 self.modify_count += 1
             
             if name == 'node':
                 entity.location = Point(self.node_location, srid=4326)
                 entity.geometry = entity.location
             elif name == 'way':
                 cls = LinearRing if self.nodes[0] == self.nodes[-1] else LineString
                 entity.geometry = cls([self.node_locations[n] for n in self.nodes], srid=4326)
                 min_, max_ = (float('inf'), float('inf')), (float('-inf'), float('-inf'))
                 for lon, lat in [self.node_locations[n] for n in self.nodes]:
                     min_ = min(min_[0], lon), min(min_[1], lat) 
                     max_ = max(max_[0], lon), max(max_[1], lat)
                 entity.location = Point((min_[0] + max_[0]) / 2 , (min_[1] + max_[1]) / 2 , srid=4326)
             else:
                 raise AssertionError("There should be no other types of entity we're to deal with.")
             
             names = dict()
             
             for lang_code, lang_name in settings.LANGUAGES:
                 with override(lang_code):
                 
                     if '-' in lang_code:
                         tags_to_try = ('name:%s' % lang_code, 'name:%s' % lang_code.split('-')[0], 'name', 'operator')
                     else:
                         tags_to_try = ('name:%s' % lang_code, 'name', 'operator')
                         name = None
                         for tag_to_try in tags_to_try:
                             if self.tags.get(tag_to_try):
                                 name = self.tags.get(tag_to_try)
                                 break
                     
                     if name is None:
                         try:
                             name = reverse_geocode(*entity.location)[0]['name']
                             if not name:
                                 raise IndexError
                             name = u"↝ %s" % name
                         except IndexError:
                             name = u"↝ %f, %f" % (self.node_location[1], self.node_location[0])
                     
                     names[lang_code] = name
             
             entity.metadata['osm'] = {
                 'attrs': dict(self.attrs),
                 'tags': dict(zip((k.replace(':', '_') for k in self.tags.keys()), self.tags.values()))
             }
             entity.primary_type = self.entity_types[types[0]]
             
             entity.save(identifiers={'osm': self.id})
             
             for lang_code, name in names.items():
                 set_name_in_language(entity, lang_code, title=name)
             
             entity.all_types = [self.entity_types[et] for et in types]
             entity.update_all_types_completion()
         
         else:
             self.unchanged_count += 1
Example #3
0
 def _import_cif(self, cif):
     """
     Parse a CIF file
     """
     
     # Clear cache once per file - avoid high memory usage
     self._cache = EntityCache()
     
     # Also reset SQL queries log
     reset_queries()
     
     routes = []
     
     this_journey = None
     
     for line in cif:
         
         if line[:2] == 'QS':
             # Journey header
             if this_journey is not None:
                 routes[-1]['journies'].append(this_journey)
             if line[2] == 'D':
                 this_journey = None
                 continue
             this_journey = {
                 'operator-code': line[3:7],
                 'id': line[7:13],
                 'start-date': self._parse_cif_date(line[13:21]),
                 'end-date': self._parse_cif_date(line[21:29]),
                 'days': weekbool(
                     line[29] == '1', # Monday
                     line[30] == '1', # Tuesday
                     line[31] == '1', # Wednesday
                     line[32] == '1', # Thursday
                     line[33] == '1', # Friday
                     line[34] == '1', # Saturday
                     line[35] == '1', # Sunday
                 ),
                 'school-holidays': {
                     'S': 'term-time',
                     'H': 'holidays'
                 }.get(line[36], 'all'),
                 'bank-holidays': {
                     'A': 'additional',
                     'B': 'holidays',
                     'X': 'non-holidays'
                 }.get(line[37], 'all'),
                 'route': line[38:42],
                 'vehicle': line[48:56].strip(),
                 'direction': line[64],
                 'notes': [],
                 'stops': [],
             }
         
         elif line[:2] in ('QN', 'ZN'):
             # Notes
             this_journey['notes'].append(line[7:])
         
         elif line[:2] == 'QO':
             # Journey start
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': None,
                     'std': self._parse_cif_time(line[14:18]),
                     'activity': 'O',
                     'estimated': line[22] == '0',
                     'fare-stage': line[24] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'QI':
             # Journey intermediate stop
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': self._parse_cif_time(line[14:18]),
                     'std': self._parse_cif_time(line[18:22]),
                     'activity': line[22],
                     'estimated': line[27] == '0',
                     'fare-stage': line[29] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'QT':
             # Journey complete
             try:
                 this_journey['stops'].append({
                     'entity': self._cache['atco:%s' % line[2:14].strip()],
                     'sta': self._parse_cif_time(line[14:18]),
                     'std': None,
                     'activity': 'F',
                     'estimated': line[22] == '0',
                     'fare-stage': line[24] == '1'
                 })
             except Http404:
                 pass
         
         elif line[:2] == 'ZL':
             # Route ID
             route_id = line[2:]
         
         elif line[:2] == 'ZD':
             # Days route ID
             route_id += line[18:-1]
         
         elif line[:2] == 'ZS':
             # Route
             
             if this_journey is not None:
                 routes[-1]['journies'].append(this_journey)
             
             routes.append({
                 'id': route_id,
                 'number': line[10:14].strip(),
                 'description': line[14:-1],
                 'stops': [],
                 'journies': []
             })
         
         elif line[:2] == 'ZA':
             
             stop_code = line[3:15].strip()
             
             try:
                 entity = self._cache['atco:%s' % stop_code]
                 if entity.source == self._get_source():
                     # Raise Http404 if this is a bus stop we came up with,
                     # so any name changes, etc, get processed
                     raise Http404()
             except Http404:
                 # Out of zone bus stops with NaPTAN codes
                 try:
                     entity = Entity.objects.get(source=self._get_source(),
                                                 _identifiers__scheme='atco',
                                                 _identifiers__value=stop_code)
                 except Entity.DoesNotExist:
                     entity = Entity(source=self._get_source())
                 identifiers = { 'atco': stop_code }
                 entity_type = self._entity_type
                 entity.primary_type = entity_type
                 entity.save(identifiers=identifiers)
                 set_name_in_language(entity, 'en', title=line[15:63].strip())
                 entity.all_types = (entity_type,)
                 entity.update_all_types_completion()
                 entity.save()
             routes[-1]['stops'].append(entity)
     
     if this_journey is not None:
         routes[-1]['journies'].append(this_journey)
     
     return routes
Example #4
0
    def add_stop(self, meta, entity_type, source):

        # Check this entity is in an area
        if self.areas != None:
            in_area = False
            for area in self.areas:
                if meta["atco-code"].startswith(area):
                    in_area = True
            if not in_area:
                return

        # See if we're updating an existing object, or creating a new one
        try:
            entity = Entity.objects.get(
                source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"]
            )
        except Entity.DoesNotExist:
            entity = Entity(source=source)
        except Entity.MultipleObjectsReturned:
            # Handle clashes
            Entity.objects.filter(
                source=source, _identifiers__scheme="atco", _identifiers__value=meta["atco-code"]
            ).delete()
            entity = Entity(source=source)

        common_name, indicator, locality, street = [
            meta.get(k) for k in ("common-name", "indicator", "locality-ref", "street")
        ]

        if (
            (common_name or "").endswith(" DEL")
            or (indicator or "").lower() == "not in use"
            or "to define route" in (common_name or "")
            or "to def rte" in (common_name or "")
            or "to def route" in (common_name or "")
            or "def.rte" in (common_name or "")
        ):
            # In the NaPTAN list, but indicates it's an unused stop
            return

        if self.meta["stop-type"] in ("MET", "GAT", "FER", "RLY"):
            names = self.names
        else:

            names = dict()

            for lang_code, lang_name in settings.LANGUAGES:
                with override(lang_code):

                    # Try and find one in our preferred order
                    for lang in (lang_code, "en", None):
                        if lang in self.names:
                            common_name = self.names[lang]
                            break

                    # Expand abbreviations in indicators
                    if indicator is not None:
                        parts = []
                        for part in indicator.split():
                            parts.append(
                                {
                                    # Translators: This is referring to bus stop location descriptions
                                    "op": ugettext("Opposite"),
                                    "opp": ugettext("Opposite"),
                                    "opposite": ugettext("Opposite"),
                                    # Translators: This is referring to bus stop location descriptions
                                    "adj": ugettext("Adjacent"),
                                    # Translators: This is referring to bus stop location descriptions
                                    "outside": ugettext("Outside"),
                                    "o/s": ugettext("Outside"),
                                    # Translators: This is referring to bus stop location descriptions
                                    "nr": ugettext("Near"),
                                    # Translators: This is referring to bus stop location descriptions
                                    "inside": ugettext("Inside"),
                                    # Translators: This is referring to bus stop location descriptions
                                    "stp": ugettext("Stop"),
                                }.get(part.lower(), part)
                            )
                        indicator = " ".join(parts)

                    if indicator is None and self.meta["stop-type"] in ("AIR", "FTD", "RSE", "TMU", "BCE"):
                        # Translators: This is referring to public transport entities
                        title = ugettext("Entrance to %s") % common_name

                    elif indicator is None and self.meta["stop-type"] in ("FBT",):
                        # Translators: This is referring to ferry ports
                        title = ugettext("Berth at %s") % common_name

                    elif indicator is None and self.meta["stop-type"] in ("RPL", "PLT"):
                        # Translators: This is referring to rail and metro stations
                        title = ugettext("Platform at %s") % common_name

                    elif indicator is not None and indicator.lower() != "none" and indicator not in common_name:
                        title = indicator + " " + common_name

                    else:
                        title = common_name

                    if street != None and street != "-":
                        # Deal with all-caps street names
                        if street.upper() == street:
                            fixedstreet = ""
                            wordstart = True
                            for letter in street:
                                if wordstart:
                                    wordstart = False
                                    fixedstreet += letter
                                    continue
                                elif letter == " ":
                                    wordstart = True
                                    fixedstreet += letter
                                    continue
                                else:
                                    fixedstreet += letter.lower()
                            street = fixedstreet

                        if street not in title:
                            title += ", " + street

                    locality_lang = self.nptg_localities.get(locality)
                    if locality_lang != None:
                        for lang in (lang_code, "en", "cy"):
                            if lang in locality_lang:
                                if locality_lang[lang] != street:
                                    title += ", " + locality_lang[lang]
                                break

                    names[lang_code] = title

        entity.primary_type = entity_type

        if not entity.metadata:
            entity.metadata = {}
        entity.metadata["naptan"] = meta
        entity.location = Point(float(meta["longitude"]), float(meta["latitude"]), srid=4326)
        entity.geometry = entity.location

        if meta["atco-code"] in self.tube_references:
            entity.metadata["london-underground-identifiers"] = self.tube_references[meta["atco-code"]]

        identifiers = {"atco": meta["atco-code"]}
        if "naptan-code" in meta:
            meta["naptan-code"] = "".join(map(self.naptan_dial, meta["naptan-code"]))
            identifiers["naptan"] = meta["naptan-code"]
        if "plate-code" in meta:
            identifiers["plate"] = meta["plate-code"]
        if "crs" in meta:
            identifiers["crs"] = meta["crs"]
        if indicator != None and re.match("Stop [A-Z]\d\d?", indicator):
            identifiers["stop"] = indicator[5:]

        entity.save(identifiers=identifiers)

        for lang_code, name in names.items():
            # This is the NaPTAN, so default to English
            if lang_code is None:
                lang_code = "en"
            set_name_in_language(entity, lang_code, title=name)

        entity.all_types = (entity_type,)
        entity.update_all_types_completion()
        entity.groups.clear()
        for stop_area in self.stop_areas:
            sa, created = EntityGroup.objects.get_or_create(source=source, ref_code=stop_area)
            entity.groups.add(sa)
        entity.save()

        return entity
Example #5
0
 def _scrape(self, route, url, output):
     url += '&showall=1'
     service = etree.parse(urlopen(url), parser=etree.HTMLParser())
     route.stops.clear()
     for i, tr in enumerate(service.find('.//table').findall('tr')[1:]):
         
         try:
             stop_code = tr[1][0].text
         except IndexError:
             
             # Stops on ACIS Live that don't have codes, e.g., out of county
             # stops
             stop_name = tr[3][0].text
             try:
                 entity = Entity.objects.get(source=self._get_source(),
                                             _identifiers__scheme='acisroute',
                                             _identifiers__value=stop_name)
             except Entity.DoesNotExist:
                 entity = Entity(source=self._get_source())
             
             entity_type = self._get_entity_type()
             entity.primary_type = entity_type
             identifiers = { 'acisroute': stop_name }
             entity.save(identifiers=identifiers)
             set_name_in_language(entity, 'en', title=stop_name)
             entity.all_types = (entity_type,)
             entity.update_all_types_completion()
         
         else:
             if stop_code.startswith('693') or stop_code.startswith('272') \
               or stop_code.startswith('734') or stop_code.startswith('282'):
                 # Oxontime uses NaPTAN code
                 scheme = 'naptan'
             elif stop_code.startswith('450'):
                 # West Yorkshire uses plate code
                 scheme = 'plate'
             else:
                 # Everyone else uses ATCO
                 scheme = 'atco'
                 if stop_code.startswith('370'):
                     # Except South Yorkshire, which mangles the code
                     stop_code = '3700%s' % stop_code[3:]
             try:
                 entity = get_entity(scheme, stop_code)
                 if entity.source == self._get_source():
                     # Raise Http404 if this is a bus stop we came up with,
                     # so any name changes, etc, get processed
                     raise Http404()
             except Http404:
                 # Out of zone bus stops with NaPTAN codes - alternatively,
                 # the fake bus stops Oxontime made up for the TUBE route
                 try:
                     entity = Entity.objects.get(source=self._get_source(),
                                                 _identifiers__scheme=scheme,
                                                 _identifiers__value=stop_code)
                 except Entity.DoesNotExist:
                     entity = Entity(source=self._get_source())
                 identifiers = {scheme: stop_code}
                 entity_type = self._get_entity_type()
                 entity.primary_type = entity_type
                 entity.save(identifiers=identifiers)
                 set_name_in_language(entity, 'en', title=tr[3][0].text)
                 entity.all_types = (entity_type,)
                 entity.update_all_types_completion()
                 entity.save()
             
         StopOnRoute.objects.create(route=route, entity=entity, order=i)