예제 #1
0
 def address_to_block(self, *args, **kwargs):
     return text_utils.address_to_block(*args, **kwargs)
예제 #2
0
    def create_newsitem(self, attributes, **kwargs):
        """
        Creates and saves a NewsItem with the given kwargs. Returns the new
        NewsItem.
        kwargs MUST have the following keys:
            title
            item_date
            location_name
        For any other kwargs whose values aren't provided, this will use
        sensible defaults.
        kwargs may optionally contain a 'convert_to_block' boolean. If True,
        this will convert the given kwargs['location_name'] to a block level
        but will use the real (non-block-level) address for geocoding and Block
        association.
        kwargs may optionally contain a 'location_name_geocoder' string. If
        this exists, it will be used to geocode the NewsItem.
        attributes is a dictionary to use to populate this NewsItem's Attribute
        object.
        """
        geocoded_location = place = None
        if 'location_name_geocoder' in kwargs:
            string_to_geocode = kwargs['location_name_geocoder']
        else:
            string_to_geocode = kwargs['location_name']
        result = self.geocode(string_to_geocode)
        if result:
            geocoded_location = result['point']
            if result['point_type'] == 'address':
                _, place = Spot.objects.from_geocoder(result)
            elif result['point_type'] == 'intersection':
                place = result['intersection']
            else:
                place = result['block']
        if kwargs.pop('convert_to_block', False):
            kwargs['location_name'] = address_to_block(kwargs['location_name'])
            # If the exact address couldn't be geocoded, try using the
            # normalized location name.
            if geocoded_location is None:
                geocoded_location = self.geocode(kwargs['location_name'])
                if geocoded_location:
                    geocoded_location = location['point']

        # Normally we'd just use "schema = kwargs.get('schema', self.schema)",
        # but self.schema will be evaluated even if the key is found in
        # kwargs, which raises an error when using multiple schemas.
        schema = kwargs.get('schema', None)
        schema = schema or self.schema
        # Some schemas don't have Source_name or source_id schema fields TODO: Perhaps change to get or create lookup
        if 'source_id' not in attributes:
            if 'source' in attributes:
                source = Lookup.objects.get(id=attributes['source'])
            else:
                source = self.schema
            attributes['source_name'] = source.name
            attributes['source_id'] = source.id

        ni = NewsItem.objects.create(
            schema=schema,
            title=kwargs['title'],
            description=kwargs.get('description', ''),
            url=kwargs.get('url', ''),
            pub_date=kwargs.get('pub_date', self.start_time),
            item_date=kwargs['item_date'],
            location=kwargs.get('location', geocoded_location),
            location_name=kwargs['location_name'],
            spot=None,
            location_id=None,  # Scrapers should never save a location_id.
            source_name=attributes['source_name']
            if 'source_name' in attributes else attributes['source'],
            source_id=attributes['source_id']
            if 'source_id' in attributes else attributes['source'],
        )
        ni.attributes = attributes
        self.num_added += 1
        self.logger.info(
            u'Created NewsItem %s (total created in this scrape: %s)', ni.id,
            self.num_added)
        self.created_newsitem_ids.setdefault(schema.id, []).append(ni.id)
        return ni