def address_to_block(self, *args, **kwargs): return text_utils.address_to_block(*args, **kwargs)
def create_newsitem(self, attributes, **kwargs): """ Creates and saves a NewsItem with the given kwargs. Returns the new NewsItem. kwargs MUST have the following keys: title item_date location_name For any other kwargs whose values aren't provided, this will use sensible defaults. kwargs may optionally contain a 'convert_to_block' boolean. If True, this will convert the given kwargs['location_name'] to a block level but will use the real (non-block-level) address for geocoding and Block association. kwargs may optionally contain a 'location_name_geocoder' string. If this exists, it will be used to geocode the NewsItem. attributes is a dictionary to use to populate this NewsItem's Attribute object. """ geocoded_location = place = None if 'location_name_geocoder' in kwargs: string_to_geocode = kwargs['location_name_geocoder'] else: string_to_geocode = kwargs['location_name'] result = self.geocode(string_to_geocode) if result: geocoded_location = result['point'] if result['point_type'] == 'address': _, place = Spot.objects.from_geocoder(result) elif result['point_type'] == 'intersection': place = result['intersection'] else: place = result['block'] if kwargs.pop('convert_to_block', False): kwargs['location_name'] = address_to_block(kwargs['location_name']) # If the exact address couldn't be geocoded, try using the # normalized location name. if geocoded_location is None: geocoded_location = self.geocode(kwargs['location_name']) if geocoded_location: geocoded_location = location['point'] # Normally we'd just use "schema = kwargs.get('schema', self.schema)", # but self.schema will be evaluated even if the key is found in # kwargs, which raises an error when using multiple schemas. schema = kwargs.get('schema', None) schema = schema or self.schema # Some schemas don't have Source_name or source_id schema fields TODO: Perhaps change to get or create lookup if 'source_id' not in attributes: if 'source' in attributes: source = Lookup.objects.get(id=attributes['source']) else: source = self.schema attributes['source_name'] = source.name attributes['source_id'] = source.id ni = NewsItem.objects.create( schema=schema, title=kwargs['title'], description=kwargs.get('description', ''), url=kwargs.get('url', ''), pub_date=kwargs.get('pub_date', self.start_time), item_date=kwargs['item_date'], location=kwargs.get('location', geocoded_location), location_name=kwargs['location_name'], spot=None, location_id=None, # Scrapers should never save a location_id. source_name=attributes['source_name'] if 'source_name' in attributes else attributes['source'], source_id=attributes['source_id'] if 'source_id' in attributes else attributes['source'], ) ni.attributes = attributes self.num_added += 1 self.logger.info( u'Created NewsItem %s (total created in this scrape: %s)', ni.id, self.num_added) self.created_newsitem_ids.setdefault(schema.id, []).append(ni.id) return ni