Esempio n. 1
0
    def save(self, old_record, list_record, detail_record):
        # This gets called once all parsing and cleanup is done.
        # It looks a lot like our 'expedient hack' code above.

        # We can ignore detail_record since has_detail is False.

        date = datetime.date(*list_record['updated_parsed'][:3])
        description = text_from_html(list_record['summary'])

        # This feed doesn't provide geographic data; we'll try to
        # extract addresses from the text, and stop on the first
        # one that successfully geocodes.
        # First we'll need some suitable text; throw away HTML tags.
       # full_description = list_record['content'][0]['value']
       # full_description = text_from_html(full_description)
        grabber = places.location_grabber()

        addrs = grabber(description)
       # printing articles title for debugging
       # print list_record['title']

        if not addrs:
            addrs = grabber(list_record['title'])
            if not addrs:
                self.logger.info("no addresses found")
                return

        location = None
        location_name = u''
        block = None
        # Ready to geocode. If we had one location_name to try,
        # this could be done automatically in create_or_update(), but
        # we have multiple possible location_names.
        for l, r, name in addrs:
            #addr = addr.strip()
            try:
                locationSyn = LocationSynonym.objects.get(pretty_name = name)
                location = Location.objects.get(name = locationSyn.location).location
            except GeocodingException:
                log_exception(level=logging.DEBUG)
                continue
            location_name = name
           # block = location['block']
           # location = location['point']
            break
        if location is None:
            self.logger.info("no addresses geocoded in %r" % list_record['title'])
            return

        kwargs = dict(item_date=date,
                      location=location,
                      location_name=location_name,
                      description=description,
                      title=list_record['title'],
                      url=list_record['link'],
                      )
        attributes = None
        self.create_or_update(old_record, attributes, **kwargs)
Esempio n. 2
0
    def save(self, old_record, list_record, detail_record):
        # This gets called once all parsing and cleanup is done.
        # It looks a lot like our 'expedient hack' code above.

        # We can ignore detail_record since has_detail is False.

        date = datetime.date(*list_record['updated_parsed'][:3])
        description = text_from_html(list_record['summary'])

        # This feed doesn't provide geographic data; we'll try to
        # extract addresses from the text, and stop on the first
        # one that successfully geocodes.
        # First we'll need some suitable text; throw away HTML tags.
       # full_description = list_record['content'][0]['value']
       # full_description = text_from_html(full_description)
        grabber = places.location_grabber()
 
        addrs = grabber(description)
       # printing articles title for debugging
       # print list_record['title']

        #if not addrs:
	#    addrs = grabber(list_record['title'])
 	#    if not addrs:
        #  	self.logger.info("no addresses found")
        #    	return

        location = None
        location_name = u''
        block = None

	
	grabber = places.place_grabber()
 
        addrs = grabber(description)

        #if not match is found article is assigned location of Kent State
        if not addrs:
            location_name  = "Kent State"
            locationSyn = LocationSynonym.objects.get(pretty_name = location_name)
            location = Location.objects.get(name = locationSyn.location).location
            self.logger.info("no matches for place found. Using Kent State default")
	else:	
            location = None
            location_name = u''
            block = None
	    
	    
	    #here we're checking the return results form the place grabber
	    #for mathces in the database. first Places are checked then PlaceSynonyms.
	    for l, r, name in addrs:
            #addr = addr.strip()
                try:
		    print name
                    place = Place.objects.get(pretty_name = name)
                    location = place.location
                except Place.DoesNotExist:
		    try:
                        place = PlaceSynonym.objects.get(pretty_name = name)
		        location = place.place.location
		    
		    except PlaceSynonym.DoesNotExist:
			self.logger.info("no addresses geocoded in %r" % list_record['title'])
			continue
                location_name = name
               # block = location['block']
               # location = location['point']
                break
            if location is None:
                self.logger.info("no addresses geocoded in %r" % list_record['title'])
                return



        kwargs = dict(item_date=date,
                      location=location,
                      location_name=location_name,
                      description=description,
                      title=list_record['title'],
                      url=list_record['link'],
                      )
        attributes = None
        self.create_or_update(old_record, attributes, **kwargs)
 def __init__(self, *args, **kwargs):
     super(WhitevilleNewsScraper, self).__init__(*args, **kwargs)
     self.grabber = places.location_grabber(ignore_location_types=[])