def test_yahoo_geocode_zero_result(self): fake_urllib2 = fudge.Fake('urllib2') fake_urllib2.remember_order() urlopen = fake_urllib2.expects('urlopen') urlopen.with_args( 'http://where.yahooapis.com/geocode?' 'location=1821+Pacific+Coast+Hwy%2C+Hermosa+Beach%2C+California+90254' '&flags=J&appid=foo' ) res = """{"ResultSet":{"version":"1.0","Error":0,"ErrorMessage":"No error","Locale":"us_US","Quality":0,"Found":0}}""" urlopen.returns(StringIO(res)) data = geocoder.geocode_yahoo( '1821 Pacific Coast Hwy, Hermosa Beach, California 90254', 'foo', _urllib2=fake_urllib2, ) eq(None, data)
def test_yahoo_geocode_no_app_id(self): fake_urllib2 = fudge.Fake('urllib2') fake_urllib2.remember_order() urlopen = fake_urllib2.expects('urlopen') urlopen.with_args( 'http://where.yahooapis.com/geocode?' 'location=1821+Pacific+Coast+Hwy%2C+Hermosa+Beach%2C+California+90254' '&flags=J' ) res = """{"ResultSet":{"version":"1.0","Error":0,"ErrorMessage":"No error","Locale":"us_US","Quality":87,"Found":1,"Results":[{"quality":87,"latitude":"33.86829","longitude":"-118.394024","offsetlat":"33.868267","offsetlon":"-118.394174","radius":500,"name":"","line1":"1821 Pacific Coast Hwy, #11","line2":"Hermosa Beach, CA 90254-3125","line3":"","line4":"United States","house":"1821","street":"Pacific Coast Hwy","xstreet":"","unittype":"","unit":"#11","postal":"90254-3125","neighborhood":"","city":"Hermosa Beach","county":"Los Angeles County","state":"California","country":"United States","countrycode":"US","statecode":"CA","countycode":"","uzip":"90254","hash":"0F843448232C6E64","woeid":12795734,"woetype":11}]}}""" urlopen.returns(StringIO(res)) data = geocoder.geocode_yahoo( '1821 Pacific Coast Hwy, Hermosa Beach, California 90254', _urllib2=fake_urllib2, ) expected = OrderedDict([ ('lat', 33.868290), ('lng', -118.394024), ]) eq(data, expected)
def update_coordinate( events_coll, yahoo_id, process_all, ): now = datetime.utcnow() if process_all: events = events_coll.find() else: latitude_query = OrderedDict([ ('facebook.venue.latitude', OrderedDict([ ('$exists', False), ]), ), ]) longitude_query = OrderedDict([ ('facebook.venue.longitude', OrderedDict([ ('$exists', False), ]), ), ]) or_query = OrderedDict([ ('$or', [latitude_query, longitude_query, ] ), ]) failed_query = OrderedDict([ ('ubernear.geocoding_failed', OrderedDict([ ('$exists', False), ]), ), ]) completed_query = OrderedDict([ ('ubernear.geocoding_completed', OrderedDict([ ('$exists', False), ]), ), ]) lookup_query = OrderedDict([ ('ubernear.lookup_completed', OrderedDict([ ('$exists', True), ]), ), ]) query = OrderedDict([ ('$and', [or_query, failed_query, completed_query, lookup_query, ] ), ]) events = events_coll.find( query, sort=[('ubernear.fetched', pymongo.ASCENDING)], ) count = events.count() if count != 0: log.info( 'Geocoding {count} event{s}'.format( count=count, s='' if count == 1 else 's', ), ) found_work = OrderedDict([ ('found_work', False), ('sleep', None), ]) # TODO This cursor may timeout if there are too many results for event in events: found_work['found_work'] = True # Check for missing values here instead of in the query # so it is explicitly known which events are not # eligible for geocoding if not 'venue' in event['facebook']: _mark_as_failed( events_coll=events_coll, event_id=event['_id'], now=now, field='geocoding_failed', reason='No venue', ) continue venue = event['facebook']['venue'] # The minimal requirements for geocoding if 'normalized' in event: address = event['normalized']['address'] city = event['normalized']['city'] elif ( not 'street' in venue or not 'city' in venue ): _mark_as_failed( events_coll=events_coll, event_id=event['_id'], now=now, field='geocoding_failed', reason='No street or city', ) continue else: address = venue['street'] city = venue['city'] request = '{address},{city}'.format( address=address.encode('utf-8'), city=city.encode('utf-8'), ) try: # TODO figure out which error corresponds to the # rate limit reached and return the number of hours # to sleep response = geocoder.geocode_yahoo(request, yahoo_id) except geocoder.GeocoderAmbiguousResultError, e: _mark_as_failed( events_coll=events_coll, event_id=event['_id'], now=now, field='geocoding_failed', reason=str(e), ) continue if response is None: _mark_as_failed( events_coll=events_coll, event_id=event['_id'], now=now, field='geocoding_failed', reason='Null response', ) continue save = OrderedDict([ ('facebook.venue.latitude', response['lat']), ('facebook.venue.longitude', response['lng']), ('ubernear.geocoding_completed', now), ('ubernear.geocoding_source', 'yahoo'), ]) log.debug( 'Storing coordinates for {event_id}'.format( event_id=event['_id'], ) ) mongo.save_no_replace( events_coll, _id=event['_id'], save=save, )
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--db-config', help=('Path to the file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.add_option( '--config', help=('Path to the file with information on application ' 'ids for different services' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.db_config is None: parser.error('Missing option --db-config=.') if options.config is None: parser.error('Missing option --config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) log.info('Start...') config = config_parser(options.config) yahoo_appid = config.get('yahoo','appid') coll = collections(options.db_config) places_coll = coll['places-collection'] found_work = False with _places_cursor(places_coll=places_coll) as cursor: for place in cursor: found_work = True address_tmpl = ( '{address} {extended}, {locality}, {region} ' '{postcode}, {country}' ) info = place['info'] address = address_tmpl.format( address=info.get('address', ''), extended=info.get('address_extended', ''), locality=info.get('locality', ''), region=info.get('region', ''), postcode=info.get('postcode', ''), country=info.get('country', ''), ) log.debug( 'Geocoding {address}'.format( address=address, ) ) try: location = geocoder.geocode_yahoo( address=address, yahoo_appid=yahoo_appid, ) except geocoder.GeocoderRateLimitError, e: log.info( '{msg}. Sleeping 24 hours...'.format( msg=str(e), ) ) time.sleep(60*60*24) except geocoder.GeocoderError, e: log.error( '{msg}. Skipping place {_id}'.format( msg=str(e), _id=place['_id'], ) ) else: