Example #1
0
    def test_yahoo_geocode_zero_result(self):
        fake_urllib2 = fudge.Fake('urllib2')
        fake_urllib2.remember_order()

        urlopen = fake_urllib2.expects('urlopen')
        urlopen.with_args(
            'http://where.yahooapis.com/geocode?'
            'location=1821+Pacific+Coast+Hwy%2C+Hermosa+Beach%2C+California+90254'
            '&flags=J&appid=foo'
            )
        res = """{"ResultSet":{"version":"1.0","Error":0,"ErrorMessage":"No error","Locale":"us_US","Quality":0,"Found":0}}"""
        urlopen.returns(StringIO(res))

        data = geocoder.geocode_yahoo(
            '1821 Pacific Coast Hwy, Hermosa Beach, California 90254',
            'foo',
            _urllib2=fake_urllib2,
            )
        eq(None, data)
Example #2
0
    def test_yahoo_geocode_no_app_id(self):
        fake_urllib2 = fudge.Fake('urllib2')
        fake_urllib2.remember_order()

        urlopen = fake_urllib2.expects('urlopen')
        urlopen.with_args(
            'http://where.yahooapis.com/geocode?'
            'location=1821+Pacific+Coast+Hwy%2C+Hermosa+Beach%2C+California+90254'
            '&flags=J'
            )
        res = """{"ResultSet":{"version":"1.0","Error":0,"ErrorMessage":"No error","Locale":"us_US","Quality":87,"Found":1,"Results":[{"quality":87,"latitude":"33.86829","longitude":"-118.394024","offsetlat":"33.868267","offsetlon":"-118.394174","radius":500,"name":"","line1":"1821 Pacific Coast Hwy, #11","line2":"Hermosa Beach, CA  90254-3125","line3":"","line4":"United States","house":"1821","street":"Pacific Coast Hwy","xstreet":"","unittype":"","unit":"#11","postal":"90254-3125","neighborhood":"","city":"Hermosa Beach","county":"Los Angeles County","state":"California","country":"United States","countrycode":"US","statecode":"CA","countycode":"","uzip":"90254","hash":"0F843448232C6E64","woeid":12795734,"woetype":11}]}}"""
        urlopen.returns(StringIO(res))

        data = geocoder.geocode_yahoo(
            '1821 Pacific Coast Hwy, Hermosa Beach, California 90254',
            _urllib2=fake_urllib2,
            )

        expected = OrderedDict([
            ('lat', 33.868290),
            ('lng', -118.394024),
            ])
        eq(data, expected)
Example #3
0
def update_coordinate(
    events_coll,
    yahoo_id,
    process_all,
    ):
    now = datetime.utcnow()

    if process_all:
        events = events_coll.find()
    else:
        latitude_query = OrderedDict([
                ('facebook.venue.latitude',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        longitude_query = OrderedDict([
                ('facebook.venue.longitude',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        or_query = OrderedDict([
                ('$or',
                 [latitude_query,
                  longitude_query,
                  ]
                 ),
                ])
        failed_query = OrderedDict([
                ('ubernear.geocoding_failed',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        completed_query = OrderedDict([
                ('ubernear.geocoding_completed',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        lookup_query = OrderedDict([
                ('ubernear.lookup_completed',
                 OrderedDict([
                            ('$exists', True),
                            ]),
                 ),
                ])
        query = OrderedDict([
                ('$and',
                 [or_query,
                  failed_query,
                  completed_query,
                  lookup_query,
                  ]
                 ),
                ])
        events = events_coll.find(
            query,
            sort=[('ubernear.fetched', pymongo.ASCENDING)],
            )

    count = events.count()
    if count != 0:
        log.info(
            'Geocoding {count} event{s}'.format(
                count=count,
                s='' if count == 1 else 's',
                ),
            )
    found_work = OrderedDict([
            ('found_work', False),
            ('sleep', None),
            ])
    # TODO This cursor may timeout if there are too many results
    for event in events:
        found_work['found_work'] = True
        # Check for missing values here instead of in the query
        # so it is explicitly known which events are not
        # eligible for geocoding
        if not 'venue' in event['facebook']:
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='geocoding_failed',
                reason='No venue',
                )
            continue
        venue = event['facebook']['venue']
        # The minimal requirements for geocoding
        if 'normalized' in event:
            address = event['normalized']['address']
            city = event['normalized']['city']
        elif (
            not 'street' in venue
            or not 'city' in venue
            ):
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='geocoding_failed',
                reason='No street or city',
                )
            continue
        else:
            address = venue['street']
            city = venue['city']
        request = '{address},{city}'.format(
            address=address.encode('utf-8'),
            city=city.encode('utf-8'),
            )
        try:
            # TODO figure out which error corresponds to the
            # rate limit reached and return the number of hours
            # to sleep
            response = geocoder.geocode_yahoo(request, yahoo_id)
        except geocoder.GeocoderAmbiguousResultError, e:
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='geocoding_failed',
                reason=str(e),
                )
            continue
        if response is None:
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='geocoding_failed',
                reason='Null response',
                )
            continue

        save = OrderedDict([
            ('facebook.venue.latitude', response['lat']),
            ('facebook.venue.longitude', response['lng']),
            ('ubernear.geocoding_completed', now),
            ('ubernear.geocoding_source', 'yahoo'),
            ])
        log.debug(
            'Storing coordinates for {event_id}'.format(
                event_id=event['_id'],
                )
            )
        mongo.save_no_replace(
            events_coll,
            _id=event['_id'],
            save=save,
            )
Example #4
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--db-config',
        help=('Path to the file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--config',
        help=('Path to the file with information on application '
              'ids for different services'
              ),
        metavar='PATH',
        )
    parser.set_defaults(
        verbose=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    if options.config is None:
        parser.error('Missing option --config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    log.info('Start...')

    config = config_parser(options.config)
    yahoo_appid = config.get('yahoo','appid')

    coll = collections(options.db_config)
    places_coll = coll['places-collection']

    found_work = False
    with _places_cursor(places_coll=places_coll) as cursor:
        for place in cursor:
            found_work = True
            address_tmpl = (
                '{address} {extended}, {locality}, {region} '
                '{postcode}, {country}'
                )
            info = place['info']
            address = address_tmpl.format(
                address=info.get('address', ''),
                extended=info.get('address_extended', ''),
                locality=info.get('locality', ''),
                region=info.get('region', ''),
                postcode=info.get('postcode', ''),
                country=info.get('country', ''),
                )
            log.debug(
                'Geocoding {address}'.format(
                    address=address,
                    )
                )
            try:
                location = geocoder.geocode_yahoo(
                    address=address,
                    yahoo_appid=yahoo_appid,
                    )
            except geocoder.GeocoderRateLimitError, e:
                log.info(
                    '{msg}. Sleeping 24 hours...'.format(
                        msg=str(e),
                        )
                    )
                time.sleep(60*60*24)
            except geocoder.GeocoderError, e:
                log.error(
                    '{msg}. Skipping place {_id}'.format(
                        msg=str(e),
                        _id=place['_id'],
                        )
                    )
            else: