def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--config', help=('Path to the file with information on how to ' 'configure facebook-event' ), metavar='PATH', ) parser.add_option( '--db-config', help=('Path to the file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.add_option( '-a', '--process-all', help=('Process all events that have not expired ' 'instead of just those that have not been ' 'processed [default %default]' ), action="store_true", dest="process_all" ) parser.set_defaults( verbose=False, process_all=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.config is None: parser.error('Missing option --config=.') if options.db_config is None: parser.error('Missing option --db-config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) config = config_parser(options.config) access_token = config.get('facebook', 'access_token') graph = GraphAPI(access_token) usps_id = config.get('usps', 'user_id') yahoo_id = config.get('yahoo', 'app_id') coll = collections(options.db_config) events_coll = coll['events-collection'] expired_coll = coll['expired-collection'] indices = [ {'facebook.end_time': pymongo.ASCENDING}, {'ubernear.fetched': pymongo.ASCENDING}, ] mongo.create_indices( collection=events_coll, indices=indices, ) indices = [ {'facebook.end_time': pymongo.ASCENDING}, ] mongo.create_indices( collection=expired_coll, indices=indices, ) log.info('Start...') log.info('Moving expired events...') facebook_event.expire( events_coll=events_coll, expired_coll=expired_coll, ) log.info('Updating event data...') facebook_work = facebook_event.update_facebook( events_coll=events_coll, graph=graph, process_all=options.process_all, ) log.info('Updating venue data...') venue_work = facebook_event.update_venue( events_coll=events_coll, usps_id=usps_id, process_all=options.process_all, ) log.info('Updating coordinate data...') coord_work = facebook_event.update_coordinate( events_coll=events_coll, yahoo_id=yahoo_id, process_all=options.process_all, ) if coord_work['sleep'] is not None: delay = coord_work['sleep'] log.info( 'Geocoding rate limit reached. ' 'Sleeping {sleep} hours...'.format( delay=delay, ) ) time.sleep(delay) else: found_work = ( facebook_work or venue_work or coord_work['found_work'] ) if not found_work: hours = 24 delay = random.randint(60*60*hours, 60*60*hours+1) log.info( 'Did not find any work. ' 'Sleeping about {hours} hours...'.format( hours=hours, ) ) time.sleep(delay) log.info('End')
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--config', help=('Path to the file with information on how to ' 'configure google-get' ), metavar='PATH', ) parser.add_option( '--db-config', help=('Path to the file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.config is None: parser.error('Missing option --config=.') if options.db_config is None: parser.error('Missing option --db-config=.') config = config_parser(options.config) host = config.get('connection', 'host') port = config.get('connection', 'port') coll = collections( config=options.db_config, read_preference=pymongo.ReadPreference.SECONDARY, ) events_coll = coll['events-collection'] keys_coll = coll['keys-collection'] logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) indices = [ {'facebook.start_time': pymongo.ASCENDING}, ] mongo.create_indices( collection=events_coll, indices=indices, ) uber_api = EventAPI01( keys_coll=keys_coll, events_coll=events_coll, ) install(uber_api) log.info( 'Starting server http://{host}:{port}'.format( host=host, port=port, ) ) app = middleware(default_app()) run(app=app, host=host, port=port, server=APIServer, quiet=True, )
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--csv', help='Path to the CSV file containing the places to import', metavar='PATH', ) parser.add_option( '--config', help=('Path to the config file with information on how to ' 'import places' ), metavar='PATH', ) parser.add_option( '--db-config', help=('Path the to file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.csv is None: parser.error('Missing option --csv=.') if options.config is None: parser.error('Missing option --config=.') if options.db_config is None: parser.error('Missing option --db-config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) places_csv = absolute_path(options.csv) config = config_parser(options.config) coll = collections(options.db_config) places_coll = coll['places-collection'] usps_id = config.get('usps', 'user_id') delimiter = config.get('csv', 'delimiter') delimiter = delimiter.decode('string-escape') fieldnames = [ 'id', 'name', 'address', 'address_extended', 'po_box', 'locality', 'region', 'country', 'postcode', 'tel', 'fax', 'category', 'website', 'email', 'latitude', 'longitude', 'status', ] log.info('Start...') with open(places_csv, 'rb') as places_fp: places = csv.DictReader( places_fp, delimiter=delimiter, fieldnames=fieldnames, ) for place in places: # Don't store empty fields save = defaultdict(dict) for k,v in place.iteritems(): if v != '': save['info'][k] = v try: lat = float(save['info']['latitude']) lng = float(save['info']['longitude']) except (KeyError, ValueError): log.debug( 'Did not find a valid latitude and longitude for place ' '{_id}'.format( _id=save['info']['id'], ) ) else: save['info']['latitude'] = lat save['info']['longitude'] = lng # Coordinates are always stored in the form [lng,lat], # in that order. Anything else might result in incorrect # MongoDB Geospatial queries. save['ubernear.location'] = [lng, lat] error_msg = ('Bad coordinates (lng,lat) {coord} for id ' '{_id}' ) error_msg = error_msg.format( coord=(lng, lat), _id=save['info']['id'] ) if (lng < -180 or lng >= 180) or (lat < -90 or lat > 90): log.error(error_msg) del save['info']['latitude'] del save['info']['longitude'] del save['ubernear.location'] if 'address' not in save['info']: log.error( 'Found place {_id} with no address information. ' 'Skipping'.format( _id=save['info']['id'], ) ) continue match = dict([ ('address', save['info']['address']), ('city', save['info']['locality']), ('state', save['info']['region']), ('zipcode', save['info']['postcode']), ]) if 'address_extended' in save['info']: match['address_extended'] = save['info']['address_extended'] try: norm = address_information.verify(usps_id, match) except: log.error( 'The USPS API could not find an address for place ' '{_id}'.format( _id=save['info']['id'], ) ) else: norm['name'] = save['info']['name'].upper() norm['country'] = 'US' save['normalized'] = norm save['ubernear.normalization_source'] = 'usps' save['ubernear.source'] = 'factual' mongo.save_no_replace( places_coll, _id=save['info']['id'], save=save, ) indices = [ {'ubernear.location': pymongo.GEO2D}, {'ubernear.last_checked': pymongo.ASCENDING}, ] mongo.create_indices( collection=places_coll, indices=indices, ) log.info('End')
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--config', help=('Path to the file with information on how to ' 'configure facebook-put' ), metavar='PATH', ) parser.add_option( '--db-config', help=('Path to the file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.config is None: parser.error('Missing option --config=.') if options.db_config is None: parser.error('Missing option --db-config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) config = config_parser(options.config) access_token = config.get('facebook', 'access_token') graph = GraphAPI(access_token) coll = collections(options.db_config) events_coll = coll['events-collection'] expired_coll = coll['expired-collection'] owners_coll = coll['owners-collection'] indices = [ {'ubernear.last_lookup': pymongo.ASCENDING}, ] mongo.create_indices( collection=owners_coll, indices=indices, ) log.info('Start...') log.info('Updating owners...') update_owners( events_coll=events_coll, expired_coll=expired_coll, owners_coll=owners_coll, ) log.info('Updating owners\' events...') found_work = update_owner_events( events_coll=events_coll, expired_coll=expired_coll, owners_coll=owners_coll, graph=graph, ) if not found_work: hours = 24 delay = random.randint(60*60*hours, 60*60*hours+1) log.info( 'Did not find any work. ' 'Sleeping about {hours} hours...'.format( hours=hours, ) ) time.sleep(delay) log.info('End')
def main(): parser = optparse.OptionParser( usage='%prog [OPTS]', ) parser.add_option( '-v', '--verbose', help='Verbose mode [default %default]', action="store_true", dest="verbose" ) parser.add_option( '--db-config', help=('Path to the file with information on how to ' 'retrieve and store data in the database' ), metavar='PATH', ) parser.add_option( '--config', help=('Path to the file with information on application ' 'ids for different services' ), metavar='PATH', ) parser.set_defaults( verbose=False, ) options, args = parser.parse_args() if args: parser.error('Wrong number of arguments.') if options.db_config is None: parser.error('Missing option --db-config=.') if options.config is None: parser.error('Missing option --config=.') logging.basicConfig( level=logging.DEBUG if options.verbose else logging.INFO, format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) log.info('Start...') config = config_parser(options.config) yahoo_appid = config.get('yahoo','appid') coll = collections(options.db_config) places_coll = coll['places-collection'] found_work = False with _places_cursor(places_coll=places_coll) as cursor: for place in cursor: found_work = True address_tmpl = ( '{address} {extended}, {locality}, {region} ' '{postcode}, {country}' ) info = place['info'] address = address_tmpl.format( address=info.get('address', ''), extended=info.get('address_extended', ''), locality=info.get('locality', ''), region=info.get('region', ''), postcode=info.get('postcode', ''), country=info.get('country', ''), ) log.debug( 'Geocoding {address}'.format( address=address, ) ) try: location = geocoder.geocode_yahoo( address=address, yahoo_appid=yahoo_appid, ) except geocoder.GeocoderRateLimitError, e: log.info( '{msg}. Sleeping 24 hours...'.format( msg=str(e), ) ) time.sleep(60*60*24) except geocoder.GeocoderError, e: log.error( '{msg}. Skipping place {_id}'.format( msg=str(e), _id=place['_id'], ) ) else: