Example #1
0
def test_create_indices_simple():
    collection = fudge.Fake('collection')
    collection.remember_order()

    ensure_index = collection.expects('ensure_index')
    ensure_index.with_args([
            ('foo', 'bar'),
            ])

    ensure_index = collection.next_call('ensure_index')
    ensure_index.with_args([
            ('sna', 'foo'),
            ('fee', 'fi'),
            ])

    indices = [
        OrderedDict([
                ('foo', 'bar')
                ]),
        OrderedDict([
                ('sna', 'foo'),
                ('fee', 'fi')
                ]),
        ]
    mongo.create_indices(
        collection=collection,
        indices=indices,
        )
Example #2
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--config',
        help=('Path to the file with information on how to '
              'configure facebook-event'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--db-config',
        help=('Path to the file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '-a', '--process-all',
        help=('Process all events that have not expired '
              'instead of just those that have not been '
              'processed [default %default]'
              ),
        action="store_true", dest="process_all"
        )
    parser.set_defaults(
        verbose=False,
        process_all=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.config is None:
        parser.error('Missing option --config=.')
    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    config = config_parser(options.config)
    access_token = config.get('facebook', 'access_token')
    graph = GraphAPI(access_token)

    usps_id = config.get('usps', 'user_id')
    yahoo_id = config.get('yahoo', 'app_id')

    coll = collections(options.db_config)
    events_coll = coll['events-collection']
    expired_coll = coll['expired-collection']

    indices = [
        {'facebook.end_time': pymongo.ASCENDING},
        {'ubernear.fetched': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=events_coll,
        indices=indices,
        )
    indices = [
        {'facebook.end_time': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=expired_coll,
        indices=indices,
        )

    log.info('Start...')

    log.info('Moving expired events...')
    facebook_event.expire(
        events_coll=events_coll,
        expired_coll=expired_coll,
        )

    log.info('Updating event data...')
    facebook_work = facebook_event.update_facebook(
        events_coll=events_coll,
        graph=graph,
        process_all=options.process_all,
        )

    log.info('Updating venue data...')
    venue_work = facebook_event.update_venue(
        events_coll=events_coll,
        usps_id=usps_id,
        process_all=options.process_all,
        )

    log.info('Updating coordinate data...')
    coord_work = facebook_event.update_coordinate(
        events_coll=events_coll,
        yahoo_id=yahoo_id,
        process_all=options.process_all,
        )
    if coord_work['sleep'] is not None:
        delay = coord_work['sleep']
        log.info(
            'Geocoding rate limit reached. '
            'Sleeping {sleep} hours...'.format(
                delay=delay,
                )
            )
        time.sleep(delay)
    else:
        found_work = (
            facebook_work
            or
            venue_work
            or
            coord_work['found_work']
            )
        if not found_work:
            hours = 24
            delay = random.randint(60*60*hours, 60*60*hours+1)
            log.info(
                'Did not find any work. '
                'Sleeping about {hours} hours...'.format(
                    hours=hours,
                    )
                )
            time.sleep(delay)

    log.info('End')
Example #3
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--config',
        help=('Path to the file with information on how to '
              'configure google-get'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--db-config',
        help=('Path to the file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.set_defaults(
        verbose=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.config is None:
        parser.error('Missing option --config=.')
    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    config = config_parser(options.config)
    host = config.get('connection', 'host')
    port = config.get('connection', 'port')

    coll = collections(
        config=options.db_config,
        read_preference=pymongo.ReadPreference.SECONDARY,
        )
    events_coll = coll['events-collection']
    keys_coll = coll['keys-collection']

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    indices = [
        {'facebook.start_time': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=events_coll,
        indices=indices,
        )

    uber_api = EventAPI01(
        keys_coll=keys_coll,
        events_coll=events_coll,
        )
    install(uber_api)

    log.info(
        'Starting server http://{host}:{port}'.format(
            host=host,
            port=port,
            )
        )

    app = middleware(default_app())
    run(app=app,
        host=host,
        port=port,
        server=APIServer,
        quiet=True,
        )
Example #4
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--config',
        help=('Path to the file with information on how to '
              'configure facebook-put'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--db-config',
        help=('Path to the file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.set_defaults(
        verbose=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.config is None:
        parser.error('Missing option --config=.')
    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    config = config_parser(options.config)
    access_token = config.get('facebook', 'access_token')
    graph = GraphAPI(access_token)

    coll = collections(options.db_config)
    events_coll = coll['events-collection']
    expired_coll = coll['expired-collection']
    owners_coll = coll['owners-collection']

    indices = [
        {'ubernear.last_lookup': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=owners_coll,
        indices=indices,
        )

    log.info('Start...')

    log.info('Updating owners...')
    update_owners(
        events_coll=events_coll,
        expired_coll=expired_coll,
        owners_coll=owners_coll,
        )

    log.info('Updating owners\' events...')
    found_work = update_owner_events(
        events_coll=events_coll,
        expired_coll=expired_coll,
        owners_coll=owners_coll,
        graph=graph,
        )

    if not found_work:
        hours = 24
        delay = random.randint(60*60*hours, 60*60*hours+1)
        log.info(
            'Did not find any work. '
            'Sleeping about {hours} hours...'.format(
                hours=hours,
                )
            )
        time.sleep(delay)

    log.info('End')
Example #5
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--csv',
        help='Path to the CSV file containing the places to import',
        metavar='PATH',
        )
    parser.add_option(
        '--config',
        help=('Path to the config file with information on how to '
              'import places'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '--db-config',
        help=('Path the to file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.set_defaults(
        verbose=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.csv is None:
        parser.error('Missing option --csv=.')
    if options.config is None:
        parser.error('Missing option --config=.')
    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    places_csv = absolute_path(options.csv)
    config = config_parser(options.config)
    coll = collections(options.db_config)
    places_coll = coll['places-collection']

    usps_id = config.get('usps', 'user_id')

    delimiter = config.get('csv', 'delimiter')
    delimiter = delimiter.decode('string-escape')
    fieldnames = [
        'id',
        'name',
        'address',
        'address_extended',
        'po_box',
        'locality',
        'region',
        'country',
        'postcode',
        'tel',
        'fax',
        'category',
        'website',
        'email',
        'latitude',
        'longitude',
        'status',
        ]

    log.info('Start...')

    with open(places_csv, 'rb') as places_fp:
        places = csv.DictReader(
            places_fp,
            delimiter=delimiter,
            fieldnames=fieldnames,
            )
        for place in places:
            # Don't store empty fields
            save = defaultdict(dict)
            for k,v in place.iteritems():
                if v != '':
                    save['info'][k] = v

            try:
                lat = float(save['info']['latitude'])
                lng = float(save['info']['longitude'])
            except (KeyError, ValueError):
                log.debug(
                    'Did not find a valid latitude and longitude for place '
                    '{_id}'.format(
                        _id=save['info']['id'],
                        )
                    )
            else:
                save['info']['latitude'] = lat
                save['info']['longitude'] = lng
                # Coordinates are always stored in the form [lng,lat],
                # in that order. Anything else might result in incorrect
                # MongoDB Geospatial queries.
                save['ubernear.location'] = [lng, lat]

                error_msg = ('Bad coordinates (lng,lat) {coord} for id '
                             '{_id}'
                             )
                error_msg = error_msg.format(
                    coord=(lng, lat),
                    _id=save['info']['id']
                    )
                if (lng < -180 or lng >= 180) or (lat < -90 or lat > 90):
                    log.error(error_msg)
                    del save['info']['latitude']
                    del save['info']['longitude']
                    del save['ubernear.location']

            if 'address' not in save['info']:
                log.error(
                    'Found place {_id} with no address information. '
                    'Skipping'.format(
                        _id=save['info']['id'],
                        )
                    )
                continue
            match = dict([
                    ('address', save['info']['address']),
                    ('city', save['info']['locality']),
                    ('state', save['info']['region']),
                    ('zipcode', save['info']['postcode']),
                    ])
            if 'address_extended' in save['info']:
                match['address_extended'] = save['info']['address_extended']
            try:
                norm = address_information.verify(usps_id, match)
            except:
                log.error(
                    'The USPS API could not find an address for place '
                    '{_id}'.format(
                        _id=save['info']['id'],
                        )
                    )
            else:
                norm['name'] = save['info']['name'].upper()
                norm['country'] = 'US'
                save['normalized'] = norm
                save['ubernear.normalization_source'] = 'usps'

            save['ubernear.source'] = 'factual'
            mongo.save_no_replace(
                places_coll,
                _id=save['info']['id'],
                save=save,
                )

    indices = [
        {'ubernear.location': pymongo.GEO2D},
        {'ubernear.last_checked': pymongo.ASCENDING},
        ]
    mongo.create_indices(
        collection=places_coll,
        indices=indices,
        )

    log.info('End')
Example #6
0
def main():
    parser = optparse.OptionParser(
        usage='%prog [OPTS]',
        )
    parser.add_option(
        '-v', '--verbose',
        help='Verbose mode [default %default]',
        action="store_true", dest="verbose"
        )
    parser.add_option(
        '--db-config',
        help=('Path to the file with information on how to '
              'retrieve and store data in the database'
              ),
        metavar='PATH',
        )
    parser.add_option(
        '-a', '--process-all',
        help=('Process all events that have not expired '
              'instead of just those that have not been '
              'processed [default %default]'
              ),
        action="store_true", dest="process_all"
        )
    parser.set_defaults(
        verbose=False,
        process_all=False,
        )

    options, args = parser.parse_args()
    if args:
        parser.error('Wrong number of arguments.')

    if options.db_config is None:
        parser.error('Missing option --db-config=.')

    logging.basicConfig(
        level=logging.DEBUG if options.verbose else logging.INFO,
        format='%(asctime)s.%(msecs)03d %(name)s: %(levelname)s: %(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S',
        )

    coll = collections(options.db_config)
    events_coll = coll['events-collection']
    places_coll = coll['places-collection']
    database = coll['database']

    indices = [
        {'match.ubernear.location': pymongo.GEO2D},
        ]
    mongo.create_indices(
        collection=events_coll,
        indices=indices,
        )

    log.info('Start...')

    log.info('Setting events\' locations...')
    found_work = event_location.locate(
        events_coll=events_coll,
        places_coll=places_coll,
        database=database,
        process_all=options.process_all,
        )

    if not found_work:
        minutes = 15
        delay = random.randint(60*minutes-1, 60*minutes+1)
        log.info(
            'Did not find any work. '
            'Sleeping about {minutes} minutes...'.format(
                minutes=minutes,
                )
            )
        time.sleep(delay)

    log.info('End')