Example #1
0
def test_normalize_state_simple():
    state = 'CALIFORNIA'
    res = address.normalize_state(state)

    eq(res, 'ca')
Example #2
0
def test_normalize_state_invalid():
    state = 'FOO BAR'
    res = address.normalize_state(state)

    eq(res, 'foo bar')
Example #3
0
def test_normalize_state_empty():
    state = '.,'
    res = address.normalize_state(state)

    eq(res, '')
Example #4
0
def test_normalize_state_other():
    state = 'Quebec'
    res = address.normalize_state(state)

    eq(res, 'quebec')
Example #5
0
def test_normalize_state_multiple():
    state = 'CA California'
    res = address.normalize_state(state)

    eq(res, 'ca california')
Example #6
0
def _match_with_venue(event, _log=None):
    if _log is None:
        _log = log

    facebook = event["facebook"]
    name = facebook.get("location")
    if name is None:
        name = facebook["owner"]["name"]

    venue = facebook["venue"]
    address = normalize_street(venue["street"])
    locality = normalize_city(venue["city"])
    region = normalize_state(venue.get("state", ""))
    country = normalize_country(venue.get("country", ""))

    if not address or not locality:
        _log.debug("Event {event_id} has invalid address or locality. " "Skipping.".format(event_id=event["_id"]))
        return None

    address = address.title()
    locality = locality.title()

    latitude = venue["latitude"]
    longitude = venue["longitude"]
    # coordinates of type int are too ambigious to be considered
    # good
    if type(latitude) is not float or type(longitude) is not float:
        _log.debug("Event {event_id} has invalid latitude or longitude. " "Skipping.".format(event_id=event["_id"]))
        return None

    # coordinates with little precision are too ambigious to be
    # considered good
    lat_precision = Decimal(repr(latitude))
    lng_precision = Decimal(repr(longitude))
    lat_precision = lat_precision.as_tuple().exponent
    lng_precision = lng_precision.as_tuple().exponent
    if lat_precision > -5 or lng_precision > -5:
        _log.debug(
            "Event {event_id} has latitude or longitude with "
            "little precision. Skipping.".format(event_id=event["_id"])
        )
        return None

    match = OrderedDict(
        [
            (
                "ubernear",
                OrderedDict([("place_id", event["_id"]), ("source", "facebook"), ("location", [longitude, latitude])]),
            ),
            (
                "place",
                OrderedDict(
                    [
                        ("address", address),
                        ("locality", locality),
                        ("name", name),
                        ("latitude", latitude),
                        ("longitude", longitude),
                    ]
                ),
            ),
        ]
    )

    if region:
        region = region.upper()
        match["place"]["region"] = region
    if country:
        country = country.upper()
        match["place"]["country"] = country

    return match
Example #7
0
def update_venue(
    events_coll,
    usps_id,
    process_all,
    ):
    now = datetime.utcnow()

    if process_all:
        events = events_coll.find()
    else:
        completed_query = OrderedDict([
                ('ubernear.normalization_completed',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        failed_query = OrderedDict([
                ('ubernear.normalization_failed',
                 OrderedDict([
                            ('$exists', False),
                            ]),
                 ),
                ])
        lookup_query = OrderedDict([
                ('ubernear.lookup_completed',
                 OrderedDict([
                            ('$exists', True),
                            ]),
                 ),
                ])
        events = events_coll.find(
            OrderedDict([
                    ('$and',
                     [completed_query,
                      failed_query,
                      lookup_query,
                      ]
                     ),
                    ]),
            sort=[('ubernear.fetched', pymongo.ASCENDING)],
            )

    count = events.count()
    if count != 0:
        log.info(
            'Normalizing {count} event{s}'.format(
                count=count,
                s='' if count == 1 else 's',
                ),
            )
    event_batch = []
    found_work = False
    # TODO This cursor may timeout if there are too many results
    for event in events:
        found_work = True
        # Don't send venues in the batch that can't be used
        # Check for missing values here instead of in the query
        # so it is explicitly known which events are not
        # eligible for normalization
        if not 'venue' in event['facebook']:
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='normalization_failed',
                reason='No venue',
                )
            continue
        venue = event['facebook']['venue']
        # The minimal requirements for the USPS API
        if (
            not 'street' in venue
            or not 'city' in venue
            or not 'state' in venue
            ):
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='normalization_failed',
                reason='No street, city or state',
                )
            continue
        # USPS doesn't take long names for states
        venue['state'] = addr_util.normalize_state(
            venue['state']
            )
        # Make sure it's a valid state abbreviation
        if venue['state'] not in addr_util.state_abbrev.keys():
            _mark_as_failed(
                events_coll=events_coll,
                event_id=event['_id'],
                now=now,
                field='normalization_failed',
                reason='Invalid state',
                )
            continue
        event_batch.append(event)
        if len(event_batch) == usps_batch_size:
            _save_venues(
                events=event_batch,
                events_coll=events_coll,
                usps_id=usps_id,
                now=now,
                )
            event_batch = []

    _save_venues(
        events=event_batch,
        events_coll=events_coll,
        usps_id=usps_id,
        now=now,
        )

    return found_work