Beispiel #1
0
def test_normalize_simple():
    item = '  foo: BAR  ,sna  '
    res = address.normalize_string(item)

    eq(res, 'foo bar sna')
Beispiel #2
0
def _match_with_place(event, place_ids, places_coll, database, _log=None):
    if _log is None:
        _log = log

    venue = _event_venue(event)
    matches = DefaultOrderedDict(OrderedDict)
    seen = []
    places = itertools.chain(
        # Always check nearby places first so that coord information
        # is not skipped if the same place is found twice
        _nearby_places(venue=venue, event_id=event["_id"], places_coll=places_coll, database=database, _log=_log),
        _place_ids_places(place_ids=place_ids, places_coll=places_coll, _log=_log),
        _normalized_places(event=event, places_coll=places_coll, _log=_log),
    )
    for place in places:
        place_info = place["info"]
        place_id = place_info["_id"]
        search_type = place["type"]
        if place_id in seen:
            continue
        seen.append(place_id)

        matched = OrderedDict([(item, False) for item in match_fields])
        distance = None
        venue_name = None
        score = 0

        if "distance" in place:
            distance = place["distance"]
            matched["coord"] = True

        if venue["address"]:
            matched["address"] = streets_equal(venue["address"], place_info["address"]["address"])

        if venue["locality"]:
            matched["locality"] = cities_equal(venue["locality"], place_info["address"]["locality"])

        for name in venue["names"]:
            name_ = normalize_string(name)
            address_name = normalize_string(place_info["address"]["name"])
            match_ = address_name in name_ or name_ in address_name
            if match_:
                venue_name = name
                matched["name"] = match_
                break

        if venue["region"] and "region" in place_info["address"]:
            matched["region"] = states_equal(venue["region"], place_info["address"]["region"])

        if venue["country"] and "country" in place_info["address"]:
            matched["country"] = countries_equal(venue["country"], place_info["address"]["country"])

        if venue["postcode"] and "postcode" in place_info["address"]:
            matched["postcode"] = zipcodes_equal(venue["postcode"], place_info["address"]["postcode"])

        if place_info["page_ids"] and venue["page_id"]:
            if venue["page_id"] in place_info["page_ids"]:
                matched["page"] = True

        # A score of 100 or more denotes a successful match
        # TODO places from place_ids should be probably be
        # treated differently since there is a higher chance
        # they will be a match
        if matched["coord"] and matched["name"]:
            score += 100
        if matched["coord"] and matched["address"]:
            score += 100
        if matched["address"] and matched["name"]:
            score += 100
        if matched["address"] and matched["locality"]:
            score += 100
        if matched["postcode"] and matched["address"]:
            score += 100
        if matched["page"]:
            score += 100

        current = matches[place_id]
        current["ubernear"] = OrderedDict(
            [
                ("score", score),
                ("place_id", place_id),
                ("source", place_info["source"]),
                ("location", place_info["coords"]),
                ("search_type", search_type),
            ]
        )
        current["ubernear"]["matched"] = matched
        if distance is not None:
            current["ubernear"]["distance"] = distance

        # Store address information in the match so no places_coll
        # look-ups are necessary when serving the event
        current["place"] = place_info["address"]

        # Some places will match on the address but the name
        # of the places are actually different, e.g., two
        # businesses with the same address but different unit
        # numbers. So, it's safer to use the facebook venue name
        if venue_name:
            current["place"]["name"] = venue_name
        elif venue["names"]:
            current["place"]["name"] = venue["names"][0]

    highest = _highest_match(matches)

    match = None
    if highest is not None:
        match = matches[highest]
        # Store a list of only those parts that have matched
        highest_matched = [k for k, v in match["ubernear"]["matched"].iteritems() if v]
        match["ubernear"]["matched"] = highest_matched

    return match