def get_all_business_types():
    businesses = loader.get_challengeset()
    idtoloc = loader.get_idtoloc()
    business_types_dict = loader.get_business_types()
    print "Done {} of {}".format(len(business_types_dict), len(businesses))
    for business in businesses:
        unique_id = business['unique_id']
        if unique_id not in business_types_dict.keys():
            print business['name']
            closest_place, best_sim = None, 0
            lat, lon = idtoloc[unique_id]
            for place in get_places(lat, lon):
                sim = cosine_sim(place['name'], business['name'])
                if sim > best_sim:
                    closest_place = place
                    best_sim = sim
            if closest_place:
                types = filter(lambda x: not x in ['point_of_interest', 'establishment', 'sublocality', 'route',
                                                   'real', 'political', 'of', 'or', 'local', 'locality', 'intersection',
                                                   '1'], closest_place['types'])
                types = " ".join(types).replace("_", " ")
            else:
                types = None
            print types
            business_types_dict[unique_id] = types
            loader.dump_business_dict(business_types_dict)
Esempio n. 2
0
def get_business_lat_lon():
    gapi = GoogleGeocodingApi()
    id_to_loc = loader.get_idtoloc()
    businesses = loader.get_challengeset()
    for business in businesses:
        unique_id = business['unique_id']
        address = business['address']
        if id_to_loc.get(unique_id) is None:
            print business['name']
            lat, lng = gapi.decode_address(address)
            id_to_loc[unique_id] = (lat, lng)
        pickle.dump(id_to_loc, open('../../data/id_to_loc.pickle', 'w'))