def scrape_websites(latitude, longitude, radius): count = 1 places = get_places_in_radius(latitude, longitude, radius, False) places = places.filter(website__isnull=True) br = mechanize.Browser(factory=mechanize.RobustFactory()) br.set_handle_robots(False) br.set_handle_equiv(False) br.addheaders = [('User-agent', 'Mozilla/5.0')] for place in places: try: print count wt = random.uniform(1, 2) time.sleep(wt) br.open("http://google.com") br.select_form('f') br.form['q'] = remove_non_ascii(place.name) + "seattle wa" data = br.submit() soup = BeautifulSoup(data.read()) num = 0 while num < 3: url = urlparse.parse_qs( urlparse.urlparse( soup.select('.r a')[num]['href']).query)['q'][0] strings_to_exclude = [ 'plus.google', 'yelp', 'facebook', 'urbanspoon', 'foursquare', 'zomato', 'tripadvisor', 'allmenus', 'thestranger', 'seattlemet', 'thrillist', 'seattle.eater', 'yahoo', 'capitolhillseattle', 'eventful', 'groupon', 'clubplanet', 'postfastr', 'opentable', 'menupix', 'menuism', 'letzgoout', 'barmano', '2findlocal', 'whitepages', 'manta', 'gigsounds', 'mapquest', 'www.restaurant.com', 'nochelatina' ] if 'http' in url and not any(string in url for string in strings_to_exclude): place.website = url place.save() break num += 1 count += 1 except Exception: traceback.print_exc() print "Scrape websites successful"
def add_facebook_extras_for_new_city(latitude, longitude, radius): places = get_places_in_radius(latitude, longitude, radius, False) add_facebook_extras(places)
def add_yelp_ratings_for_city(latitude, longitude, radius): places = get_places_in_radius(latitude, longitude, radius, False) get_yelp_ratings(places)
def auto_approve_new_deal_places_for_city(latitude, longitude, radius): places = get_places_in_radius(latitude, longitude, radius, True) in_review = places.filter(place_type__isnull=False) auto_approve_new_deal_places(in_review)
def delete_duplicate_places_for_new_city(latitude, longitude, radius): places = get_places_in_radius(latitude, longitude, radius, True) delete_duplicate_places(places)
def add_foursquare_extras_for_new_city(latitude, longitude, radius): places = get_places_in_radius(latitude, longitude, radius, True) add_foursquare_extras(places)