class YelpScraper: """ Right now it's basically a wrapper for YelpAPI object. Example y = YelpScraper('API KEY') res = y.get_pois(34.140940, -118.127974, 5000) # Search near Pasadena biz_id = res['businesses'][0]['id'] y.get_poi_features(biz_id) """ def __init__(self, api_key): self.yelp = YelpAPI(api_key) def get_pois(self, latitude, longitude, radius): """ Search Yelp for points of interest near the given latitude and longitude. https://www.yelp.com/developers/documentation/v3/business_search :param latitude: Decimal latitude. :param longitude: Decimal latitude. :param radius: Search radius in *meters*. """ return self.yelp.search_query(latitude=latitude, longitude=longitude, radius=radius) def get_poi_features(self, yelpid): """ Get details about a specific point of interest, given its Yelp ID. """ return self.yelp.business_query(yelpid)
def post(self, request): yelp_api = YelpAPI('4eeWvgRSP72tOmZaghWTvQ', 'K88plDw1UnwjACzBophf6Du578c', 'uKfMmBjcu1_JXCj5aS1o9foFOB8VpN4q', 'DRi5uRtvXpyXSRUxhf_8SaQ52LI') search_results = yelp_api.business_query(request.DATA.get('name')) return Response({'results': search_results.get('reviews')})
def get_categories_from_name_and_address(name, address): yelp_api = YelpAPI(MY_API_KEY) alias = get_alias_from_name_and_address(name, address) if alias == "": return [] else: result = yelp_api.business_query(id=alias) return [c['alias'] for c in result['categories']]
def yelpJSON(label): label = label.encode('utf-8') yelp_api = YelpAPI(application.config['YELP_API_KEY']) result = yelp_api.business_query(label) reviews = yelp_api.reviews_query(label) result['review_list'] = reviews response = jsonify(result) response.headers.add('Access-Control-Allow-Origin', application.config['ALLOWED_DOMAIN']) return response
def parseYelpData(places, heading="tel"): """ This function takes in a list of businesses and looks them up by phone number in Yelp Fusion using yelpapi. It adds data about Yelp average rating of the business, the number of Yelp reviews, and the company's Yelp link to the dictionary of the business. The function then returns the updated list of businesses. param: places list of businesses that have been scraped by the GoogleParser param: heading default to "tel", signifying the heading for the phone number data field of each business return: updated list of businesses, with these data fields added to each business: 'yelp_rating', 'yelp_review_count', 'yelp_url','is_listed_yelp','is_claimed_yelp' """ yelp_api = YelpAPI(APIKEY) for place in places: phoneNum = changeFormatTel(place[heading]) if phoneNum == "": place['yelp_rating'] = "" place['yelp_review_count'] = "" place['yelp_url'] = "" place['is_listed_yelp'] = "0" place['is_claimed_yelp'] = "0" continue response = yelp_api.phone_search_query(phone=str(phoneNum)) # If the phone number is listed in Yelp, add Yelp rating, review_count, and # yelpUrl to the dictionary of the business. if response['total'] != 0: business = response['businesses'][0] place['yelp_rating'] = strip(str(business['rating'])) place['yelp_review_count'] = strip(str(business['review_count'])) place['yelp_url'] = strip(str(business['url'])) companyID = strip(str(business['id'])) claimResponse = yelp_api.business_query(id=companyID) place['is_listed_yelp'] = "1" if claimResponse['is_claimed'] == True: place['is_claimed_yelp'] = "1" else: place['is_claimed_yelp'] = "0" else: place['yelp_rating'] = "" place['yelp_review_count'] = "" place['yelp_url'] = "" place['is_listed_yelp'] = "0" place['is_claimed_yelp'] = "0" # to avoid Yelp's error messages of too many queries per second time.sleep(1) return places
def link_restaurant(request, event_id): form = request.POST url_pattern = r'https://www.yelp.com/biz/(.+)' url1 = "" event = Event.objects.get(id=event_id) rest = "" if form['rest']: try: rest = form['rest'].split("?")[0] url1 = re.search(url_pattern, rest).group(1) except AttributeError: rest = "" print("url not found.. should have been caught by validator") # print(url1) new_rest = None if rest != "": try: new_rest = Restaurant.objects.get(alias=url1) except Restaurant.DoesNotExist: print(f'Querying API for rest1 = {url1}') yelp_api = YelpAPI(yelp_key) r = yelp_api.business_query(id=url1) pprint.pprint(r) photo1_url = "" photo2_url = "" photo3_url = "" if len(r['photos']) > 0: photo1_url = r['photos'][0] if len(r['photos']) > 1: photo2_url = r['photos'][1] if len(r['photos']) > 2: photo3_url = r['photos'][2] new_rest = Restaurant.objects.create( alias=r['alias'], name=r['name'], image_url=r['image_url'], url=r['url'], display_phone=r['display_phone'], review_count=r['review_count'], rating=r['rating'], photo1_url=photo1_url, photo2_url=photo2_url, photo3_url=photo3_url, # price=r['price'] ) event.restaurants.add(new_rest) event.save() return redirect('/dashboard')
def yelp_reviews(request): yelpId = request.GET.get('yelp_id') yelp_api = YelpAPI(settings.YELP_CONSUMER_KEY, settings.YELP_CONSUMER_SECRET, settings.YELP_TOKEN, settings.YELP_TOKEN_SECRET) api_response = yelp_api.business_query(id=yelpId) responseJson = { "excerpt": api_response['reviews'][0]['excerpt'], "username": api_response['reviews'][0]['user']['name'], "user_img": api_response['reviews'][0]['user']['image_url'], "overall_rating_img": api_response['rating_img_url'], "review_count": api_response['review_count'], "url": api_response['url'] } return JsonResponse({'response': responseJson})
def yelp_extras(self): yelp_api = YelpAPI(YELP_CONSUMER_KEY, YELP_CONSUMER_SECRET, YELP_TOKEN, YELP_TOKEN_SECRET) result = yelp_api.business_query(id=self.yelp_id) img_key = 'rating_img_url_large' if img_key in result: self.rating_image_url = result[img_key] phone_key = 'phone' if phone_key in result: self.phone = prettify_phone_number(result[phone_key]) rating_key = 'rating' if rating_key in result: self.rating = result[rating_key] review_count_key = 'review_count' if review_count_key in result: self.review_count = result[review_count_key] location_key = 'location' if location_key in result: location_data = result[location_key] self.zip_code = location_data['postal_code'] self.street_address = location_data['address'][0] self.save()
class Yelp(object): """ Connect to Yelp API and pull information """ def __init__(self, bus_id=def_id): self.bus_id = bus_id self.yelp_id = "" self.yelp_secret = "" self.response = {} self.name = "" self.city = "" self.country = "" with open(os.path.join(cwd, 'data/pvt.csv')) as f: for line in f: l = line.strip().split(',') if l[0] == "yelp_id": self.yelp_id = l[1] continue if l[0] == "yelp_secret": self.yelp_secret = l[1] continue self.yelp_api = YelpAPI(self.yelp_id, self.yelp_secret) self.pull_info() def pull_info(self): """ Input: Yelp Business ID Output: None Side-Effects: Create city, country and name attributes for our business """ self.response = self.yelp_api.business_query(self.bus_id) self.name = self.response['name'] self.city = self.response['location']['city'] self.country = self.response['location']['country']
stateRes = 'CO' cityRes = 'Denver' output_json = [] with open('data.json') as json_file: data = json.load(json_file) for p in data: nameRes = p['name'] addressRes = p['address'] response = yelp_api.business_match_query(name=nameRes, address1=addressRes, city=cityRes, state=stateRes, country='US') biz_info = response['businesses'] idRes = "" if len(biz_info) > 0: idRes = biz_info[0]['id'] response = yelp_api.business_query(id=idRes) output_json.append({ 'name': nameRes, 'address': addressRes, 'website': p['website'], 'rating': response['rating'], 'review_count': response['review_count'], }) with open('final.json', 'w') as outfile: json.dump(output_json, outfile)
"query(transaction_type=" "'delivery', location='dallas, " "tx')")) response = yelp_api.transaction_search_query(transaction_type='delivery', location='dallas, tx') pprint(response) print('\n-------------------------------------------------------------------------\n') """ Example business query. Business API: https://www.yelp.com/developers/documentation/v3/business """ print("***** business information for Amy's on 6th St. *****\n{}\n".format("yelp_api.business_query(id='amys-ice-" "creams-austin-3')")) response = yelp_api.business_query(id='amys-ice-creams-austin-3') pprint(response) print('\n-------------------------------------------------------------------------\n') """ Example reviews query. Reviews API: https://www.yelp.com/developers/documentation/v3/business_reviews """ print("***** selected reviews for Amy's on 6th St. *****\n{}\n".format("yelp_api.reviews_query(id='amys-ice-" "creams-austin-3')")) response = yelp_api.reviews_query(id='amys-ice-creams-austin-3') pprint(response) print('\n-------------------------------------------------------------------------\n')
class scrappers: data_path = "././data/raw" def __init__(self): __dir_path = os.path.dirname(os.path.realpath(__file__)) credentials = get_credidentials() self.twitter_premium_api = load_credentials( filename="{}/{}".format(__dir_path, "twitter_keys.yaml"), yaml_key="search_tweets_api_30day") self.twitter_api = Twitter(auth=OAuth( consumer_key=credentials['twitter']['consumer_key'], consumer_secret=credentials['twitter']['consumer_secret'], token=credentials['twitter']['access_token_key'], token_secret=credentials['twitter']['access_token_secret'])) self.yelp_api = YelpAPI(credentials['yelp']['api_key']) self.__data_path = "../data/raw" logger.info("initiation started.") def tw_verify_credentials(self): obj = self.twitter_api.VerifyCredentials() print(json.dumps(obj._json, indent=4, sort_keys=True)) def tw_get_statuses(self, user_list): for username in user_list: with open(f'datasets/tw_{username}_statuses.json', 'w') as f: try: f.write('{"statuses": [') max_id = 0 while (True): # status scheme available at: https://developer.twitter.com/en/docs/tweets/timelines/api-reference/get-statuses-user_timeline.html statuses = self.twitter_api.GetUserTimeline( screen_name=username, count=100, max_id=max_id) if len(statuses) == 1 and statuses[0].id == max_id: break else: for status in statuses: if status.id != max_id: f.write("%s," % json.dumps(status._json)) max_id = statuses[-1].id finally: max_id != 0 and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") def tw_get_search(self, user_list): for user_name, keyword_list in user_list.items(): with open(f'datasets/tw_{user_name}_searches.json', 'w') as f: try: f.write('{"statuses": [') max_id = 0 user = self.twitter_api.GetUser(screen_name=user_name) keyword_list.append(f'{user.name}') keyword_list.append(f'{user_name}') keyword_list.append(f'#{user_name}') keyword_list.append(f'@{user_name}') term = '{}'.format(' OR '.join(keyword_list)) while (True): # status scheme available at: https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets.html statuses = self.twitter_api.GetSearch( term=term.encode('utf-8'), geocode=None, count=100, max_id=max_id) if (len(statuses) == 1 and statuses[0].id == max_id) or statuses == []: break else: for status in statuses: if status.id != max_id: """status_text = json.dumps(status._json) status_json = json.loads(status_text) status_json['keyword'] = keyword""" f.write("%s," % json.dumps(status._json)) max_id = statuses[-1].id finally: max_id != 0 and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") def tw_get_premium_search(self, keyword: str): with open(f'datasets/tw_{keyword.lower()}_searches_premium.json', 'w') as f: try: f.write('{"statuses": [') rule = gen_rule_payload( pt_rule="near:\"New York, NY\" within:50mi".format(), results_per_call=100, from_date="2018-07-01", to_date="2018-10-01") rule = gen_rule_payload( pt_rule="place:\"New York, NY\"".format(), results_per_call=100, from_date=(datetime.date.today() - datetime.timedelta(31)).isoformat(), to_date=datetime.date.today().isoformat()) next_token = None while True: results = ResultStream(rule_payload=rule, **self.twitter_premium_api) results.next_token = next_token tweets = [] try: tweets = list(results.stream()) except Exception as ex: print(str(ex)) for tweet in tweets: f.write("%s," % json.dumps(tweet)) if results.next_token is None: break else: next_token = results.next_token next_token is not None and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") except Exception as ex: print("Error:\n" + str(ex)) def yp_get_businesses(self, business_list): """ Get reviews for each business in the business_list and creates separate data files. File Type: JSON """ for business in business_list: with open(f'{self.data_path}/yp_{business}_competitors.json', 'w') as f: try: f.write('{"businesses": [') branch = self.yelp_api.business_query(business) offset = 0 while (True): try: # status scheme available at: # https://www.yelp.com/developers/documentation/v3/business_search competitors = self.yelp_api.search_query( longitude=branch['coordinates']['longitude'], latitude=branch['coordinates']['latitude'], radius=40000, # categories='bars,french' sort_by='distance', limit=50, offset=offset) f.write("%s," % json.dumps(competitors['businesses'])) offset = offset + 50 except self.yelp_api.YelpAPIError: break finally: offset != 0 and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") def yp_get_competitors(self, business_list): """ Gets business list in consideration to the existing business list file. Adds any additional business, if it is not recorded yet. """ file_path = fp.yp_raw_competitors(self.data_path) index_list = [] existing_list = [] """ if os.path.exists(file_path): with open(file_path, 'r') as f: current_file = f.readlines() if len(current_file) > 0: existing_list = json.loads(current_file[0]) index_list = [_business["alias"] for _business in existing_list] logger.info(f"existing file found: {len(index_list)} total entries") """ with open(file_path, 'w') as f: # find businesses for business in business_list: new_list = [] try: logger.info(f"import started for : {business}") branch = self.yelp_api.business_query(business) offset = 0 while (True): try: # status scheme available at: # https://www.yelp.com/developers/documentation/v3/business_search competitors = self.yelp_api.search_query( longitude=branch['coordinates']['longitude'], latitude=branch['coordinates']['latitude'], radius=40000, # categories='bars,french' sort_by='distance', limit=50, offset=offset) # add alias name for distance measurement as dist_to_alias businesses = competitors["businesses"] [ i.update({"dist_to_alias": business}) for i in businesses ] for i in businesses: if i['alias'] not in index_list: new_list.append(i) index_list.append(i['alias']) offset = offset + 50 except self.yelp_api.YelpAPIError: break finally: existing_list.extend(new_list) logger.info( f"import completed. existing: {len(existing_list)} new: {len(new_list)}" ) # saving into file json.dump(existing_list, f) def yp_get_business_reviews(self, business_list): """ Gets three reviews from the yelp api. """ for business in business_list: with open(f'{self.data_path}/yp_{business}_rws.json', 'w') as f: try: f.write('{"reviews": [') offset = 0 while (True): reviews_set = self.yelp_api.reviews_query( business, limit=5, offset=offset) reviews = reviews_set['reviews'] if len(reviews) > 0: for review in reviews: f.write("%s,\n" % review) offset = offset + 5 else: break finally: offset != 0 and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") def yp_get_competitor_reviews(self, business_list=None, start_index=0, end_index=5): """ Gets reviews by scraping through the site. Reviews are saved by business name and reviews. Uses Competitors reviews file as default file. Given index controls regions of Competitors. business_list: None or List start_index: int, interested region's starting index end_index: int, interested region's ending index File Type: CSV """ file_path = fp.yp_raw_competitors_reviews(self.data_path) headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } columns = [ 'alias', 'ratingValue', 'dataPublished', 'description', 'author' ] df: pd.DataFrame # getting competitors list businesses_file_path = fp.yp_raw_competitors(self.data_path) businesses_index_list = [] if os.path.exists(businesses_file_path): with open(businesses_file_path, 'r') as f: current_file = f.readlines() if len(current_file) > 0: businesses_index_list = [ _business["alias"] for _business in json.loads(current_file[0]) ] # needed every time if os.path.exists(file_path): with open(file_path, 'r') as f: df = pd.read_csv(file_path) logger.info( f"existing file found. total reviews count: {len(df)}") # need only once, if file doesn't exists if os.path.exists(file_path) is False: with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(columns) logger.info("file created at: {}".format(file_path)) # ops with open(file_path, 'a', newline='') as f: if business_list is None: business_list = businesses_index_list current_index = start_index - 1 for business in business_list[start_index:end_index]: cnt_imported = 0 current_index = current_index + 1 logger.info(f"index: {current_index} of {end_index - 1}") try: writer = csv.writer(f) logger.info(f"import started for : {business}") start = 0 cnt_requests = 0 while (True): url = '{}/{}?sort_by=date_desc&start={}'.format( 'https://www.yelp.com/biz', business, start) response = requests.get(url, headers) soup = BeautifulSoup(response.text, 'html.parser') html_script = soup.findAll( 'script', {'type': 'application/ld+json'})[-1] obj = json.loads(html_script.string) reviews = obj['review'] if len(reviews) > 0: for review in reviews: data = [ business, review['reviewRating']['ratingValue'], review['datePublished'], review['description'], review['author'] ] check = np.array(data, dtype='O') if not (df.values == check).all(1).any(): writer.writerow(data) cnt_imported = cnt_imported + 1 start = start + 20 cnt_requests = cnt_requests + 1 else: logger.info( f"import completed. total reviews cnt: {cnt_imported} total request cnt: {cnt_requests}" ) break except Exception as ex: logger.warning( f"error: alias: {business} index: {current_index} total reviews cnt: {cnt_imported}" ) logger.warning(f"error message: {ex}") logger.warning("Let me sleep for some time..") second = int(round(random.expovariate(1) * 100)) time.sleep(second) logger.warning( f"{second} seconds slept, now back on scrapping..") continue def yp_get_business_reviews2(self, business_list): """ Gets reviews by scraping through the site. """ for business in business_list: headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } with open(f'{self.data_path}/yp_{business}_rws.json', 'w') as f: try: f.write('{"reviews": [') start = 0 while (True): url = '{}/{}?sort_by=date_desc&start={}'.format( 'https://www.yelp.com/biz', business, start) response = requests.get(url, headers) soup = BeautifulSoup(response.text, 'html.parser') html_script = soup.find( 'script', {'type': 'application/ld+json'}) obj = json.loads(html_script.string) reviews = obj['review'] if len(reviews) > 0: for review in reviews: data = { 'ratingValue': review['reviewRating']['ratingValue'], 'datePublished': review['datePublished'], 'description': review['description'], 'author': review['author'] } f.write("%s," % json.dumps(data)) start = start + 20 else: break finally: start != 0 and f.seek(f.tell() - 1, os.SEEK_SET) f.write("]}") with open(f'datasets/yp_businesses.json', 'a') as f: obj['review'] = []
from yelpapi import YelpAPI from misty.utils import print_and_say YELP_API_KEY = 'h81ylaT0alwtJCUUyI7RazCCHNHleVGnhD9ZONPT1s4kL9v5qhCXPZrcI20H4LYisDEjJZu_j4ibEsSTpM2ISDpWBeraK3t42rwV_PhxtYvmatDn2xquIUKdueYtYHYx' # plz no steal my api keyz! client = YelpAPI(YELP_API_KEY) biz_ids = [ 'pierce-j-r-plumbing-co-inc-of-sacramento-rocklin', 'ncm-roseville', ] random.shuffle(biz_ids) for biz_id in biz_ids: result = client.business_query(id=biz_id) print_and_say( f"{result['name']}. Phone number: {result['display_phone']}. Address: {''.join(result['location']['display_address'])}", next_voice=True) reviews = client.reviews_query(id=result['id']) print_and_say( f"Retrieved {len(reviews['reviews'])} of {reviews['total']} reviews.", next_voice=True) for review in reviews['reviews']: print_and_say( f"On {review['time_created']} {review['user']['name']} gave a rating of {review['rating']} stars, stating: {review['text']}.", next_voice=True)
] url = "https://api.yelp.com/v3/businesses/search" headers = {'Authorization': 'Bearer %s' % api_key} business_id = [] for c in cities: #since the search limit is 50, we set the radius about 3 miles params = {'term': "restaurant", "location": c, "limit": 50, "radius": 5000} responses = requests.get(url, headers=headers, params=params).json() for response in responses["businesses"]: business_id.append(response["id"]) # In[4]: yelp_api = YelpAPI(api_key) responses = [] for b in business_id: responses.append(yelp_api.business_query(id=b)) # In[5]: dates_time = [] ratings = [] for r in responses: try: dates_time.append(r["hours"][0]["open"]) ratings.append(r["rating"]) except KeyError: #some stores didn't list out operation hours continue pprint(dates_time) # In[6]:
class YelpQuery: def __init__(self): self.DEFAULT_TERM = 'dinner' self.DEFAULT_LOCATION = 'San Luis Obispo, CA' self.SEARCH_LIMIT = 3 self.SEARCH_PATH = '/v2/search/' self.BUSINESS_PATH = '/v2/business/' self.CONSUMER_KEY = "0Ji9mpDmiH0rub9AvjtRhQ" self.CONSUMER_SECRET = "b7SZdC5XVqbo3SKvlK28bs12hrM" self.TOKEN = "DTNxH_G-2dF_PmiZzY7geQJnW5rcVWxu" self.TOKEN_SECRET = "dj-jziXREJcEf3vtmwWkqQFXu_8" def connect(self): self.yelp_api = YelpAPI(self.CONSUMER_KEY, self.CONSUMER_SECRET, self.TOKEN, self.TOKEN_SECRET) def get_business(self, business_id, **kw_args): business_results = self.yelp_api.business_query(id=business_id, **kw_args) return businessResults def search(self, **kwargs): search_results = self.yelp_api.search_query(**kwargs) return search_results #get a valid restaurant def getValidRestaurant(self, **kwargs): response = self.search(**kwargs) nameMatch = False result = None i = 0 while not nameMatch and i < 3 and i < len(response["businesses"]): business = response["businesses"][i] if business["name"].lower() == kwargs["term"].lower(): nameMatch = True result = business i+=1 if len(response["businesses"]) > 0: result = response["businesses"][0] return result def getExactFeatures(self, restaurantName, location, weight): featureDict = {} if location == "Arnold": location = "Arnold, CA" elif restaurantName == "G-Brothers Kettlecorn": restaurantName = "G Brothers" elif location == "Shell Beach" and "intock" in restaurantName: restaurantName = "F. McLintocks Saloon & Dining House" location = "Shell Beach, CA" yelpRestaurant = self.getValidRestaurant(term = restaurantName, location = location, limit = 3) if yelpRestaurant: rating = self.getExactRating(yelpRestaurant) reviewCount = self.getReviews(yelpRestaurant) else: rating = self.getExactRating({"rating" : 3.0, "review_count" : 21}) reviewCount = self.getReviews({"rating": 3.0, "review_count" : 21}) #the only restaurant for which this failed is Alphys. Hardcoding the vals from #Alphys Chateau Basque at http://www.yelp.com/biz/alphys-chateau-basque-pismo-beach-2 print("No yelp info for %s in %s" % (restaurantName, location)) for i in range(1, weight + 1): featureDict["rating%d" %i] = rating featureDict["reviewCount%d" %i] = reviewCount return featureDict def getFeatures(self, restaurantName, location, weight): featureDict = {} if location == "Arnold": location = "Arnold, CA" elif restaurantName == "G-Brothers Kettlecorn": restaurantName = "G Brothers" elif location == "Shell Beach" and "intock" in restaurantName: restaurantName = "F. McLintocks Saloon & Dining House" location = "Shell Beach, CA" yelpRestaurant = self.getValidRestaurant(term = restaurantName, location = location, limit = 3) if yelpRestaurant: rating = self.getRating(yelpRestaurant) reviewCount = self.getReviews(yelpRestaurant) else: rating = self.getRating({"rating" : 3.0, "review_count" : 21}) reviewCount = self.getRating({"rating": 3.0, "review_count" : 21}) #the only restaurant for which this failed is Alphys. Hardcoding the vals from #Alphys Chateau Basque at http://www.yelp.com/biz/alphys-chateau-basque-pismo-beach-2 print("No yelp info for %s in %s" % (restaurantName, location)) for i in range(1, weight + 1): featureDict["rating%d" %i] = rating featureDict["reviewCount%d" %i] = reviewCount return featureDict def getReviews(self, restaurant): reviewCount = restaurant["review_count"] if reviewCount >= 500: reviewCountBucket = "above500" elif reviewCount >= 400: reviewCountBucket = "above400" elif reviewCount >= 300: reviewCountBucket = "above300" elif reviewCount >= 200: reviewCountBucket = "above200" elif reviewCount >= 100: reviewCountBucket = "above100" else: reviewCountBucket = "below100" return reviewCountBucket def getExactRating(self, restaurant): rating = restaurant["rating"] ratingBucket = None if rating >= 4.5: ratingBucket = "above45" elif rating >= 4.0: ratingBucket = "above4" elif rating >= 3.0: ratingBucket = "above3" elif rating >= 2.0: ratingBucket = "above2" elif ratingBucket >= 1.0: ratingBucket = "above1" else: ratingBucket = "above0" return ratingBucket def getRating(self, restaurant): rating = restaurant["rating"] ratingBucket = None if rating >= 4.0: ratingBucket = "positive" else: ratingBucket = "negative" return ratingBucket
class ConnectYelp(ConnectorEvent): TYPE = "Yelp" def __init__(self): api_key = get_secret('YELP', 'api_key') self.api = YelpAPI(api_key) events_connector = ConnectorEvent.query.filter( ConnectorEvent.connector_type == ExtractEvents.TYPE, ConnectorEvent.connector_event_id == ExtractEvents.ID).first() self.events = events_connector.data def extract(self, name=None, event_id=None, backfill=None): connector = self.get_connector() for key, event in self.events.items(): place_id = get_from(event, ['place_id']) place_name = get_from(event, ['name']) if not place_id: continue if event_id is not None and place_id != event_id: continue if name is not None and name != place_name: continue if backfill and place_id in connector.data: print("Found Place ID {} => {}".format( place_id, connector.data[place_id]['name'])) continue search_results = None b_details = None event_name = event['name'] event_addr = get_from(event, ['address']) event_city = event['city'] event_state = get_from(event, ['state'], 'CA') if not search_results: kwargs = { 'name': event_name, 'address1': event_addr, 'city': event_city, 'state': event_state } print(" | ".join( ["{}: \"{}\"".format(k, v) for k, v in kwargs.items()])) try: search_results = self.api.business_match_query( country="US", **kwargs) except Exception as e: print("business_match_query: {}".format(e)) if not search_results or len(search_results['businesses']) == 0: try: term = " ".join([event_name, event_city, event_state]) location = " ".join([event_city, event_state]) kwargs = {'term': term, 'location': location} print(" | ".join([ "{}: \"{}\"".format(k, v) for k, v in kwargs.items() ])) search_results = self.api.search_query(limit=1, **kwargs) except Exception as e: print("search_query: {}".format(e)) if search_results: for r in search_results['businesses']: b_details = None try: b_details = self.api.business_query(id=r['id']) except Exception as e: print("business_query: {}".format(e)) print(r['id']) if b_details: # row_event.update_meta(self.TYPE, {**r, **b_details}) # db_session.merge(row_event) # db_session.commit() connector.data[place_id] = b_details db_session.merge(connector) db_session.commit() # yield b_details['name'], b_details else: print("Unable to find {}".format(place_id))
class YelpData(object): """ This class will complete handle the calls for Yelp Data Business API - business_query() Business Match API - business_match_query() Reviews API - reviews_query() """ def __init__(self): self.business_match = mongodb.db.business_match self.business_details = mongodb.db.business_details self.business_reviews = mongodb.db.business_reviews self.yelp_req = mongodb.db.yelp_request self.yelp_api = YelpAPI(app.config['YELP_API_KEY']) self.response = None @staticmethod def _remove_keys(json_data): del json_data['user_id'] del json_data['_id'] return json_data def get_business_match_data(self, user_id=None, name=None, address1='', address2=None, address3=None, city=None, state=None, country=None, latitude=None, longitude=None, phone=None, zip_code=None, yelp_business_id=None, limit=1, match_threshold='default'): """ Link: https://www.yelp.com/developers/documentation/v3/business_match required parameters: * name - business name * city * state * country """ self.response = self.yelp_api.business_match_query( name=name, address1=address1, address2=address2, address3=address3, city=city, state=state, country=country, latitude=latitude, longitude=longitude, phone=phone, zip_code=zip_code, yelp_business_id=yelp_business_id, limit=limit, match_threshold=match_threshold) self.response['user_id'] = user_id self.business_match.insert_one(self.response) self.response = self._remove_keys(self.response) return self.response def get_business_details(self, business_id, user_id): self.response = self.yelp_api.business_query(id=business_id) self.response['user_id'] = user_id self.business_details.insert_one(self.response) self.response = self._remove_keys(self.response) return self.response def get_business_reviews(self, business_id, user_id): self.response = self.yelp_api.reviews_query(id=self.business_id) self.response['user_id'] = user_id self.business_reviews.insert_one(self.response) self.response = self._remove_keys(self.response) return self.response def yelp_request(self, yelp_request, user_id): yelp_request['req_datetime'] = datetime.datetime.now() yelp_request['user_id'] = user_id return self.yelp_req.insert_one(yelp_request).acknowledged
from yelpapi import YelpAPI from yelp_authentication import CONSUMER_KEY, CONSUMER_SECRET, TOKEN, TOKEN_SECRET yelp_api = YelpAPI(CONSUMER_KEY, CONSUMER_SECRET, TOKEN, TOKEN_SECRET) # Example business query. Look at # http://www.yelp.com/developers/documentation/v2/business for more # information. print("***** selected reviews for Amy's on 6th St. *****") business = yelp_api.business_query(id='amys-ice-creams-austin-3') for review in business['reviews']: print('rating: {}\nexcerpt: {}\n'.format(review['rating'], review['excerpt']))
search_results = yelp_api.search_query(term='Pizza', location='Halifax', sort_by='rating', limit=50) businessIds = list() businessRating = list() #businessPhotos = list() #businessReviews = list() for result in search_results["businesses"]: businessIds.append(result["id"]) businessRating.append(result["rating"]) for businessId in businessIds: businessPhotos = yelp_api.business_query(id=businessId)["photos"] businessReviews = yelp_api.reviews_query(id=businessId)["reviews"] os.mkdir(businessId) i = 0 for photo in businessPhotos: savefile = str(businessId + "/" + str(i) + ".jpg") urllib.request.urlretrieve(photo, savefile) i = i + 1 i = 0 for review in businessReviews: savefile = str(businessId + "/" + str(i) + ".txt") fsavefile = open(savefile, "w") fsavefile.write(review["text"]) i = i + 1
def process_update(request, event_id): errors = Event.objects.basic_validator(request.POST) if len(errors) > 0: request.session['errors'] = errors for key, value in errors.items(): messages.error(request, value, extra_tags=key) return redirect(f'/events/edit/{event_id}') # if no errors form = request.POST event = Event.objects.get(id=event_id) event.title = form['title'] event.time = form['time'] event.location = form['location'] event.hosted_by = event.hosted_by event.message = form['message'] event.save() url_pattern = r'https://www.yelp.com/biz/(.+)' url = "" n = 1 rest = 'rest' + str(n) rest_obj = None while rest in form: if form[rest]: try: rest_url = form[rest].split("?")[0] url = re.search(url_pattern, rest_url).group(1) except AttributeError: print("url not found.. should have been caught by validator") url = "" print(url) if url != "": try: rest_obj = Restaurant.objects.get(alias=url) except Restaurant.DoesNotExist: print(f'Querying API for rest1 = {url}') yelp_api = YelpAPI(yelp_key) r = yelp_api.business_query(id=url) pprint.pprint(r) photo1_url = "" photo2_url = "" photo3_url = "" if len(r['photos']) > 0: photo1_url = r['photos'][0] if len(r['photos']) > 1: photo2_url = r['photos'][1] if len(r['photos']) > 2: photo3_url = r['photos'][2] rest_obj = Restaurant.objects.create( alias=r['alias'], name=r['name'], image_url=r['image_url'], url=r['url'], display_phone=r['display_phone'], review_count=r['review_count'], rating=r['rating'], photo1_url=photo1_url, photo2_url=photo2_url, photo3_url=photo3_url, # price=r['price'] ) event.restaurants.add(rest_obj) n += 1 rest = 'rest' + str(n) # event.users_who_join.add(User.objects.get(id=request.session['user_id'])) event.save() return redirect("/dashboard")
def get_restaurant_details(Business_name): db = pymysql.connect(host="localhost", user="", password="", db="") yelp_api = YelpAPI(api_key='') Restaurant_Data = pd.read_csv("Restaurant_Data.csv") cur1 = db.cursor() cur2 = db.cursor() try: cur1.execute( """Select A.Address, A.City, A.State, A.Zip_code From Address_Data A Where A.Company_Name like "%{}%" """.format( Business_name)) Details = cur1.fetchall() except ValueError: Details = [] search_results = yelp_api.search_query(term=Business_name, location='New York, NY') if search_results['businesses'][0]['name'] == Business_name: business_id = search_results['businesses'][0]['id'] latitude = search_results['businesses']['coordinates'][0][ 'latitude'] Details.append( search_results['businesses']['coordinates'][0]['longitude']) business_search = yelp_api.business_query(id=business_id) Details.append(business_search['location']['address1']) Details.append(business_search['location']['city']) Details.append(business_search['location']['state']) Details.append(business_search['location']['zip_code']) print(Details) for row in Details: Address = row[0] City = row[1] State = row[2] Zip_Code = row[3] try: cur2.execute( """Select A.Actual_Revenue, A.Number_of_Employees From Restaurant_Financial_Information A Where A.Restaurant_Name like "%{}%" AND A.Address like "%{}%" """ .format(Business_name, Address)) Info = cur2.fetchall() except ValueError: Info = ["Unavailable", "Unavailable"] for rows in Info: Revenue = rows[0] Employees = rows[1] df = Restaurant_Data[Restaurant_Data['Company Name'] == Business_name][[ 'Executive First Name', 'Executive Last Name', 'Years In Database', 'Square Footage', 'Credit Score Alpha' ]] Owner = str(df['Executive First Name'].item() + " " + df['Executive Last Name'].item()) no_of_years = df['Years In Database'].item() sq_ft = df['Square Footage'].item() credit_score = df['Credit Score Alpha'].item() if df.index != 0: Detail = { 'Restaurant Name': Business_name, 'Address': Address, 'City': City, 'State': State, 'Zip Code': Zip_Code, 'Owner Name': Owner, 'Years in Business': no_of_years, 'Square Footage': sq_ft, 'Credit Score': credit_score, 'Revenue': Revenue, 'Number of Employees': Employees } else: Detail = { 'Restaurant Name': Business_name, 'Address': Address, 'City': City, 'State': State, 'Zip Code': Zip_Code, 'Owner Name': "None", 'Years in Business': "None", 'Square Footage': "None", 'Credit Score': "None", 'Revenue': Revenue, 'Number of Employees': Employees } restaurant_details = pd.DataFrame(Detail, index=[0]) return (restaurant_details)
'_WuvOb1mMpmTdhuh9LwvyY_RQzsiLt3B', 'yAbsrRApeXzSIO_KK1dbDzbw0oA') clean_file = open('new_and_imporoved.csv', 'w') writer = csv.writer(clean_file) with open('Final_csv.csv', 'r') as original_file: episodes = original_file.read().split('\n') for index, episode in enumerate(episodes): episodes[index] = episodes[index].split(',') season = episodes[index][0] title = episodes[index][1] restaurant = episodes[index][2] city = episodes[index][3] state = episodes[index][4] business_id = episodes[index][5] rating = episodes[index][6] response = yelp_api.business_query(id=business_id) if 'is_claimed' in response.keys(): review_count = response['review_count'] print restaurant, review_count elif 'error' in response.keys(): review_count = 'Error' else: review_count = 'Error' writer.writerow((season, title, restaurant, city, state, business_id, rating, review_count)) time.sleep(.5) clean_file.close()
from yelpapi import YelpAPI yelp_api = YelpAPI('V4oXt-CDFMUXzPerVYXWnA', 'omnmWvy6KQNOSt1o9qD0YwMtKiQ', '_WuvOb1mMpmTdhuh9LwvyY_RQzsiLt3B', 'yAbsrRApeXzSIO_KK1dbDzbw0oA') rating_list = open('try_againrating_list.csv', 'w') writer = csv.writer(rating_list) business_id_list = [] rating_list = [] with open('new_clean_master_list.csv', 'r') as original_csv_file: episodes = original_csv_file.read().split('\n') for index, episode in enumerate(episodes): episodes[index] = episodes[index].split(',') business_id = episodes[index][5] try: response = yelp_api.business_query(id=business_id) rating_list.append(response['rating']) print business_id, response['rating'] except KeyError: rating_business_id_list.append(business_id) rating_list.append('KeyError') print "{0} Key Error".format(biz_id) time.sleep(.5) # for business_id, rating in zip(business_id_list, rating_list): # writer.writerow((business_id, rating)) # while True: # print "This prints once every 5 seconds." # time.sleep(5)
parser.add_argument("--limit", nargs='?', const=20, type= int, help="A limit of result per type of business\ in id's description. Maximum value is 50") args = parser.parse_args() def eprint(*args,**kwargs): print(*args, file=sys.stderr,**kwargs) if args.limit > 50: eprint("Max value for limit is 50, will continue with that") args.limit = 50 if args.rad > 40000: eprint("Max value for rad is 40000, will continue with that") args.limit = 40000 yelp_api = YelpAPI(CLIENT_ID, CLIENT_SECRET) business = yelp_api.business_query(id=args.id) business = yelp_api.business_query(id="saporé-san-diego") city = business['location']['city'] lat = business['coordinates']['latitude'] lon = business['coordinates']['longitude'] categories = business['categories'] bus_type = list(map(lambda x: x['alias'],categories)) seen = set() ratings = [] review_count = [] tot = 1 for different in range(len(bus_type)): similar = yelp_api.search_query(location=city,term=bus_type[different],limit=args.limit) competition = similar['businesses'] for i in range(len(competition)): if competition[i]['id'] not in seen:
from yelpapi import YelpAPI from pprint import pprint yelp_api = YelpAPI('V4oXt-CDFMUXzPerVYXWnA', 'omnmWvy6KQNOSt1o9qD0YwMtKiQ', '_WuvOb1mMpmTdhuh9LwvyY_RQzsiLt3B', 'yAbsrRApeXzSIO_KK1dbDzbw0oA') # response = yelp_api.business_query(id='oohhs-and-aahhs-washington') # pprint(response) # print response['rating'] # response = yelp_api.business_query(id='mac-and-ernies-roadside-eatery-tarpley') # response = yelp_api.business_query(id="brints-diner-wichita") try: response = yelp_api.business_query(id="pizza-palace-knoxville") print response['rating'] except KeyError: print "Key Error"
"yelp_api.transaction_search_query(transaction_type='delivery', location='dallas, tx')" )) response = yelp_api.transaction_search_query(transaction_type='delivery', location='dallas, tx') pprint(response) print( '\n-------------------------------------------------------------------------\n' ) """ Example business query. Business API: https://www.yelp.com/developers/documentation/v3/business """ print("***** business information for Amy's on 6th St. *****\n{}\n".format( "yelp_api.business_query(id='amys-ice-creams-austin-3')")) response = yelp_api.business_query(id='amys-ice-creams-austin-3') pprint(response) print( '\n-------------------------------------------------------------------------\n' ) """ Example reviews query. Reviews API: https://www.yelp.com/developers/documentation/v3/business_reviews """ print("***** selected reviews for Amy's on 6th St. *****\n{}\n".format( "yelp_api.reviews_query(id='amys-ice-creams-austin-3')")) response = yelp_api.reviews_query(id='amys-ice-creams-austin-3') pprint(response) print( '\n-------------------------------------------------------------------------\n'
for entry in data: name = entry['name'] longitude = float(entry['longlat'][0]) latitude = float(entry['longlat'][1]) response = yelp_api.autocomplete_query(text=name, latitude=latitude, longitude=longitude) for biz in response['businesses']: school_info.append({ "latitude": latitude, "longitude": longitude, "id": biz['id'], "name": name, }) break break for school in school_info: response = yelp_api.business_query(id=school['id']) school['rating'] = response['rating'] school['name'] = response['name'] df = pd.DataFrame(school_info) geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])] gdf = GeoDataFrame(df, geometry=geometry) world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) usa = gpd.read_file('./bayarea_zipcodes.shp') gdf.plot(ax=world.plot(figsize=(10, 6)), marker='o', color='red', markersize=15) input('a')
city_name = city_name_country[0] city_country = city_name_country[1] city_key = city_name_split[0] cityStr = city_name.split(",")[0] city_code = city_key print(city_code + " " + cityStr + " " + city_country + " ///" + city_name) response = yelp.search_query(term='food', location=city_name, price='1,2,3,4', sort_by='best_match', limit=10) data = json.dumps(response) restaurants_json = json.loads(data) city_dict = {} city_dict['businesses'] = [] for p in restaurants_json['businesses']: if ('price' in p): review = yelp.reviews_query(id=p['id'], sort_by='rating', limit=3) details = yelp.business_query(id=p['id']) data = json.dumps(review) detaildata = json.dumps(details) reviews_json = json.loads(data) details_json = json.loads(detaildata) reviews_dict = {} reviews_dict['reviews'] = [] for q in reviews_json['reviews']: reviews_dict['reviews'].append({ 'user_name' : q['user']['name'], 'text' : q['text'], 'stars' : q['rating'] }) doc_dict = {} doc_dict['name'] = p['name'], doc_dict['cityName'] = cityStr