コード例 #1
0
ファイル: restaurant.py プロジェクト: lykee/YelpWebScraping
def queryApi(term,city, neighbourhood = ''):

	global OFFSET_LIMIT
	global MAX_LIMIT
	total_inserted=0
	location = neighbourhood + ', ' + city + ', US'
	# Get longitude and latitude from Google Geocoding API V3 
	longitude, latitude = getlocation.getCoordinates(location)
	
	print location, longitude, latitude

	# Call API twice for each neighbourhood (API response restricted to 20 records for each request) 
	while OFFSET_LIMIT < MAX_LIMIT:
		response = search(term,location, longitude, latitude)
		MAX_LIMIT = response['total']

		allRestaurantData = response['businesses']
		n_biz=len(allRestaurantData) 
		if n_biz > 0:
			
			for restaurant in allRestaurantData:
				
				dbh.restaurant.insert(restaurant)#, safe=True)
				
			time.sleep(4)
			OFFSET_LIMIT += 20
			total_inserted+=n_biz
	
	# Write data for each neighbourhood. Maximum of 40 records
	print 'Writing {0} records for term {1}'.format(total_inserted,term.strip())
	print 'Total number documents in the collection {0} \n\n'.format(dbh.restaurant.find().count())
	
	OFFSET_LIMIT = 0
def queryApi(city, neighbourhood = ''):
	restaurantData = []
	categoryData = []
	tempRestaurantData = {}
	tempCategoryData = {}
	global OFFSET_LIMIT
	global MAX_LIMIT
	location = neighbourhood + ', ' + city + ', US'

	# Get longitude and latitude from Google Geocoding API V3 
        longitude, latitude = getlocation.getCoordinates(location)
	print location, longitude, latitude

	# Call API twice for each neighbourhood (API response restricted to 20 records for each request) 
	while OFFSET_LIMIT < MAX_LIMIT:
		response = search(location, longitude, latitude)
		MAX_LIMIT = response['total']

		allRestaurantData = response['businesses']
		if len(allRestaurantData) > 0:
			for restaurant in allRestaurantData:
				tempRestaurantData['ID']		= restaurant['id'].encode('ascii', 'ignore')
				tempRestaurantData['NAME']		= restaurant['name'].encode('ascii', 'ignore')
				tempRestaurantData['REVIEW_COUNT']	= restaurant['review_count']
				tempRestaurantData['RATING']		= restaurant['rating']
				if restaurant['location'].get('coordinate'):
					tempRestaurantData['LONGITUDE']		= restaurant['location']['coordinate']['longitude']
					tempRestaurantData['LATITUDE']		= restaurant['location']['coordinate']['latitude']
				else:
	                                tempRestaurantData['LONGITUDE']         = 0.0
        	                        tempRestaurantData['LATITUDE']          = 0.0
				tempRestaurantData['CITY']		= restaurant['location']['city']
				tempRestaurantData['STATE']		= restaurant['location']['state_code']
				tempRestaurantData['ZIP']		= restaurant['location']['postal_code'] if restaurant['location'].get('postal_code') else 99999
				tempRestaurantData['COUNTRY']		= restaurant['location']['country_code']
				
				tempRestaurantData['CATEGORY']		= ''	

				# If Categories are present, store them as comma seperated strings
				if restaurant.get('categories'):
					for category in restaurant['categories']:
						categoryData.append(category[1].encode('ascii', 'ignore'))					
					
					tempRestaurantData['CATEGORY'] = ",".join(categoryData)
					categoryData = []

			 	restaurantData.append(tempRestaurantData)
				tempRestaurantData = {}
	
			time.sleep(4)
			OFFSET_LIMIT += 20
	
	# Write data for each neighbourhood. Maximum of 40 records
	print 'Writing {0} records'.format(OFFSET_LIMIT)
	writeData(city, restaurantData)
	OFFSET_LIMIT = 0
コード例 #3
0
def search(location, term, category):

    # Get longitude and latitude from Google Geocoding API V3
    longitude, latitude = getlocation.getCoordinates(location)
    urlParams = {
        "location": location,
        "cll": str(latitude) + ", " + str(longitude),
        "term": term,
        "limit": 20,
        "sort": 2,
        "category_filter": category,
    }
    return request(API_HOST, SEARCH_PATH, urlParams)