Ejemplo n.º 1
0
def _scrape_yelp(query):
    client = Client(settings.YELP_AUTH)
    results = client.search('Phoenix', term=query).businesses
    reviews = list(
        map(lambda x: client.get_business(x.id).business.reviews[0].excerpt,
            results))
    return reviews
Ejemplo n.º 2
0
def get_yelp_info(yelp_id):
    origin = request.headers.get('Origin')
    referer = request.headers.get('Referer')

    if origin is None or referer is None:
        return abort(404)

    allowed_url = AllowedUrl.query(AllowedUrl.origin == origin,
                                   AllowedUrl.referer == referer).get()

    if allowed_url is None:
        return abort(404)

    yelp_key = Yelp.query().get()

    if yelp_key is None:
        return abort(404)

    auth = Oauth1Authenticator(consumer_key=yelp_key.consumer_key,
                               consumer_secret=yelp_key.consumer_secret,
                               token=yelp_key.token,
                               token_secret=yelp_key.token_secret)

    client = Client(auth)
    result = client.get_business(yelp_id).business
    response = {}
    response['url'] = result.url
    response['image_url'] = result.image_url
    response['rating_img_url'] = result.rating_img_url

    return jsonify(response)
class YelpClient(object):

    def __init__(self):
        auth = Oauth1Authenticator(
                consumer_key= 'NqKErS1dFKKwfxlc5KpB0Q',
                consumer_secret= 'BzO_xc7Jge-B5YeysLuLi-WkiHE',
                token= '72CDWmpOaC8LEVgjY1bZVQgyX4v3v8fx',
                token_secret='yLfQC1-Vr_B5mpuqKtidnK_gnbo'
                )
        self.client = Client(auth)

    def search(self,params):
        return self.client.get_business(params)
def get_total_ratings(x):
    # authenticate the api
    auth = Oauth1Authenticator(
        consumer_key='d8eoj4KNoPqOqE_RN9871Q',
        consumer_secret='pGVDNEGSaH8Kv-WZ8ba5v02IjCo',
        token='S-SfyVte5G0nCkTmbydWRtxlheNXCEnG',
        token_secret='Y_UViE9LthLQqW7_ht8U8V_F6aE'
    )
    client = Client(auth)

    # return the total number of ratings for a restaurant
    total_ratings = client.get_business(x)
    total_ratings = total_ratings.business.review_count
    return total_ratings
Ejemplo n.º 5
0
class YelpService(object):

    def __init__(self):
        auth = Oauth1Authenticator(
            consumer_key="uz2Sv5gO6dwlnjRv3BqzwA",
            consumer_secret="VhgG3IucBO_eTheOlWzrVuuVjbU",
            token="bN1HD9FSDGqUWjzxbIkho_N1muVe0xcA",
            token_secret="hEdALK5D2gCI9-H3GwGKAw1jEYo"
        )

        self.client = Client(auth)

        self._business_cache = {}

    def get_location(self, yelp_id):
        """
        Get the location of a yelp business
        """
        business = self._get_business(yelp_id)
        return business.location.coordinate

    def get_name(self, yelp_id):
        """
        Get the name of a location
        """
        business = self._get_business(yelp_id)
        return business.name

    def get_url(self, yelp_id):
        """
        Get the url to the yelp side of a business
        """
        business = self._get_business(yelp_id)
        return business.url

    def _get_business(self, yelp_id):
        if yelp_id in self._business_cache:
            return self._business_cache[yelp_id]
        else:
            response = self.client.get_business(yelp_id)
            self._business_cache[yelp_id] = response.business
            return response.business

    def search(self, query, location):
        response = self.client.search(location=location, term=query)
        return response.businesses
Ejemplo n.º 6
0
#Client Search Function:
#Can be used to search by location which can by specified by neighborhood, address or city.
#Can be used to search by a bounding box, which takes a southwest and a northwest lat/long for values
#Can be used to search also by geographic coordinates, which requires a lat/long
#Optional parameters are accuracy, altitude, and altitude_accuracy

#Documentation can be found at https://www.yelp.com/developers/documentation/v2/search_api

r = client.search(area, **params)

for i in range(len(r.businesses)):
    print("[Name: " + r.businesses[i].name + "] [Number of reviews : " +
          str(r.businesses[i].review_count) + "] [Categories: ",
          r.businesses[i].categories,
          "]",
          file=f1)
    print(r.businesses[i].id, file=f2)
    id1 = r.businesses[i].id
    b = client.get_business(id1)
    print(b.business.location.coordinate.longitude)
    print(b.business.location.coordinate.latitude)


def ids():
    #Create an empty list we will use to append a list of ids
    l = []
    for i in range(len(response.businesses)):
        l.append(response.businesses[i].id)
    return l
class YelpParser:
    def __init__(self):
        with open('yelp_config_secret.json') as cred:
            creds = json.load(cred)
            auth = Oauth1Authenticator(**creds)
            self.client = Client(auth)
        self.baseurl = "https://www.yelp.com/biz_photos/"

    def get_lexicon_names_by_bounding_box(self, distance, **coordinate):
        params = {'lang': 'en'}
        latitude_sw = coordinate['latitude'] + (
            distance * math.cos(-135 * math.pi / 180)) / 111
        longitude_sw = coordinate['longitude'] + (
            distance * math.sin(-135 * math.pi / 180)) / (
                111 * math.cos(coordinate['latitude'] * math.pi / 180))
        latitude_ne = coordinate['latitude'] + (
            distance * math.cos(45 * math.pi / 180)) / 111
        longitude_ne = coordinate['longitude'] + (
            distance * math.sin(45 * math.pi / 180)) / (
                111 * math.cos(coordinate['latitude'] * math.pi / 180))

        print("original coordinate (%s, %s)" %
              (str(coordinate['latitude']), str(coordinate['longitude'])))
        print("south west coordinate (%s, %s)" %
              (str(latitude_sw), str(longitude_sw)))
        print("north east coordinate (%s, %s)" %
              (str(latitude_ne), str(longitude_ne)))

        response = self.client.search_by_bounding_box(latitude_sw,
                                                      longitude_sw,
                                                      latitude_ne,
                                                      longitude_ne, **params)
        result = dict()
        for business in response.businesses:
            result[business.id] = business
        return result

    def get_outside_images_for_businesses(self, businesses):
        result = dict()
        for business_id in businesses.keys():
            url_list = self.get_outside_images_for_business(business_id)
            result[business_id] = url_list
        return result

    def get_outside_images_for_business(self, business_id):
        url = self.baseurl + business_id + "?tab=outside"
        url = urllib.quote(url.encode('utf8'), ':/?=')
        socket = urllib.urlopen(url)
        html = socket.read()
        soup = BeautifulSoup(html, 'html.parser')
        result = [
            link.get("src").replace("258s", "o")
            for link in soup.findAll("img", {"class": "photo-box-img"})
            if "258s" in link.get("src")
        ]
        socket.close()
        return result

    def get_businessname(self, business_id):
        if business_id == '' or business_id is None:
            return ''
        else:
            params = {'lang': 'en'}
            response = self.client.get_business(business_id, **params)
            return response.business.name
Ejemplo n.º 8
0
class YelpCollector(Collector):
    def __init__(self):
        self.name = "yelp"
        creds = self.loadCredentials()

        # Authenticate Yelp
        auth = Oauth1Authenticator(consumer_key=creds["consumer_key"],
                                   consumer_secret=creds["consumer_secret"],
                                   token=creds["token"],
                                   token_secret=creds["token_secret"])
        self.yelpClient = Client(auth)
        self.mongoClient = MongoClient(creds["mongodb"])

    '''
        Returns a single result.
    '''

    def sample(self):
        return self.yelpClient.search("Boston, MA", {"limit": 1})

    def collectBatch(self, offset):
        return self.yelpClient.search("Boston, MA", {"offset": offset})

    ## used for the collection of data
    ## from the website.
    def collectAll(self):
        # the total I get from the api doesn't
        # seem to accurate, so I'm taking a different
        # approach
        response = requests.get(
            "https://www.yelp.com/search?find_loc=Boston,+MA")
        soup = BeautifulSoup(response.content, 'html.parser')

        total = soup.select("span.pagination-results-window"
                            )[0].contents[0].strip()[len("Showing 1-10 of "):]

        print(total)

        # expected result: 78189, give or take a few
        firstBatch = self.collectBatch(0)
        #print(firstBatch.total)

    # get the ids for a some businesses
    def getIds(self):
        # the total I get from the api doesn't
        # seem to accurate, so I'm taking a different
        # approach
        response = requests.get(
            "https://www.yelp.com/search?find_loc=Boston,+MA")
        soup = BeautifulSoup(response.content, 'html.parser')

        total = soup.select("span.pagination-results-window"
                            )[0].contents[0].strip()[len("Showing 1-10 of "):]
        total = int(total)

        currOffset = 0
        batchNum = 0
        businessIds = []
        while (currOffset < 1):
            print("Batch " + str(batchNum) + " complete.")
            businesses = self.collectBatch(currOffset).businesses
            for business in businesses:
                businessIds.append(business.id)

            currOffset += 20
            batchNum += 1

        return businessIds

    def store(self):
        pass

    def getBusiness(self, id):
        return self.yelpClient.get_business(id)

    # exclude is the bu
    #def collectAndStoreBiz(self, exclude):

    # exclude is the business fields to exclude
    def collectAndStore(self,
                        exclude=[
                            "url", "mobile_url", "rating_img_url",
                            "rating_img_url_small", "rating_img_url_large",
                            "image_url", "snippet_image_url", "eat24_url",
                            "reviews"
                        ]):
        db = self.mongoClient.get_default_database()
        # the total I get from the api doesn't
        # seem to accurate, so I'm taking a different
        # approach
        response = requests.get(
            "https://www.yelp.com/search?find_loc=Boston,+MA")
        soup = BeautifulSoup(response.content, 'html.parser')

        total = soup.select("span.pagination-results-window"
                            )[0].contents[0].strip()[len("Showing 1-10 of "):]
        total = int(total)
        #print(total)

        # collect the business data
        currOffset = 0
        batchNum = 0
        businessIds = []
        db["info"].insert_one({"offset": 0})

        while (currOffset < total):
            print("Batch " + str(batchNum) + " complete.")
            businesses = self.collectBatch(currOffset).businesses
            for business in businesses:
                businessIds.append(business.id)

                #print()
                #JSON = json.dumps(business, default=lambda a: a.)
                #print(business.__dict__)
                dictionary = business.__dict__
                dictionary["location"] = dictionary["location"].__dict__
                dictionary["location"]["coordinate"] = dictionary["location"][
                    "coordinate"].__dict__

                # delete the image urls before adding the object
                # to the database
                for e in exclude:
                    del dictionary[e]

                db["businesses"].insert_one(dictionary)

            currOffset += 20
            # store the offset in the database for future use
            db["info"].update_one({}, {"$inc": {"offset": 20}})

            batchNum += 1

        #print(str(businessIds))
        for bId in businessIds:
            print(bId)

            business = self.yelpClient.get_business(bId).business

            #print(business.__dict__)
            # collect all the reviews data
            ##print("============================================")
            reviews = business.reviews
            if (reviews != None):
                for review in reviews:
                    print(review.__dict__)

            #print(str(business.__dict__))

            # collect all the gift certificate data
            certs = business.gift_certificates
            if (certs != None):
                for cert in certs:
                    #db["gift_certificates"].insert_one()
                    pass

            # collect all the deals data
            deals = business.deals
            if (deals != None):
                for deal in deals:
                    #print(deal.__dict__)
                    pass
Ejemplo n.º 9
0
#look for Montreal restaurants
#english reviews

params = {'term': 'restaurants', 'lang': 'en'}

response = client.search('Montreal', **params)

#get their ID

####response.businesses[0].id

print(response.businesses[0].id)

str1 = str(response.businesses[0].id)

#get reviews for this business-id

params_b = {'lang': 'en'}

response_b = client.get_business(str1, **params)

#print(response_b.reviews)
'''   
response = client.get_business('yelp-san-francisco', **params)

print(response)
print(response.business.name)
print(response.business.categories)
'''
Ejemplo n.º 10
0

print os.path.dirname(__file__)

sr = search_body()
rh = bus_holder()

with open(os.path.dirname(__file__) + '/confidential_config.json') as cred:
    creds = json.load(cred)
    auth = Oauth1Authenticator(**creds)
    client = Client(auth)
    sr.setparams('en', 'restaurants')
    res = client.search('New York', **sr.params)

    for bus in res.businesses:
        rh.fill(bus.id, bus.name)

    print rh.holder

    bus_sr = search_body()
    bus_sr.setparams('en')

    for k in rh.holder.keys():
        #print rh.holder[k]
        ans = client.get_business(rh.holder[k], **bus_sr.params)

        rh.reviews[k].append(ans.business.reviews[0].excerpt)

    for k in rh.reviews.keys():
        print k, '   ', rh.reviews[k][0]
    'lang': 'en'
}

response = client.search('Montreal', **params)

#get their ID

####response.businesses[0].id

print(response.businesses[0].id)

str1 = str(response.businesses[0].id)

#get reviews for this business-id

params_b = {
         'lang' : 'en'
    }

response_b = client.get_business(str1, **params)

#print(response_b.reviews)

'''   
response = client.get_business('yelp-san-francisco', **params)

print(response)
print(response.business.name)
print(response.business.categories)
'''
client = Client(auth)

params = {
	'term': 'Optometry',
	'category_filter': 'health'
	'sort':2
}

a=client.search('341+West+Tudor+Road+Anchorage+AK+99503', **params)
a.businesses
for item in a.businesses:
    print item.name, item.rating, item.review_count
				

b = client.get_business('', **params)

"city": "Anchorage", "zip": "99503", "phone": "9077706652", "state": "AK", "address_2": "Suite 101", "address": "341 West Tudor Road"



b = client.get_business('Makar', **params)
a.businesses    
for item in a.businesses:
    print item.name, item.rating, item.review_count
    
    
params = {
    'category': 'health'
}
Ejemplo n.º 13
0
def imageURL(id):
        auth = yelp_access.access()
        client = Client(auth)
        response = client.get_business(id)
        return response.business.image_url
Ejemplo n.º 14
0
    consumer_secret = "F5ulxGBDWJK3aNFen_CoLe3Ma0w",
    token = "0P8KGvetjnc_sJaQwG3OuzIentzaAcI9",
    token_secret = "Coxq7Z_FCpMO5_0GpDl32uOC9LM"
)

client = Client(auth)

#### Optional
#
#params = {
#    'lang': 'fr'
#}
#
####

response = client.get_business('yelp-san-francisco')
print response.business.name

'''
params = {
    'term': 'food',
    'lang': 'en'
}

responseObj = client.search('97-22 57th ave', **params)
'''
'''
print responseObj.businesses
print "\n"
'''
'''
Ejemplo n.º 15
0
class YelpBusinessScraper(object):
    def __init__(self, yelp_config):
        self.auth = Oauth1Authenticator(
            consumer_key=yelp_config['consumer_key'],
            consumer_secret=yelp_config['consumer_secret'],
            token=yelp_config['token'],
            token_secret=yelp_config['token_secret'])
        self.client = Client(self.auth)
        self.reader = csv.DictReader
        self.outfile = open('data/yelp/businesses.csv', "w+")
        self.db = []

    def read_csv(self, csv_filename):
        with open(csv_filename) as csvfile:
            self.init_write_csv("data/yelp/businesses.csv")
            csv_file = self.reader(csvfile)
            for row in csv_file:
                try:
                    self.get_yelp_business(row)
                except:
                    print "Failed to get records for " + row['business_id']
            self.outfile.close()
            print "Done!"
            # print "Wrote to %s businesses to %s" % (len(self.db), outfile)

    def init_write_csv(self, csv_filename):
        fieldnames = [
            'name', 'is_closed', 'business_id', 'address', 'city', 'zip_code',
            'longitude', 'latitude', 'neighborhood_1', 'neighborhood_2',
            'category_1', 'category_2'
        ]
        self.writer = csv.DictWriter(self.outfile, fieldnames=fieldnames)
        self.writer.writeheader()

    def get_yelp_business(self, row):
        business_id = row['business_id']
        response = self.client.get_business(business_id)
        # print response.business.name
        self.get_response_info(response)

    def get_response_info(self, response):
        response.business.neighborhoods = self.get_neighborhoods(response)
        response.business.categories = self.get_categories(response)
        response.business.address = self.get_address(response)
        business_dict = self.get_business_dict(response)
        self.writer.writerow(business_dict)

    def get_neighborhoods(self, response):
        output = ["", ""]
        neighborhoods = response.business.location.neighborhoods
        if neighborhoods is not None:
            output[0] = neighborhoods[0]
            if len(neighborhoods) > 1:
                output[1] = neighborhoods[1]
        return output

    def get_categories(self, response):
        output = ["", ""]
        categories = response.business.categories
        if categories is not None:
            output[0] = categories[0][1]
            if len(categories) > 1:
                output[1] = categories[1][1]
        return output

    def get_address(self, response):
        output = ""
        addresses = response.business.location.address
        if addresses is not None:
            try:
                output = addresses[0]
            except:
                output = addresses
        return output

    def get_business_dict(self, response):
        business = response.business

        try:
            if business.location.coordinate.latitude:
                latitude = business.location.coordinate.latitude
        except:
            latitude = None

        try:
            if business.location.coordinate.longitude:
                longitude = business.location.coordinate.longitude
        except:
            longitude = None

        output = {
            'name': business.name,
            'is_closed': business.is_closed,
            'business_id': business.id.encode("utf-8"),
            'address': business.address,
            'city': business.location.city,
            'zip_code': business.location.postal_code,
            'longitude': longitude,
            'latitude': latitude,
            'neighborhood_1': business.neighborhoods[0],
            'neighborhood_2': business.neighborhoods[1],
            'category_1': business.categories[0],
            'category_2': business.categories[1]
        }
        return output