Beispiel #1
0
address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip'])
# address = urllib.quote(yelp_query[1][1])
yelp_query

import json
import requests

address = [ [query[0], query[1]] for query in yelp_query ]
urls = []

for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(adr[1])
    urls.append(url)

    
resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {'longitude': geo["lng"], 'latitude': geo["lat"], 'phone': address[1][0]}
data_entry

mysqldao.update('yelp', 'yelp_phone', ['latitude', 'longitude'], ['phone'], [data_entry])

print mysqldao.select('yelp', 'yelp_phone', ['*'], ['phone'],[{'phone': data_entry["phone"]}])
import yelp_etl
import mysqldao
import requests

BIZ_URL = "http://www.yelp.com/biz/"

keyword = 'price_range'
all_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,
                             ['id'])
list_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,
                              ['id'], [keyword], [{
                                  keyword: ''
                              }])

left_biz = len(list_biz_id)
total_biz = len(all_biz_id)
count = 0
for biz in list_biz_id:
    bizid = biz[0]
    url = BIZ_URL + bizid
    response = requests.get(url).text.encode('utf-8')
    linelist = str(response).split("\n")
    index = yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg,
                                       yelp_web.endAttributeReg)
    attr_dict = yelp_web.attribute_match(linelist[index[0]:index[1]])
    attr_dict['id'] = bizid
    mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\
     yelp_web.list_attribute, ['id'], [attr_dict])
    count += 1
    print bizid, 'updated', "left ", left_biz - count
Beispiel #3
0
address = """
REPLACE(
CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip),
'  ', ' ')
address"""

yelp_query = mysqldao.select('dwdproject', 'yelp_phone',
                             ['phone', address, 'zip'])
address = [[query[0], query[1]] for query in yelp_query]
urls = []
for adr in address:
    url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(
        adr[1])
    urls.append(url)

resp = requests.get(urls[1])
data = json.loads(resp.text)
geo = data["results"][0]["geometry"]["location"]

data_entry = {
    'longitude': geo["lng"],
    'latitude': geo["lat"],
    'phone': address[1][0]
}

mysqldao.update('dwdproject', 'yelp_phone', ['longitude', 'latitude'],
                ['phone'], [data_entry])
print mysqldao.select('dwdproject', 'yelp_phone', ['*'], ['phone'],
                      [{
                          'phone': '7188924968'
                      }])
import yelp_web
import yelp_etl
import mysqldao
import requests

BIZ_URL="http://www.yelp.com/biz/"

keyword='price_range'
all_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'])
list_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'], [keyword], [{keyword:''}])

left_biz=len(list_biz_id)
total_biz=len(all_biz_id)
count = 0
for biz in list_biz_id:
	bizid=biz[0]
	url=BIZ_URL+bizid
	response=requests.get(url).text.encode('utf-8')
	linelist=str(response).split("\n")
	index=yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg, yelp_web.endAttributeReg)
	attr_dict=yelp_web.attribute_match(linelist[index[0]:index[1]])
	attr_dict['id']=bizid
	mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\
		yelp_web.list_attribute, ['id'], [attr_dict])
	count+=1
	print bizid, 'updated', "left ", left_biz-count