address = """ REPLACE( CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip), ' ', ' ') address""" yelp_query = mysqldao.select('yelp', 'yelp_phone', ['phone', address, 'zip']) # address = urllib.quote(yelp_query[1][1]) yelp_query import json import requests address = [ [query[0], query[1]] for query in yelp_query ] urls = [] for adr in address: url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote(adr[1]) urls.append(url) resp = requests.get(urls[1]) data = json.loads(resp.text) geo = data["results"][0]["geometry"]["location"] data_entry = {'longitude': geo["lng"], 'latitude': geo["lat"], 'phone': address[1][0]} data_entry mysqldao.update('yelp', 'yelp_phone', ['latitude', 'longitude'], ['phone'], [data_entry]) print mysqldao.select('yelp', 'yelp_phone', ['*'], ['phone'],[{'phone': data_entry["phone"]}])
import yelp_etl import mysqldao import requests BIZ_URL = "http://www.yelp.com/biz/" keyword = 'price_range' all_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id']) list_biz_id = mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'], [keyword], [{ keyword: '' }]) left_biz = len(list_biz_id) total_biz = len(all_biz_id) count = 0 for biz in list_biz_id: bizid = biz[0] url = BIZ_URL + bizid response = requests.get(url).text.encode('utf-8') linelist = str(response).split("\n") index = yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg, yelp_web.endAttributeReg) attr_dict = yelp_web.attribute_match(linelist[index[0]:index[1]]) attr_dict['id'] = bizid mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\ yelp_web.list_attribute, ['id'], [attr_dict]) count += 1 print bizid, 'updated', "left ", left_biz - count
address = """ REPLACE( CONCAT(address1, ' ',address2, ' ', city, ' ', state,' ', zip), ' ', ' ') address""" yelp_query = mysqldao.select('dwdproject', 'yelp_phone', ['phone', address, 'zip']) address = [[query[0], query[1]] for query in yelp_query] urls = [] for adr in address: url = 'http://maps.googleapis.com/maps/api/geocode/json?address="%s"&sensor=true' % urllib.quote( adr[1]) urls.append(url) resp = requests.get(urls[1]) data = json.loads(resp.text) geo = data["results"][0]["geometry"]["location"] data_entry = { 'longitude': geo["lng"], 'latitude': geo["lat"], 'phone': address[1][0] } mysqldao.update('dwdproject', 'yelp_phone', ['longitude', 'latitude'], ['phone'], [data_entry]) print mysqldao.select('dwdproject', 'yelp_phone', ['*'], ['phone'], [{ 'phone': '7188924968' }])
import yelp_web import yelp_etl import mysqldao import requests BIZ_URL="http://www.yelp.com/biz/" keyword='price_range' all_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id']) list_biz_id=mysqldao.select(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant, ['id'], [keyword], [{keyword:''}]) left_biz=len(list_biz_id) total_biz=len(all_biz_id) count = 0 for biz in list_biz_id: bizid=biz[0] url=BIZ_URL+bizid response=requests.get(url).text.encode('utf-8') linelist=str(response).split("\n") index=yelp_web.target_line_range(linelist, yelp_web.beginAttributeReg, yelp_web.endAttributeReg) attr_dict=yelp_web.attribute_match(linelist[index[0]:index[1]]) attr_dict['id']=bizid mysqldao.update(yelp_etl.db_name, yelp_etl.tb_yelp_restaurant,\ yelp_web.list_attribute, ['id'], [attr_dict]) count+=1 print bizid, 'updated', "left ", left_biz-count