def validate_db(yelp_object, haven_model=None): """takes the result of a yelp query by businesses id and compares it to the database entry. If any information on the local db is out of date, it is updated accordingly. Will also create new db if the haven_model is none""" print "yelp object in validate_db:", yelp_object print "haven_model in validate_db", haven_model new = False if haven_model is None: haven_model = Business() haven_model.yelp_id = yelp_object['id'] new = True haven_model.name = yelp_object['name'] if yelp_object['location'].get('address'): if len(yelp_object['location']['address']) > 1: haven_model.address_line_2 = yelp_object['location']['address'][1] haven_model.address_line_1 = yelp_object['location']['address'][0] # nothing in local db should not have a city and state code but if for some reason yelp wiped them, it prevents it # from being cleared, protecting db integrity if yelp_object['location'].get('city'): haven_model.city = yelp_object['location']['city'] if yelp_object['location'].get('state_code'): haven_model.state = yelp_object['location']['state_code'] if yelp_object['location'].get('postal_code'): haven_model.zipcode = yelp_object['location']['postal_code'] if yelp_object.get('phone'): haven_model.phone = yelp_object['phone'] if yelp_object['location'].get('coordinate'): haven_model.latitude = yelp_object['location']['coordinate']['latitude'] haven_model.longitude = yelp_object['location']['coordinate']['longitude'] try: if new: db.session.add(haven_model) print "successfully added" db.session.commit() print 'successfully committed' print "committed business:", haven_model except: print 'ut-oh'
def save_businesses(): for bdata in iterate_file("business", shortcircuit=False): business = Business() business.business_id = bdata['business_id'] business.name = bdata['name'] business.full_address = bdata['full_address'] business.city = bdata['city'] business.state = bdata['state'] business.latitude = bdata['latitude'] business.longitude = bdata['longitude'] business.stars = decimal.Decimal(bdata.get('stars', 0)) business.review_count = int(bdata['review_count']) business.is_open = True if bdata['open'] == "True" else False business.save() save_categories(bdata['business_id'], bdata['categories']) save_neighborhoods(bdata['business_id'], bdata['neighborhoods'])
def save_businesses(): for bdata in iterate_file("business", shortcircuit=False): business = Business() business.business_id = bdata['business_id'] business.name = bdata['name'] business.address = bdata['address'] business.city = bdata['city'] business.neighborhood = bdata['neighborhood'] business.state = bdata['state'] business.latitude = bdata['latitude'] business.longitude = bdata['longitude'] business.stars = decimal.Decimal(bdata.get('stars', 0)) business.review_count = int(bdata['review_count']) business.is_open = True if bdata['is_open'] == "1" else False business.save() temp = bdata['attributes'] if bdata['attributes'] is not None else [] temp1 = bdata['categories'] if bdata['categories'] is not None else [] save_categories(bdata['business_id'], temp1) save_attributes(bdata['business_id'], temp)
def _add_to_businesses(params): """adds dictionary of attributes to businesses db""" print params if not Business.query.filter_by(yelp_id=params['yelp_id']).first(): business = Business() cat_list = [] for key in params: # adds elements in category lists to category table if they don't already exist if key == "categories": for cat in params[key]: cat_list.append(cat) if not Category.query.filter_by(category_name=cat).first(): category = Category(category_name=cat) db.session.add(category) # THROUGH LINE 40 REPLACED BY 30-34 # for group in params[key]: # print type(group) # for subtype in group: # print type(subtype) # if not Category.query.filter_by(category_name=subtype).first(): # category = Category(category_name=subtype) # db.session.add(category) # cat_list.append(subtype) # print cat_list elif key == "yelp_id": business.yelp_id = params[key] elif key == "name": business.name = params[key] elif key == "address_line_1": business.address_line_1 = params[key] elif key == "address_line_2": business.address_line_2 = params[key] elif key == "city": business.city = params[key] elif key == "state": business.state = params[key] elif key == "zipcode": business.zipcode = params[key] elif key == "phone": business.phone = params[key] elif key == "latitude": business.latitude = params[key] elif key == "longitude": business.longitude = params[key] try: db.session.add(business) db.session.commit() except: db.session.rollback() print business.name, "has insufficient information, skipping." return None # creates rows in reference table for cat in cat_list: # creates row in reference table business = Business.query.filter_by(yelp_id=params['yelp_id']).first() catbus = BusinessCategory() print business.business_id catbus.business_id = business.business_id cat_object = Category.query.filter_by(category_name=cat).first() print cat_object.category_name catbus.category_id = cat_object.category_id if not BusinessCategory.query.filter_by(business_id=catbus.business_id, category_id=catbus.category_id).first(): db.session.add(catbus) db.session.commit() print "added " + business.name + " to db" else: print "Already in Dictionary" return None
def build_db(city, state): # categories = ['active', 'arts', 'auto', 'beautysvc', 'education', 'eventservices', 'financialservices', 'food', # 'health', 'homeservices', 'hotelstravel', 'localflavor', 'localservices', 'massmedia', 'nightlife', # 'pets', 'professional', 'publicservicesgovt', 'realestate', 'religiousorgs', 'restaurants', # 'shopping'] city_state = city + ", " + state # for category in categories: # result_count = yelp_api.search_query(location=city_state, category_filter=category)['total'] result_count = yelp_api.search_query(location=city_state)['total'] offset = 0 added = 0 skipped = 0 print result_count # max offset is 1000 # try: while offset < result_count: while offset < 1000: # print category # results = yelp_api.search_query(location=city_state, category_filter=category, offset=offset) results = yelp_api.search_query(location=city_state, offset=offset) for result in results['businesses']: try: business = Business() # id business.yelp_id = result['id'] # name business.name = result['name'] # address lines 1 and 2 if result['location'].get('address'): business.address_line_1 = result['location'][ 'address'][0] if len(result['location']['address']) > 1: business.address_line_2 = result['location'][ 'address'][1] # city business.city = result['location']['city'] # state code business.state = result['location']['state_code'] # zip code business.zipcode = result['location']['postal_code'] # phone if result.get('phone'): business.phone = result['phone'] # latitude and longitude if result['location'].get('coordinate'): business.latitude = result['location']['coordinate'][ 'latitude'] business.longitude = result['location']['coordinate'][ 'longitude'] # list of categories if result.get('categories'): cat_list = [] for group in result['categories']: for subtype in group: if not Category.query.filter_by( category_name=subtype).first(): category = Category(category_name=subtype) db.session.add(category) cat_list.append(subtype) # if not Business.query.filter_by(yelp_id=business.yelp_id).first(): # db.session.add(business) db.session.add(business) db.session.commit() bus_id = business.business_id for cat in cat_list: # creates row in reference table catbus = BusinessCategory() catbus.business_id = bus_id cat_object = Category.query.filter_by( category_name=cat).first() catbus.category_id = cat_object.category_id db.session.add(catbus) db.session.commit() added += 1 print "added" + str(added) print business.name except: print "already added:" + business.name print 'skipped' + str(skipped) skipped += 1 print "added so far: " + str(added) db.session.commit() offset += 20
def build_db(city, state): # categories = ['active', 'arts', 'auto', 'beautysvc', 'education', 'eventservices', 'financialservices', 'food', # 'health', 'homeservices', 'hotelstravel', 'localflavor', 'localservices', 'massmedia', 'nightlife', # 'pets', 'professional', 'publicservicesgovt', 'realestate', 'religiousorgs', 'restaurants', # 'shopping'] city_state= city + ", " + state # for category in categories: # result_count = yelp_api.search_query(location=city_state, category_filter=category)['total'] result_count = yelp_api.search_query(location=city_state)['total'] offset = 0 added = 0 skipped = 0 print result_count # max offset is 1000 # try: while offset < result_count: while offset < 1000: # print category # results = yelp_api.search_query(location=city_state, category_filter=category, offset=offset) results = yelp_api.search_query(location=city_state, offset=offset) for result in results['businesses']: try: business = Business() # id business.yelp_id = result['id'] # name business.name= result['name'] # address lines 1 and 2 if result['location'].get('address'): business.address_line_1 = result['location']['address'][0] if len(result['location']['address']) > 1: business.address_line_2 = result['location']['address'][1] # city business.city = result['location']['city'] # state code business.state = result['location']['state_code'] # zip code business.zipcode = result['location']['postal_code'] # phone if result.get('phone'): business.phone = result['phone'] # latitude and longitude if result['location'].get('coordinate'): business.latitude = result['location']['coordinate']['latitude'] business.longitude = result['location']['coordinate']['longitude'] # list of categories if result.get('categories'): cat_list = [] for group in result['categories']: for subtype in group: if not Category.query.filter_by(category_name=subtype).first(): category = Category(category_name=subtype) db.session.add(category) cat_list.append(subtype) # if not Business.query.filter_by(yelp_id=business.yelp_id).first(): # db.session.add(business) db.session.add(business) db.session.commit() bus_id = business.business_id for cat in cat_list: # creates row in reference table catbus = BusinessCategory() catbus.business_id = bus_id cat_object = Category.query.filter_by(category_name=cat).first() catbus.category_id = cat_object.category_id db.session.add(catbus) db.session.commit() added += 1 print "added" + str(added) print business.name except: print "already added:" + business.name print 'skipped' + str(skipped) skipped += 1 print "added so far: " + str(added) db.session.commit() offset += 20