import pickle from bs4 import BeautifulSoup from progressbar import ProgressBar, Percentage,Bar,Timer from specialFunctions import NYCzips, getTags locationsTotal = 0 oneTaggerCount = 0 zipcount = len(NYCzips) p = ProgressBar(widgets=[Percentage(),Bar(),Timer()], maxval=zipcount) p.start() for index, zipcode in enumerate(NYCzips): exec("data = open('%s','r')" % (str(zipcode)+'results2')) locations = pickle.load(data) locationsTotal+=len(locations) for location in locations: soup = BeautifulSoup(location,'xml') tags = getTags(soup) if len(tags) == 1: oneTaggerCount+=1 p.update(index+1) p.finish() print str((float(oneTaggerCount)/float(locationsTotal))*100)[:4] + '% of locations have only one tag :(' print str(oneTaggerCount) + ' out of ' + str(locationsTotal)
if getPercentDiff(cgAddress.upper(),yelpAddress.upper()) >= 0.6 or (cgPhone == yelpPhone and yelpLat[:8] == cgLat[:8] and yelpLon[:9] == cgLon[:9]): if getPercentDiff(cgRestName.upper(),yelpRestName.upper()) >= 0.6: #soup.location.name = 'YELPED' #when yelp values added location id changed to YELPED #soup.user_review_count.string = str(cityRevCount + yelpRevCount) soup.location.append(souper.new_tag('yelped_review_count')) soup.location.yelped_review_count.append(unicode(cityRevCount + yelpRevCount)) print "New Review Count = " + str(soup.yelped_review_count.text) print "Old Rating = " + soup.rating.text + " Yelp Rating = " + str(yelp(yelpData,'avg_rating')) for tag in yelp(yelpData,'categories'): try: yelpTags[tag['name']] except: yelpTags[tag['name']]=len(yelpTags)+1 if tag['name'] not in getTags(soup): soup.tags.append(souper.new_tag('tag',id='yelp'+str(yelpTags[tag['name']]))) soup.tags.contents[-1].append(tag['name']) try: newRating = unicode( (float(soup.rating.string)*cityRevCount + float(yelp(yelpData,'avg_rating')*2*yelpRevCount))/float(yelpRevCount + cityRevCount)) soup.location.append(souper.new_tag('yelped_rating')) soup.location.yelped_rating.append(newRating) except: soup.location.append(souper.new_tag('yelped_rating')) soup.location.yelped_rating.append(str(float(yelp(yelpData,'avg_rating'))*2)) print "New Rating = " + soup.yelped_rating.string yelpedCount+=1 else: yelpResults.append(str(yelpData)) print 'FLAGGED: ' + cgRestName + ' ' + yelp(yelpData,'name') + ' ::: ' + str(getPercentDiff(cgRestName,yelp(yelpData,'name')))