def __init__(self, keys, city_name, restaurants_to_find=None): self.restaurant_list = [] self.yelp = Yelp(keys['client_id'], keys['client_secret']) self.city_name = city_name for restaurant in restaurants_to_find: self.add_restaurant(restaurant)
class City(object): def __init__(self, keys, city_name, restaurants_to_find=None): self.restaurant_list = [] self.yelp = Yelp(keys['client_id'], keys['client_secret']) self.city_name = city_name for restaurant in restaurants_to_find: self.add_restaurant(restaurant) def add_restaurant(self, restaurant_name): search_results = self.yelp.search(restaurant_name, self.city_name)['businesses'] if len(search_results) == 0: return None # maybe dumb business = search_results[0] params = { restaurant.RESTAURANT_NAME: business['name'], restaurant.RESTAURANT_RATING: business['rating'], restaurant.RESTAURANT_REVIEW_COUNT: business['review_count'], restaurant.RESTAURANT_CUISINE: business['categories'][0]['alias'], restaurant.RESTAURANT_LINK: business['url'].split('?')[0], restaurant.RESTAURANT_LOCATION: business['location']['city'], restaurant.RESTAURANT_PRICE: business['price'] } self.restaurant_list.append(restaurant.Restaurant(params)) def output_restaurants(self): result = "" result += restaurant.Restaurant.getCsvHeader() for r in self.restaurant_list: result += r.toCsvLine() return result
def yelp_checking(data): start = time.time() try: yelp_close = Yelp(data).check() if yelp_close: email = 'C' print('C in yelp') return email except: pass end = time.time() print("----{} seconds---- Yelp".format(end - start))
from torch import nn, optim from torch.utils.data import DataLoader from multiprocessing import cpu_count from yelp import Yelp from model import SelfVAE from utils import linear_anneal # device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Yelp dataset data_path = '../data' max_len = 64 splits = ['train', 'valid', 'test'] datasets = {split: Yelp(root=data_path, split=split) for split in splits} # data loader batch_size = 32 dataloaders = { split: DataLoader(datasets[split], batch_size=batch_size, shuffle=split == 'train', num_workers=cpu_count(), pin_memory=torch.cuda.is_available()) for split in splits } symbols = datasets['train'].symbols # SelfVAE model embedding_size = 300
from webdirectory import WebDirectory from yelp import Yelp if __name__ == "__main__": parser = argparse.ArgumentParser(description="Python lead generator") parser.add_argument("--url", help="Url of the web directory to crawl") parser.add_argument("--searchterm", help="What to search for, e.g. accountants") parser.add_argument("--location", help="Where to search for the search term, e.g. London") parser.add_argument("--maxhits", help="The maximum number of hits to return") parser.add_argument("--stealth-level", help="The amount of time to pause between each scrape, in seconds") args = parser.parse_args() wd = WebDirectory if "YELP" in args.url.upper(): print("Using yelp") wd=Yelp() elif "GOOGLE" in args.url.upper(): print("Google is not implemented at this time") else: print("That url is not supported") wd.url = args.url wd.searchterm = args.searchterm wd.location = args.location wd.maxhits = args.maxhits wd.stealth_level = args.stealth_level or 0 print(type(wd)) wd.run()
def __init__(self): super(Scrapper, self).__init__() self.yelp = Yelp() self.yellowpages = YellowPages() self.groupon = Groupon() self.craiglist = Craiglist()
class Scrapper(object): def __init__(self): super(Scrapper, self).__init__() self.yelp = Yelp() self.yellowpages = YellowPages() self.groupon = Groupon() self.craiglist = Craiglist() ##################################### # Shoe Repair # ##################################### def srap_yelp_shoe_repair(self): """ Scrap shoe repair data from Yelp """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_YELP_SHOE_REPAIR_FILE) if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) txt_write(PROGRESS_YELP_SHOE_REPAIR_FILE, "1") logging.info("file is exist %s ..." % file_data) f = open(PROGRESS_YELP_SHOE_REPAIR_FILE, "r") i = int(f.readline()) logging.info( "\n\nYelp-Shoe-Repair-Scrapper started at i: %d \n\n" % i ) base_url = "https://www.yelp.com/search?find_desc" \ "=Shoe+repair&find_loc=New+York,+NY&start=" for j in range(i, 820, 10): try: yelp_url = base_url + str(j) self.yelp.scrap_yelp(yelp_url, ST_SHOE_REPAIR) # progress of yelp_shoe_repair scrapping % percentage = float(j * 100 / 820) logging.info( "\n\nYelp-Shoe-Repair-Scrapper %f percent " "completed" % percentage ) # update scrapping progress in yelp_shoe_repair progress file txt_write(PROGRESS_YELP_SHOE_REPAIR_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Yelp-Shoe-Repair-Scrapper") except Exception as exp: logging.error( "srap_yelp_shoe_repair() :: Got exception: %s" % exp ) logging.error(traceback.format_exc()) def scrap_yellowpages_shoe_repair(self): """ Scrap shoe repair data from yellow pages """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % ( dir_path, PROGRESS_YELLOWPAGES_SHOE_REPAIR_FILE ) if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) txt_write(PROGRESS_YELLOWPAGES_SHOE_REPAIR_FILE, "1") logging.info("file is exist %s ..." % file_data) f = open(PROGRESS_YELLOWPAGES_SHOE_REPAIR_FILE, "r") i = int(f.readline()) logging.info( "\n\nYellowpages-Shoe-Repair-Scrapper started at i: %d \n\n" % i ) base_url = "https://www.yellowpages.com/search?" \ "search_terms=Shoe%20Repair" \ "&geo_location_terms=New%20York%2C%20NY&page=" for j in range(i, 30, 1): try: yellowpages_url = base_url + str(j) self.yellowpages.scrap_yellowpages(yellowpages_url, ST_SHOE_REPAIR) # progress of yellowpages_shoe_repair scrapping % percentage = float(j * 100 / 30) logging.info( "\n\nYellowPages-Shoe-Repair-Scrapper %f percent " "completed" % percentage ) # update scrapping progress in # yellowpages_shoe_repair progress file txt_write(PROGRESS_YELLOWPAGES_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("YellowPages-Shoe-Repair-Scrapper") except Exception as exp: logging.error( "scrap_yellowpages_shoe_repair() :: " "Got Exception : %s" % exp ) logging.error(traceback.format_exc()) def scrap_groupon_shoe_repair(self): """ Scrap shoe repair data from groupon """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_GROUPON_SHOE_REPAIR_FILE) # if progress file is not found, # create a new one with default value 1 if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) txt_write(PROGRESS_GROUPON_SHOE_REPAIR_FILE, "1") logging.info("file is exist %s ..." % file_data) f = open(PROGRESS_GROUPON_FILE, "r") i = int(f.readline()) logging.info( "\n\nGroupon-Shoe-Repair-Scrapper started at i: %d \n\n" % i ) base_url = "https://www.groupon.com/browse/chicago?" \ "lat=41.8795&lng=-87.6243&address=Chicago&query=" \ "new+york+shoe+repair&locale=en_US" for j in range(i, 400, 1): try: groupon_url = base_url + str(j) self.groupon.scrap_groupon(groupon_url, ST_SHOE_REPAIR) # progress of groupon_shoe_repair scrapping % percentage = float(j * 100 / 400) logging.info( "\n\nGroupon-Shoe-Repair-Scrapper %f percent " "completed" % percentage ) # update scrapping progress in # groupon_shoe_repair progress file txt_write(PROGRESS_GROUPON_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Groupon-Shoe-Repair-Scrapper") except Exception as exp: logging.error( "scrap_groupon_shoe_repair() :: Got Exception : %s" % exp ) logging.error(traceback.format_exc()) def scrap_cragilist_shoe_repair(self): """ Scrap shoe repair data from cragilist """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_CRAIGLIST_SHOE_REPAIR_FILE) # if progress file is not found, # create a new one with default value 1 if not os.path.isfile(file_data): logging.error("Error: %s file is not found" % file_data) txt_write(PROGRESS_CRAIGLIST_SHOE_REPAIR_FILE, "1") logging.info("file is exist %s ..." % file_data) f = open(PROGRESS_CRAIGLIST_SHOE_REPAIR_FILE, "r") i = int(f.readline()) logging.info("\n\nCraiglist-Dry-Cleaner started at i: %d \n\n" % i) base_url = "https://newyork.craigslist.org/search" \ "/sss?query=shoe+repair&sort=rel" for j in range(i, 90, 1): try: craiglist_url = base_url + str(i) self.craiglist.scrap_craiglist(craiglist_url, ST_SHOE_REPAIR) # progress of craiglist_shoe_repair scrapping % percentage = float(j * 100 / 90) logging.info( "\n\nCraiglist-Shoe-Repair-Scrapper %f " "percent completed" % percentage ) # update scrapping progress in # craiglist_shoe_repair progress file txt_write(PROGRESS_CRAIGLIST_SHOE_REPAIR_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Craiglist-Shoe-Repair-Scrapper") except Exception as exp: logging.error( "scrap_shoe_rapair_companies() :: " "Got Exception : %s" % exp ) logging.error( traceback.format_exc() ) ##################################### # Dry Cleaner # ##################################### def scrap_yelp_dry_cleaners(self): """ Scrap dry cleaners data from yelp """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_YELP_FILE) # if progress file is not found, # create a new one with default value 10 if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) print("Error: %s file not found" % file_data) txt_write(PROGRESS_YELP_FILE, "10 ") logging.info("file is exist %s ..." % file_data) print("file is exist %s ..." % file_data) f = open(PROGRESS_YELP_FILE, "r") i = int(f.readline()) logging.info( "\n\nYelp-Dry-Cleaner-Scrapper started at i: %d \n\n" % i ) print("\n\nYelp-Dry-Cleaner-Scrapper started at i: %d \n\n" % i) base_url = "https://www.yelp.com/search?find_desc" \ "=Dry+Cleaners&find_loc=New+York%2C+NY&start=" for j in range(i, 1000, 10): try: yelp_url = base_url + str(j) self.yelp.scrap_yelp(yelp_url, ST_DRY_CLEANERS) # progress % of yelp_dry_cleaner scrapping percentage = float(j * 100 / 1000) logging.info( "\n\nYelp-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) print( "\n\nYelp-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) # update scrapping progress in yelp_dry_cleaner progress file txt_write(PROGRESS_YELP_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Yelp-Dry-Cleaner-Scrapper") except Exception as exp: logging.error( "scrap_yelp_dry_cleaners() :: Got Exception : %s" % exp ) logging.error( traceback.format_exc() ) def scrap_yellowpages_dry_cleaner(self): """ Scrap dry cleaners data from yellow pages """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_YELLOWPAGES_FILE) # if progress file is not found, # create a new one with default value 1 if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) print("Error: %s file not found" % file_data) txt_write(PROGRESS_YELLOWPAGES_FILE, "1") logging.info("file is exist %s ..." % file_data) print("file is exist %s ..." % file_data) f = open(PROGRESS_YELLOWPAGES_FILE, "r") i = int(f.readline()) logging.info( "\n\nYellowPages-Dry-Cleaner-Scrapper started at i: %d \n\n" % i ) print("\n\nYellowPages-Dry-Cleaner-Scrapper started at i: %d \n\n" % i) base_url = "https://www.yellowpages.com/" \ "new-york-ny/dry-cleaners-laundries?page=" for j in range(i, 86, 1): try: yellowpages_url = base_url + str(j) self.yellowpages.scrap_yellowpages(yellowpages_url, ST_DRY_CLEANERS) # progress % of yellowpages_dry_cleaner scrapping percentage = float(j * 100 / 86) logging.info( "\n\nYellowPages-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) print( "\n\nYellowPages-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) # update scrapping progress in # yellowpages_dry_cleaner progress file txt_write(PROGRESS_YELLOWPAGES_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("YellowPages-Dry-Cleaner-Scrapper") except Exception as exp: logging.error( "scrap_yellowpages_dry_cleaner() :: " "Got Exception : %s" % exp ) logging.error( traceback.format_exc() ) def scrap_groupon_dry_cleaner(self): """ Scrap dry cleaners data from groupon """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_GROUPON_FILE) # if progress file is not found, # create a new one with default value 1 if not os.path.isfile(file_data): logging.error("Error: %s file not found" % file_data) print( "Error: %s file not found" % file_data ) txt_write(PROGRESS_GROUPON_FILE, "1") logging.info("file is exist %s ..." % file_data) print( "file is exist %s ..." % file_data ) f = open(PROGRESS_GROUPON_FILE, "r") i = int(f.readline()) logging.info( "\n\nGroupon-Dry-Cleaner-Scrapper started at i: %d \n\n" % i ) print( "\n\nGroupon-Dry-Cleaner-Scrapper started at i: %d \n\n" % i ) base_url = "https://www.groupon.com/browse/chicago?" \ "lat=41.8795&lng=-87.6243&address=Chicago&query=" \ "dry+cleaners&locale=en_US&page=" for j in range(i, 16, 1): try: groupon_url = base_url + str(j) self.groupon.scrap_groupon(groupon_url, ST_DRY_CLEANERS) # progress % of groupon_dry_cleaner scrapping percentage = float(j * 100 / 15) logging.info( "\n\nGroupon-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) print( "\n\nGroupon-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) # update scrapping progress in # groupon_dry_cleaner progress file txt_write(PROGRESS_GROUPON_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Groupon-Dry-Cleaner-Scrapper") except Exception as exp: logging.error( "scrap_groupon_dry_cleaner() :: Got Exception : %s" % exp ) logging.error( traceback.format_exc() ) def scrap_craiglist_dry_cleaner(self): """ Scrap dry cleaners data from craiglist :return: """ dir_path = os.path.dirname(os.path.realpath(__file__)) file_data = "%s/%s" % (dir_path, PROGRESS_CRAIGLIST_FILE) # if progress file is not found, # create a new one with default value 1 if not os.path.isfile(file_data): logging.error("Error: %s file is not found" % file_data) print( "file is exist %s ..." % file_data ) txt_write(PROGRESS_CRAIGLIST_FILE, "1") logging.info("file is exist %s ..." % file_data) print( "file is exist %s ..." % file_data ) f = open(PROGRESS_CRAIGLIST_FILE, "r") i = int(f.readline()) logging.info( "\n\nCraiglist-Dry-Cleaner started at i: %d \n\n" % i ) print( "\n\nCraiglist-Dry-Cleaner started at i: %d \n\n" % i ) base_url = "https://newyork.craigslist.org/" \ "search/sss?query=dry%20cleaner&sort=rel" for j in range(i, 89, 1): try: craiglist_url = base_url + str(j) self.craiglist.scrap_craiglist(craiglist_url, ST_DRY_CLEANERS) # progress % of craiglist_dry_cleaner scrapping percentage = float(j * 100 / 89) logging.info( "\n\nCraiglist-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) print( "\n\nCraiglist-Dry-Cleaner-Scrapper %f " "percent completed" % percentage ) # update scrapping progress in # craiglist_dry_cleaner progress file txt_write(PROGRESS_CRAIGLIST_FILE, str(j)) # sleep scrapper for a while sleep_scrapper("Craiglist-Dry-Cleaner-Scrapper") except Exception as exp: logging.error( "scrap_craiglist_dry_cleaner() :: " "Got Exception : %s" % exp ) logging.error( traceback.format_exc() ) def dry_cleaner_processing(self): """ Start Dry Cleaner Processing """ ######################################## # dry_cleaner_processing # ######################################## yelp_dry_cleaner_processing = Process( target=self.scrap_yelp_dry_cleaners) yelp_dry_cleaner_processing.start() yellowpages_dry_cleaner_processing = Process( target=self.scrap_yellowpages_dry_cleaner) yellowpages_dry_cleaner_processing.start() groupon_dry_cleaner_processing = Process( target=self.scrap_groupon_dry_cleaner) groupon_dry_cleaner_processing.start() def shoe_repair_processing(self): """ Start Shoe Repair Processing """ ######################################## # shoe_repair_processing # ######################################## yelp_shoe_repair_processing = Process( target=self.srap_yelp_shoe_repair) yelp_shoe_repair_processing.start() yellowpages_shoe_repair_processing = Process( target=self.scrap_yellowpages_shoe_repair) yellowpages_shoe_repair_processing.start() groupon_shoe_repair_processing = Process( target=self.scrap_groupon_shoe_repair) groupon_shoe_repair_processing.start()
output = 'Name/Rating/Price<br>' index = 0 for result in results: output += f'{index}, {result[1]}, {result[2]}, {result[3]}<br>' index += 1 if index == 10: break return f''' Results equal {output} <form action="/restaurants" method="POST" > <input type="number" name="restaurant" /> <input type="submit" name="name" value="Go to restaurant's page" /> </form> ''' @app.route("/restaurants", methods=['GET', 'POST']) def restaurants(): index = request.form['restaurant'] print(mapquest.yelp_data) #yelp = Yelp() results = mapquest.call(int(index)) return results if __name__ == "__main__": # Don't forget to turn debug to False when launching yelp = Yelp() mapquest = MapQuest() app.run(host='0.0.0.0', port=8080, debug=True)
def main(): pp = PrettyPrinter(indent=2) yelp = Yelp() details = yelp.get_business() pp.pprint(details)