def get_details(self, url=None): try: # time.sleep(5) self.driver.get(self.url) FIND_BY = self.dish_config['SELECTORS']['DISHESH']['FIND_BY'] VALUE = self.dish_config['SELECTORS']['DISHESH']['VALUE'] element = WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, VALUE))) if FIND_BY == 'class': dishes = self.driver.find_elements_by_class_name(VALUE) elif FIND_BY == 'id': dishes = self.driver.find_elements_by_id(VALUE) restaurant_obj = { 'city_code': self.city_code, 'name': "Masala Box", 'type': 'North Indian, South Indian', 'stars': 4.1, 'ratings': '100+ ratings', 'image': 'https://www.google.com/url?sa=i&source=images&cd=&ved=2ahUKEwju_pWC2N_mAhVOzzgGHe4dBvIQjRx6BAgBEAQ&url=https%3A%2F%2Fwww.masalabox.com%2F&psig=AOvVaw1h-_mKB5oipFia4-jSyvIU&ust=1577874845479690', 'opens_at': None, 'country': self.country, 'subzone': 'General', 'city': self.city, 'platform': 'MASALABOX', 'dishes': [], 'added_on': str(datetime.datetime.utcnow()) } self.restaurant_obj = restaurant_obj dish_obj = Dish(self.dish_config, self.restaurant_obj) dish_obj.get_dishes(dishes) print("OBJJJJJJJJJJJJJ++++", len(self.restaurant_obj['dishes'])) sort_key_info = restaurant_obj['platform'] + '__' + restaurant_obj[ 'subzone'] + '__' + restaurant_obj['name'].strip().replace( ' ', '_') restaurant_obj['sort_key_info'] = sort_key_info restaurant_obj['stars'] = Decimal(str( self.restaurant_obj['stars'])) # write to dynamodb self.dynamodb_write_obj = DynamoDBWrite() self.dynamodb_write_obj.dynamodb_write(self.restaurant_obj) # write data to parquet in s3 restaurant_obj['stars'] = float(self.restaurant_obj['stars']) self.write_to_s3_parquet_obj = WriteS3Parquet(self.city) self.write_to_s3_parquet_obj.write_to_parquet(self.restaurant_obj) except Exception as e: print('++++++++NOT DONE', e) self.driver.close()
class Box8: def __init__(self,url=None): self.CONFIG_FILE = "jwt-config.json" with open(self.CONFIG_FILE,'r') as config_file: self.data_set = json.load(config_file) self.starter_config = self.data_set['BOX8']['CONFIG']['STARTER'] self.config = self.data_set['BOX8']['CONFIG'] self.dish_config = self.data_set['BOX8']['CONFIG']['DISH'] self.options = Options() self.options.headless = True self.driver = webdriver.Firefox(options=self.options) self.url = self.starter_config["URL"] self.city = 'general' self.country = 'india' self.city_code = self.city+'__'+self.country def get_details(self,url): try: print('+++++getting restro page') self.driver.get(self.url) try: # element = WebDriverWait(driver, 10).until( # EC.visibility_of_element_located((By.CLASS_NAME, "menu-content-wrapper")) # ) FIND_BY = self.dish_config['SELECTORS']['WAIT']['FIND_BY'] VALUE = self.dish_config['SELECTORS']['WAIT']['VALUE'] if FIND_BY == 'class': element = WebDriverWait(self.driver, 10).until( EC.visibility_of_element_located((By.CLASS_NAME, VALUE)) ) elif FIND_BY == 'id': element = WebDriverWait(self.driver, 10).until( EC.visibility_of_element_located((By.ID, VALUE)) ) elif FIND_BY == 'tag': element = WebDriverWait(self.driver, 10).until( EC.visibility_of_element_located((By.TAG_NAME, VALUE)) ) CTG_NAV = { 'FIND_BY': self.dish_config['SELECTORS']['CATEGORY_NAV']['FIND_BY'], 'VALUE' : self.dish_config['SELECTORS']['CATEGORY_NAV']['VALUE'] } CATEGORIES = { 'FIND_BY': self.dish_config['SELECTORS']['CATEGORIES']['FIND_BY'], 'VALUE' : self.dish_config['SELECTORS']['CATEGORIES']['VALUE'] } if CTG_NAV['FIND_BY'] == 'class': ctg_nav = self.driver.find_element_by_class_name(CTG_NAV['VALUE']) elif CTG_NAV['FIND_BY'] == 'id': ctg_nav = self.driver.find_element_by_id(CTG_NAV['VALUE']) elif CTG_NAV['FIND_BY'] == 'tag': ctg_nav = self.driver.find_element_by_tag_name(CTG_NAV['VALUE']) if CATEGORIES['FIND_BY'] == 'class': categories = ctg_nav.find_elements_by_class_name(CATEGORIES['VALUE']) elif CATEGORIES['FIND_BY'] == 'id': categories = ctg_nav.find_element_by_id(CATEGORIES['VALUE']) elif CATEGORIES['FIND_BY'] == 'tag': categories = ctg_nav.find_elements_by_tag_name(CATEGORIES['VALUE']) # ctg_nav = driver.find_element_by_class_name('menu-sidebar-items') # categories = ctg_nav.find_elements_by_tag_name('li') restaurant_obj = { 'city_code':self.city_code, 'name':"Box8", 'type':"North Indian, South Indian", 'stars':4.1, 'ratings':"100+ Ratings", 'image':"https://assets.box8.co.in/images/Box8.jpg", 'country':self.country, 'city':self.city, 'city':'Bangalore', 'subzone':'General', 'platform':'Box8', 'dishes':[], 'added_on': str(datetime.datetime.utcnow()) } self.dish_obj = Dish(self.driver, self.dish_config) for ctg in categories: self.dish_obj.get_dishes(ctg,restaurant_obj) print('+++++++',len(restaurant_obj['dishes'])) sort_key_info = restaurant_obj['platform']+'__'+restaurant_obj['subzone']+'__'+restaurant_obj['name'].strip().replace(' ','_') restaurant_obj['sort_key_info'] = sort_key_info restaurant_obj['stars'] = Decimal(str(restaurant_obj['stars'])) self.dynamodb_write_obj = DynamoDBWrite() self.dynamodb_write_obj.dynamodb_write(restaurant_obj) # write data to parquet in s3 restaurant_obj['stars'] = float(restaurant_obj['stars']) self.write_to_s3_parquet_obj = WriteS3Parquet(self.city) self.write_to_s3_parquet_obj.write_to_parquet(restaurant_obj) except Exception as e: print('++++++++NOT DONE',e) self.driver.close() except Exception as e: print('+++++++Exception while contents of given url',e)
def get_details(self): try: self.driver.set_page_load_timeout(30) self.driver.get(self.url) print("+++++++++HEREEEEE coming") try: FIND_BY = self.starter_config['SELECTORS']['PRE_ORDER'][ 'FIND_BY'] VALUE = self.starter_config['SELECTORS']['PRE_ORDER']['VALUE'] # if FIND_BY == 'class': # self.driver.find_element_by_class_name(VALUE).click() # elif FIND_BY == 'css': # self.driver.find_element_by_css_selector(VALUE).click() WebDriverWait(self.driver, 10).until( EC.element_to_be_clickable( (By.XPATH, '//*[@id="hmodal"]/div/div/div[2]/button'))).click() print("+++++++CLICKED") except Exception as e: print("))))))))ERROR", e) FIND_BY = self.dish_config['SELECTORS']['DISHESH']['FIND_BY'] VALUE = self.dish_config['SELECTORS']['DISHESH']['VALUE'] # element = WebDriverWait(driver, 10).until( # EC.visibility_of_element_located((By.CLASS_NAME, VALUE)) # ) if FIND_BY == 'class': dishes = self.driver.find_elements_by_class_name(VALUE) elif FIND_BY == 'css': dishes = self.driver.find_elements_by_css_selector(VALUE) print("+++++++++HEREEEEE") restaurant_obj = { 'city_code': self.city_code, 'name': "Ready Bowl", 'type': 'North Indian, South Indian', 'stars': 4.1, 'ratings': '100+ ratings', 'image': 'https://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&ved=2ahUKEwjo7r61s-TmAhX1zDgGHfJjAkoQjRx6BAgBEAQ&url=https%3A%2F%2Fwww.zomato.com%2Fbangalore%2Freadybowl-btm&psig=AOvVaw2OnHt5mwTrtZrmFtJTEItS&ust=1578036819956899', 'opens_at': None, 'country': self.country, 'subzone': 'General', 'city': self.city, 'platform': 'READYBOWL', 'dishes': [], 'added_on': str(datetime.datetime.utcnow()) } self.restaurant_obj = restaurant_obj dish_obj = Dish(self.dish_config, self.restaurant_obj, self.driver) dish_obj.get_dishes(dishes) self.driver.close() sort_key_info = restaurant_obj['platform'] + '__' + restaurant_obj[ 'subzone'] + '__' + restaurant_obj['name'].strip().replace( ' ', '_') restaurant_obj['sort_key_info'] = sort_key_info restaurant_obj['stars'] = Decimal(str( self.restaurant_obj['stars'])) # # write to dynamodb self.dynamodb_write_obj = DynamoDBWrite() self.dynamodb_write_obj.dynamodb_write(self.restaurant_obj) # # write data to parquet in s3 restaurant_obj['stars'] = float(self.restaurant_obj['stars']) print("OBJJJJJJJJJJJJJ++++print", json.dumps(self.restaurant_obj)) self.write_to_s3_parquet_obj = WriteS3Parquet(self.city) self.write_to_s3_parquet_obj.write_to_parquet(self.restaurant_obj) except: print('++++++++NOT DONE') self.driver.close()