def check_ajax_page_for_msg(label, msg, url): try: # headers = { # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", # "Accept-Encoding":"gzip, deflate, br", # "Accept-Language":"en-US,en;q=0.5", # "Cache-Control":"no-cache", # "Connection":"keep-alive", # "Host":"www.nvidia.com", # "Pragma":"no-cache", # "Referer":"https://www.google.com/", # "TE":"Trailers", # "Upgrade-Insecure-Requests":"1", # "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0" # } webdriver = Firefox() webdriver.implicitly_wait(30) response = webdriver.request('GET', url) response2 = webdriver.request('GET', 'https://www.nvidia.com/etc/designs/nvidiaGDC/clientlibs_foundation.min.3a16fd19562feeb504bb63525a249962.js') webdriver.execute_script(response2.text) # webdriver.find_element_by_class_name('availability') # with open('clientlibs_foundation.min.3a16fd19562feeb504bb63525a249962.js') as f: # webdriver.execute_script(f.read()) # element = WebDriverWait(webdriver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "availability"))) wait_for_ajax(webdriver) # webdriver.find_element_by_class_name('availability') if response.status_code != 200: send_sms_msg('Attention needed. Got a response other than 200 OK from ' + label + ' page.') html = response.text soup = BeautifulSoup(html, 'html.parser') disclaimer_msg_present = msg in html html_file = open(label + '.html', 'w', encoding='utf-8') html_file.write(html) html_file.close() availability = soup.find('div', {'class', 'availability'}) print('Value of disclaimer_msg_present for ' + label + ' page: ' + str(disclaimer_msg_present)) if not disclaimer_msg_present: send_sms_msg('ATTENTION! ' + label + ' page message has changed.') print('\nResponse from ' + label + ' page link: ' + str(response.status_code) + '\n') except Exception as e: send_sms_msg('Exception occurred while sending request to Ironmaster website.') print(e)
def atc(): browser = Firefox() browser2 = Firefox() getLink() link = "https://yeezysupply.com/products/womens-tubular-boot-pvc-transparent" atc = "https://yeezysupply.com/cart/add.js" size = variants[input("Enter size: ")] payload = {"quantity": "1", "id": size} input("Press Enter load and add to cart...") # -------------- Go to link and ATC--------------- browser.get(link) response = browser.request('POST', atc, data=payload) browser.get("https://yeezysupply.com/cart") browser.execute_script( "document.getElementsByClassName('K__button CA__button-checkout')[0].click();" ) browser2.get(link) response = browser2.request('POST', atc, data=payload) browser2.get("https://yeezysupply.com/cart") browser2.execute_script( "document.getElementsByClassName('K__button CA__button-checkout')[0].click();" ) # -------------- Go to shipping -------------- input("CONTINUE TO SHIPPING...") for i in checkoutPayload: inputMsg = browser.find_element_by_id(i[0]) inputMsg.send_keys(i[1]) mySelect = Select( browser.find_element_by_id("checkout_shipping_address_province")) mySelect.select_by_value('Maryland') browser.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) for i in checkoutPayload: inputMsg = browser2.find_element_by_id(i[0]) inputMsg.send_keys(i[1]) mySelect = Select( browser2.find_element_by_id("checkout_shipping_address_province")) mySelect.select_by_value('Maryland') browser2.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) # -------------- Go to payment -------------- input("CONTINUE TO PAYMENT METHOD...") browser.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) browser2.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) # -------------- Fill card -------------- input("FILL CREDIT CARD...") eachFrame = 0 for i in creditCard: frame = browser.find_elements_by_xpath( '//iframe[@frameborder="0"]')[eachFrame] browser.switch_to.frame(frame) inputMsg = browser.find_element_by_id(i[0]) for e in range(0, len(i)): inputMsg.send_keys(i[e]) browser.switch_to.default_content() eachFrame += 1 eachFrame = 0 for i in creditCard: frame = browser2.find_elements_by_xpath( '//iframe[@frameborder="0"]')[eachFrame] browser2.switch_to.frame(frame) inputMsg = browser2.find_element_by_id(i[0]) for e in range(0, len(i)): inputMsg.send_keys(i[e]) browser2.switch_to.default_content() eachFrame += 1 # -------------- FINAL STEP CHECKOUT -------------- print_warn("CHECKOUT?") input("") browser.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) browser2.execute_script( "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();" ) time.sleep(10) browser.quit()
class RedBubble: def __init__(self): self.options = Options() self.options.set_preference("intl.accept_languages", 'en-us') self.options.set_preference('useAutomationExtension', False) self.options.set_preference('dom.webdriver.enabled', False) self.driver = Firefox(options=self.options) self.driver.execute_script( "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" ) self.driver.get("https://amiunique.org/fp") time.sleep(5) self.driver.get("https://antcpt.com/score_detector/") time.sleep(5) def get_to(self, url): if self.driver.current_url != url: self.driver.get(url) def download_image(self, file_name, image_url): if "x1000" in image_url: big_image_url = image_url.replace("1000x1000", "2000x2000") big_image_url = big_image_url.replace("750x1000", "1500x2000") big_image_url = big_image_url.replace("1000", "2000") big_image_url = big_image_url.replace("750", "1500") if self.download_image(file_name, big_image_url): print(f"Gotten bigger image for {file_name} @ {big_image_url}") return True try: res = self.driver.request('GET', image_url) except requests.exceptions.ConnectionError: time.sleep(60) res = self.driver.request('GET', image_url) if 200 <= res.status_code < 300: im = Image.open(io.BytesIO(res.content)) width, height = im.size # Add watermark if max(width, height) >= 2000: font = ImageFont.truetype('Arial.ttf', 72) else: font = ImageFont.truetype('Arial.ttf', 36) draw = ImageDraw.Draw(im) text = "DuckHunt.me" fill_color = (128, 0, 0) textwidth, textheight = draw.textsize(text, font) margin = 30 x = width - textwidth - margin y = height - textheight - margin try: draw.text((x, y), text, font=font, fill=fill_color) except TypeError: draw.text((x, y), text, font=font, fill=128) # Write to file im.save(file_name) return True else: print(f"Download of {file_name} (@{image_url}) FAILED.") return False def clean_html(self, html_string): tree = html.fromstring(html_string) cleaner = html.clean.Cleaner() cleaner.safe_attrs_only = True cleaner.safe_attrs = frozenset(['id']) cleaned = cleaner.clean_html(tree) return html.tostring(cleaned, encoding='unicode') def dismiss_cookie_popup(self): self.dismiss_member_prompt() try: self.driver.find_element_by_class_name( "Toastify__toast-body").find_element_by_tag_name( "button").click() return True except (NoSuchElementException, ElementClickInterceptedException): return False def dismiss_member_prompt(self): try: self.driver.find_element_by_class_name( "sailthru-overlay-close").click() return True except NoSuchElementException: return False def login(self): self.driver.get("https://www.redbubble.com/en/auth/login") username = self.driver.find_element_by_xpath( '//*[@id="ReduxFormInput1"]') username.send_keys(USERNAME) password = self.driver.find_element_by_xpath( '//*[@id="ReduxFormInput2"]') password.send_keys(PASSWORD) connect = self.driver.find_element_by_xpath( '/html/body/div[1]/div[7]/div[2]/div[2]/div/form/span/button') prev_url = self.driver.current_url connect.click() time.sleep(5) while prev_url == self.driver.current_url: print("Please solve captcha") time.sleep(1) def change_locale(self): self.get_to("https://www.redbubble.com/settings/show") locale_dropdown = Select( self.driver.find_element_by_xpath('//*[@id="settings_locale"]')) locale_dropdown.select_by_visible_text("English") country_code_dropdown = Select( self.driver.find_element_by_xpath( '//*[@id="settings_country_code"]')) country_code_dropdown.select_by_value("US") currency_dropdown = Select( self.driver.find_element_by_xpath( '//*[@id="settings_currency_iso"]')) currency_dropdown.select_by_value("USD") try: button = self.driver.find_element_by_xpath( '/html/body/div/form/div[4]/input') except NoSuchElementException: button = self.driver.find_element_by_xpath( '/html/body/div[1]/div[6]/div[2]/form/div[4]/button') button.click() def get_works_urls(self): self.get_to("https://www.redbubble.com/en/portfolio/manage_works") self.dismiss_cookie_popup() works = [] links = self.driver.find_elements_by_class_name( 'works_work-menu-option') for a in links: if a.tag_name != 'a': continue href = a.get_attribute('href') if href.startswith( "https://www.redbubble.com/people/duckhuntdiscord/works"): works.append(href) random.shuffle(works) return works def get_work_products_urls(self, work_url): self.get_to(work_url) products_urls = [] links = self.driver.find_elements_by_class_name('carousel_item-link') for a in links: if a.tag_name != 'a': continue href = a.get_attribute('href') if href.startswith("https://www.redbubble.com/i/"): products_urls.append(href) random.shuffle(products_urls) return products_urls def get_work_info(self, work_url): self.get_to(work_url) return { "name": self.driver.find_element_by_class_name( 'work-information_title').text, "url": urljoin(work_url, urlparse(work_url).path) } def _get_colorswatch(self): try: colors_button = self.driver.find_element_by_css_selector( "[class^='ColorPickerActivator__colorPickerActivator--']") except NoSuchElementException: colors_button = None if colors_button: colors_button.click() try: colors_swatch = self.driver.find_elements_by_css_selector( "[class^='DesktopColorControls__swatch--']") except NoSuchElementException: colors_swatch = [None] if len(colors_swatch) == 0: colors_swatch = [None] return colors_swatch def _get_print_locations(self): print_locations = self.driver.find_elements_by_name("printLocation") if not print_locations: print_locations = [None] return print_locations def _get_sizes(self): sizes = self.driver.find_elements_by_name("size") if not sizes: sizes = [None] return sizes def download_product_information(self, product_url, work_info): self.get_to(product_url) self.dismiss_cookie_popup() varients = [] product_name = self.driver.find_element_by_tag_name('h1').text print(f"Downloading {product_name}...") download_to = DOWNLOAD_DIR / work_info["name"] / product_name download_to.mkdir(exist_ok=True, parents=True) if (download_to / "download.json").exists(): print("Skipping : already downloaded") return colors = len(self._get_colorswatch()) print(f"Found {colors} colors_swatch.") for color_n in range(colors): color_element = self._get_colorswatch()[color_n] if color_element is None: color_product_name = None else: color_product_name = color_element.get_attribute('title') color_element.click() self.dismiss_cookie_popup() print_locations_count = len(self._get_print_locations()) print(f"Found {print_locations_count} print locations.") for print_location_n in range(print_locations_count): print_location_element = self._get_print_locations( )[print_location_n] if print_location_element is None: print_location_text = None else: label = print_location_element.find_element_by_xpath('..') print_location_text = label.text label.click() self.dismiss_cookie_popup() sizes = self._get_sizes() sizes_names = [] for size_element in sizes: if size_element is None: continue else: label = size_element.find_element_by_xpath('..') size_name = label.text sizes_names.append(size_name) time.sleep(1) # Time for images to load images = self.driver.find_elements_by_tag_name('img') images_downloaded = 0 images_files = [] for img in images: src = img.get_attribute('src') klass = img.get_attribute('class') if src.startswith("https://ih1.redbubble.net/image" ) and "GalleryImage__img--" in klass: main = "PreviewGallery__rightColumn--" in img.find_element_by_xpath( '..').find_element_by_xpath( '..').find_element_by_xpath( '..').get_attribute('class') img_folder = download_to / f"{color_product_name} - {print_location_text}" img_folder.mkdir(parents=True, exist_ok=True) image_name = img_folder / f"{images_downloaded}.jpg" dld = self.download_image(file_name=image_name, image_url=src) if dld: images_downloaded += 1 images_files.append({ "link": str(image_name), "main": main }) price = '' config_div = self.driver.find_element_by_tag_name( 'h1').find_element_by_xpath('..') prices_maybe = config_div.find_elements_by_tag_name('span') for price_maybe in prices_maybe: try: price = price_maybe.find_element_by_tag_name( 'span').text break except NoSuchElementException: continue varient = { "color_product_name": color_product_name, "print_location_text": print_location_text, "sizes_names": sizes_names, "images_count": images_downloaded, "images": images_files, "price": price, "url": self.driver.current_url, } print(varient) varients.append(varient) try: features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]")\ .find_element_by_xpath('..')\ .find_element_by_tag_name('ul')\ .get_attribute('outerHTML') except NoSuchElementException: features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]") \ .find_element_by_xpath('..') \ .find_element_by_xpath('..') \ .find_element_by_tag_name('ul') \ .get_attribute('outerHTML') with open(download_to / "download.json", "w") as f: json.dump( { "varients": varients, "features_html": self.clean_html(features), "work": work_info, }, f, sort_keys=True) def main(self): self.login() self.change_locale() works_urls = self.get_works_urls() print(f"Discovered {len(works_urls)} works") for work_url in works_urls: work_info = self.get_work_info(work_url) # if (DOWNLOAD_DIR / work_info["name"]).exists(): # print(f"Skipping {work_info['name']} because it was already downloaded.") # continue product_urls = self.get_work_products_urls(work_url) for product_url in product_urls: ok = False while not ok: try: self.download_product_information( product_url, work_info) ok = True except (StaleElementReferenceException, ElementNotInteractableException, NoSuchElementException) as e: print(f"That failed {e}, retrying...") self.driver.quit()
class EntsoeDownloader: """ A class used to automatically scrap CSV files from ENTSOE Transparecny Platform ... Attributes ---------- __username : str a string representing username of an existing ENTSOE account __password : str a string representing password of an existing ENTSOE account __login_url : str the homepage link of ENTSOE __download_dir : str the folder path where CSV files are stored date: str string representing today's date __day_ahead_price_url: None the download url for scraping day ahead prices __generation_per_product_url: None the download url for scraping electricity generation per products __actual_total_load_url: None the donwload url for scraping actual loads __generation_forecasted_url the download url for scraping the generation forecasted driver: None contains the selenium chrome driver class Methods ------- check_download(i=0) Checks whether the CSV file is downloaded within a certain time-period else it terminates setup(headless=False, date_input=None) Initializes the chrome webdriver for scraping set_download_url() Initializes the download urls login_and_download() Interacts with the firefox browser and executes a sequence of tasks for scraping """ def __init__(self, last_date_fetched, username="", password=""): last_date_fetched = datetime.strptime(last_date_fetched, "%Y-%m-%d %H:%M:%S") new_date = last_date_fetched.date() new_date += timedelta(days=1) self.__username = username self.__password = password self.__login_url = "https://transparency.entsoe.eu/homepageLogin" self.__download_dir = os.path.join(os.getcwd(), "download") # choose another date if the last hour of fetched data is 23:00:00 self.date = last_date_fetched.date( ) if last_date_fetched.time().hour != 23 else new_date self.__day_ahead_price_url = None self.__generation_per_product_url = None self.__actual_total_load_url = None self.__generation_forecasted_url = None self.driver = None @staticmethod def check_download(i=0): """ Checks first whether files have been downloaded within a 20 second time interval Parameters ------------- i: File counter, to check how many files have been downloaded """ time_to_wait = 150 time_counter = 0 # Validate whether file has been downloaded while True: if time_counter > time_to_wait: raise FileNotFoundError( "Necessary files not found in directory (20 seconds timeout reached)!" ) # List all the files in the : 'current_working_dir'/download file_names = os.listdir(os.getcwd() + "/download") if len(file_names) == i: return True time.sleep(1) time_counter += 1 def setup(self, headless=False, date_input=None): # Validate date variable if date_input is not None: self.date = datetime.strptime(date_input, "%d.%m.%Y").date() # Create directory if fetched folder is not available if not os.path.exists(self.__download_dir): os.mkdir(self.__download_dir) # Set headless option and firefox profile options = Options() options.headless = headless fp = webdriver.FirefoxProfile() fp.set_preference("browser.download.folderList", 2) fp.set_preference("browser.download.manager.showWhenStarting", False) fp.set_preference("browser.download.dir", self.__download_dir) fp.set_preference( "browser.helperApps.neverAsk.saveToDisk", "text/plain, application/vnd.ms-excel, text/csv, text/comma-separated-values, " "application/octet-stream") # Initialize Firefox() object to navigate self.driver = Firefox(firefox_profile=fp, options=options) return self def set_download_url(self): """ Sets class url attributes if date is given """ if self.date is None: raise ValueError("Attribute 'date' needs to be defined") self.__day_ahead_price_url = "https://transparency.entsoe.eu/transmission-domain/r2/dayAheadPrices/export?" \ "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime.dateTime=" + self.date.strftime( "%d.%m.%Y") + "+00%3A00%7CCET%7CDAY&biddingZone.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&dateTime.timezone=CET_CEST&dateTime" \ ".timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV" self.__generation_per_product_url = "https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/export?" \ "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime="+self.date.strftime( "%d.%m.%Y")+"+00%3A00%7CCET%7CDAYTIMERANGE&dateTime.endDateTime="+self.date.strftime( "%d.%m.%Y")+"+00%3A00%7CCET%7CDAYTIMERANGE&area.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&productionType.values=B01&productionType." \ "values=B02&productionType.values=B03&productionType.values=B04&productionType.values=B05&productionType.values=B06&productionType." \ "values=B07&productionType.values=B08&productionType.values=B09&productionType.values=B10&productionType.values=B11&productionType." \ "values=B12&productionType.values=B13&productionType.values=B14&productionType.values=B20&productionType.values=B15&productionType." \ "values=B16&productionType.values=B17&productionType.values=B18&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime." \ "timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV" self.__actual_total_load_url = "https://transparency.entsoe.eu/load-domain/r2/totalLoadR2/export?name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime" \ ".dateTime="+self.date.strftime( "%d.%m.%Y")+"+00%3A00%7CCET%7CDAY&biddingZone.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&dateTime.timezone=CET_CEST&dateTime" \ ".timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV" self.__generation_forecasted_url = "https://transparency.entsoe.eu/generation/r2/dayAheadGenerationForecastWindAndSolar/export?" \ "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime.dateTime="+self.date.strftime( "%d.%m.%Y") +"+00%3A00%7CCET%7CDAYTIMERANGE&dateTime" \ ".endDateTime="+ self.date.strftime( "%d.%m.%Y") + "+00%3A00%7CCET%7CDAYTIMERANGE&area.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&productionType" \ ".values=B16&productionType.values=B18&productionType.values=B19&processType.values=A18&processType.values=A01&processType" \ ".values=A40&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV" def login_and_download(self): """ Executes a sequence of tasks to log into ENTSOE and downloads the specified files """ if self.__username == "" or self.__password == "": raise ValueError( "Pleaser set the credentials for ENTSOE to download") # Remove "download" directory to and create new one if len(os.listdir(os.getcwd() + "/download")) > 0: shutil.rmtree(self.__download_dir) os.mkdir(self.__download_dir) # Instantiate the download urls and request the browser for the homepage login url self.set_download_url() self.driver.get(self.__login_url) # Find login form elements and insert ENTSOE account credentials self.driver.find_element_by_id("email").send_keys(self.__username) self.driver.find_element_by_id("password").send_keys(self.__password) # Trigger click event to sign in self.driver.find_element_by_xpath( "//div[@class='submit-line']/div/input").click() # Wait until login is completed and the "user-panel-drop-down" element is agreed WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.ID, "close-button"))) # Agree to the terms and conditions loader_div = self.driver.find_element_by_xpath("//*[@id='loading']") self.driver.execute_script( "arguments[0].setAttribute('style','visibility:hidden;');", loader_div) self.driver.find_element_by_id("close-button").click() # Open download url in new tab urls = [ self.__generation_per_product_url, self.__day_ahead_price_url, self.__actual_total_load_url, self.__generation_forecasted_url ] for url, index in zip(urls, range(len(urls))): if self.check_download(index): print("Opening url: ", re.search("/r2/(.*)/export", url).group(1)) self.driver.execute_script("window.open('" + url + "');") # Download CSV file and exit the browser time_counter = 0 time_to_wait = 150 # the last CSV file which is the one holding forecast information on solar, wind and total # takes at least 40 seconds to download while len(os.listdir(os.path.join(os.getcwd(), "download"))) is not 4: if time_counter > time_to_wait: raise FileNotFoundError( "Necessary files not found in directory" " (150 seconds timeout reached)!") time.sleep(1) time_counter += 1 self.driver.quit()
import cv2 import boto3 from cv2 import imwrite from cv2 import addWeighted from PIL import Image, ImageFilter import cv2 from scipy.ndimage.filters import median_filter options = Options() options.headless = False browser = Firefox(options=options) browser.get('https://parivahan.gov.in/rcdlstatus/') browser.execute_script("document.body.style.zoom='10%'") elem = browser.find_element_by_id("form_rcdl:j_idt36:j_idt41") loc = elem.location size = elem.size x = int(loc['x']) y = int(loc['y']) w = int(size['width']) h = int(size['height']) # print(x,y,w,h) browser.save_screenshot("snap.png") img = cv2.imread("snap.png") crop_img = img[y:y + h, x:x + w] imwrite("Image.png", crop_img) # print(pytesseract.image_to_string("Image.png"))
return results #%% Solve an individual game currently on page def solve_game(window): cards = organize_cards(window.find_elements_by_class_name('set-board-card')) sets = evaluate_cards(cards) for s in sets: for c in s: c[0].click() sleep(1) #%% Initialize Browser browser = Firefox() browser.get('https://www.nytimes.com/puzzles/set') #Scroll away from banner ad browser.execute_script("window.scrollTo(0, 400)") sleep(1) #%% Select each tab and solve the game. for i in range(4): solve_game(browser) try: browser.find_element_by_class_name('pzm-modal__button').click() except: pass
class Sunny(object): def __init__(self, login, password): self.start_display() profile = webdriver.FirefoxProfile() profile.set_preference('browser.download.folderList', 2) # custom location profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', current_dir) profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv,application/vnd.ms-excel") #profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/plain") self.driver = Firefox(profile) self.login(login, password) self._login = login self._password = password def start_display(self): self.display = Display(visible=0, size=(800, 600)) self.display.start() def close(self): self.driver.close() self.display.stop() def login(self, login=None, password=None): """Login on the Sunny portal website using the credentials Parameters ---------- login: str The login credential to sunnyportal password: str The password credential of sunnyportal """ if not login: login = self._login password = self._password self.driver.get("https://www.sunnyportal.com/Templates/Start.aspx?ReturnUrl=%2f") self.driver.find_element_by_id("txtUserName").clear() self.driver.find_element_by_id("txtUserName").send_keys(login) self.driver.find_element_by_id("txtPassword").clear() self.driver.find_element_by_id("txtPassword").send_keys(password) self.driver.find_element_by_id("ctl00_ContentPlaceHolder1_Logincontrol1_LoginBtn").click() #time.sleep(0.5) def wait_n_get(self, element_type, value): """ Wait for an element to be present and get it Paramters --------- element_type: By.ID | By.LINK_TEXT... The type of value to identify the element to get value: str the value describing the element to get Returns ------- el: element The driver element requested """ return WebDriverWait(self.driver, TIME_DELAY).until(EC.presence_of_element_located((element_type, value))) def goto(self, n_house): """Go to the page of an house given it's number, from the plant list page Parameters ---------- n_house: int The number of the house to go to """ el = self.wait_n_get(By.LINK_TEXT, houses[n_house]) el.click() def goto_2(self, n_house): """Go to a house from the plant pannel on the Dashboard page Parameters ---------- n_house: int The number of the house to go to """ self.wait_n_get(By.CLASS_NAME, 'plantselect').click() self.wait_n_get(By.LINK_TEXT, houses[n_house]).click() def hover_over(self, id): """Hover over an element of the page given its id Parameter --------- id: str The id of the element to hover over """ el = self.wait_n_get(By.ID, id) hover = ActionChains(self.driver).move_to_element(el) hover.perform() def click(self, id): """Click on an element of the page given its id Parameter --------- id: str The id of the element to click on """ el = self.wait_n_get(By.ID, id) el.click() def select_date(self, day, month, year): id_date = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1__datePicker_textBox' id_before = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_prev' id_after = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_next' try: el = self.wait_n_get(By.ID, id_date) self.driver.execute_script('$("#%s").val("%d/%d/%d")'%(id_date, month, day, year)) sleep(0.2) self.click(id_before) sleep(0.2) self.click(id_after) sleep(0.2) except Exception as e: if "Element is not clickable at point" in str(e): print(e) print('trying again!') self.select_date(day, month, year) def download(self, day=None, month=None, year=None): """Download the CSV file """ # Make sure we see the "Day" pannel tabactive = self.wait_n_get(By.CLASS_NAME, 'tabactive') if not tabactive.text == 'Day': self.click(id_day) # Select the right day if day: self.select_date(day, month, year) # Hover over the download button try: self.hover_over(id_hover) self.click(id_click) except Exception as e_1: # Check if the data is available for that day by looking for the info bubble try: el = self.wait_n_get(By.ID, id_info) if 'info.png' in el.get_attribute('src'): print('no data available for this day') return None else: # Not sure what just happen there raise(e_1) except Exception as e_2: if 'Unable to locate element' in str(e_2): # The info icon isn't available print(e_2) raise(e_1) else: # Not sure what just happen there print(e_1) print(e_2) #raise (e1, e2) # Download the data for the day res = self.driver.request('GET', url_data_graph) if res.status_code == 200: print('sucess') else: raise Exception('Error:', res.text) return res def download_house(self, n, day=None, month=None, year=None): """ Download the house power production of the day Parameters ---------- driver: WebDriver The WebDriver instance to action n_house: int The number of the house to go to Return ------ df: pandas.DataFrame | None A dataframe containing the house day power production, or None if there isn't any data available """ try: # Check what is the starting point if 'Start.aspx' in self.driver.current_url: # We are on the login screen, we first need to login print('-- login in main screen') self.login() print('-- accessing house', n) self.goto(n) elif 'sunnyportal.com/Plants' in self.driver.current_url: # We are on the plant list, lets self.goto(n) elif 'sunnyportal.com/FixedPages/Dashboard.aspx' in self.driver.current_url: # We are on a dashboard, so we should be able to click on the left hand pannel to go to the new house self.goto_2(n) else: # No idea where we are raise Exception('I dont know where we are:', self.driver.current_url) print('-- downloading house', n, 'power data') res = self.download(day, month, year) self.date = self.wait_n_get(By.ID, id_date).get_attribute('value') if day: if not self.date == "%d/%d/%d"%(month, day, year): print('Error the date wasnt fixed correctly: '+self.date) if res: # There seems to be a positive response, so let's put it in a pandas dataframe df = pd.read_csv(StringIO(res.text), sep=';', names=['power', 'avg'], skiprows=1) print('-- download sucessful') return df else: print('-- download failed') # No response, we return a None object return res except Exception as e_1: # Something whent wrong try: # Check if sunny portal has banned us for some time text = self.wait_n_get(By.ID, 'ctl00_ContentPlaceHolder1_Logincontrol1_DivLogin').text if 'Login failed! Login will be blocked for' in text: # It does seem like we have been banned for some time print(text) n_sec = int(text.split('for')[1].split(' seconds')[0]) print('going to sleep for %d sec'%(n_sec)) time.sleep(n_sec) print('retrying this house') return self.download_house(n, day, month, year) except Exception as e_2: # I don't know what went wrong print(e_1) print(e_2) raise(e_1) def img(self): """A simple screenshot function to show on the notebook""" return Image(self.driver.get_screenshot_as_png()) def download_all(self, day=None, month=None, year=None): df_dict = {} for k, v in houses.items(): print(k) df = self.download_house(k, day, month, year) if isinstance(df, pd.DataFrame): df_dict['House %d'%(k)] = df # Save the data into a DataFrame self.data = pd.DataFrame({k:v.power for k, v in df_dict.items() if isinstance(v, pd.DataFrame)}, index=df.index) # Save the data into a file m,d,y = self.date.split('/') self.data.to_csv('svalin_%s_%s_%s.csv'%(d,m,y)) return self.data