''' for j in range(1, 4): movie['star'].append(star_list[j].findAll('td')[1].find('a').text.strip()) #extract poster url movie['poster'] = movie_details.find('div', {'class' : 'poster'}).find('img')['src'] #extrcat rating movie['rating'] = movie_details.find('span', {'itemprop' : 'ratingValue'}).text #extract release dates of each movie dates_url = movie_details.find('a', {'title' : 'See more release dates'})['href'] dates_response = driver.request('GET', 'https://www.imdb.com/' + dates_url) dates_details = bs(dates_response.text, 'lxml') movie['date'] = {} dates_list = dates_details.findAll('tr', {'class': 'ipl-zebra-list__item release-date-item'}) for date in dates_list: date_info = date.findAll('td') if(date_info[0].text.strip() not in movie['date'].keys()): movie['date'][date_info[0].text.strip()] = date_info[1].text data['movie'].append(movie) #Create scraper object and initialize with url scraper = Scarper(imdb_url) #send request to the giver url scraper.load_url() #get all links of each movie scraper.get_movie_links() #get details of each movie scraper.get_movie_details() with open('movies.json', 'w') as outfile: json.dump(data, outfile) driver.close()
browser.get('https://band.us/band/{}/create-live'.format(JBS_BAND_ID)) time.sleep(5) browser.find_element_by_xpath( '/html/body/div[1]/section/div/div[1]/div/div[2]/button[1]').click() browser.find_element_by_xpath( '/html/body/div[1]/section/div/div[2]/div[3]/div[4]/div/button').click() button = browser.find_element_by_xpath( '/html/body/div[1]/section/div/div[2]/div[3]/div[3]/div/button') action = ActionChains(browser) action.move_to_element(button) action.perform() time.sleep(2.5) browser.find_element_by_xpath( '/html/body/div[1]/section/div/div[2]/div[3]/div[3]/div/button').click() time.sleep(2.5) browser.switch_to.alert.accept() streamKey = clipboard.paste() print('Stream Key: {}'.format(streamKey)) args = ['rtmp://global-rtmp.lip2.navercorp.com:8080/relay', streamKey] args.insert(0, './stream.sh') print(args) subprocess.Popen(args, cwd=os.getcwd()) browser.close()
print(ri) pass if __name__ == "__main__": url_prefix = "http://www.hshfy.sh.cn/shfy/gweb/flws_list_content.jsp" page_id = 1 while True: print("Current page: %d" % page_id) url = url_prefix webdriver = Firefox() response = webdriver.request('POST', url_prefix, data={'fydm':'200', 'ajlb':'%E6%B0%91%E4%BA%8B', 'pagesnum':'2'}) r_read = response.text soup = BeautifulSoup(r_read) info_soup = soup.findAll('tr', attrs={"style":"cursor:hand"}) if info_soup: print("LEN: %d" %(len(info_soup))) for item in info_soup: track_info(item) webdriver.close() page_id += 1 if page_id > 234: break db_conn.close() print("Done!")
class AlgebraixSession(object): """Launch an Algebraix session.""" def __init__(self): """Initialise the session by opening the web browser.""" self.browser = Firefox() self.browser.get("https://c1-summit.algebraix.com/") self.regex = re.compile(r"(.+\.\w{3,4}) \(\d+\.?\d+[KM]\)") def set_names(self): """Find and sets current message’s sender’s name.""" self.names = [ name.text for name in self.browser.find_elements_by_class_name( "material-card__text--primary") ] self.sender_name = self.names[0] def replace_sender_name(self, names): """ Check if parent’s name can be substituted with student’s. Inputs: names, a dictionary of various data types. """ for student, v in names.items(): if self.sender_name in v[1]: self.sender_name = student def set_group(self, names): """ Check student’s group if possible. Inputs: names, a dictionary of various data types. """ self.group = names.get(self.sender_name, [""])[0] def set_bodies(self): """Find and set current message’s body text.""" self.bodies = [] for item in self.browser.find_elements_by_class_name( "material-card__body--paragraph." + "material-card__body--respect-lines.text-break"): self.bodies.append(item.text + "\n\n") self.bodies[-1] = self.bodies[-1][:-3] def set_dates(self): """Find and set date and time for each message.""" self.dates = [ date.text for date in self.browser.find_elements_by_class_name( "material-card__body--title-secondary") ] def set_attachments(self): """Set a list of attachments for current message.""" self.attachments = [ link for link in self.browser.find_elements_by_tag_name("a") if self.regex.search(link.text) ] def create_download_directory(self): """Create download directory for current sender.""" self.targetPath = os.path.expanduser( os.path.join( "~", "Downloads", "AlgebraixInbox", f"{self.group}{self.sender_name.title().replace(' ', '')}")) os.makedirs(self.targetPath, exist_ok=True) def download_files(self): """Download and save current body text and attachments.""" n = 1 while os.path.isfile(os.path.join(self.targetPath, f"{n:02}.txt")): n += 1 file = open(os.path.join(self.targetPath, f"{n:02}.txt"), "w") for name, date, body in zip(self.names, self.dates, self.bodies): file.write(name.title() + "\n" + date + "\n" + body) file.close() for link in self.attachments: res = self.browser.request("GET", link.get_attribute("href")) res.raise_for_status() file = open( os.path.join( self.targetPath, f"{n:02}_{self.regex.search(link.text).group(1)}"), "wb") for chunk in res.iter_content(10000): file.write(chunk) file.close() def find_next(self): """ Find and returns the link to the next message. Returns False if it is the last message. Returns: a Selenium object or a bool. """ links = self.browser.find_elements_by_class_name("action-item") for link in links: if link.get_attribute("data-original-title") == "Next": return link return False def browser_close(self): """Close the web browser.""" self.browser.close()
class Sunny(object): def __init__(self, login, password): self.start_display() profile = webdriver.FirefoxProfile() profile.set_preference('browser.download.folderList', 2) # custom location profile.set_preference('browser.download.manager.showWhenStarting', False) profile.set_preference('browser.download.dir', current_dir) profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv,application/vnd.ms-excel") #profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/plain") self.driver = Firefox(profile) self.login(login, password) self._login = login self._password = password def start_display(self): self.display = Display(visible=0, size=(800, 600)) self.display.start() def close(self): self.driver.close() self.display.stop() def login(self, login=None, password=None): """Login on the Sunny portal website using the credentials Parameters ---------- login: str The login credential to sunnyportal password: str The password credential of sunnyportal """ if not login: login = self._login password = self._password self.driver.get("https://www.sunnyportal.com/Templates/Start.aspx?ReturnUrl=%2f") self.driver.find_element_by_id("txtUserName").clear() self.driver.find_element_by_id("txtUserName").send_keys(login) self.driver.find_element_by_id("txtPassword").clear() self.driver.find_element_by_id("txtPassword").send_keys(password) self.driver.find_element_by_id("ctl00_ContentPlaceHolder1_Logincontrol1_LoginBtn").click() #time.sleep(0.5) def wait_n_get(self, element_type, value): """ Wait for an element to be present and get it Paramters --------- element_type: By.ID | By.LINK_TEXT... The type of value to identify the element to get value: str the value describing the element to get Returns ------- el: element The driver element requested """ return WebDriverWait(self.driver, TIME_DELAY).until(EC.presence_of_element_located((element_type, value))) def goto(self, n_house): """Go to the page of an house given it's number, from the plant list page Parameters ---------- n_house: int The number of the house to go to """ el = self.wait_n_get(By.LINK_TEXT, houses[n_house]) el.click() def goto_2(self, n_house): """Go to a house from the plant pannel on the Dashboard page Parameters ---------- n_house: int The number of the house to go to """ self.wait_n_get(By.CLASS_NAME, 'plantselect').click() self.wait_n_get(By.LINK_TEXT, houses[n_house]).click() def hover_over(self, id): """Hover over an element of the page given its id Parameter --------- id: str The id of the element to hover over """ el = self.wait_n_get(By.ID, id) hover = ActionChains(self.driver).move_to_element(el) hover.perform() def click(self, id): """Click on an element of the page given its id Parameter --------- id: str The id of the element to click on """ el = self.wait_n_get(By.ID, id) el.click() def select_date(self, day, month, year): id_date = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1__datePicker_textBox' id_before = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_prev' id_after = 'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_next' try: el = self.wait_n_get(By.ID, id_date) self.driver.execute_script('$("#%s").val("%d/%d/%d")'%(id_date, month, day, year)) sleep(0.2) self.click(id_before) sleep(0.2) self.click(id_after) sleep(0.2) except Exception as e: if "Element is not clickable at point" in str(e): print(e) print('trying again!') self.select_date(day, month, year) def download(self, day=None, month=None, year=None): """Download the CSV file """ # Make sure we see the "Day" pannel tabactive = self.wait_n_get(By.CLASS_NAME, 'tabactive') if not tabactive.text == 'Day': self.click(id_day) # Select the right day if day: self.select_date(day, month, year) # Hover over the download button try: self.hover_over(id_hover) self.click(id_click) except Exception as e_1: # Check if the data is available for that day by looking for the info bubble try: el = self.wait_n_get(By.ID, id_info) if 'info.png' in el.get_attribute('src'): print('no data available for this day') return None else: # Not sure what just happen there raise(e_1) except Exception as e_2: if 'Unable to locate element' in str(e_2): # The info icon isn't available print(e_2) raise(e_1) else: # Not sure what just happen there print(e_1) print(e_2) #raise (e1, e2) # Download the data for the day res = self.driver.request('GET', url_data_graph) if res.status_code == 200: print('sucess') else: raise Exception('Error:', res.text) return res def download_house(self, n, day=None, month=None, year=None): """ Download the house power production of the day Parameters ---------- driver: WebDriver The WebDriver instance to action n_house: int The number of the house to go to Return ------ df: pandas.DataFrame | None A dataframe containing the house day power production, or None if there isn't any data available """ try: # Check what is the starting point if 'Start.aspx' in self.driver.current_url: # We are on the login screen, we first need to login print('-- login in main screen') self.login() print('-- accessing house', n) self.goto(n) elif 'sunnyportal.com/Plants' in self.driver.current_url: # We are on the plant list, lets self.goto(n) elif 'sunnyportal.com/FixedPages/Dashboard.aspx' in self.driver.current_url: # We are on a dashboard, so we should be able to click on the left hand pannel to go to the new house self.goto_2(n) else: # No idea where we are raise Exception('I dont know where we are:', self.driver.current_url) print('-- downloading house', n, 'power data') res = self.download(day, month, year) self.date = self.wait_n_get(By.ID, id_date).get_attribute('value') if day: if not self.date == "%d/%d/%d"%(month, day, year): print('Error the date wasnt fixed correctly: '+self.date) if res: # There seems to be a positive response, so let's put it in a pandas dataframe df = pd.read_csv(StringIO(res.text), sep=';', names=['power', 'avg'], skiprows=1) print('-- download sucessful') return df else: print('-- download failed') # No response, we return a None object return res except Exception as e_1: # Something whent wrong try: # Check if sunny portal has banned us for some time text = self.wait_n_get(By.ID, 'ctl00_ContentPlaceHolder1_Logincontrol1_DivLogin').text if 'Login failed! Login will be blocked for' in text: # It does seem like we have been banned for some time print(text) n_sec = int(text.split('for')[1].split(' seconds')[0]) print('going to sleep for %d sec'%(n_sec)) time.sleep(n_sec) print('retrying this house') return self.download_house(n, day, month, year) except Exception as e_2: # I don't know what went wrong print(e_1) print(e_2) raise(e_1) def img(self): """A simple screenshot function to show on the notebook""" return Image(self.driver.get_screenshot_as_png()) def download_all(self, day=None, month=None, year=None): df_dict = {} for k, v in houses.items(): print(k) df = self.download_house(k, day, month, year) if isinstance(df, pd.DataFrame): df_dict['House %d'%(k)] = df # Save the data into a DataFrame self.data = pd.DataFrame({k:v.power for k, v in df_dict.items() if isinstance(v, pd.DataFrame)}, index=df.index) # Save the data into a file m,d,y = self.date.split('/') self.data.to_csv('svalin_%s_%s_%s.csv'%(d,m,y)) return self.data