def __enter__(self): self.session = Session( webdriver_path='/usr/lib/chromium-browser/chromedriver', browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless']}) return self
def __init__(self, path): self.last_json = "" self.last_response = None self.IG_SIG_KEY = '4f8732eb9ba7d1c8e8897a75d6474d4eb3f5279137431b2aafb71fafe2abe178' self.SIG_KEY_VERSION = '4' self.USER_AGENT = 'Instagram 10.26.0 Android ({android_version}/{android_release}; 640dpi; 1440x2560; {manufacturer}; {device}; {model}; samsungexynos8890; en_US)'.format( **DEVICE_SETTINTS) self.s = Session(webdriver_path=path, browser='chrome', default_timeout=15) self.logger = logging.getLogger('[instatesi_{}]'.format(id(self))) self.privateUsers = {} self.users = {} fh = logging.FileHandler(filename='instatesi.log') fh.setLevel(logging.INFO) fh.setFormatter(logging.Formatter('%(asctime)s %(message)s')) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) ch.setFormatter( logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) self.logger.addHandler(fh) self.logger.addHandler(ch) self.logger.setLevel(logging.DEBUG) self.lastUserHandled = None
def __init__(self, username, password, driver_path, download_path=None, browser='chrome', webdriver_options={'arguments': ['headless']}, logger=None): if not logger: logging.basicConfig(level=logging.DEBUG) self.logger = logging.getLogger(__name__) self.logger.setLevel('DEBUG') else: self.logger = logger self._username = username self._password = password self.driver_path = driver_path self.download_path = download_path self.logger = logging.getLogger('odigo_downloader.downloader') self.url = 'https://enregistreur.prosodie.com/odigo4isRecorder/EntryPoint?serviceName=LoginHandler' self.browser = browser self.webdriver_options = webdriver_options self.logger.debug( f"Creating Session object with values: {self.webdriver_options}") self.session = Session(webdriver_path=self.driver_path, browser=self.browser, default_timeout=15, webdriver_options=self.webdriver_options) self.logger.debug(f"Session details: {self.session.driver}") self.validated = False
def __init__(self): # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式 self.s = Session( webdriver_path='./chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless']} )
def __init__( self ): self.verbose = False self._session = Session( webdriver_path='' ,browser='chrome' ,default_timeout=15 ,webdriver_options={ 'arguments' : [ 'headless' ] } )
def __init__(self, userLogin: str, userPass: str) -> None: self._rugratSession = Session("./chromedriver", browser="chrome", default_timeout=15) self._userLogin = userLogin self._userPassword = userPass self._isLogged = False # default/recomended range seconds between self._rangeTimeBetComments = 290 self._rangeTimeBetFollow = 400
def __init__(self): # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式 self.s = Session( webdriver_path='./chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless']} ) self.category_mapping = None path = os.path.join(os.getcwd(), FILENAME) if os.path.exists(path): self.category_mapping = ujson.load(open(path))
def setup_requestium_session(self): if self.active: return f"Session/Browser already active. Cannot have two concurrent sessions/browsers" if self.headless: webdriver_options = {'arguments': ['headless']} else: webdriver_options = {} self.logger.debug( f"Creating Session object with values: {webdriver_options}") self.session = Session(webdriver_path=self.driver_path, browser='chrome', default_timeout=15, webdriver_options=webdriver_options) self.active = True
def __init__(self, comment_count=30, commentaries=None, start_at=8, end_with=23): self.s = Session( './chromedriver', 'chrome', default_timeout=60, webdriver_options={'arguments': ['headless', 'disable-gpu', f'user-agent={user_agent}']} ) self.s.headers.update(s_headers) self.comment_count = comment_count self.commentaries = commentaries self.start_at = start_at self.end_with = end_with self.posts = Queue() self.exception_recoder = []
def get_session(flag): if flag == 0: return Session(webdriver_path=posC, browser='chrome', default_timeout=15, webdriver_options={ 'arguments': ['headless', '--no-sandbox', '--disable-gpu'] }) else: return Session( webdriver_path=posC, browser='chrome', default_timeout=15, )
def initDriver(self): self.s = Session( webdriver_path= r'C:\Software\Cent\CentBrowser\Application\chromedriver.exe', browser='chrome', default_timeout=15, webdriver_options={ 'arguments': [ # '-headless', '-mute-audio', '-window-size=1920,1080', '-start-maximized', '-no-sandbox' ] }) self.s.driver.get('http://music.163.com/#/login') time.sleep(1) self.s.driver.switch_to.default_content() source_text = self.s.driver.page_source autoId_st = source_text.find("visibility:") + 26 autoId_end = autoId_st + 24 autoId = source_text[autoId_st:autoId_end] path = str("//*[@id='" + autoId + "']/div[1]/div[1]/a") time.sleep(0.5) lockBtn = self.s.driver.find_element_by_xpath(path) lockBtn.click()
def test_locate_Document(session): if not session: session = Session() url = 'http://180.97.151.94:9012/inspectionCheck/queryDocuments?guid=da622969-6c38-4d4f-88c3-7e32e5aaec09&action=true' session.driver.get(url) session.driver.implicitly_wait(3) session.driver.switch_to.window(session.driver.window_handles[-1]) session.driver.implicitly_wait(3) time.sleep(2) button = '//*[@id="app"]/div/div[2]/section/div/div[1]/div[1]/div[1]/div/div/div' session.driver.find_element_by_xpath(button).click() time.sleep(2) '#dropdown-menu-890' uls = session.driver.find_elements_by_css_selector('ul[style^="position"]') print("--", uls) for ul in uls: print("S1", ul.get_attribute("class")) print("S ", ul.get_attribute("style")) main_ul = uls[-1] print(main_ul.get_attribute("class")) time.sleep(1) lis = main_ul.find_elements_by_tag_name('li') lis[4].click() fill_dsrcbbl(session) return session
def setup(): driver = r'C:\Users\RSTAUNTO\Desktop\chromedriver.exe' s = Session(webdriver_path=driver, browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless']}) return s
def get_Cookies(username="******", passwo="WE@3dfsa", session=None): if not None: session = Session( webdriver_path= 'E:/pythonWebWorkSpace/WorkSpace-FrameWork/TestProject/test_selenium/chromedriver.exe', # webdriver_path='geckodriver.exe', browser='chrome', default_timeout=15, # webdriver_options={'arguments': ['headless']} ) session.driver.get('http://180.97.151.94:9012/login') us = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[1]/div/div/input' pa = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[2]/div/div/input' sub = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[4]/div/button' session.driver.find_element_by_xpath(us).send_keys(username) session.driver.find_element_by_xpath(pa).send_keys(passwo) time.sleep(5) session.driver.find_element_by_xpath(sub).click() # s.driver.switch_to.window(s.driver.window_handles[-1]) time.sleep(1) # safety='//*[@id="app"]/div/div[1]/div[4]/div/div/div[1]/div' # s.driver.find_element_by_xpath(safety).click() # # s.driver.switch_to.window(s.driver.window_handles[-1]) # # richang='//*[@id="app"]/div/div[1]/div/div[1]/div/ul/div[6]/a/li' # s.driver.find_element_by_xpath(richang).click() # cookies = session.driver.get_cookies() with open('cookies.json', 'w') as f: cookies = json.dump(cookies, f) return session
def acquire(self, acquire_wait_timeout=15): ''' Purpose: Get a requestium session. Arguments: acquire_wait_timeout - int - time to wait for a requestium session to free up Returns: S - requestium.Session - requestium Session object ''' S = None # IF SESSION AVAILABLE, TAKE IT if len(self.available) > 0: S = self.available.pop() self.inuse.append(S) # NO SESSION AVAILABLE, CREATE ONE elif len(self.inuse) < self.pool_size: S = Session(**self.requestium_args) self.inuse.append(S) # NO SESSIONS AVAILABLE AND NO MORE ALLOWED, WAIT FOR ONE else: StartWaitTime = datetime.now() while ( datetime.now() - StartWaitTime ).seconds <= acquire_wait_timeout\ and ( len( self.inuse ) + len( self.available ) ) > 0: if len(self.available) > 0: S = self.available.pop() self.inuse.append(S) break return S
def __init__(self, proxy=None): self.cookies = None self.sess = Session("/usr/local/phantomjs", "phantomjs", default_timeout=15) if proxy: self.sess.proxies['https'] = proxy self.sess.proxies['http'] = proxy self.proxy = proxy self.sess = Session(webdriver_path='/usr/local/bin/chromedriver', browser='phantomjs', default_timeout=15, webdriver_options={'arguments': ['headless']}) if proxy: self.proxies['http'] = proxy self.proxies['https'] = proxy self.user = None
def setup(): s = Session( webdriver_path=driver, browser='chrome', default_timeout=15, # webdriver_options={'arguments': ['headless']} ) return s
def test_locate_Document(session): if not session: session = Session() url = 'http://180.97.151.94:9012/inspectionCheck/checkMethod' session.driver.get(url) session.driver.switch_to.window(session.driver.window_handles[-1]) session = add_zhifajianca(session) return session
def main(): try: start = sys.argv[1] except: print('ERROR: Requires URL as the first argument.') quit(0) # Constants ALLDROPDOWN = '//*[@id="selectReadType"]/option[2]' ACTUALIMAGES = '//*[@id="divImage"]//img' IMGGROUPS = '.listing a' TITLE = '.bigChar' NEXT = '//*[(@id = "btnNext")]//src' s = Session( webdriver_path='C:\\Webdrivers\\chromedriver', browser='chrome' ) # ,webdriver_options={'arguments': ['headless', 'disable-gpu']} s.driver.get(start) s.driver.ensure_element_by_css_selector(TITLE) title = s.driver.find_element_by_css_selector(TITLE).text groups = s.driver.find_elements_by_css_selector(IMGGROUPS) s.transfer_driver_cookies_to_session() begin = to_attribute_list(groups, 'href').pop() response = s.get(begin).xpath(ACTUALIMAGES) print(response) s.close() quit(2)
def login(email,password): s = Session('./chromedriver.exe', browser='chrome', default_timeout=15) s.driver.get('https://accounts.google.com/signin/v2/identifier?hl=zh-CN&continue=https%3A%2F%2Fmail.google.com%2Fmail&service=mail&flowName=GlifWebSignIn&flowEntry=AddSession') inputs=s.driver.find_elements_by_tag_name('input') inputs[0].send_keys(email) return s
def fetch_latest_bhavcopy(self): nse_url = "https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm" s = Session(webdriver_path='./chromedriver', browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless']}) s.driver.get(nse_url) link = s.driver.ensure_element_by_link_text('Bhavcopy file (csv)').get_attribute('href') s.driver.close() r = requests.get(link) open('bhav.csv.zip', 'wb').write(r.content)
def __init__(self): # Create a session and authenticate self._s = Session( webdriver_path='/usr/lib/chromium-browser/chromedriver', browser='chrome') #, #webdriver_options={"arguments": ["--headless"]}) self._s.headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0' }) # Login r = self._s.post('https://www.adopteunmec.com/auth/login', data={ 'username': '******', 'password': '******' }) if not r.ok: raise 'Something wrong in login' else: time.sleep(2)
def main(url): session = Session( webdriver_path='../Chrome Canary/chromedriver.exe', browser='chrome', default_timeout=6, webdriver_options={'arguments': ['disable-logging', 'headless']}) session.driver.get(url) div_content = WebDriverWait(session.driver, 5).until( EC.presence_of_element_located((By.XPATH, "//div[@id='content']"))) print('######## FROM SELENIUM ########') print(div_content.text) print('######## COPYING SESSION FROM SELENIUM TO REQUESTS ########') session.transfer_driver_cookies_to_session() final_response = session.get(url, headers={'user-agent': 'custom requestium'}) soup = BeautifulSoup(final_response.text, 'html.parser') print('######## FROM REQUESTS ########') body_text = soup.find(id="content") print(body_text.text)
def setUp(self): self.browser = Session(webdriver_path='/usr/lib/chromium-browser/chromedriver', browser='chrome', default_timeout=15, webdriver_options={ 'arguments': ['--headless', '--no-sandbox', '--disable-dev-shm-usage']}) SolenoidValve.objects.create(id=1, number=1, step=10, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=2, number=2, step=20, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=3, number=3, step=30, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=4, number=4, step=40, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=5, number=5, step=50, first_pin=1, second_pin=2) SolenoidValve.objects.create(id=6, number=6, step=60, first_pin=1, second_pin=2) bottle_one = Bottle.objects.create(id=1, name='bottle1', solenoid_valve_id=1) bottle_two = Bottle.objects.create(id=2, name='bottle2', solenoid_valve_id=2) bottle_three = Bottle.objects.create(id=3, name='bottle3', solenoid_valve_id=3) bottle_four = Bottle.objects.create(id=4, name='bottle4', solenoid_valve_id=4, empty=True) bottle_five = Bottle.objects.create(id=5, name='bottle5', solenoid_valve_id=5) Bottle.objects.create(id=6, name='bottle6', solenoid_valve_id=6) cocktail_one = Cocktail.objects.create \ (id=1, name="cocktailone", description='cocktail one description') cocktail_two = Cocktail.objects.create \ (id=2, name="cocktailtwo", description='cocktail two description') cocktail_three = Cocktail.objects.create \ (id=3, name="cocktailthree", description='cocktail three description') BottlesBelongsCocktails(bottle=bottle_one, cocktail=cocktail_one, dose=1).save() BottlesBelongsCocktails(bottle=bottle_two, cocktail=cocktail_two, dose=2).save() BottlesBelongsCocktails(bottle=bottle_three, cocktail=cocktail_two, dose=3).save() BottlesBelongsCocktails(bottle=bottle_four, cocktail=cocktail_three, dose=4).save() BottlesBelongsCocktails(bottle=bottle_five, cocktail=cocktail_three, dose=4).save() self.client = Client()
def __init__( self, mode: str = 'requestium', use_cache: bool = True, max_cache_size: int = 10000, timeout: int = 15, browser: str = 'chrome', loading_time: int = 3, # delay to wait the webpage loading webdriver_path: str = os.path.join(curr_dir, 'chromedriver')): assert mode in ['requests', 'selenium', 'requestium'] assert browser in ['chrome'] self.mode = mode self.loading_time = loading_time self.timeout = timeout self.use_cache = use_cache if use_cache: self.html_cache = LRUCache(maxsize=max_cache_size) if mode == 'requests': pass elif mode == 'selenium': from selenium import webdriver from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument("--headless") self.driver = webdriver.Chrome(webdriver_path, chrome_options=chrome_options) elif mode == 'requestium': from requestium import Session, Keys self.session = Session( webdriver_path=webdriver_path, browser='chrome', default_timeout=timeout, webdriver_options={'arguments': ['headless']}) else: assert False, '"mode" must be either requests, selenium, or requestium.'
def login_Getcookie(myaccount, mypassword): #webdriver_option来确定是否使用浏览器显示或者静默登陆 rq = Session( webdriver_path= 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless']} ) rq.driver.get("https://passport.weibo.cn/signin/login") inputname = rq.driver.find_element("xpath", '//*[@id="loginName"]') password = rq.driver.find_element_by_xpath('//*[@id="loginPassword"]') login_button = rq.driver.find_element_by_xpath('//*[@id="loginAction"]') rq.driver.implicitly_wait(10) inputname.send_keys(myaccount) password.send_keys(mypassword) login_button.click() rq.driver.implicitly_wait(15) # ver_button = rq.driver.find_element_by_xpath('//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]') # ver_button.click() #验证码不知道啥时候会有,反正登陆几次把 rq.transfer_driver_cookies_to_session() return rq
class ChromeTestCase(unittest.TestCase): def setUp(self): self.s = Session( 'chromedriver', browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless', 'disable-gpu']}) def test_cookie_transfer_to_requests(self): """Tested on http://testing-ground.scraping.pro/login""" self.s.driver.get('http://testing-ground.scraping.pro/login') self.s.driver.find_element_by_id('usr').send_keys('admin') self.s.driver.ensure_element_by_id('pwd').send_keys( '12345', Keys.ENTER) self.s.driver.ensure_element_by_xpath( '//div[@id="case_login"]/h3[@class="success"]') self.s.transfer_driver_cookies_to_session() response = self.s.get( 'http://testing-ground.scraping.pro/login?mode=welcome') success_message = response.xpath( '//div[@id="case_login"]/h3[@class="success"]/text()' ).extract_first() self.assertEqual( success_message, 'WELCOME :)', 'Failed to transfer cookies from Selenium to Requests') def test_cookie_transfer_to_selenium(self): self.s.get('http://testing-ground.scraping.pro/login') self.s.cookies.set('tdsess', 'TEST_DRIVE_SESSION', domain='testing-ground.scraping.pro') self.s.transfer_session_cookies_to_driver() self.s.driver.get( 'http://testing-ground.scraping.pro/login?mode=welcome') success_message = self.s.driver.xpath( '//div[@id="case_login"]/h3[@class="success"]/text()' ).extract_first() self.assertEqual( success_message, 'WELCOME :)', 'Failed to transfer cookies from Requests to Selenium') def tearDown(self): self.s.driver.close()
def login(): url = 'https://ident.lds.org/sso/UI/Login' url2 = 'https://www.lds.org/mls/mbr/records/member-list?lang=eng' import os print(os.getcwd()) s = Session( '/Users/travis.howe/Projects/github/data_science/scrape/email_lst_scrape/chromedriver', browser='chrome', default_timeout=15) s.driver.get(url) print('Waiting for elements to load...') s.driver.ensure_element_by_id('IDToken1').send_keys(Keys.BACKSPACE) s.driver.ensure_element_by_id('IDToken2').send_keys(Keys.BACKSPACE) # s.driver.ensure_element_by_id('IDToken1').send_keys(lds_user_name) # s.driver.ensure_element_by_id('IDToken2').send_keys(lds_password) print('Please log-in in the chrome browser') s.driver.ensure_element_by_id("login-submit-button", timeout=60, state='present').click() s.driver.get(url2) s.driver.ensure_element_by_tag_name("tbody", timeout=60, state='visible') # todo: this isn't great go = True email_lst = [] while go: s.driver.execute_script( 'window.scrollTo(0, document.body.scrollHeight);') time.sleep(3) new_page = s.driver.page_source if 'Ziemann, Donella' in new_page: go = False soup = BeautifulSoup(new_page, 'lxml') email_lst += [ href.split(':')[1] for href in [ a_tag['href'] for a_tag in soup.findAll('a') if a_tag.has_attr('ng-href') ] if '@' in href ] return email_lst
def get_session(): session = Session( webdriver_path= 'E:/pythonWebWorkSpace/WorkSpace-FrameWork/TestProject/test_selenium/chromedriver.exe', browser='chrome', default_timeout=15, # webdriver_options={'arguments': ['headless']} ) try: with open('cookies.json', 'r') as f: cookies = json.load(f) if len(cookies) == 0: raise Exception("empty cookie") return session except: session = get_Cookies(username="******", session=session) return session
def get_image_links( main_keyword, supplemented_keywords, link_file_path, num_requested=1000): s = Session('chromedriver', browser='chrome', default_timeout=15, #webdriver_options={'arguments': ['headless', 'disable-gpu']} ) number_of_scrolls = int(num_requested / 400) + 1 img_urls = set() for i in range(len(supplemented_keywords)): search_query = main_keyword + ' ' + supplemented_keywords[i] url = "https://www.google.com/search?q=" + search_query + "&source=lnms&tbm=isch" s.driver.get(url) for _ in range(number_of_scrolls): for __ in range(10): s.driver.execute_script("window.scrollBy(0, 1000000)") time.sleep(2) time.sleep(5) try: s.driver.find_element_by_xpath("//input[@value='Show more results']").click() except Exception as e: print("Process-{0} reach the end of page or get the maximum number of requested images".format( main_keyword )) break images = s.driver.find_elements_by_xpath('//div[contains(@class,"rg_meta")]') for img in images: img_url = json.loads(img.get_attribute('innerHTML'))["ou"] img_urls.add(img_url) print('Process-{0} add keyword {1} , got {2} image urls so far'.format(main_keyword, supplemented_keywords[i], len(img_urls))) print('Process-{0} totally get {1} images'.format(main_keyword, len(img_urls))) s.driver.quit() with open(link_file_path, 'w') as wf: for url in img_urls: wf.write(url + '\n') print('Store all the links in file {0}'.format(link_file_path))
class ChromeTestCase(unittest.TestCase): def setUp(self): self.s = Session('chromedriver', browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless', 'disable-gpu']}) def test_cookie_transfer_to_requests(self): """Tested on http://testing-ground.scraping.pro/login""" self.s.driver.get('http://testing-ground.scraping.pro/login') self.s.driver.find_element_by_id('usr').send_keys('admin') self.s.driver.ensure_element_by_id('pwd').send_keys('12345', Keys.ENTER) self.s.driver.ensure_element_by_xpath('//div[@id="case_login"]/h3[@class="success"]') self.s.transfer_driver_cookies_to_session() response = self.s.get('http://testing-ground.scraping.pro/login?mode=welcome') success_message = response.xpath( '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first() self.assertEqual( success_message, 'WELCOME :)', 'Failed to transfer cookies from Selenium to Requests') def test_cookie_transfer_to_selenium(self): self.s.get('http://testing-ground.scraping.pro/login') self.s.cookies.set('tdsess', 'TEST_DRIVE_SESSION', domain='testing-ground.scraping.pro') self.s.transfer_session_cookies_to_driver() self.s.driver.get('http://testing-ground.scraping.pro/login?mode=welcome') success_message = self.s.driver.xpath( '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first() self.assertEqual( success_message, 'WELCOME :)', 'Failed to transfer cookies from Requests to Selenium') def tearDown(self): self.s.driver.close()
def setUp(self): self.s = Session('chromedriver', browser='chrome', default_timeout=15, webdriver_options={'arguments': ['headless', 'disable-gpu']})