コード例 #1
0
ファイル: efunds.py プロジェクト: zofuthan/starapis
 def __enter__(self):
     self.session = Session(
         webdriver_path='/usr/lib/chromium-browser/chromedriver',
         browser='chrome',
         default_timeout=15,
         webdriver_options={'arguments': ['headless']})
     return self
コード例 #2
0
    def __init__(self, path):
        self.last_json = ""
        self.last_response = None
        self.IG_SIG_KEY = '4f8732eb9ba7d1c8e8897a75d6474d4eb3f5279137431b2aafb71fafe2abe178'
        self.SIG_KEY_VERSION = '4'
        self.USER_AGENT = 'Instagram 10.26.0 Android ({android_version}/{android_release}; 640dpi; 1440x2560; {manufacturer}; {device}; {model}; samsungexynos8890; en_US)'.format(
            **DEVICE_SETTINTS)
        self.s = Session(webdriver_path=path,
                         browser='chrome',
                         default_timeout=15)
        self.logger = logging.getLogger('[instatesi_{}]'.format(id(self)))
        self.privateUsers = {}
        self.users = {}
        fh = logging.FileHandler(filename='instatesi.log')
        fh.setLevel(logging.INFO)
        fh.setFormatter(logging.Formatter('%(asctime)s %(message)s'))

        ch = logging.StreamHandler()
        ch.setLevel(logging.DEBUG)
        ch.setFormatter(
            logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

        self.logger.addHandler(fh)
        self.logger.addHandler(ch)
        self.logger.setLevel(logging.DEBUG)
        self.lastUserHandled = None
コード例 #3
0
 def __init__(self,
              username,
              password,
              driver_path,
              download_path=None,
              browser='chrome',
              webdriver_options={'arguments': ['headless']},
              logger=None):
     if not logger:
         logging.basicConfig(level=logging.DEBUG)
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel('DEBUG')
     else:
         self.logger = logger
     self._username = username
     self._password = password
     self.driver_path = driver_path
     self.download_path = download_path
     self.logger = logging.getLogger('odigo_downloader.downloader')
     self.url = 'https://enregistreur.prosodie.com/odigo4isRecorder/EntryPoint?serviceName=LoginHandler'
     self.browser = browser
     self.webdriver_options = webdriver_options
     self.logger.debug(
         f"Creating Session object with values: {self.webdriver_options}")
     self.session = Session(webdriver_path=self.driver_path,
                            browser=self.browser,
                            default_timeout=15,
                            webdriver_options=self.webdriver_options)
     self.logger.debug(f"Session details: {self.session.driver}")
     self.validated = False
コード例 #4
0
 def __init__(self):
     # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式
     self.s = Session(
         webdriver_path='./chromedriver',
         browser='chrome',
         default_timeout=15,
         #webdriver_options={'arguments': ['headless']}
     )
コード例 #5
0
ファイル: monster.py プロジェクト: todun/monster
	def __init__( self ):
		self.verbose = False
		self._session = Session(
				webdriver_path=''
				,browser='chrome'
				,default_timeout=15
				,webdriver_options={
						'arguments' : [ 'headless' ]
					}
			)
コード例 #6
0
    def __init__(self, userLogin: str, userPass: str) -> None:
        self._rugratSession = Session("./chromedriver",
                                      browser="chrome",
                                      default_timeout=15)
        self._userLogin = userLogin
        self._userPassword = userPass
        self._isLogged = False

        # default/recomended range seconds between
        self._rangeTimeBetComments = 290
        self._rangeTimeBetFollow = 400
コード例 #7
0
    def __init__(self):
        # 使用requestium的Session, 使用requests和Selenium, 设置为headless模式
        self.s = Session(
            webdriver_path='./chromedriver',
            browser='chrome',
            default_timeout=15,
            #webdriver_options={'arguments': ['headless']}
        )
        self.category_mapping = None

        path = os.path.join(os.getcwd(), FILENAME)
        if os.path.exists(path):
            self.category_mapping = ujson.load(open(path))
コード例 #8
0
 def setup_requestium_session(self):
     if self.active:
         return f"Session/Browser already active. Cannot have two concurrent sessions/browsers"
     if self.headless:
         webdriver_options = {'arguments': ['headless']}
     else:
         webdriver_options = {}
     self.logger.debug(
         f"Creating Session object with values: {webdriver_options}")
     self.session = Session(webdriver_path=self.driver_path,
                            browser='chrome',
                            default_timeout=15,
                            webdriver_options=webdriver_options)
     self.active = True
コード例 #9
0
ファイル: hupu.py プロジェクト: broholens/hupu-up-posts
 def __init__(self, comment_count=30, commentaries=None, start_at=8, end_with=23):
     self.s = Session(
         './chromedriver',
         'chrome',
         default_timeout=60,
         webdriver_options={'arguments': ['headless', 'disable-gpu', f'user-agent={user_agent}']}
     )
     self.s.headers.update(s_headers)
     self.comment_count = comment_count
     self.commentaries = commentaries
     self.start_at = start_at
     self.end_with = end_with
     self.posts = Queue()
     self.exception_recoder = []
コード例 #10
0
def get_session(flag):
    if flag == 0:
        return Session(webdriver_path=posC,
                       browser='chrome',
                       default_timeout=15,
                       webdriver_options={
                           'arguments':
                           ['headless', '--no-sandbox', '--disable-gpu']
                       })
    else:
        return Session(
            webdriver_path=posC,
            browser='chrome',
            default_timeout=15,
        )
コード例 #11
0
ファイル: main.py プロジェクト: eliotxu/neteaseShua
    def initDriver(self):
        self.s = Session(
            webdriver_path=
            r'C:\Software\Cent\CentBrowser\Application\chromedriver.exe',
            browser='chrome',
            default_timeout=15,
            webdriver_options={
                'arguments': [
                    # '-headless',
                    '-mute-audio',
                    '-window-size=1920,1080',
                    '-start-maximized',
                    '-no-sandbox'
                ]
            })

        self.s.driver.get('http://music.163.com/#/login')
        time.sleep(1)

        self.s.driver.switch_to.default_content()
        source_text = self.s.driver.page_source
        autoId_st = source_text.find("visibility:") + 26
        autoId_end = autoId_st + 24
        autoId = source_text[autoId_st:autoId_end]
        path = str("//*[@id='" + autoId + "']/div[1]/div[1]/a")

        time.sleep(0.5)

        lockBtn = self.s.driver.find_element_by_xpath(path)
        lockBtn.click()
コード例 #12
0
def test_locate_Document(session):
    if not session:
        session = Session()
    url = 'http://180.97.151.94:9012/inspectionCheck/queryDocuments?guid=da622969-6c38-4d4f-88c3-7e32e5aaec09&action=true'
    session.driver.get(url)
    session.driver.implicitly_wait(3)
    session.driver.switch_to.window(session.driver.window_handles[-1])
    session.driver.implicitly_wait(3)
    time.sleep(2)
    button = '//*[@id="app"]/div/div[2]/section/div/div[1]/div[1]/div[1]/div/div/div'
    session.driver.find_element_by_xpath(button).click()
    time.sleep(2)
    '#dropdown-menu-890'
    uls = session.driver.find_elements_by_css_selector('ul[style^="position"]')
    print("--", uls)
    for ul in uls:
        print("S1", ul.get_attribute("class"))
        print("S ", ul.get_attribute("style"))

    main_ul = uls[-1]
    print(main_ul.get_attribute("class"))
    time.sleep(1)
    lis = main_ul.find_elements_by_tag_name('li')
    lis[4].click()

    fill_dsrcbbl(session)

    return session
コード例 #13
0
def setup():
    driver = r'C:\Users\RSTAUNTO\Desktop\chromedriver.exe'
    s = Session(webdriver_path=driver,
                browser='chrome',
                default_timeout=15,
                webdriver_options={'arguments': ['headless']})
    return s
コード例 #14
0
def get_Cookies(username="******", passwo="WE@3dfsa", session=None):
    if not None:
        session = Session(
            webdriver_path=
            'E:/pythonWebWorkSpace/WorkSpace-FrameWork/TestProject/test_selenium/chromedriver.exe',
            # webdriver_path='geckodriver.exe',
            browser='chrome',
            default_timeout=15,
            # webdriver_options={'arguments': ['headless']}
        )
    session.driver.get('http://180.97.151.94:9012/login')
    us = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[1]/div/div/input'
    pa = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[2]/div/div/input'
    sub = '//*[@id="app"]/div/div[2]/div/div/div/div/div[2]/form/div[4]/div/button'

    session.driver.find_element_by_xpath(us).send_keys(username)
    session.driver.find_element_by_xpath(pa).send_keys(passwo)
    time.sleep(5)

    session.driver.find_element_by_xpath(sub).click()
    # s.driver.switch_to.window(s.driver.window_handles[-1])
    time.sleep(1)
    # safety='//*[@id="app"]/div/div[1]/div[4]/div/div/div[1]/div'
    # s.driver.find_element_by_xpath(safety).click()
    #
    # s.driver.switch_to.window(s.driver.window_handles[-1])
    #
    # richang='//*[@id="app"]/div/div[1]/div/div[1]/div/ul/div[6]/a/li'
    # s.driver.find_element_by_xpath(richang).click()
    #
    cookies = session.driver.get_cookies()
    with open('cookies.json', 'w') as f:
        cookies = json.dump(cookies, f)
    return session
コード例 #15
0
    def acquire(self, acquire_wait_timeout=15):
        '''
        Purpose:    Get a requestium session.
        Arguments:
            acquire_wait_timeout - int - time to wait for a requestium session to free up
        Returns:
            S - requestium.Session - requestium Session object
        '''

        S = None

        # IF SESSION AVAILABLE, TAKE IT
        if len(self.available) > 0:
            S = self.available.pop()
            self.inuse.append(S)

        # NO SESSION AVAILABLE, CREATE ONE
        elif len(self.inuse) < self.pool_size:
            S = Session(**self.requestium_args)
            self.inuse.append(S)

        # NO SESSIONS AVAILABLE AND NO MORE ALLOWED, WAIT FOR ONE
        else:
            StartWaitTime = datetime.now()
            while ( datetime.now() - StartWaitTime ).seconds <= acquire_wait_timeout\
                    and ( len( self.inuse ) + len( self.available ) ) > 0:
                if len(self.available) > 0:
                    S = self.available.pop()
                    self.inuse.append(S)
                    break

        return S
コード例 #16
0
ファイル: github_search.py プロジェクト: Qingluan/MasscanFofa
 def __init__(self, proxy=None):
     self.cookies = None
     self.sess = Session("/usr/local/phantomjs",
                         "phantomjs",
                         default_timeout=15)
     if proxy:
         self.sess.proxies['https'] = proxy
         self.sess.proxies['http'] = proxy
     self.proxy = proxy
     self.sess = Session(webdriver_path='/usr/local/bin/chromedriver',
                         browser='phantomjs',
                         default_timeout=15,
                         webdriver_options={'arguments': ['headless']})
     if proxy:
         self.proxies['http'] = proxy
         self.proxies['https'] = proxy
     self.user = None
コード例 #17
0
def setup():
    s = Session(
        webdriver_path=driver,
        browser='chrome',
        default_timeout=15,
        # webdriver_options={'arguments': ['headless']}
    )
    return s
コード例 #18
0
def test_locate_Document(session):
    if not session:
        session = Session()
    url = 'http://180.97.151.94:9012/inspectionCheck/checkMethod'
    session.driver.get(url)
    session.driver.switch_to.window(session.driver.window_handles[-1])
    session = add_zhifajianca(session)
    return session
コード例 #19
0
def main():
    try:
        start = sys.argv[1]
    except:
        print('ERROR: Requires URL as the first argument.')
        quit(0)

    # Constants
    ALLDROPDOWN = '//*[@id="selectReadType"]/option[2]'
    ACTUALIMAGES = '//*[@id="divImage"]//img'
    IMGGROUPS = '.listing a'
    TITLE = '.bigChar'
    NEXT = '//*[(@id = "btnNext")]//src'

    s = Session(
        webdriver_path='C:\\Webdrivers\\chromedriver', browser='chrome'
    )  # ,webdriver_options={'arguments': ['headless', 'disable-gpu']}

    s.driver.get(start)
    s.driver.ensure_element_by_css_selector(TITLE)
    title = s.driver.find_element_by_css_selector(TITLE).text
    groups = s.driver.find_elements_by_css_selector(IMGGROUPS)
    s.transfer_driver_cookies_to_session()
    begin = to_attribute_list(groups, 'href').pop()
    response = s.get(begin).xpath(ACTUALIMAGES)
    print(response)
    s.close()
    quit(2)
コード例 #20
0
ファイル: seleniumtest.py プロジェクト: 097633/Spider
def login(email,password):

    s = Session('./chromedriver.exe', browser='chrome', default_timeout=15)
    s.driver.get('https://accounts.google.com/signin/v2/identifier?hl=zh-CN&continue=https%3A%2F%2Fmail.google.com%2Fmail&service=mail&flowName=GlifWebSignIn&flowEntry=AddSession')
    inputs=s.driver.find_elements_by_tag_name('input')
    inputs[0].send_keys(email)



    return s
コード例 #21
0
 def fetch_latest_bhavcopy(self):
     nse_url = "https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm"
     s = Session(webdriver_path='./chromedriver',
             browser='chrome',
             default_timeout=15,
             webdriver_options={'arguments': ['headless']})
     s.driver.get(nse_url)
     link = s.driver.ensure_element_by_link_text('Bhavcopy file (csv)').get_attribute('href')
     s.driver.close()
     r = requests.get(link)  
     open('bhav.csv.zip', 'wb').write(r.content)
コード例 #22
0
    def __init__(self):
        # Create a session and authenticate
        self._s = Session(
            webdriver_path='/usr/lib/chromium-browser/chromedriver',
            browser='chrome')  #,
        #webdriver_options={"arguments": ["--headless"]})
        self._s.headers.update({
            'User-Agent':
            'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0'
        })

        # Login
        r = self._s.post('https://www.adopteunmec.com/auth/login',
                         data={
                             'username': '******',
                             'password': '******'
                         })
        if not r.ok:
            raise 'Something wrong in login'
        else:
            time.sleep(2)
コード例 #23
0
def main(url):
    session = Session(
        webdriver_path='../Chrome Canary/chromedriver.exe',
        browser='chrome',
        default_timeout=6,
        webdriver_options={'arguments': ['disable-logging', 'headless']})

    session.driver.get(url)
    div_content = WebDriverWait(session.driver, 5).until(
        EC.presence_of_element_located((By.XPATH, "//div[@id='content']")))
    print('######## FROM SELENIUM ########')
    print(div_content.text)

    print('######## COPYING SESSION FROM SELENIUM TO REQUESTS ########')
    session.transfer_driver_cookies_to_session()
    final_response = session.get(url,
                                 headers={'user-agent': 'custom requestium'})

    soup = BeautifulSoup(final_response.text, 'html.parser')
    print('######## FROM REQUESTS ########')
    body_text = soup.find(id="content")
    print(body_text.text)
コード例 #24
0
ファイル: tests.py プロジェクト: scwall/cocktail_engine
    def setUp(self):
        self.browser = Session(webdriver_path='/usr/lib/chromium-browser/chromedriver',
                               browser='chrome',
                               default_timeout=15,
                               webdriver_options={
                                   'arguments':
                                       ['--headless', '--no-sandbox', '--disable-dev-shm-usage']})
        SolenoidValve.objects.create(id=1, number=1, step=10, first_pin=1, second_pin=2)
        SolenoidValve.objects.create(id=2, number=2, step=20, first_pin=1, second_pin=2)
        SolenoidValve.objects.create(id=3, number=3, step=30, first_pin=1, second_pin=2)
        SolenoidValve.objects.create(id=4, number=4, step=40, first_pin=1, second_pin=2)
        SolenoidValve.objects.create(id=5, number=5, step=50, first_pin=1, second_pin=2)
        SolenoidValve.objects.create(id=6, number=6, step=60, first_pin=1, second_pin=2)
        bottle_one = Bottle.objects.create(id=1, name='bottle1', solenoid_valve_id=1)
        bottle_two = Bottle.objects.create(id=2, name='bottle2', solenoid_valve_id=2)
        bottle_three = Bottle.objects.create(id=3, name='bottle3', solenoid_valve_id=3)
        bottle_four = Bottle.objects.create(id=4, name='bottle4', solenoid_valve_id=4, empty=True)
        bottle_five = Bottle.objects.create(id=5, name='bottle5', solenoid_valve_id=5)
        Bottle.objects.create(id=6, name='bottle6', solenoid_valve_id=6)
        cocktail_one = Cocktail.objects.create \
            (id=1, name="cocktailone", description='cocktail one description')
        cocktail_two = Cocktail.objects.create \
            (id=2, name="cocktailtwo", description='cocktail two description')
        cocktail_three = Cocktail.objects.create \
            (id=3, name="cocktailthree", description='cocktail three description')
        BottlesBelongsCocktails(bottle=bottle_one, cocktail=cocktail_one,
                                dose=1).save()
        BottlesBelongsCocktails(bottle=bottle_two, cocktail=cocktail_two,
                                dose=2).save()
        BottlesBelongsCocktails(bottle=bottle_three, cocktail=cocktail_two,
                                dose=3).save()
        BottlesBelongsCocktails(bottle=bottle_four, cocktail=cocktail_three,
                                dose=4).save()
        BottlesBelongsCocktails(bottle=bottle_five, cocktail=cocktail_three,
                                dose=4).save()

        self.client = Client()
コード例 #25
0
    def __init__(
        self,
        mode: str = 'requestium',
        use_cache: bool = True,
        max_cache_size: int = 10000,
        timeout: int = 15,
        browser: str = 'chrome',
        loading_time: int = 3,  # delay to wait the webpage loading
        webdriver_path: str = os.path.join(curr_dir, 'chromedriver')):
        assert mode in ['requests', 'selenium', 'requestium']
        assert browser in ['chrome']

        self.mode = mode
        self.loading_time = loading_time
        self.timeout = timeout
        self.use_cache = use_cache
        if use_cache:
            self.html_cache = LRUCache(maxsize=max_cache_size)

        if mode == 'requests':
            pass
        elif mode == 'selenium':
            from selenium import webdriver
            from selenium.webdriver.chrome.options import Options
            chrome_options = Options()
            chrome_options.add_argument("--headless")
            self.driver = webdriver.Chrome(webdriver_path,
                                           chrome_options=chrome_options)
        elif mode == 'requestium':
            from requestium import Session, Keys
            self.session = Session(
                webdriver_path=webdriver_path,
                browser='chrome',
                default_timeout=timeout,
                webdriver_options={'arguments': ['headless']})
        else:
            assert False, '"mode" must be either requests, selenium, or requestium.'
コード例 #26
0
def login_Getcookie(myaccount,
                    mypassword):  #webdriver_option来确定是否使用浏览器显示或者静默登陆
    rq = Session(
        webdriver_path=
        'C:\Program Files (x86)\Google\Chrome\Application\chromedriver',
        browser='chrome',
        default_timeout=15,
        #webdriver_options={'arguments': ['headless']}
    )
    rq.driver.get("https://passport.weibo.cn/signin/login")

    inputname = rq.driver.find_element("xpath", '//*[@id="loginName"]')
    password = rq.driver.find_element_by_xpath('//*[@id="loginPassword"]')
    login_button = rq.driver.find_element_by_xpath('//*[@id="loginAction"]')
    rq.driver.implicitly_wait(10)
    inputname.send_keys(myaccount)
    password.send_keys(mypassword)
    login_button.click()
    rq.driver.implicitly_wait(15)
    # ver_button = rq.driver.find_element_by_xpath('//*[@id="embed-captcha"]/div/div[2]/div[1]/div[3]')
    # ver_button.click()
    #验证码不知道啥时候会有,反正登陆几次把
    rq.transfer_driver_cookies_to_session()
    return rq
コード例 #27
0
class ChromeTestCase(unittest.TestCase):
    def setUp(self):
        self.s = Session(
            'chromedriver',
            browser='chrome',
            default_timeout=15,
            webdriver_options={'arguments': ['headless', 'disable-gpu']})

    def test_cookie_transfer_to_requests(self):
        """Tested on http://testing-ground.scraping.pro/login"""

        self.s.driver.get('http://testing-ground.scraping.pro/login')
        self.s.driver.find_element_by_id('usr').send_keys('admin')
        self.s.driver.ensure_element_by_id('pwd').send_keys(
            '12345', Keys.ENTER)
        self.s.driver.ensure_element_by_xpath(
            '//div[@id="case_login"]/h3[@class="success"]')

        self.s.transfer_driver_cookies_to_session()
        response = self.s.get(
            'http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = response.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()'
        ).extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)',
            'Failed to transfer cookies from Selenium to Requests')

    def test_cookie_transfer_to_selenium(self):
        self.s.get('http://testing-ground.scraping.pro/login')
        self.s.cookies.set('tdsess',
                           'TEST_DRIVE_SESSION',
                           domain='testing-ground.scraping.pro')

        self.s.transfer_session_cookies_to_driver()
        self.s.driver.get(
            'http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = self.s.driver.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()'
        ).extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)',
            'Failed to transfer cookies from Requests to Selenium')

    def tearDown(self):
        self.s.driver.close()
コード例 #28
0
def login():
    url = 'https://ident.lds.org/sso/UI/Login'
    url2 = 'https://www.lds.org/mls/mbr/records/member-list?lang=eng'

    import os
    print(os.getcwd())

    s = Session(
        '/Users/travis.howe/Projects/github/data_science/scrape/email_lst_scrape/chromedriver',
        browser='chrome',
        default_timeout=15)
    s.driver.get(url)

    print('Waiting for elements to load...')
    s.driver.ensure_element_by_id('IDToken1').send_keys(Keys.BACKSPACE)
    s.driver.ensure_element_by_id('IDToken2').send_keys(Keys.BACKSPACE)
    # s.driver.ensure_element_by_id('IDToken1').send_keys(lds_user_name)
    # s.driver.ensure_element_by_id('IDToken2').send_keys(lds_password)
    print('Please log-in in the chrome browser')

    s.driver.ensure_element_by_id("login-submit-button",
                                  timeout=60,
                                  state='present').click()

    s.driver.get(url2)
    s.driver.ensure_element_by_tag_name("tbody", timeout=60, state='visible')

    # todo: this isn't great
    go = True
    email_lst = []
    while go:
        s.driver.execute_script(
            'window.scrollTo(0, document.body.scrollHeight);')
        time.sleep(3)
        new_page = s.driver.page_source
        if 'Ziemann, Donella' in new_page:
            go = False
    soup = BeautifulSoup(new_page, 'lxml')
    email_lst += [
        href.split(':')[1] for href in [
            a_tag['href'] for a_tag in soup.findAll('a')
            if a_tag.has_attr('ng-href')
        ] if '@' in href
    ]
    return email_lst
コード例 #29
0
def get_session():
    session = Session(
        webdriver_path=
        'E:/pythonWebWorkSpace/WorkSpace-FrameWork/TestProject/test_selenium/chromedriver.exe',
        browser='chrome',
        default_timeout=15,
        # webdriver_options={'arguments': ['headless']}
    )
    try:
        with open('cookies.json', 'r') as f:
            cookies = json.load(f)
        if len(cookies) == 0:
            raise Exception("empty cookie")

        return session
    except:
        session = get_Cookies(username="******", session=session)
        return session
コード例 #30
0
def get_image_links( main_keyword, supplemented_keywords, link_file_path, num_requested=1000):
    s = Session('chromedriver',
                browser='chrome',
                default_timeout=15,
                #webdriver_options={'arguments': ['headless', 'disable-gpu']}
                )
    number_of_scrolls = int(num_requested / 400) + 1
    img_urls = set()
    for i in range(len(supplemented_keywords)):
        search_query = main_keyword + ' ' + supplemented_keywords[i]
        url = "https://www.google.com/search?q=" + search_query + "&source=lnms&tbm=isch"
        s.driver.get(url)

        for _ in range(number_of_scrolls):
            for __ in range(10):
                s.driver.execute_script("window.scrollBy(0, 1000000)")
                time.sleep(2)
            time.sleep(5)
            try:
                s.driver.find_element_by_xpath("//input[@value='Show more results']").click()
            except Exception as e:
                print("Process-{0} reach the end of page or get the maximum number of requested images".format(
                    main_keyword ))
                break

        images = s.driver.find_elements_by_xpath('//div[contains(@class,"rg_meta")]')
        for img in images:
            img_url = json.loads(img.get_attribute('innerHTML'))["ou"]
            img_urls.add(img_url)
        print('Process-{0} add keyword {1} , got {2} image urls so far'.format(main_keyword, supplemented_keywords[i],
                                                                               len(img_urls)))
    print('Process-{0} totally get {1} images'.format(main_keyword, len(img_urls)))
    s.driver.quit()

    with open(link_file_path, 'w') as wf:
        for url in img_urls:
            wf.write(url + '\n')
    print('Store all the links in file {0}'.format(link_file_path))
コード例 #31
0
class ChromeTestCase(unittest.TestCase):
    def setUp(self):
        self.s = Session('chromedriver',
                         browser='chrome',
                         default_timeout=15,
                         webdriver_options={'arguments': ['headless', 'disable-gpu']})

    def test_cookie_transfer_to_requests(self):
        """Tested on http://testing-ground.scraping.pro/login"""

        self.s.driver.get('http://testing-ground.scraping.pro/login')
        self.s.driver.find_element_by_id('usr').send_keys('admin')
        self.s.driver.ensure_element_by_id('pwd').send_keys('12345', Keys.ENTER)
        self.s.driver.ensure_element_by_xpath('//div[@id="case_login"]/h3[@class="success"]')

        self.s.transfer_driver_cookies_to_session()
        response = self.s.get('http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = response.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)', 'Failed to transfer cookies from Selenium to Requests')

    def test_cookie_transfer_to_selenium(self):
        self.s.get('http://testing-ground.scraping.pro/login')
        self.s.cookies.set('tdsess', 'TEST_DRIVE_SESSION', domain='testing-ground.scraping.pro')

        self.s.transfer_session_cookies_to_driver()
        self.s.driver.get('http://testing-ground.scraping.pro/login?mode=welcome')
        success_message = self.s.driver.xpath(
            '//div[@id="case_login"]/h3[@class="success"]/text()').extract_first()

        self.assertEqual(
            success_message, 'WELCOME :)', 'Failed to transfer cookies from Requests to Selenium')

    def tearDown(self):
        self.s.driver.close()
コード例 #32
0
 def setUp(self):
     self.s = Session('chromedriver',
                      browser='chrome',
                      default_timeout=15,
                      webdriver_options={'arguments': ['headless', 'disable-gpu']})