Ejemplo n.º 1
0
 def start_search(self):
     for socket in self.socket_list:
         temp_socket = socket.split()
         PROXY_HOST = temp_socket[0]
         PROXY_PORT = temp_socket[1]
         fp = webdriver.FirefoxProfile()
         fp.set_preference("network.proxy.type", 1)
         fp.set_preference("network.proxy.http", PROXY_HOST)
         fp.set_preference("network.proxy.http_port", int(PROXY_PORT))
         fp.set_preference("network.proxy.ssl", PROXY_HOST)
         fp.set_preference("network.proxy.ssl_port", int(PROXY_PORT))
         fp.set_preference("general.useragent.override", "whater_useragent")
         fp.update_preferences()
         browser2 = webdriver.Firefox(firefox_profile=fp)
         try:
             browser2.get('https://www.bing.com/')
             assert "Bing" in browser2.title
         except:
             print("retrying Bing with different socket")
             browser2.quit()
             continue
         print("\n--------------------------------------------")
         print("using socket: " + PROXY_HOST + ":" + PROXY_PORT)
         print("searching for keyword(s):   " + self.keyword)
         search_box = browser2.find_element_by_name("q")
         sub_button = browser2.find_element_by_name("go")
         sleep(random.randint(20, 30) + random.random())
         search_box.send_keys(self.keyword)
         sleep(random.randint(20, 30) + random.random())
         sub_button.send_keys(Keys.RETURN)
         sleep(random.randint(20, 30) + random.random())
         page_index = 0
         extractor = re.sub(r".*//", "", self.url)
         if "www." in extractor:
             domain = extractor[4:]
         else:
             domain = extractor
         while domain not in browser2.current_url:
             page_index += 1
             print("current index:   " + str(page_index))
             page_links = browser2.find_elements_by_xpath("//a[@href]")
             found_link = ""
             for link in page_links:
                 try:
                     if domain in link.get_attribute("href"):
                         sleep(1)
                         found_link = link
                 except:
                     print("stale element")
                     browser2.quit()
                     break
             if found_link:
                 print("Found " + domain + " at index " + str(page_index))
                 found_link.click()
             sleep(5 + random.random())
             try:
                 if domain not in browser2.current_url:
                     sleep(random.randint(5, 10) + random.random())
                     try:
                         idx = str(page_index + 1)
                         browser2.find_element_by_link_text(idx).click()
                     except:
                         print("Exception occurred: trying next socket")
                         browser2.quit()
                         break
                 else:
                     page_index = 0
             except:
                 print("Exception occurred: trying next socket")
                 browser2.quit()
                 break
         sleep(random.randint(30, 60) + random.random())
         try:
             target_links = browser2.find_elements_by_xpath("//a[@href]")
         except:
             browser2.quit()
             continue
         random_page_num = random.randint(0, len(target_links) - 1)
         sleep(random.randint(30, 60) + random.random())
         target_link = target_links[random_page_num]
         try:
             target_link.click()
         except:
             print("Invalid target link... retrying with next socket")
             browser2.quit()
             sleep(1 + random.random())
             continue
         sleep(random.randint(10, 15) + random.random())
         print("visiting random page:   " + browser2.current_url)
         sleep(random.randint(30, 60) + random.random())
         browser2.quit()
Ejemplo n.º 2
0
 def setUp(self):
     self.driver = webdriver.Firefox()
     self.app = Popen("export FLASK_APP=app.py; env/bin/python -m flask run", stdin=PIPE, stdout=DEVNULL, shell=True, preexec_fn=os.setsid)
     time.sleep(.5)
Ejemplo n.º 3
0
def searchLine(myline,
               url="http://cti.voa.gov.uk/cti/inits.asp",
               browser_options=browser_options):
    myline = myline.replace('"', '').split(',')
    if len(myline) != 16:
        print('[FATAL] Wrong format of input data, cannot perform research.')
        return 'fatalErr'
    postcode = myline[3]
    address = ' '.join(myline[7:10])
    try:
        driver = webdriver.Firefox(
            options=browser_options,
            firefox_binary="/kaggle/working/firefox/firefox/firefox")
        driver.get(url)
        txtPC = driver.find_element_by_name("txtPostCode")
        driver.execute_script('arguments[0].value = arguments[1]', txtPC,
                              postcode)
        driver.find_element_by_id('frmInitSForm').submit()
        time.sleep(1.5)
        scl_complex = driver.find_element_by_class_name('scl_complex')
    except:
        print(
            '[ERROR] Something went wrong with this search, a connection error was returned'
        )
        if 'driver' in locals():
            driver.quit()
        return 'err'
    oldtext = scl_complex.text if 'scl_complex' in locals() else ''
    if oldtext == '':
        answer = 'err'
        print('Line, Answer: %s, %s' % (myline, answer))
        print()
        return answer
    while True:
        a = ''
        try:
            driver.execute_script("Next();")
            time.sleep(2)
        except selenium.common.exceptions.JavascriptException:
            break
        try:
            scl_complex = driver.find_element_by_class_name('scl_complex')
            oldtext = oldtext + '\n' + scl_complex.text
        except:
            print(
                '[ERROR] Something went wrong with this search, a connection error was returned'
            )
            if 'driver' in locals():
                driver.quit()
            return 'err'
    driver.quit()
    oldtext = oldtext.replace(
        'Address Council Tax band Improvement indicator Local authority reference number\n',
        '')
    while '  ' in oldtext:
        oldtext = oldtext.replace('  ', ' ')
    lines = oldtext.split('\n')
    res = []
    if 'Local authority name' in oldtext:
        answer = 'notFound'
        print('Line, Answer: %s, %s' % (myline, answer))
        print()
        return answer
    for line in lines:
        ls = line.split(' ')
        if len(ls) < 2:
            continue  #protection against empty lines in server answer
        if ls[-2] == 'Yes':  ##if present, ignore 'Improvement indicator' field
            ls.pop(-2)
        t = (' '.join(ls[:-2]), ls[-2], ls[-1])
        res.append(t)

    answer = 'notFound'
    for t in res:
        #compare to PAON
        if t[0].split(',')[0] == myline[7]:
            answer = t[1]
            break
    if answer == 'notFound':
        for t in res:
            #if still not found, compare to SAON
            if t[0].split(',')[0] == myline[8]:
                answer = t[1]
                break
    if answer == 'notFound':
        for t in res:
            #if still again not found, allow partial match to SAON
            if t[0].split(',')[0] in myline[8].split():
                answer = t[1]
                break
    print('Line, Answer: %s, %s' % (myline, answer))
    print()
    return answer
Ejemplo n.º 4
0
 def __init__(self):
     self.wd = webdriver.Firefox()
     self.wd.implicitly_wait(5)
     self.session = SessionHelper(self)
     self.group = GroupHelper(self)
     self.contact = ContactHelper(self)
Ejemplo n.º 5
0
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary

caps = webdriver.DesiredCapabilities().FIREFOX
caps['marionette'] = True
binary = FirefoxBinary(r'/Applications/Firefox.app/Contents/MacOS/firefox')
# 这是下载在mac中的火狐可执行文件的默认地址
driver = webdriver.Firefox(firefox_binary=binary, capabilities=caps,
                           executable_path='/Users/kongweichang/Downloads/geckodriver')
url = 'https://www.baidu.com/'
# 可以改成你想显示的网页
driver.get(url)
Ejemplo n.º 6
0
options = Options()
options.headless = True

print(
    "[Firefox must be installed and configured to use with Selenium]\n[Temporary restrictions may be placed on your account for unfollowing too quickly]\n[All locally processed, check the source code to confirm. Don't run if you don't understand]\n"
)

user_username = input("What's your Instagram username?\n")
password = input("What's your Instagram password?\n")
username = input("What account do you want to check?\n")
directory = input(
    "Where would you like to save the .txt files generated by this script?\n[Full path required]"
)

browser = webdriver.Firefox(options=options)

# LOGGING INTO INSTAGRAM ----------------------------------------------------------

browser.get('https://instagram.com')
time.sleep(1)
# Finding log username field and password field
usernameField = browser.find_element_by_name("username")
passwordField = browser.find_element_by_name("password")
time.sleep(1)
# Sending username and password
usernameField.send_keys(user_username)
passwordField.send_keys(password)
# Find and click login button
logInButton = browser.find_element_by_css_selector('.L3NKy > div:nth-child(1)')
logInButton.click()
Ejemplo n.º 7
0
 def setUp(self):
     self.browser = webdriver.Firefox()
Ejemplo n.º 8
0
 def __init__(self, username, password):
     self.username = username
     self.password = password
     self.bot = webdriver.Firefox()
Ejemplo n.º 9
0
	def setUp(self,value='baiduUrl'):
		self.driver=webdriver.Firefox()
		self.driver.implicitly_wait(30)
		self.driver.maximize_window()
		self.driver.get(self.getXmlData(value))
from selenium import webdriver#从selenium引入web驱动
import time
driver=webdriver.Firefox()#设置火狐驱动
driver.get('http://127.0.0.1')#打开网址
time.sleep(2)#休眠2秒
driver.find_element_by_id('username').send_keys('zhangxu')#根据id获取到名字的输入框并且填入姓名
time.sleep(2)#休眠2秒
driver.find_element_by_id('password').send_keys('1234')#根据id获取到密码的输入框并且填入密码
time.sleep(2)#休眠2秒
driver.find_element_by_id('btn_login').click()#根据id获取到登陆按钮的输入框并且点击
time.sleep(2)#休眠2秒


Ejemplo n.º 11
0
Archivo: ib.py Proyecto: zurajm/pybank
    def login(self, username=None, password=None):
        # Download to a custom location. Don't show dialog.
        self._download_dir = tempfile.mkdtemp()
        firefox_profile = webdriver.FirefoxProfile()
        firefox_profile.set_preference('browser.download.folderList', 2)
        firefox_profile.set_preference(
                'browser.download.manager.showWhenStarting', False)
        firefox_profile.set_preference(
                'browser.download.dir', self._download_dir)
        # CSV files are returned as text/plain.
        firefox_profile.set_preference(
                'browser.helperApps.neverAsk.saveToDisk', 'text/plain')

        if self._debug:
            self._browser = webdriver.Firefox(firefox_profile)
        else:
            # TODO: Fix the login and enable PhantomJs.
            #self._browser = webdriver.PhantomJS()
            self._browser = webdriver.Firefox(firefox_profile)

        self._browser.implicitly_wait(self._WEBDRIVER_TIMEOUT)
        # The user menu is only visible if the window is min 992px wide.
        self._browser.set_window_size(1000, 800)

        self._logged_in = False
        self._accounts_cache = None
        self._transactions_cache = {}

        browser = self._browser
        browser.get(self._LOGIN_URL)

        if not username:
            username = raw_input('User name: ')

        if self.ask_and_restore_cookies(
                browser, username, self._SESSION_TIMEOUT_S):
            browser.get(self._MAIN_URL)

        if not self._is_logged_in():
            if not password:
                password = getpass.getpass('Password: '******'loginform')
            except exceptions.NoSuchElementException:
                raise fetch.FetchError('Login form not found.')
            login_form.find_element_by_name('user_name').send_keys(username)
            login_form.find_element_by_name('password').send_keys(password)
            # Weirldy, the login button has to be clicked twice.
            login_form.find_element_by_id('submitForm').click()
            login_form.find_element_by_id('submitForm').click()

            raw_input("Please follow the log-in instructions and press enter.")

            if not self._is_logged_in():
                raise fetch.FetchError('Login failed.')

        # It is a bit silly to just sleep here, but other approaches failed, so
        # this is a simple fix.
        time.sleep(10)

        self.save_cookies(browser, username)
        self._logged_in = True
        self._username = username
        logger.info('Log-in sucessful.')
Ejemplo n.º 12
0
    def find_goods(self):

        """
        :param url: list of links /ekatalog/category/goods/number/number/"
        :return: store information in products.csv file
        """

        #  name of products
        name = []
        #  price of products
        price = []
        #  quantity of products
        quantity = []
        #  name of brand
        brand = []
        #  name of model
        model = []

        for adress in url:

            options = webdriver.FirefoxOptions()
            options.add_argument('-headless')
            cwd = os.getcwd()
            path = f"{cwd}/geckodriver"
            driver = webdriver.Firefox(executable_path=path, firefox_options=options)

            driver.get(adress)
            content = driver.page_source.encode("utf-8").strip()
            soup = BeautifulSoup(content, "lxml")

            ul = soup.find_all("ul", {"class": "tecdocProList"})

            #  for each element of ul scrap info about product
            for element in ul:

                n = element.find("li", {"class": "tecdocCol tecdocCol2"})
                #  check if element is active
                if n.find("a", {"class": "ajax"}) is not None:
                    n = n.find("span", {"class": "tecdocIndex"})
                    n = n.text
                    name.append(n)

                    #  adding name of brand and model
                    b = soup.find("nav", {"class": "KatTcDcBreadCrumbs"})
                    b = b.find_all("span", {"itemprop": "title"})

                    brand_name = b[1]
                    brand_name = brand_name.text
                    brand.append(brand_name)

                    model_name = b[2]
                    model_name = model_name.text
                    model.append(model_name)
                else:
                    pass

                try:
                    p = element.find("li", {"class": "tecdocCol tecdocCol4"})
                    p = p.find("span", {"class": "tecdocCena"})
                    p = p.text
                    price.append(p)
                except AttributeError:
                    pass

                try:
                    q = element.find("li", {"class": "tecdocCol tecdocCol5"})
                    q = q.find("span", {"class": "tecdocStany"})
                    q = q.text
                    quantity.append(q)
                except AttributeError:
                    pass

        return name, price, quantity, brand, model
Ejemplo n.º 13
0
 def __init__(self):
     self.url = "https://e-katalog.intercars.com.pl"
     self.driver = webdriver.Firefox()
     self.driver.get(self.url)
Ejemplo n.º 14
0
 def setUp(self):
     self.browser = webdriver.Firefox()
     self.browser.implicitly_wait(3)
     self.browser.wait = WebDriverWait(self.browser, 10)
     activate('en')
Ejemplo n.º 15
0
proxy_host = "127.0.0.1"
proxy_port = "8118"

mdfl = open('data.txt', 'a')

caps = webdriver.DesiredCapabilities.FIREFOX
caps['marionette'] = True

caps['proxy'] = {
    "proxyType": "MANUAL",
    "httpProxy": proxy_host+":"+proxy_port,
    "ftpProxy": proxy_host+":"+proxy_port,
    "sslProxy": proxy_host+":"+proxy_port
}

driver = webdriver.Firefox(options=options, firefox_binary=firefox_path_from_env, capabilities=caps)
print('driver started')

with open("stocks.txt") as f: stocks = f.readlines()
for s in range(0, len(stocks)):
    lstk=stocks[s].rstrip()
    url = "https://www.gurufocus.com/stock/{}/summary".format(lstk)
    
    driver.get(url)
    ps = driver.page_source
    if 'cloud' not in ps and 'flare' not in ps:
        error=False
        try:
            tbl = driver.find_element_by_xpath("//div[@class='stock-summary-table fc-regular']")
            divs = tbl.find_elements_by_xpath(".//div")
        except Exception as e:
Ejemplo n.º 16
0
 def setUp(self):
     self.home_url = 'http://www.propxdoeswhat.me/'
     self.driver = webdriver.Firefox()
     self.driver.get(self.home_url)
     self.driver.maximize_window()
Ejemplo n.º 17
0
 def setUpClass(cls):
     cls.driver=webdriver.Firefox()
     url="http://www.cnblogs.com/yoyoketang/"
     cls.driver.get(url)
     cls.driver.implicitly_wait(5)
Ejemplo n.º 18
0
    def parse_page(self, response):
        try:
            from pyvirtualdisplay import Display
            display = Display(visible=0, size=(800, 800))
            display.start()
            firefox_options = Options()
            firefox_options.add_argument('-headless')
            firefox_options.add_argument('--disable-gpu')
            driver = webdriver.Firefox(firefox_options=firefox_options, executable_path=settings.FIREFOX_PATH)
            driver.get(response.url)
            driver.implicitly_wait(100)
            elem_code = driver.find_elements_by_id('WarehouseCode')
            elem_acode = driver.find_elements_by_id('AccountCode')
            elem_name = driver.find_elements_by_id('UserName')
            elem_pass = driver.find_elements_by_id('Password')
            btn_login = driver.find_elements_by_css_selector('input[name="Login"]')

            if elem_code:
                elem_code[0].send_keys('03')
            if elem_acode:
                elem_acode[0].send_keys('001862')
            if elem_name:
                elem_name[0].send_keys('MAXLEAD')
            if elem_pass:
                elem_pass[0].send_keys('1202HXML')
            btn_login[0].click()
            driver.implicitly_wait(100)
            time.sleep(5)
            total_page = driver.find_elements_by_css_selector('#navigationTR nobr')[0].text
            total_page = int(total_page.split(' ')[-1])

            for i in range(total_page):
                try:
                    res = driver.find_elements_by_css_selector('#ViewManyListTable tr')
                    elem = driver.find_element_by_id('MetaData')
                    elem.click()
                    res.pop(0)
                    for val in res:
                        td_re = val.find_elements_by_tag_name('td')
                        if td_re:
                            sku = td_re[0].text
                            warehouse = 'Hanover'
                            if td_re[3].text and not td_re[3].text == ' ':
                                qty = td_re[3].text
                                qty = qty.replace(',','')
                            else:
                                qty = 0

                            qty_sql = "select id from mmc_stocks where commodity_repertory_sku='%s' and warehouse='%s'" % (
                            sku, warehouse)
                            self.db_cur.execute(qty_sql)
                            self.db_cur.fetchone
                            qty_re = self.db_cur.rowcount
                            values = (qty, sku, warehouse)
                            if qty_re > 0:
                                sql = "update mmc_stocks set qty=%s where commodity_repertory_sku=%s and warehouse=%s"
                            else:
                                sql = "insert into mmc_stocks (qty, commodity_repertory_sku, warehouse) values (%s, %s, %s)"
                            self.db_cur.execute(sql, values)
                    if i < total_page:
                        elem_next_page = driver.find_elements_by_id('Next')
                        if elem_next_page:
                            elem_next_page[0].click()
                            driver.implicitly_wait(100)
                except:
                    continue
            self.conn.commit()
            sql = "update mmc_spider_status set status=3, description='' where warehouse='Hanover'"
            self.db_cur.execute(sql)
            self.conn.commit()
        except Exception as e:
            values = (str(e),)
            sql = "update mmc_spider_status set status=2, description=%s where warehouse='Hanover'"
            self.db_cur.execute(sql, values)
            self.conn.commit()

        try:
            driver.refresh()
            driver.switch_to.alert.accept()
            driver.implicitly_wait(100)
        except:
            pass
        display.stop()
        driver.quit()
Ejemplo n.º 19
0
 def __init__(self):
     self.driver = webdriver.Firefox(
         executable_path="d:\\downloads\\geckodriver.exe")
     self.session = SessionHelper(self)
     self.group = GroupHelper(self)
     self.contact = ContactHelper(self)
Ejemplo n.º 20
0
 def setUp(self):
     self.driver = webdriver.Firefox()
     self.driver.implicitly_wait(30)
     self.base_url = "http://localhost:3000"
     self.verificationErrors = []
     self.accept_next_alert = True
Ejemplo n.º 21
0
 def sub_setUp(self):
     self.driver = webdriver.Firefox(executable_path=DRIVER_PATH +
                                     '\geckodriver.exe')
     self.driver.get(self.URL)
Ejemplo n.º 22
0
def main():
    warnings.filterwarnings('ignore')

    driver = webdriver.Firefox()
    driver.get("http://private-url/projects/")
    html = driver.page_source
    soup = BeautifulSoup(html, "html5lib")
    driver.quit()

    projects = soup.find_all('div', {'class': 'ParentRow'})
    projectsAlt = soup.find_all('div', {'class': 'ParentRowAlt'})

    projectsHtml = projects + projectsAlt
    projectData = []

    projectsData = [[]]

    # Function that takes list of projects info and writes it into excel file
    def writeToExcel(projectsList, file):
        wb = load_workbook(file)

        # Selects the active worksheet
        ws = wb.active

        # Adds company logo
        img = Image('logo.png')
        ws.add_image(img, 'D1')

        #starting cell coordinates (5th row, 1st column)
        r, c = 5, 1

        for p in projectsList:
            for data in p:
                # Converts the first record of each project (project number) to integer
                if c == 1:
                    data = int(data)

                ws.cell(row=r, column=c).value = data
                c += 2

            # Loop that goes trough all the columns and colors each cell
            for counter in range(1, 10):
                fill = PatternFill(start_color='D9D9D9',
                                   end_color='D9D9D9',
                                   fill_type='solid')
                ws.cell(row=r, column=counter).fill = fill

            r += 1
            c = 1

        # Get the current datetime
        date = datetime.datetime.now()

        # Format the datetime and convert it to string
        date = date.strftime('%m-%d-%Y')

        wb.save('Level 1 Projects - ' + date + '.xlsx')

    # Function that takes project html and return list with Project number, Start Date, End Date and Project Level
    def getProjectData(p):
        projectNum = p.find('div', {'class': 'ProjectNoCol'})
        projectNum = projectNum.a.text

        projectFacility = p.find('div', {'class': 'FacilityName'})
        projectFacility = projectFacility.text

        projectStartTime = p.find('div', {'class': 'DateCol'})
        projectStartTime = projectStartTime.text

        projectEndTime = p.find('div', {'class': 'DateColTo'})
        projectEndTime = projectEndTime.text

        projectLevel = p.find('div', {'class': 'TwoChanel'})
        projectLevel = projectLevel.text

        # Removing the '+' from the project number
        projectNum = ''.join(projectNum.split())
        if len(projectNum) >= 7:
            projectNum = projectNum[:-1]

        # Removing label from facility name
        projectFacility = projectFacility[10:]
        projectFacility = projectFacility.strip()

        # Remove the empty spaces and the label from the start time
        projectStartTime = projectStartTime.replace(' ', '')
        projectStartTime = projectStartTime[10:]

        # Remove the empty spaces and the label from the end time
        projectEndTime = projectEndTime.replace(' ', '')
        projectEndTime = projectEndTime[3:]

        # Fills the project info into list
        pInfo = [
            projectNum, projectFacility, projectStartTime, projectEndTime,
            projectLevel
        ]

        return pInfo

    # The row in the 2D list for each project
    row = 0

    # Goes through list with project's html
    for project in projectsHtml:
        level = project.find('div', {'class': 'TwoChanel'})

        # Checks if the project is Level 1
        if level.text != '':
            projectsData.append([])
            projectData = getProjectData(project)

            for info in projectData:
                projectsData[row].append(info)
            row += 1

    #Checks if any Level 1 Projects had been found
    if not (projectData):
        print("No Level 1 Projects for today!")
        quit()

    # Removes the last record (project) form the list as it appears to be newline
    projectsData.pop()

    # Sorts the list by project number
    projectsData = sorted(projectsData, key=lambda l: l[0])

    # Loop that goes through the 2D lists with all the projects info and prints it as a string
    print('\n')

    for project in projectsData:
        dataString = ' | '.join(project)
        print(dataString)

    print('--------------------')
    print('Level 1 Projects: ', len(projectsData), '\n')

    writeToExcel(projectsData, 'template.xlsx')
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import time

# Setting up test driver and target domain
options = webdriver.FirefoxOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument("--disable-extensions")
options.add_argument("--disable-gpu")
options.add_argument("--disable-extensions")
options.add_argument("--disable-setuid-sandbox")
options.add_argument("--disable-devshm-using")
options.add_argument("--window-size=1920,1080")
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
firefox_binary = "/usr/bin/firefox"
driver = webdriver.Firefox(
    options=options, capabilities=cap, firefox_binary=firefox_binary)
driver.get("https://aintthatspecial.tk")
time.sleep(3)
assert driver.title == 'SaveMe'
Ejemplo n.º 24
0
 def setUp(self):
     self.driver = webdriver.Firefox()
     self.driver.implicitly_wait(10)
     self.driver.get("http://localhost:3000")
Ejemplo n.º 25
0
import time
from selenium import webdriver
from lxml import etree

#这里一定要设置编码格式,防止后面写入文件时报错
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

friend = '563679994' # 朋友的QQ号,朋友的空间要求允许你能访问
user = '******'  # 你的QQ号
pw = 'XXX'  # 你的QQ密码

#获取浏览器驱动
driver = webdriver.Firefox()

# 浏览器窗口最大化
driver.maximize_window()

# 浏览器地址定向为qq登陆页面
driver.get("http://i.qq.com")

# 所以这里需要选中一下frame,否则找不到下面需要的网页元素
driver.switch_to.frame("login_frame")

# 自动点击账号登陆方式
driver.find_element_by_id("switcher_plogin").click()

# 账号输入框输入已知qq账号
driver.find_element_by_id("u").send_keys(user)
Ejemplo n.º 26
0
 def setUp(self):
     self.driver = webdriver.Firefox()
     self.driver.get("https://passport.cnblogs.com/user/signin")
     self.driver.implicitly_wait(10)
Ejemplo n.º 27
0
 def setUp(self):
     self.driver = webdriver.Firefox()
     self.driver.implicitly_wait(30)
     self.base_url = "http://*****:*****@@192.168.1.1/"
     self.verificationErrors = []
     self.accept_next_alert = True
Ejemplo n.º 28
0
# -*- coding:utf-8 -*-
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import urllib3
import re
urllib3.disable_warnings()

path = r'C:\Users\lenovo\AppData\Roaming\Mozilla\Firefox\Profiles\wxglh3q7.default'
profile = webdriver.FirefoxProfile(path)
driver = webdriver.Firefox(firefox_profile=profile)

driver.get('https://www.cnblogs.com/yoyoketang/')
cookie = driver.get_cookies()
# print(cookie)   # list对象
driver.quit()

s = requests.session()
c = requests.cookies.RequestsCookieJar()
for i in cookie:
    c.set(i["name"], i["value"])
s.cookies.update(c)
# print(c)

url = 'https://home.cnblogs.com/u/yoyoketang/relation/followers'
r1 = s.get(url, verify=False)
# print(r1.content.decode('utf-8'))

soup = BeautifulSoup(r1.content.decode('utf-8'), 'html.parser')
# print(soup)
#
Ejemplo n.º 29
0
def main():
    parser = ArgumentParser()

    parser.add_argument('-d', '--destination', type=str, dest='dest', default='data/', 
                        help='Path to the destination folder')
    parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', 
                        help='Show browser window?')
    
    args = parser.parse_args()

    if not os.path.isdir(args.dest):
        try:
            os.mkdir(args.dest)
        except Exception:
            print('[-] Couldn\'t create destination directory ... ')
            os._exit(1)

    options = webdriver.FirefoxOptions()
    options.headless = not args.verbose

    browser = webdriver.Firefox(firefox_options=options)

    browser.set_window_position(25, 25)
    browser.set_window_size(1000, 800)

    browser.get('http://www.zeno.org/Literatur/M/Goethe,+Johann+Wolfgang/Gedichte')
    sub_points  = grab_sub_points(browser)
    sub_hrefs   = []

    for sp in tqdm(sub_points, desc='Fetching URLs ...    ', unit='url'):
        try:
            a       = sp.find_element_by_class_name('zenoTXLinkInt')
            href    = a.get_attribute('href')

            sub_hrefs.append(href)
        except selenium.common.exceptions.NoSuchElementException:
            pass

    for href in tqdm(sub_hrefs, desc='Processing poems ... ', unit='poe'):
        res = req.get(href)
        try:
            soup = BeautifulSoup(res.text, 'lxml')
        except Exception:
            continue

        ma = soup.find('div', class_='zenoCOMain')
        try:
            title = ma.find('h4').text
        except Exception:
            continue
        content = []

        for e in ma.find_all(['p', 'br']):
            if e.name == 'p':
                content.append(e.text)
            else:
                content.append('')

        fname   = re.sub(r'\s', '_', title.lower())
        fname   = re.sub(r'[^\w\d]', '', fname)
        fpath   = os.path.join(args.dest, fname + '.txt')
        index   = -1

        while os.path.isfile(fpath):
            index += 1
            fpath = os.path.join(args.dest, '{}-{}.txt'.format(fname, index))

        with open(fpath, 'wb') as f:
            f.write(title.encode('utf-8'))
            f.write('\n\n\n'.encode('utf-8'))
            f.write('\n'.join(content).encode('utf-8'))
            f.write('\n\n'.encode('utf-8'))

    browser.quit()
Ejemplo n.º 30
0
    def __init__(self):
        self.driver = webdriver.Firefox()
        self.store = StoreManager("store/") #fetch from env
        self.run = RunStore(self.store, self.name)

        self.log("Setup complete")