Ejemplo n.º 1
0
def login():
    # Login via Wechat
    browser.get("https://passport.jd.com/uc/login?ltype=logout&ReturnUrl=https://global.jd.com/")
    time.sleep(2)
    if browser.find_element_by_link_text("微信"):
        browser.find_element_by_link_text("微信").click()
        print("Please login within 6 sec")
        time.sleep(6)
Ejemplo n.º 2
0
def openurl(url, num):
    browser = webdriver.Chrome()  #打开浏览器
    browser.get(url)  #进入相关网站
    html = browser.page_source  #获取网站源码
    data = str(pq(html))  #str() 函数将对象转化为适于人阅读的形式。
    print(data)

    dic = {}
    re_rule = r'<div class="news-item-container">(.*?)<div data-v-00b2e9bc=""/>'
    datalist = re.findall(re_rule, data, re.S)
    for i in range(0, len(datalist)):
        rule1 = r'<img src="/img/icon-lihao.png" data-v-6c26747a=""/>(.*?)<!----></span>'
        bullish = re.findall(rule1, datalist[i], re.S)
        if len(bullish) == 0:
            rule1 = r'<img src="/img/icon-likong.png" data-v-6c26747a=""/>(.*?)</span>'
            bullish = re.findall(rule1, datalist[i], re.S)
        rule2 = r'<span class="stock-group-item-name" data-v-f97d9694="">(.*?)</span>'
        stock_name = re.findall(rule2, datalist[i], re.S)

        if len(stock_name) > 0 and len(bullish) > 0:
            for c in range(0, len(stock_name)):
                dic[stock_name[c]] = bullish[0]
                print("正在爬取第", len(dic) + 1, "个请稍等.....")

    c = len(datalist)
    if len(dic) < num:
        while (1):
            browser.find_element_by_class_name("home-news-footer").click()
            time.sleep(1)
            html = browser.page_source
            data = str(pq(html))
            datalist = re.findall(re_rule, data, re.S)
            for i in range(c, len(datalist)):
                rule3 = r'<img data-v-6c26747a="" src="/img/icon-lihao.png"/>(.*?)<!----></span>'
                bullish = re.findall(rule3, datalist[i], re.S)
                if len(bullish) == 0:
                    rule5 = r'<img data-v-6c26747a="" src="/img/icon-likong.png"/>(.*?)</span>'
                    bullish = re.findall(rule5, datalist[i], re.S)
                rule4 = r'<span data-v-f97d9694="" class="stock-group-item-name">(.*?)</span>'
                stock_name = re.findall(rule4, datalist[i], re.S)
                if len(stock_name) > 0 and len(bullish) > 0:
                    for c in range(0, len(stock_name)):
                        dic[stock_name[c]] = bullish[0]
            c = len(datalist)
            if len(dic) > num:
                browser.quit()
                print("爬取完毕!!")
                break

            print("正在爬取第", len(dic) + 1, "个请稍等.....")
    else:
        browser.quit()
        print("爬取完毕!!")

    return dic
def getDate():
    # 3.发送请求
    browser.get(url)
    # 4.定位全国
    browser.find_element_by_css_selector("#changeCityBox > p.checkTips > a").click()
    # 5.找到文本框输入数据分析
    browser.find_element_by_css_selector("#search_input").send_keys("数据分析")
    # 6.找到搜索按钮并点击
    browser.find_element_by_css_selector("#search_button").click()
    time.sleep(3)
    # 7.自动翻页 pager_next_disabled pager_next
    i = 1
    # 控制页数
    j = 1
    # 控制条数
    while (browser.page_source.find('pager_next_disabled') == -1):
        # 找到li
        li_List = browser.find_elements_by_css_selector("#s_position_list > ul > li")
        # 循环遍历 123456789101112131415161718192021
        for li in li_List:
            # 标题
            title = li.find_element_by_css_selector("div.list_item_top > div.position > div.p_top > a > h3").text
            # 地址
            address = \
            li.find_element_by_css_selector("div.list_item_top > div.position > div.p_top > a > span > em").text.split(
                "·")[0]
            # 公司
            company = li.find_element_by_css_selector("div.list_item_top > div.company > div.company_name > a").text
            # 薪水,经验
            result = li.find_element_by_css_selector("div.list_item_top > div.position > div.p_bot > div").text
            # 薪水
            salary = result.split(" ")[0]
            # 经验
            exp = result.split(" ")[1].replace("经验", "")
            # 学历
            edu = result.split(" ")[-1]
            # 类型
            type = li.find_element_by_class_name("industry").text.split("/")[0].strip()
            # 写入cvs
            with open("lagou_date.cvs", "a", encoding="utf-8", newline="") as file:
                csvWriter = csv.writer(file).writerow([title, address, company, salary, exp, edu, type])
                print("第{}页第{}条下载成功".format(i, j))
                j += 1
                # 找到 下一页点击
        browser.find_element_by_class_name("pager_next").click()
        i += 1
        time.sleep(3)
Ejemplo n.º 4
0
from selenium import webdriver
import time 
import math
from idlelib import browser



link = "http://suninjuly.github.io/selects1.html"

try:
    browser = webdriver.Chrome()
    browser.get(link)
    num1 = browser.find_element_by_id("num1")
    x = num1.text         
    num2 = browser.find_element_by_id("num2")
    y = num2.text
    z = str(int(x) + int(y))
    from selenium.webdriver.support.ui import Select
    select = Select(browser.find_element_by_tag_name("select"))
    select.select_by_value(z) 
    button = browser.find_element_by_css_selector("button.btn")
    button.click()

finally:

    time.sleep(15)
 
    browser.quit()


Ejemplo n.º 5
0
def picking():
    # Open your cart
    browser.get("https://cart.jd.com/cart.action#none")
    time.sleep(2)