コード例 #1
0
ファイル: crawl_fund.py プロジェクト: ryjfgjl/Fund
class Fund:
    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db(db='fund')

    def main(self, driver):
        total_page = driver.find_element_by_class_name('nv').text
        total_page = int(re.search('\d+', total_page).group())
        print('total page:', total_page)
        sql = 'select crawledPage from crawl_info where spiderName = "fund"'
        crawledPage = int(
            self.Sql.exec_sql(self.db_conn, sql).fetchall()[0][0])
        current_page = crawledPage
        if current_page == total_page:
            return
        for i in range(total_page):
            if i < current_page:
                next_page = driver.find_elements_by_xpath(
                    "//div[@id='pager']/span[@class='nu page']")[-1]
                next_page.click()
                time.sleep(10)
                continue
            try:
                fund_ids = driver.find_elements_by_class_name('bzdm')
                for fund_id in fund_ids:
                    fund_id = fund_id.text
                    sql = 'insert into fund(fund) select "{0}" from dual where "{0}" not in(select fund from fund)'.format(
                        fund_id)
                    self.Sql.exec_sql(self.db_conn, sql)
                current_page += 1
                print('Crawled Page {}'.format(current_page))
                sql = 'update crawl_info set crawledPage = {} where spiderName = "fund"'.format(
                    current_page)
                self.Sql.exec_sql(self.db_conn, sql)
                next_page = driver.find_elements_by_xpath(
                    "//div[@id='pager']/span[@class='nu page']")[-1]
                next_page.click()
                time.sleep(10)
            except Exception as reason:
                print('Spider Crawl Failed in Page {0}, {1}'.format(
                    current_page, str(reason)))
コード例 #2
0
ファイル: allfund.py プロジェクト: ryjfgjl/Fund
# all fund code

import requests
from lxml import etree
from sql import Sql

Sql = Sql()
db_conn = Sql.conn_db('fund')

url = 'http://fund.eastmoney.com/allfund.html'
r = requests.get(url)
r.encoding = 'gb2312'
html = r.text
html = etree.HTML(html)
num_boxes = html.xpath('//div[@id="code_content"]//div[@class="num_box"]')
allfund = []

for num_box in [num_boxes[0]]:
    lies = num_box.xpath(
        '//div[@id="code_content"]//div[@class="num_box"]/ul/li')
    for li in [lies[0]]:
        funds = li.xpath(
            '//div[@id="code_content"]//div[@class="num_box"]/ul/li/div/a[1]/text()'
        )
        for fund in funds:
            print(fund)
            code = fund.split(')')[0][1:]
            name = fund.split(')')[1]
            sql = 'insert into fund(code, name) values ("{}", "{}")'.format(
                code, name)
            Sql.exec_sql(db_conn, sql)
コード例 #3
0
# https://www.fnscore.com/detail/league/kog-2/league-kog-647.html

import requests
from bs4 import BeautifulSoup
from sql import Sql


Sql = Sql()
db_conn = Sql.conn_db('wzmatch')

url = 'https://www.fnscore.com/detail/league/kog-2/league-kog-647.html'
responce = requests.get(url)
html = responce.text


data = []
soup = BeautifulSoup(html, 'html.parser')
matches = soup.find_all('div', 'match-panel-container')[1].find_all('div', 'match-panel-item match-table-item')
for match in matches:
    match_info = match.find_all('p')
    matchTime = match_info[0].get_text()
    teams = match.find_all('div', 'team')
    teamA = teams[0].find('p').get_text()
    teamB= teams[1].find('p').get_text()
    score = match_info[2].get_text()
    BO = match_info[4].get_text()
    data.append([matchTime,teamA,teamB,score,BO])


sql = 'insert into kpl2020_autumn(matchTime,teamA,teamB,score,BO) values(%s,%s,%s,%s,%s)'
Sql.exec_sql(db_conn, sql, data)
コード例 #4
0
from bs4 import BeautifulSoup
from sql import Sql

Sql = Sql()
db_conn = Sql.conn_db('lolgamequiz')
url = 'https://lpl.qq.com/es/worlds/2020/'
html = """
<div class="swiper-wrapper" id="team_list" style="transform: translate3d(0px, 0px, 0px);"><a href="//lpl.qq.com/es/team_detail.shtml?tid=29" target="_blank" class="swiper-slide swiper-slide-active" onclick="PTTSendClick('btn','btn-team1','队伍');">
                                    <div>
                                        <img src="//img.crawler.qq.com/lolwebvideo/20190523093050/f63d37cbc3810f2f9e8fb5688dd40254/0" alt="">
                                    </div>
                                    <p>LPL赛区:<span>JDG</span></p>
                                    </a><a href="//lpl.qq.com/es/team_detail.shtml?tid=41" target="_blank" class="swiper-slide swiper-slide-next" onclick="PTTSendClick('btn','btn-team1','队伍');">
                                    <div>
                                        <img src="//img.crawler.qq.com/lolwebvideo/20190523093521/b753e24c05cc53123ce5fa3f3a19162f/0" alt="">
                                    </div>
                                    <p>LPL赛区:<span>SN</span></p>
                                    </a><a href="//lpl.qq.com/es/team_detail.shtml?tid=4" target="_blank" class="swiper-slide" onclick="PTTSendClick('btn','btn-team1','队伍');">
                                    <div>
                                        <img src="//img.crawler.qq.com/lolwebvideo/20190523093621/b1721b1e247c18bab54a548775a887a5/0" alt="">
                                    </div>
                                    <p>LPL赛区:<span>LGD</span></p>
                                    </a><a href="//lpl.qq.com/es/team_detail.shtml?tid=117" target="_blank" class="swiper-slide" onclick="PTTSendClick('btn','btn-team1','队伍');">
                                    <div>
                                        <img src="//img.crawler.qq.com/lolwebvideo/20190919151523/2a9931322ed5750213ab6204adadaec1/0" alt="">
                                    </div>
                                    <p>LEC赛区:<span>G2</span></p>
                                    </a><a href="//lpl.qq.com/es/team_detail.shtml?tid=42" target="_blank" class="swiper-slide" onclick="PTTSendClick('btn','btn-team1','队伍');">
                                    <div>
                                        <img src="//game.gtimg.cn/images/lpl/act/a20200901worlds/c6-team2.png" alt="">
                                    </div>
コード例 #5
0
class JJJZ:
    def __init__(self):
        self.Sql = Sql()
        self.Driver = Driver()
        self.db_conn = self.Sql.conn_db(db='fund')

    def main(self):
        driver = self.Driver.main()
        root = 'http://fundf10.eastmoney.com/jjjz_'
        sql = 'select id,fund from fund where id > (select ifnull(crawledFundId,0) from crawl_info where spiderName = "jjjz")'
        fund_id_li = self.Sql.exec_sql(self.db_conn, sql)
        driver.implicitly_wait(2)
        for fund in fund_id_li:
            id = fund[0]
            fund = fund[1]
            jjjz_page = 0
            url = root + fund + '.html'
            driver.get(url)
            jjjz_total_page = driver.find_elements_by_xpath(
                '//div[@class="pagebtns"]/label')[-2].text
            #sql = 'select crawledPage from crawl_info where spiderName = “jjjz”'
            #jjjz_page = int(self.Sql.exec(self.db_conn, sql))
            data = []
            for i in range(int(jjjz_total_page)):

                trs = driver.find_elements_by_xpath(
                    '//div[@id="jztable"]/table/tbody/tr')
                for i in range(len(trs)):
                    date = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[1]'.
                        format(i + 1)).text.strip()
                    unit_jz = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[2]'.
                        format(i + 1)).text.strip()
                    total_jz = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[3]'.
                        format(i + 1)).text.strip()
                    date_rate = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[4]'.
                        format(i + 1)).text.strip()
                    buy_status = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[5]'.
                        format(i + 1)).text.strip()
                    sale_status = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[6]'.
                        format(i + 1)).text.strip()
                    red = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[7]'.
                        format(i + 1)).text.strip()
                    data.append([
                        id, date, unit_jz, total_jz, date_rate, buy_status,
                        sale_status, red
                    ])
                print('Crawling Fund {0}, Crawled Page {1}'.format(
                    fund, jjjz_page))
                jjjz_page += 1
                jjjz_next_page = driver.find_elements_by_xpath(
                    '//div[@class="pagebtns"]/label')[-1]
                jjjz_next_page.click()
                time.sleep(1)

            sql = "insert into jjjz(fundId,JZDate,unitJZ,totalJZ,dateRate,buyStatus,saleStatus,red) values (%s,%s,%s,%s,%s,%s,%s,%s)"
            self.Sql.exec_sql(self.db_conn, sql, data)
            sql = 'update crawl_info set crawledFundId = {} where spiderName = "jjjz"'.format(
                id)
            self.Sql.exec_sql(self.db_conn, sql)