コード例 #1
0
ファイル: crawl_fund.py プロジェクト: ryjfgjl/Fund
class Fund:
    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db(db='fund')

    def main(self, driver):
        total_page = driver.find_element_by_class_name('nv').text
        total_page = int(re.search('\d+', total_page).group())
        print('total page:', total_page)
        sql = 'select crawledPage from crawl_info where spiderName = "fund"'
        crawledPage = int(
            self.Sql.exec_sql(self.db_conn, sql).fetchall()[0][0])
        current_page = crawledPage
        if current_page == total_page:
            return
        for i in range(total_page):
            if i < current_page:
                next_page = driver.find_elements_by_xpath(
                    "//div[@id='pager']/span[@class='nu page']")[-1]
                next_page.click()
                time.sleep(10)
                continue
            try:
                fund_ids = driver.find_elements_by_class_name('bzdm')
                for fund_id in fund_ids:
                    fund_id = fund_id.text
                    sql = 'insert into fund(fund) select "{0}" from dual where "{0}" not in(select fund from fund)'.format(
                        fund_id)
                    self.Sql.exec_sql(self.db_conn, sql)
                current_page += 1
                print('Crawled Page {}'.format(current_page))
                sql = 'update crawl_info set crawledPage = {} where spiderName = "fund"'.format(
                    current_page)
                self.Sql.exec_sql(self.db_conn, sql)
                next_page = driver.find_elements_by_xpath(
                    "//div[@id='pager']/span[@class='nu page']")[-1]
                next_page.click()
                time.sleep(10)
            except Exception as reason:
                print('Spider Crawl Failed in Page {0}, {1}'.format(
                    current_page, str(reason)))
コード例 #2
0
ファイル: allfund.py プロジェクト: ryjfgjl/Fund
import requests
from lxml import etree
from sql import Sql

Sql = Sql()
db_conn = Sql.conn_db('fund')

url = 'http://fund.eastmoney.com/allfund.html'
r = requests.get(url)
r.encoding = 'gb2312'
html = r.text
html = etree.HTML(html)
num_boxes = html.xpath('//div[@id="code_content"]//div[@class="num_box"]')
allfund = []

for num_box in [num_boxes[0]]:
    lies = num_box.xpath(
        '//div[@id="code_content"]//div[@class="num_box"]/ul/li')
    for li in [lies[0]]:
        funds = li.xpath(
            '//div[@id="code_content"]//div[@class="num_box"]/ul/li/div/a[1]/text()'
        )
        for fund in funds:
            print(fund)
            code = fund.split(')')[0][1:]
            name = fund.split(')')[1]
            sql = 'insert into fund(code, name) values ("{}", "{}")'.format(
                code, name)
            Sql.exec_sql(db_conn, sql)
コード例 #3
0
ファイル: allmanager.py プロジェクト: ryjfgjl/Fund
        pages, page)
    response = requests.get(url)
    html = response.text
    data = html[len('var returnjson= {data:[['):(len(html.split(']')[-1]) +
                                                 2) * -1]
    managers = data.split('],[')
    for manager in managers:
        manager = manager[1:-1].split('","')

        code = manager[0]
        name = manager[1]
        companycode = manager[2]
        companyname = manager[3]
        currentfundcodes = manager[4]
        currentfundnames = manager[5]
        managertotalday = manager[6]
        bestincomerate = manager[7]
        bestfundcode = manager[8]
        bestfundname = manager[9]
        bestfundmoney = manager[10]
        bestfundincomerate = manager[11]
        crawldate = str(date.today())

        sql = "insert into manager(code,name,companycode,companyname,currentfundcodes,currentfundnames,managertotalday,bestincomerate,bestfundcode,bestfundname,bestfundmoney,bestfundincomerate," \
              "crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        Sql.exec_sql(db_conn, sql, [[
            code, name, companycode, companyname, currentfundcodes,
            currentfundnames, managertotalday, bestincomerate, bestfundcode,
            bestfundname, bestfundmoney, bestfundincomerate, crawldate
        ]])
コード例 #4
0
from datetime import date
from bs4 import BeautifulSoup

Sql = Sql()
db_conn = Sql.conn_db('fund')

url = 'http://fund.eastmoney.com/Company/default.html#scomname;dasc'
response = requests.get(url)
response.encoding = 'utf8'
html = response.text
soup = BeautifulSoup(html, 'html.parser')
trs = soup.find('table', id='gspmTbl').find('tbody').find_all('tr')

for tr in trs:
    tds = tr.find_all('td')
    code = tds[1].find('a')['href']
    name = tds[1].get_text()
    establishdate = tds[3].get_text()
    lables = tds[4].find_all('label', 'sprite sprite-star1')
    txscore = len(lables)
    scalenumber = tds[5]['data-sortvalue']
    allfundnumber = tds[6].get_text()
    allmanagernumber = tds[7].get_text()
    crawldate = str(date.today())

    sql = "insert into company (code,name,establishdate,txscore,scalenumber,allfundnumber,allmanagernumber,crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s)"
    Sql.exec_sql(db_conn, sql, [[
        code, name, establishdate, txscore, scalenumber, allfundnumber,
        allmanagernumber, crawldate
    ]])
コード例 #5
0
import requests
from bs4 import BeautifulSoup
from sql import Sql


Sql = Sql()
db_conn = Sql.conn_db('wzmatch')

url = 'https://www.fnscore.com/detail/league/kog-2/league-kog-647.html'
responce = requests.get(url)
html = responce.text


data = []
soup = BeautifulSoup(html, 'html.parser')
matches = soup.find_all('div', 'match-panel-container')[1].find_all('div', 'match-panel-item match-table-item')
for match in matches:
    match_info = match.find_all('p')
    matchTime = match_info[0].get_text()
    teams = match.find_all('div', 'team')
    teamA = teams[0].find('p').get_text()
    teamB= teams[1].find('p').get_text()
    score = match_info[2].get_text()
    BO = match_info[4].get_text()
    data.append([matchTime,teamA,teamB,score,BO])


sql = 'insert into kpl2020_autumn(matchTime,teamA,teamB,score,BO) values(%s,%s,%s,%s,%s)'
Sql.exec_sql(db_conn, sql, data)

コード例 #6
0
class JJJZ:
    def __init__(self):
        self.Sql = Sql()
        self.Driver = Driver()
        self.db_conn = self.Sql.conn_db(db='fund')

    def main(self):
        driver = self.Driver.main()
        root = 'http://fundf10.eastmoney.com/jjjz_'
        sql = 'select id,fund from fund where id > (select ifnull(crawledFundId,0) from crawl_info where spiderName = "jjjz")'
        fund_id_li = self.Sql.exec_sql(self.db_conn, sql)
        driver.implicitly_wait(2)
        for fund in fund_id_li:
            id = fund[0]
            fund = fund[1]
            jjjz_page = 0
            url = root + fund + '.html'
            driver.get(url)
            jjjz_total_page = driver.find_elements_by_xpath(
                '//div[@class="pagebtns"]/label')[-2].text
            #sql = 'select crawledPage from crawl_info where spiderName = “jjjz”'
            #jjjz_page = int(self.Sql.exec(self.db_conn, sql))
            data = []
            for i in range(int(jjjz_total_page)):

                trs = driver.find_elements_by_xpath(
                    '//div[@id="jztable"]/table/tbody/tr')
                for i in range(len(trs)):
                    date = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[1]'.
                        format(i + 1)).text.strip()
                    unit_jz = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[2]'.
                        format(i + 1)).text.strip()
                    total_jz = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[3]'.
                        format(i + 1)).text.strip()
                    date_rate = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[4]'.
                        format(i + 1)).text.strip()
                    buy_status = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[5]'.
                        format(i + 1)).text.strip()
                    sale_status = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[6]'.
                        format(i + 1)).text.strip()
                    red = driver.find_element_by_xpath(
                        '//div[@id="jztable"]/table/tbody/tr[{0}]/td[7]'.
                        format(i + 1)).text.strip()
                    data.append([
                        id, date, unit_jz, total_jz, date_rate, buy_status,
                        sale_status, red
                    ])
                print('Crawling Fund {0}, Crawled Page {1}'.format(
                    fund, jjjz_page))
                jjjz_page += 1
                jjjz_next_page = driver.find_elements_by_xpath(
                    '//div[@class="pagebtns"]/label')[-1]
                jjjz_next_page.click()
                time.sleep(1)

            sql = "insert into jjjz(fundId,JZDate,unitJZ,totalJZ,dateRate,buyStatus,saleStatus,red) values (%s,%s,%s,%s,%s,%s,%s,%s)"
            self.Sql.exec_sql(self.db_conn, sql, data)
            sql = 'update crawl_info set crawledFundId = {} where spiderName = "jjjz"'.format(
                id)
            self.Sql.exec_sql(self.db_conn, sql)