Example #1
0
class JJGKPipeline:
    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db('fund')

    def process_item(self, item, spider):
        code = item['code']
        fullname = item['fullname']
        shortname = item['shortname']
        type = item['type']
        releasetime = item['releasetime']
        establishtime = item['establishtime']
        establishcount = item['establishcount']
        money = item['money']
        count = item['count']
        company = item['company']
        companycode = item['companycode']
        bank = item['bank']
        bankcode = item['bankcode']
        manager = item['manager']
        managercode = item['managercode']
        red = item['red']
        managerfee = item['managerfee']
        bankfee = item['bankfee']
        servicefee = item['servicefee']
        applybuyfee = item['applybuyfee']
        buyfee = item['buyfee']
        salefee = item['salefee']
        comparestandard = item['comparestandard']
        tacking = item['tacking']

        target = item['target']
        idea = item['idea']
        range = item['range']
        strangy = item['strangy']
        redpolicy = item['redpolicy']
        risk = item['risk']
        leastbuy = item['leastbuy']
        crawldate = item['crawldate']

        sql = "insert into jjgk(code,fullname,shortname,type,releasetime,establishtime,establishcount,money,count,company,companycode," \
              "bank,bankcode,manager,managercode,red,managerfee,bankfee,servicefee,applybuyfee,buyfee,salefee,comparestandard,tacking," \
              "target,idea,`range`,strangy,redpolicy,risk,leastbuy,crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
              "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        self.Sql.exec_sql(self.db_conn, sql, [[
            code, fullname, shortname, type, releasetime, establishtime,
            establishcount, money, count, company, companycode, bank, bankcode,
            manager, managercode, red, managerfee, bankfee, servicefee,
            applybuyfee, buyfee, salefee, comparestandard, tacking, target,
            idea, range, strangy, redpolicy, risk, leastbuy, crawldate
        ]])

        return item
Example #2
0
 def __init__(self):
     self.Sql = Sql()
     self.db_conn = self.Sql.conn_db('fund')
Example #3
0
class JJJLPipeline:
    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db('fund')

    def process_item(self, item, spider):

        if isinstance(item, JJJLItem):
            code = item['code']
            name = item['name']
            info = item['info']
            totalday = item['totalday']
            startdate = item['startdate']
            currentcompany = item['currentcompany']
            currentfundmoney = item['currentfundmoney']
            bestincomerate = item['bestincomerate']
            crawldate = item['crawldate']

            sql = "insert into jjjl(code,name,info,totalday,startdate,currentcompany,currentfundmoney,bestincomerate,crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            self.Sql.exec_sql(self.db_conn, sql, [[
                code, name, info, totalday, startdate, currentcompany,
                currentfundmoney, bestincomerate, crawldate
            ]])

        elif isinstance(item, JJJLFundHistoryItem):
            code = item['code']
            fundcode = item['fundcode']
            fundname = item['fundname']
            fundtype = item['fundtype']
            fundmoney = item['fundmoney']
            fundmanagerdate = item['fundmanagerdate']
            fundmanagerday = item['fundmanagerday']
            fundmanagerincomerate = item['fundmanagerincomerate']
            crawldate = item['crawldate']

            sql = "insert into jjjl_fund_history(code,fundcode,fundname,fundtype,fundmoney,fundmanagerdate,fundmanagerday,fundmanagerincomerate,crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            self.Sql.exec_sql(self.db_conn, sql, [[
                code, fundcode, fundname, fundtype, fundmoney, fundmanagerdate,
                fundmanagerday, fundmanagerincomerate, crawldate
            ]])
        elif isinstance(item, JJJLCurrentFundItem):
            code = item['code']
            fundcode = item['fundcode']
            fundname = item['fundname']
            fundtype = item['fundtype']
            last3mrate = item['last3mrate']
            last3mrank = item['last3mrank']
            last6mrate = item['last6mrate']
            last6mrank = item['last6mrank']
            last1yrate = item['last1yrate']
            last1yrank = item['last1yrank']
            last2yrate = item['last2yrate']
            last2yrank = item['last2yrank']
            currentyearrate = item['currentyearrate']
            currentyearrank = item['currentyearrank']
            crawldate = item['crawldate']

            sql = "insert into jjjl_current_fund(code,fundcode,fundname,fundtype,last3mrate,last3mrank,last6mrate,last6mrank,last1yrate,last1yrank,last2yrate,last2yrank,currentyearrate,currentyearrank,crawldate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s," \
                  "%s,%s,%s)"
            self.Sql.exec_sql(self.db_conn, sql, [[
                code, fundcode, fundname, fundtype, last3mrate, last3mrank,
                last6mrate, last6mrank, last1yrate, last1yrank, last2yrate,
                last2yrank, currentyearrate, currentyearrank, crawldate
            ]])

        return item
Example #4
0
class JJJL(scrapy.Spider):
    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db('fund')

    name = "jjjl"
    custom_settings = {
        'ITEM_PIPELINES': {
            'fund.jjjl_pipelines.JJJLPipeline': 300
        },
    }

    def start_requests(self):
        sql = 'select code from manager where code not in(select code from jjjl)'
        codes = self.Sql.exec_sql(self.db_conn, sql).fetchall()
        for code in codes:
            code = code[0]
            url = 'http://fund.eastmoney.com/manager/{}.html'.format(code)
            yield scrapy.Request(url=url,
                                 callback=self.parse,
                                 meta={'code': code})

    def parse(self, response):
        html = response.text
        code = response.meta['code']
        item = JJJLItem()

        soup = BeautifulSoup(html, 'lxml')

        name = soup.find('div', 'content_out').find(
            'div', 'content_in').find('span').get_text()
        info = soup.find('div', 'content_out').find(
            'div', 'jlinfo clearfix').find('p').get_text()
        totalday = soup.find('div', 'content_out').find(
            'div', 'left clearfix w438').find('div', 'right jd').get_text()
        startdate = ''
        currentcompany = soup.find('div', 'content_out').find(
            'div', 'left clearfix w438').find('div',
                                              'right jd').find('a').get_text()
        currentfundmoney = soup.find('div', 'content_out').find('div', 'left clearfix w438').find('div', 'right jd').\
            find('div', 'gmContainer').find('div', 'gmleft gmlefts').find('span', 'numtext').get_text()
        bestincomerate = soup.find('div', 'content_out').find('div', 'left clearfix w438').find('div', 'right jd').\
            find('div', 'gmContainer').find('div', 'gmleft').find('span', 'numtext').get_text()

        item['code'] = code
        item['name'] = name
        item['info'] = info
        item['totalday'] = totalday
        item['startdate'] = startdate
        item['currentcompany'] = currentcompany
        item['currentfundmoney'] = currentfundmoney
        item['bestincomerate'] = bestincomerate
        item['crawldate'] = str(date.today())

        yield item

        item = JJJLFundHistoryItem()
        trs = soup.find('div', 'content_out').find(
            'table', 'ftrs').find('tbody').find_all('tr')
        for tr in trs:
            tds = tr.find_all('td')
            fundcode = tds[0].get_text()
            fundname = tds[1].get_text()
            fundtype = tds[3].get_text()
            fundmoney = tds[4].get_text()
            fundmanagerdate = tds[5].get_text()
            fundmanagerday = tds[6].get_text()
            fundmanagerincomerate = tds[7].get_text()

            item['code'] = code
            item['fundcode'] = fundcode
            item['fundname'] = fundname
            item['fundtype'] = fundtype
            item['fundmoney'] = fundmoney
            item['fundmanagerdate'] = fundmanagerdate
            item['fundmanagerday'] = fundmanagerday
            item['fundmanagerincomerate'] = fundmanagerincomerate
            item['crawldate'] = str(date.today())

            yield item

        item = JJJLCurrentFundItem()
        trs = soup.find_all('div', 'content_in')[1].find(
            'table', 'ftrs').find('tbody').find_all('tr')
        for tr in trs:
            tds = tr.find_all('td')
            fundcode = tds[0].get_text()
            fundname = tds[1].get_text()
            fundtype = tds[2].get_text()
            last3mrate = tds[3].get_text()
            last3mrank = tds[4].get_text()
            last6mrate = tds[5].get_text()
            last6mrank = tds[6].get_text()
            last1yrate = tds[7].get_text()
            last1yrank = tds[8].get_text()
            last2yrate = tds[9].get_text()
            last2yrank = tds[10].get_text()
            currentyearrate = tds[11].get_text()
            currentyearrank = tds[12].get_text()

            item['code'] = code
            item['fundcode'] = fundcode
            item['fundname'] = fundname
            item['fundtype'] = fundtype
            item['last3mrate'] = last3mrate
            item['last3mrank'] = last3mrank
            item['last6mrate'] = last6mrate
            item['last6mrank'] = last6mrank
            item['last1yrate'] = last1yrate
            item['last1yrank'] = last1yrank
            item['last2yrate'] = last2yrate
            item['last2yrank'] = last2yrank
            item['currentyearrate'] = currentyearrate
            item['currentyearrank'] = currentyearrank
            item['crawldate'] = str(date.today())

            yield item
Example #5
0
class JJGK(scrapy.Spider):

    def __init__(self):
        self.Sql = Sql()
        self.db_conn = self.Sql.conn_db('fund')

    name = "jjgk"
    custom_settings = {
        'ITEM_PIPELINES': {'fund.jjgk_pipelines.JJGKPipeline': 300},
    }

    def start_requests(self):
        sql = 'select code from fund where code not in(select code from jjgk)'
        codes = self.Sql.exec_sql(self.db_conn, sql).fetchall()
        for code in codes:
            code = code[0]
            url = 'http://fundf10.eastmoney.com/jbgk_{}.html'.format(code)
            yield scrapy.Request(url=url, callback=self.parse,  meta={'code':code})

    def parse(self, response):
        html = response.text
        code = response.meta['code']
        item = JJGKItem()

        soup = BeautifulSoup(html, 'lxml')
        boxes = soup.find('div', 'detail').find('div', 'txt_cont').find('div', 'txt_in').find_all('div', 'box')
        tds = boxes[0].find('table', 'info w790').find_all('td')
        fullname = tds[0].get_text()
        shortname = tds[1].get_text()
        type = tds[3].get_text()
        releasetime = tds[4].get_text()
        establishtime = tds[5].get_text().split('/')[0]
        establishcount = tds[5].get_text().split('/')[1]
        money = tds[6].get_text()
        count = tds[7].get_text()
        company = tds[8].get_text()
        companycode = tds[8].find('a')['href']
        bank = tds[9].get_text()
        bankcode = tds[9].find('a')['href']
        manager = tds[10].get_text()
        managercode = tds[10].find('a')
        if managercode:
            managercode = managercode['href']
        red = tds[11].get_text()
        managerfee = tds[12].get_text()
        bankfee = tds[13].get_text()
        servicefee = tds[14].get_text()
        applybuyfee = tds[15].get_text()
        buyfee = tds[16].get_text()
        salefee = tds[17].get_text()

        comparestandard = tds[18].get_text()
        tacking = tds[19].get_text()

        if len(boxes) == 8:
            del (boxes[1])
        target = boxes[1].find('p').get_text().strip('\n\r ')
        idea = boxes[2].find('p').get_text().strip('\n\r ')
        range = boxes[2].find('p').get_text().strip('\n\r ')
        strangy = boxes[2].find('p').get_text().strip('\n\r ')
        redpolicy = boxes[2].find('p').get_text().strip('\n\r ')
        risk = boxes[2].find('p').get_text().strip('\n\r ')

        leastbuy = soup.find('div', 'bs_jz').find('div', 'col-left').find('div').find('a').find_all('span')[
            -1].get_text()
        crawldate = str(date.today())


        item['code'] = code
        item['fullname'] = fullname
        item['shortname'] = shortname
        item['type'] = type
        item['releasetime'] = releasetime
        item['establishtime'] = establishtime
        item['establishcount'] = establishcount
        item['money'] = money
        item['count'] = count
        item['company'] = company
        item['companycode'] = companycode
        item['bank'] = bank
        item['bankcode'] = bankcode
        item['manager'] = manager
        item['managercode'] = managercode
        item['red'] = red
        item['managerfee'] = managerfee
        item['bankfee'] = bankfee
        item['servicefee'] = servicefee
        item['applybuyfee'] = applybuyfee
        item['buyfee'] = buyfee
        item['salefee'] = salefee
        item['comparestandard'] = comparestandard
        item['tacking'] = tacking

        item['target'] = target
        item['idea'] = idea
        item['range'] = range
        item['strangy'] = strangy
        item['redpolicy'] = redpolicy
        item['risk'] = risk
        item['leastbuy'] = leastbuy
        item['crawldate'] = crawldate


        yield item