Beispiel #1
0
 def parse(self, response):
     asia_list = response.xpath(
         "//div[@class='row']//table[@id='table_wrapper-table']//tbody/tr")
     for asia_item in asia_list:
         item = EastmoneyItem()
         item['number'] = asia_item.xpath(".//td[1]/text()").extract_first()
         item['name'] = asia_item.xpath(".//td[2]/a/text()").extract_first()
         item['latest_price'] = asia_item.xpath(
             ".//td[3]//text()").extract_first()
         item['amount_of_rise_and_fall'] = asia_item.xpath(
             ".//td[4]//text()").extract_first()
         item['rate_of_rise_and_fall'] = asia_item.xpath(
             ".//td[5]//text()").extract_first()
         item['today_opening_price'] = asia_item.xpath(
             ".//td[6]//text()").extract_first()
         item['maximum_price'] = asia_item.xpath(
             ".//td[7]//text()").extract_first()
         item['minimum_price'] = asia_item.xpath(
             ".//td[8]//text()").extract_first()
         item['yesterday_closing_price'] = asia_item.xpath(
             ".//td[9]/text()").extract_first()
         item['amplitude'] = asia_item.xpath(
             ".//td[10]/text()").extract_first()
         item['update_time'] = asia_item.xpath(
             ".//td[11]/text()").extract_first()
         yield item
 def parse_num(self, response):
     # 取得response中的text文本
     text = response.text
     # 利用爆炸和替换取得股票列表
     stock_list = eval(
         text.split('=')[1].split(':')[1].replace(',pages', ''))
     # 遍历取得股票列表中个股所需信息
     for stock in stock_list:
         stock_msg_list = stock.split(',')
         # 股票代码
         stock_num = stock_msg_list[1]
         # 股票名称
         stock_name = stock_msg_list[2]
         # 股票最新价
         stock_price = stock_msg_list[3]
         # 股票涨跌幅
         stock_change_range = stock_msg_list[5].replace('%', '')
         # 股票涨跌额
         stock_change_price = stock_msg_list[4]
         # 将股票代码'30'开头的创业板股票剔除
         if not stock_num.startswith('30'):
             print("代码:%s 名称:%s 现价:%s 涨跌幅:%s 涨跌额:%s" %
                   (stock_num, stock_name, stock_price, stock_change_range,
                    stock_change_price))
             # 生成item对象
             item = EastmoneyItem()
             item['stock_num'] = stock_num
             item['stock_name'] = stock_name
             item['stock_price'] = stock_price
             item['stock_change_range'] = stock_change_range
             item['stock_change_price'] = stock_change_price
             yield item
             self.logger.info('ok')
     print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
Beispiel #3
0
    def parse_info(self, response):
        url = response.url
        page = re.search(
            'http://fund.eastmoney.com/data/rankhandler.aspx?op=ph&dt=kf&ft=all&rs=&gs=0&sc=zzf&st=desc&sd=2017-09-17&ed=2018-09-17&qdii=&tabSubtype=,,,,,&pi=(.*?)&pn=50&dx=1',
            url).group(1)
        newpage = re.sub('pageIndex::\d+', '')
        html = response.text
        response = html.replace('var rankData = ', '').replace(
            ",allRecords:4246,pageIndex:1,pageNum:50,allPages:85,allNum:4246,gpNum:802,hhNum:2233,zqNum:1015,zsNum:535,bbNum:54,qdiiNum:140,etfNum:0,lofNum:213,fofNum:14};",
            '}').replace('{datas', '{"datas"').split()
        for datas in response:

            data = json.loads(datas)
            print(data['datas'])
            for content in data['datas']:
                content = content.split(',')
                item = EastmoneyItem()
                item['基金代码'] = content[0]
                item['基金简称'] = content[1]
                item['日期'] = content[3]
                item['单位净值'] = content[4]
                item['累计净值'] = content[5]
                item['日增长率'] = content[6]
                item['近1周'] = content[7]
                item['近1月'] = content[8]
                item['近3月'] = content[9]
                item['近6月'] = content[10]
                item['近1年'] = content[11]
                item['近2年'] = content[12]
                item['近3年'] = content[13]
                item['今年来'] = content[14]
                item['成立来'] = content[15]
                item['手续费'] = content[-3]
                yield item
Beispiel #4
0
 def _gen_start_request(self):
     pages = config.TARGETS
     for page in pages:
         item = EastmoneyItem()
         item['_id'] = page['_id']  #得到股票的代码
         item['name'] = page['name']  #得到股票的名字
         yield scrapy.Request(page['url'],
                              meta={'item': item},
                              callback=self.parse_stock)
 def parse(self, response):
     
     item=EastmoneyItem()
     item["stock_id"]=response.css("#code::text").extract()
     item["name"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(1) > b:nth-child(1)::text").extract()
     item["gross_profit_rate"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(7)::text").extract()
     item["ROE"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(9)::text").extract()
     item["net_margin"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(4)::text").extract()
     item["net_assets"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(3)::text").extract()
     item["market_cap"]=response.css(".cwzb > table:nth-child(1) > tbody:nth-child(2) > tr:nth-child(1) > td:nth-child(2)::text").extract()
     yield item
Beispiel #6
0
 def parse(self, response):
     selector = Selector(response)
     tiezi= selector.xpath("//div[@class='articleh normal_post']" or "//div[@class='articleh normal_post odd']")
     for i in range(len(tiezi)):
         item = EastmoneyItem()
         box = tiezi[i]
         try:               
             item['views'] = int(box.xpath(".//span[@class='l1']/text()").extract()[0]) #帖子浏览量
             item['comments'] = int(box.xpath(".//span[@class='l2']/text()").extract()[0])#帖子点击率
             item['day'] = (box.xpath(".//span[@class='l5']/text()").extract()[0]).split(" ")[0]#发帖日期
             item['time'] = (box.xpath(".//span[@class='l5']/text()").extract()[0]).split(" ")[1]#发帖时间
             item['title_content'] =  str(box.xpath(".//span[@class='l3']/a/text()").extract())#帖子标题
             item['inner_content']=Selector(requests.get('http://guba.eastmoney.com'+ box.xpath(".//span[@class='l3']/a/@href").extract()[0])).xpath("//div[@class='xeditor_content']/p/text()").extract()#帖子内容
             yield item
         except Exception as e:
             print ('excepiton',e)
    def parse(self, response):
        item = EastmoneyItem()
        try:
            #基金名字
            item['eastName'] = self.panduaninfo(
                response.xpath(
                    '//div[@class="fundDetail-tit"]/div/text()').extract())
            #基金链接
            item['eastUrl'] = self.panduaninfo(
                response.xpath('//h1[@class="fl"]/a/@href').extract())
            #近一个月到成立以来的收益率
            item['eastOne'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[1]/dd[2]/span[2]/text()'
                ).extract())
            item['eastThree'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[2]/dd[2]/span[2]/text()'
                ).extract())
            item['eastSix'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[3]/dd[2]/span[2]/text()'
                ).extract())
            item['eastOneyear'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[1]/dd[3]/span[2]/text()'
                ).extract())
            item['eastThreeyear'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[2]/dd[3]/span[2]/text()'
                ).extract())
            item['eastAll'] = self.panduaninfo(
                response.xpath(
                    '//*[@id="body"]/div[12]/div/div/div[2]/div[1]/div[1]/dl[3]/dd[3]/span[2]/text()'
                ).extract())

            yield item

        except:
            pass
    def parse(self, response):
        if response.status == 404:
            print("404 error occur")
            newurl = mynexturl()
            yield Request(newurl, callback=self.parse, dont_filter=True)
            return
        try:
            print("response")
            print(type(response))
            print(response)
            res = response.body.decode(response.encoding)
            res1 = res.encode('utf-8')
            print(type(res))
            print(res1)
            p1 = re.compile(r'[(](.*?)[)]', re.S)
            finalres = re.findall(p1, res1)
            json_finalres = json.loads("".join(finalres))
            Scode = json_finalres['Scode']
            ApiResults = json_finalres['ApiResults']
            HasError = json_finalres['HasError']
            print("ApiResults")
            print(ApiResults)
            print(type(ApiResults))
            Overall = ApiResults['zj']['Overall'][0]
            print(type(Overall))
            TotalScore = Overall['TotalScore']
            TotalScoreCHG = Overall['TotalScoreCHG']
            LeadPre = Overall['LeadPre']
            RisePro = Overall['RisePro']
            MsgCount = Overall['MsgCount']
            CapitalScore = Overall['CapitalScore']
            D1 = Overall['D1']
            ValueScore = Overall['ValueScore']
            MarketScoreCHG = Overall['MarketScoreCHG']
            Status = Overall['Status']
            Comment = Overall['Comment']
            UpdateTime = Overall['UpdateTime']
            print("Overall")
            print(Overall)
            item = EastmoneyItem()
            item['stock_names'] = g_stock_name
            item['stock_id'] = g_stock_id
            item['TotalScore'] = TotalScore
            item['TotalScoreCHG'] = TotalScoreCHG
            item['LeadPre'] = LeadPre
            item['RisePro'] = RisePro
            item['MsgCount'] = MsgCount
            item['CapitalScore'] = CapitalScore
            item['D1'] = D1
            item['ValueScore'] = ValueScore
            item['MarketScoreCHG'] = MarketScoreCHG
            item['Status'] = Status
            item['Comment'] = Comment
            item['UpdateTime'] = UpdateTime
            print("json success")

            # item['total_score']=response.xpath('//div[@class="pingfen"]/span/text()')
            # for i in item['total_score']:
            # print(i)
            # item['today_score']=response.xpath('//div[@class="biaoxian"]/span/text()').extract()
            # item['win_score']=response.xpath('//div[@class="dabai"]/span/text()').extract()
            # item['tomorrow_score']=response.xpath('//div[@class="shangzhang"]/span/text()').extract()
            # print("start:")
            # print(item['total_score'])
            # print(item['today_score'])
            # print(item['win_score'])
            # print(item['tomorrow_score'])
            yield item
            newurl = mynexturl()
            yield Request(newurl, callback=self.parse, dont_filter=True)
        except:
            print("error occur")
            newurl = mynexturl()
            yield Request(newurl, callback=self.parse, dont_filter=True)