Ejemplo n.º 1
0
 def parse(self, response):
     self.log('URL: %s' % response.url)
     ticker = response.url.split("=")[1]
     print ticker
     try:
         hxs = Selector(response)
         item = StockItem()
         print hxs
         item['CompanyName'] = hxs.xpath(
             '//*[@id="quote-header-info"]/div[2]/div[1]/div[1]/h1/text()'
         ).extract()
         print hxs.xpath(
             '//*[@id="quote-header-info"]/div[2]/div[1]/div[1]/h1/text()'
         ).extract()
         item['Price'] = hxs.xpath(
             '//*[@id="quote-header-info"]/div[3]/div[1]/div/span[1]/text()'
         ).extract()
         item['PrevClose'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[1]/table/tbody/tr[1]/td[2]/span/text()'
         ).extract()
         item['Open'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[1]/table/tbody/tr[2]/td[2]/span/text()'
         ).extract()
         item['DayRange'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[1]/table/tbody/tr[5]/td[2]/text()'
         ).extract()
         item['YearRange'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[1]/table/tbody/tr[6]/td[2]/text()'
         ).extract()
         item['Volume'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[1]/table/tbody/tr[7]/td[2]/span/text()'
         ).extract()
         item['MarketCap'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[1]/td[2]/span/text()'
         ).extract()
         item['Beta'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[2]/td[2]/span/text()'
         ).extract()
         item['PERatioTTM'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[3]/td[2]/span/text()'
         ).extract()
         item['EPSTTM'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[4]/td[2]/span/text()'
         ).extract()
         item['ForwardDivYield'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[6]/td[2]/text()'
         ).extract()
         item['OneYearTragetEstimate'] = hxs.xpath(
             '//*[@id="quote-summary"]/div[2]/table/tbody/tr[8]/td[2]/span/text()'
         ).extract()
         item['PercentGain'] = hxs.xpath(
             '//*[@id="quote-header-info"]/div[3]/div/div[1]/span[2]/text()'
         ).extract()
         #item['Error']= "False"
         #with open("filter-urls.txt", 'a') as outfile:
         #	outfile.write(ticker+"\n")
         return item
     except:
         print "Error"
Ejemplo n.º 2
0
 def parse_hd_item(self, response):
     date = self.getDate(response.url)
     for [code, records] in self.extract_item(response):
         item = StockItem()
         item['code'] = code
         item['records'] = records
         item['channel'] = 'hd'
         item['date'] = date
         yield item
Ejemplo n.º 3
0
    def parse(self, response):
        self.log('URL: %s' % response.url)

        hxs = HtmlXPathSelector(response)
        item = StockItem()
        item['title'] = hxs.select(
            '//*[@id="yfi_rt_quote_summary"]/div[1]/div/h2/text()').extract()
        item['value'] = hxs.select(
            '//*[@id="yfi_rt_quote_summary"]/div[2]/p/span[1]/span/text()'
        ).extract()
        return item
Ejemplo n.º 4
0
 def extract_item(self, response):
     hxs = Selector(response)
     stocks = hxs.xpath('//div[@class="grid-view"]/table/tbody/tr')
     self.log(str(len(stocks)))
     for stock in stocks:
         stock_items = stock.xpath('./td')
         if len(stock_items) < 16:
             continue
         item = StockItem()
         district = 'sz'
         if "SH" in response.url:
             district = 'sh'
         code = district + stock_items[0].xpath('./a/span/text()')[0].extract()
         records = []
         records.append( stock_items[2].xpath('./text()')[0].extract())
         for idx in range(2, 17):
             item = stock_items[idx].xpath('./text()')[0].extract()
             records.append(item)
         yield [code, records]
Ejemplo n.º 5
0
    def parse_api(self, response):
        stock_code = None
        params = response.url.split('?')[-1].split('&')
        for param in params:
            [k, v] = param.split('=')
            if k == 'code':
                stock_code = v

        if stock_code is None:
            self.log("no stock_id found in [" + response.url + "]",
                     scrapy.log.CRITICAL)

        data = json.loads(response.body)
        records = data["record"]
        for record in records:
            self.log("stock history:" + stock_code + ":" + ",".join(record))
        item = StockItem()
        item["code"] = stock_code
        item["records"] = records
        item['cate'] = 'ifeng'
        return item
Ejemplo n.º 6
0
 def parse_cate(self, response):
     stock_cate = None
     params = response.url.split('?')[-1].split('&')
     for param in params:
         [k, v] = param.split('=')
         if k == 'c':
             stock_cate = v
     stock_code_list = []
     hxs = Selector(response)
     stock_codes = hxs.xpath('//table/tr/td[1]/a').extract()
     self.log("[cate_code]" + str(len(stock_codes)))
     for stock_code in stock_codes:
         items = stock_code.split('/')
         if len(items) < 6:
             continue
         code = items[6]
         self.log(code)
         stock_code_list.append(code)
     item = StockItem()
     item['code'] = stock_cate
     item['records'] = stock_code_list
     item['cate'] = "cate"
     return item
Ejemplo n.º 7
0
    def parse(self, response):
        self.log('URL: %s' % response.url)
        ticker = response.url.split("=")[1]
        print ticker
        try:
            producer = KafkaProducer(bootstrap_servers='172.31.26.247:9092')
            hxs = Selector(response)
            item = StockItem()
            data = {}

            data['CompanyName'] = hxs.xpath(
                '//*[@id="quote-header-info"]/div[2]/div[1]/div[1]/h1/text()'
            ).extract()
            data['Price'] = hxs.xpath(
                '//*[@id="quote-header-info"]/div[3]/div[1]/div/span[1]/text()'
            ).extract()
            data['PrevClose'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[1]/table/tbody/tr[1]/td[2]/span/text()'
            ).extract()
            data['Open'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[1]/table/tbody/tr[2]/td[2]/span/text()'
            ).extract()
            data['DayRange'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[1]/table/tbody/tr[5]/td[2]/text()'
            ).extract()
            data['YearRange'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[1]/table/tbody/tr[6]/td[2]/text()'
            ).extract()
            data['Volume'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[1]/table/tbody/tr[7]/td[2]/span/text()'
            ).extract()
            data['MarketCap'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[1]/td[2]/text()'
            ).extract()
            data['Beta'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[2]/td[2]/span/text()'
            ).extract()
            data['PERatioTTM'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[3]/td[2]/span/text()'
            ).extract()
            data['EPSTTM'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[4]/td[2]/span/text()'
            ).extract()
            data['ForwardDivYield'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[6]/td[2]/text()'
            ).extract()
            data['OneYearTragetEstimate'] = hxs.xpath(
                '//*[@id="quote-summary"]/div[2]/table/tbody/tr[8]/td[2]/span/text()'
            ).extract()
            data['PercentGain'] = hxs.xpath(
                '//*[@id="quote-header-info"]/div[3]/div/div/span[2]/text()'
            ).extract()

            jd = json.dumps(data)
            producer.send('json', jd)
            print(
                "##########################################################################################################################"
            )
            return item
        except:
            print "Error"