Esempio n. 1
0
    def parse_content(self, response):
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[0]

        title = ''.join(
            response.xpath(
                '//div[@class="news_content "]/h1//text()').extract()).strip()
        logging.debug(title)
        data_date = datetime.datetime.strptime(
            response.meta['data_date'][0:10], '%Y-%m-%d')
        data_list = table_to_list(data_table2)

        if len(data_list) <= 1 or len(data_list[1]) < 5:
            raise 'PLAS.CHEM99----get table failed %s' % response.url

        for row in data_list[1:]:
            item = MarketPricePP()
            item['materials'] = row[0].strip()
            item['product'] = row[1].strip()
            item['price'] = row[2].strip()
            item['rise_offset'] = row[3].strip()
            item['remarks'] = row[4].strip()
            item['datadate'] = data_date
            item['update_dt'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
            item['source'] = title
            yield item
Esempio n. 2
0
    def parse_content(self, response):
        data_table = response.xpath('//div[@id="PanelContent"]//table')[0]
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[0]
        #logging.debug(data_table2)
        #logging.debug(lxml.html.tostring(data_table2))
        title = ''.join(
            response.xpath(
                '//div[@class="news_content "]/h1//text()').extract()).strip()
        logging.debug(title)
        data_date = re.compile('(\d{8})').search(title).group(1)
        data_list = table_to_list(data_table2)

        for row in data_list[1:]:
            item = FactoryPrice()
            item['region'] = row[0].strip()
            item['produce_code'] = row[1].strip()
            item['produce_name'] = row[2].strip()
            item['pre_price'] = row[3].strip()
            item['price'] = row[4].strip()
            item['rise_offset'] = row[5].strip()
            if len(row) >= 7:
                item['remarks'] = row[6].strip()
                item['remarks'] = row[6].strip()
            item['title'] = title
            item['trading_dt'] = data_date
            item['datetime_stamp'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
            yield item
Esempio n. 3
0
 def parse_content(self, response):
     doc = lxml.html.document_fromstring(response.body_as_unicode())
     data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[0]
     title = ''.join(
         response.xpath(
             '//div[@class="news_content "]/h1//text()').extract()).strip()
     auth_info = ''.join(
         response.xpath('//div[@class="news_title_b"]//text()').extract())
     pub_date = re.compile('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})').search(
         auth_info).group(1)
     logging.debug(title)
     data_list = table_to_list(data_table2)
     tab_title = data_list[0][1].strip()
     for row in data_list[1:]:
         item = PriceSXSY()
         item['produce_code'] = row[0].strip()
         item['price'] = row[1].strip()
         item['rise_offset'] = row[2].strip()
         item['remarks'] = row[3].strip()
         item['title'] = title
         item['trading_dt'] = pub_date
         item['tab_title'] = tab_title
         item['datetime_stamp'] = datetime.datetime.now().strftime(
             '%Y-%m-%d %H:%M:%S')
         yield item
 def parse_content(self, response):
     datadate = response.meta['datadate']
     datatype = response.meta['datatype']
     print datatype
     data_table = response.xpath('//*[@id="Panel_News"]/div[1]/table')
     data_list = table_to_list(data_table)
     if len(data_list[0]) > 7:
         datemonth = data_list[0][1]
         for data in data_list[1:]:
             item = t_chem99_bithumen_prod_Item()
             item['datadate'] = datadate
             item['datemonth'] = datemonth
             if datatype == 1:
                 item['cls_type'] = u'地区'
             elif datatype == 2:
                 item['cls_type'] = u'集团'
             item['item_name'] = data[0]
             item['curr_month_value'] = data[1]
             item['pre_month_value'] = data[2]
             item['mom'] = data[3]
             item['pre_year_value'] = data[4]
             item['yoy'] = data[5]
             item['cumu_value_y'] = data[6]
             item['pre_cumu_value_y'] = data[7]
             item['cumu_yoy'] = data[8]
             item['update_dt'] = datetime.datetime.now().strftime(
                 '%Y-%m-%d %H:%M:%S')
             item['source'] = response.url
             yield item
Esempio n. 5
0
    def parse_content(self, response):
        date = response.meta['date']
        title = response.meta['title']
        remark = ''.join(
            response.xpath(
                '//div[@id="Panel1"]/p[1]//text()').extract()).strip()
        logging.debug(title)

        doc = lxml.html.document_fromstring(response.body_as_unicode())
        if doc.xpath('//form[@id="frm_login"]'):
            raise Exception('Login error')

        if doc.xpath('//div[@id="Panel1"]//table'):
            data_table = doc.xpath('//div[@id="Panel1"]//table')[0]
            data_list = table_to_list(data_table)
            if len(data_list) <= 1 or len(data_list[0]) < 5:
                raise Exception('RUBB.CHEM99----get table failed %s' %
                                response.url)

            row = data_list[len(data_list) - 1]
            for index in range(1, len(data_list[0])):
                item = RubbThailand()
                item['product'] = data_list[0][index].strip()
                item['price'] = row[index].strip()
                item['remark'] = remark
                try:
                    item['datadate'] = datetime.datetime.strptime(
                        row[0].strip(), '%Y/%m/%d')
                except:
                    item['datadate'] = date
                item['update_dt'] = datetime.datetime.now()
                item['source'] = title
                yield item
Esempio n. 6
0
    def parse_content(self, response):
        data_table = response.xpath('//div[@id="PanelContent"]//table')[0]
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[0]

        title = ''.join(
            response.xpath(
                '//div[@class="news_content "]/h1//text()').extract()).strip()
        logging.debug(title)
        data_date = datetime.datetime.strptime(
            re.compile('(\d{8})').search(title).group(1), '%Y%m%d')
        data_list = table_to_list(data_table2)

        if len(data_list) <= 1 or len(data_list[1]) < 4:
            raise 'CHEM99----get table failed %s' % response.url

        for row in data_list[1:]:
            for index in range(1, len(row)):
                item = PlasticFarmFilm()
                item['product'] = row[0].strip()
                item['area'] = data_list[0][index].strip()
                item['price'] = row[index].strip()
                item['datadate'] = data_date
                item['update_dt'] = datetime.datetime.now()
                item['source'] = title
                yield item
    def parse_content(self, response):
        datadate = response.meta['datadate']
        datatype = response.meta['datatype']
        if datatype == 3:
            print response.url
        if datatype == 1:
            data_table = response.xpath('//*[@id="Panel_News"]/div[1]/table')
            data_list = table_to_list(data_table)
            last_week_date = datetime.datetime.strptime(
                datadate, '%Y-%m-%d') - datetime.timedelta(days=7)
            for data in data_list[1:]:
                item = t_ec_rateofoperation_bitumenItem()
                item['datadate'] = datadate
                item['area'] = data[0]
                item['current_week_date'] = datadate
                item['last_week_date'] = last_week_date.strftime('%Y-%m-%d')
                item['current_week_value'] = data[1]
                item['last_week_value'] = data[2]
                item['change_situation'] = data[3]
                item['update_dt'] = datetime.datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')
                item['source'] = response.url
                yield item

        if datatype == 2:

            data_table = response.xpath('//*[@id="Panel_News"]/div[1]/table')
            data_list = table_to_list(data_table)
            print data_list
            for data in data_list[1:]:
                item = t_ec_check_bitumenItem()
                if len(data[0]) < 6:
                    item['area'] = data[0]
                    item['datadate'] = datadate
                    item['factory_name'] = data[1]
                    item['affiliation'] = data[2]
                    item['product'] = data[3]
                    item['status'] = data[4]
                    item['product_time'] = data[5]
                    item['update_dt'] = datetime.datetime.now().strftime(
                        '%Y-%m-%d %H:%M:%S')
                    item['source'] = response.url
                    yield item
                else:
                    continue
Esempio n. 8
0
    def parse_content(self, response):
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[1]
        title = ''.join(
            response.xpath(
                '//div[@class="news_content "]/h1//text()').extract()).strip()
        auth_info = ''.join(
            response.xpath('//div[@class="news_title_b"]//text()').extract())
        pub_date = re.compile('(\d{8})').search(title).group(1)
        logging.debug(title)

        for p_text in response.xpath(
                '//div[@id="PanelContent"]//p//text() | //div[@id="PanelContent"]//div//text()'
        ).extract():
            if (u'表' in p_text) and \
                (u'国内' in p_text) and \
                (u'丙烯' in p_text) and \
                (u'价格一览' in p_text):
                tab_title = p_text.strip()

        data_list = table_to_list(data_table2)

        item1 = PropeneMonomer()
        item1['column_type'] = u'价格'
        item1['sd_area'] = data_list[1][1].strip()
        item1['hb_area'] = data_list[1][2].strip()
        item1['hd_area'] = data_list[1][3].strip()
        item1['xb_area'] = data_list[1][4].strip()
        item1['db_area'] = data_list[1][5].strip()
        item1['hn_area'] = data_list[1][6].strip()
        item1['title'] = title
        item1['tab_title'] = tab_title
        item1['trading_dt'] = pub_date
        item1['datetime_stamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')

        item2 = PropeneMonomer()
        item2['column_type'] = u'涨跌'
        item2['sd_area'] = data_list[2][1].strip()
        item2['hb_area'] = data_list[2][2].strip()
        item2['hd_area'] = data_list[2][3].strip()
        item2['xb_area'] = data_list[2][4].strip()
        item2['db_area'] = data_list[2][5].strip()
        item2['hn_area'] = data_list[2][6].strip()
        item2['title'] = title
        item2['tab_title'] = tab_title
        item2['trading_dt'] = pub_date
        item2['datetime_stamp'] = datetime.datetime.now().strftime(
            '%Y-%m-%d %H:%M:%S')

        return [item1, item2]
Esempio n. 9
0
    def parse_content(self, response):
        date = response.meta['date']
        title = response.meta['title']
        remark = ''.join(
            response.xpath(
                '//div[@id="Panel1"]/p[1]//text()').extract()).strip()
        logging.debug(title)

        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table = doc.xpath('//div[@id="Panel1"]//table')[0]
        data_list = table_to_list(data_table)
        if not (len(data_list[0]) == 11 or len(data_list[0]) == 7):
            raise Exception('RUBB.CHEM99----get table failed %s' %
                            response.url)

        if len(data_list[0]) == 11:
            for row in data_list[2:]:
                item = RubbUSSThailand()
                item['product'] = row[0].strip()
                item['price'] = row[1].strip()
                item['price_3_5'] = row[2].strip()
                item['price_5_7'] = row[3].strip()
                item['price_7_10'] = row[4].strip()
                item['price_10_15'] = row[5].strip()
                item['volume'] = row[6].strip()
                item['volume_3_5'] = row[7].strip()
                item['volume_5_7'] = row[8].strip()
                item['volume_7_10'] = row[9].strip()
                item['volume_10_15'] = row[10].strip()
                item['remark'] = remark
                item['datadate'] = date
                item['update_dt'] = datetime.datetime.now()
                item['source'] = title
                yield item
        elif len(data_list[0]) == 7:
            for row in data_list[1:]:
                item = RubbUSSThailand()
                item['product'] = row[0].strip()
                item['price'] = row[1].strip()
                item['price_3_5'] = row[2].strip()
                item['price_5_7'] = row[3].strip()
                item['price_7_10'] = row[4].strip()
                item['price_10_15'] = row[5].strip()
                item['remark'] = remark
                item['datadate'] = date
                item['update_dt'] = datetime.datetime.now()
                item['source'] = title
                yield item
Esempio n. 10
0
    def parse_content(self, response):
        datadate = response.meta['datadate']

        data_table = response.xpath('//*[@id="Panel_News"]/div[1]/table')
        data_list = table_to_list(data_table)
        wti_date_range = re.findall(u'WTI均价((.+))',data_list[0][1])[0]
        for data in data_list[1:]:
            item = t_ec_merey_oil_Item()
            item['datadate'] = datadate
            item['datemonth'] = data[0]
            item['wti_price_avg'] = data[1]
            item['wti_date_range'] = wti_date_range
            item['discount_value'] = data[2]
            item['tongs_barrels_ratio'] = data[3]
            item['fx_rate'] = data[4]
            item['settle_prc'] = data[5]
            item['update_dt'] = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S')
            item['source'] = response.url
            yield item
Esempio n. 11
0
    def parse_content(self, response):
        data_table = response.xpath('//div[@id="PanelContent"]//table')[0]
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table2 = doc.xpath('//div[@id="PanelContent"]//table')[0]

        title = ''.join(
            response.xpath(
                '//div[@class="news_content "]/h1//text()').extract()).strip()
        logging.debug(title)

        data_list = table_to_list(data_table2)

        if len(data_list) <= 1 or len(data_list[1]) < 7:
            raise 'CHEM99----get table failed %s' % response.url

        datematch = re.search('(\d{8})', title)
        if datematch:
            data_date = datetime.datetime.strptime(datematch.group(1),
                                                   '%Y%m%d')
        else:
            year = re.search('(\d{7})', title).group(1)[0:4]
            monday = re.search(u'(\d+)月(\d+)日', data_list[0][2])
            month = monday.group(1)
            day = monday.group(2)
            data_date = datetime.datetime(int(year), int(month), int(day))

        for row in data_list[1:]:
            item = PlasticFilm()
            item['product'] = row[0].strip()
            item['spec'] = row[1].strip()
            item['price'] = row[2].strip()
            item['rise_offset'] = row[3].strip()
            item['than_lastweek'] = row[4].strip()
            item['than_lastmonth'] = row[5].strip()
            item['than_lastyear'] = row[6].strip()
            item['datadate'] = data_date
            item['update_dt'] = datetime.datetime.now()
            item['source'] = title
            yield item
Esempio n. 12
0
    def parse_content(self, response):
        doc = lxml.html.document_fromstring(response.body_as_unicode())
        data_table = doc.xpath('//div[@id="Panel_News"]//table')[0]
        title = ''.join(
            response.xpath(
                '//div[@class="div_news"]/h1//text()').extract()).strip()
        logging.debug(title)
        data_date = datetime.datetime.strptime(response.meta['data_date'],
                                               '%Y-%m-%d')
        data_list = table_to_list(data_table)
        #行列转置,一共5列
        data_list = trans_table(data_list[0:5])

        if len(data_list) <= 1 or len(data_list[1]) < 5:
            logging.error('OIL.CHEM99----get table failed %s' % response.url)

        unit = None
        match = re.search(
            u'单位:(.*)',
            doc.xpath('//div[@id="Panel_News"]//p/text()')[1].strip())
        if match:
            unit = match.group(1)

        for row in data_list[1:]:
            item = MarketReviewBitumen()
            item['area'] = row[0].strip()
            item['pre_price'] = row[1].strip()
            item['price'] = row[2].strip()
            item['change'] = row[3].strip()
            item['changeratio'] = row[4].strip()
            if unit:
                item['unit'] = unit
            item['datadate'] = data_date
            item['update_dt'] = datetime.datetime.now()
            item['source'] = title
            yield item
Esempio n. 13
0
    def parse_content(self, response):
        data_table = response.xpath('//*[@id="Panel_News"]/div[1]/table')
        data_list = table_to_list(data_table)

        a = 'aaa'