コード例 #1
0
ファイル: items.py プロジェクト: mainliufeng/spider
    def save(self):
        self.validate()

        ModelClass = self.model
        session = Session()
        try:
            row = ModelClass()
            for key in self.fields.keys():
                value = self.get(key)
                if (getattr(ModelClass, key).property.columns[0].type.__class__
                        == DECIMAL) and (isinstance(value, str)
                                         or isinstance(value, unicode)):
                    if not value:
                        value = None
                    else:
                        value = Decimal(value.replace(',', ''))
                elif (getattr(ModelClass,
                              key).property.columns[0].type.__class__
                      == Integer) and (isinstance(value, str)
                                       or isinstance(value, unicode)):
                    if not value:
                        value = None
                    else:
                        value = int(value.replace(',', ''))
                setattr(row, key, value)
            session.add(row)
            session.commit()
        except (SqlalchemyIntegrityError, PymysqlIntegrityError) as e:
            session.rollback()
            raise DropItem(e.message)
        # except Exception as e:
        #     session.rollback()
        #     raise DropItem(e.message)
        finally:
            session.close()
コード例 #2
0
    def start_requests(self):
        session = Session()
        try:
            stock_cd_market_part_list = session.query(
                CurrListedCorp.stock_cd, CurrListedCorp.market_part).all()

            for stock_cd_market_part in stock_cd_market_part_list:
                stock_cd = stock_cd_market_part[0]
                market_part = stock_cd_market_part[1]
                yield Request(
                    self.information_url_pattern.format(market_part, stock_cd))
        finally:
            session.close()
コード例 #3
0
ファイル: items.py プロジェクト: mainliufeng/spider
    def save(self):
        self.validate()

        session = Session()
        try:
            session.query(CurrListedCorp).filter(
                CurrListedCorp.stock_cd == self['stock_cd']).update(
                    {CurrListedCorp.market_part: self['market_part']})
            session.commit()
        except (SqlalchemyIntegrityError, PymysqlIntegrityError) as e:
            session.rollback()
            raise DropItem(e.message)
        # except Exception as e:
        #     session.rollback()
        #     raise DropItem(e.message)
        finally:
            session.close()
コード例 #4
0
    def start_requests(self):
        session = Session()
        try:
            #year_period_list = session.query(
            #    PeriodList.year, PeriodList.period
            #).all()

            stock_cd_market_part_list = session.query(
                CurrListedCorp.stock_cd, CurrListedCorp.market_part).all()
            for stock_cd_market_part in stock_cd_market_part_list:
                stock_cd = stock_cd_market_part[0]
                market_part = stock_cd_market_part[1]
                yield Request(url=self.cashflow_url_pattern.format(
                    market_part, stock_cd),
                              meta={'stock_cd': stock_cd},
                              callback=self.parse_cashflow)
        finally:
            session.close()
コード例 #5
0
ファイル: cninfo_com_cn_profit.py プロジェクト: uapdw/spider
    def start_requests(self):
        session = Session()
        try:
            year_period_list = session.query(
                PeriodList.year, PeriodList.period
            ).all()

            stock_cd_market_part_list = session.query(
                CurrListedCorp.stock_cd
            ).all()
            for stock_cd_market_part in stock_cd_market_part_list:
                stock_cd = stock_cd_market_part[0]
                for year_period in year_period_list:
                    year = int(year_period[0])
                    period = year_period[1]
                    period_season = self.monthList[int(period)]
                    yield Request(
                        url=self.profit_url_pattern.format(
                        	stock_cd, year, period_season
                    	),
                    	meta={
                            'stock_cd': stock_cd,
                            'year': year,
                            'period': period,
                        },
                        callback=self.parse_profit
                    )
        finally:
            session.close()
コード例 #6
0
    def start_requests(self):
        session = Session()
        try:
            year_period_list = session.query(PeriodList.year,
                                             PeriodList.period).all()

            stock_cd_list = session.query(CurrListedCorp.stock_cd).filter(
                CurrListedCorp.data_sour == '0').all()
        finally:
            session.close()

        for stock_cd, in stock_cd_list:
            for year_period in year_period_list:
                year = year_period[0]
                period = year_period[1]

                formdata = {
                    'report_year': year,
                    'stock_id': stock_cd,
                    'report_period_id':
                    self.period_notice_type_dict.get(period)
                }

                yield FormRequest(url=self.search_url,
                                  formdata=formdata,
                                  meta={
                                      'stock_cd': stock_cd,
                                      'year': year,
                                      'period': period,
                                  },
                                  callback=self.parse_search)
コード例 #7
0
    def start_requests(self):
        session = Session()
        try:
            self.periodlist = session.query(PeriodList).order_by(
                desc(PeriodList.year), desc(PeriodList.period)).limit(1).one()

            stock_cd_market_part_list = session.query(
                CurrListedCorp.stock_cd, CurrListedCorp.market_part).all()

            for stock_cd_market_part in stock_cd_market_part_list:
                stock_cd = stock_cd_market_part[0]
                market_part = stock_cd_market_part[1]
                yield Request(
                    self.information_url_pattern.format(market_part, stock_cd))
        finally:
            session.close()
コード例 #8
0
 def start_requests(self):
     session = Session()
     try:
         self.periodlist = session.query(PeriodList).order_by(
             desc(PeriodList.year), desc(PeriodList.period)).all()
         stock_cd_list = session.query(CurrListedCorp.stock_cd).all()
         for stock_cd in stock_cd_list:
             stock_code = stock_cd[0]
             for period in self.periodlist:
                 year = period.year.encode('utf8')
                 mm = self.monthList[int(period.period.encode('utf8'))]
                 balanceSheetUrl = 'http://www.cninfo.com.cn/information/stock/balancesheet_.jsp?stockCode=' + stock_code + '&yyyy=' + year + '&&mm=' + (
                     year != str(2016) and mm
                     or '') + '&cwzb=balancesheet&button2=%CC%E1%BD%BB'
                 req = Request(balanceSheetUrl, callback=self.parsebalance)
                 req.meta['year'] = year
                 req.meta['month'] = period.period.encode('utf8')
                 yield req
     finally:
         session.close()
コード例 #9
0
    def start_requests(self):
        session = Session()
        try:
            year_period_list = session.query(PeriodList.year,
                                             PeriodList.period).all()

            stock_cd_list = session.query(CurrListedCorp.stock_cd).filter(
                CurrListedCorp.data_sour == '0').all()
        finally:
            session.close()

        for stock_cd, in stock_cd_list:
            for year_period in year_period_list:
                year = year_period[0]
                period = year_period[1]

                formdata = {
                    'report_year': year,
                    'stock_id': stock_cd,
                    'report_period_id':
                    self.period_notice_type_dict.get(period)
                }

                yield FormRequest(
                    url=self.search_url,
                    formdata=formdata,
                    # headers={
                    #     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                    #     'Accept-Encoding': 'gzip, deflate',
                    #     'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
                    #     'Content-Type': 'application/x-www-form-urlencoded',
                    #     'Host': 'www.cninfo.com.cn',
                    #     'Origin': 'http://www.cninfo.com.cn',
                    #     'Referer': 'http://www.cninfo.com.cn/search/search.jsp'
                    # },
                    meta={
                        'stock_cd': stock_cd,
                        'year': year,
                        'period': period,
                    },
                    callback=self.parse_search)
コード例 #10
0
    def start_requests(self):
        session = Session()
        try:
            year_period_list = session.query(
                PeriodList.year, PeriodList.period
            ).all()

            stock_cd_market_part_list = session.query(
                CurrListedCorp.stock_cd, CurrListedCorp.market_part
            ).all()
        finally:
            session.close()

        for stock_cd_market_part in stock_cd_market_part_list:
            stock_cd = stock_cd_market_part[0]
            market_part = stock_cd_market_part[1]

            if not market_part:
                continue

            for year_period in year_period_list:
                year = int(year_period[0])
                period = year_period[1]

                if period == '3':
                    start_time = '{}-01-01'.format(year+1)
                    end_time = '{}-01-01'.format(year+2)
                else:
                    start_time = '{}-01-01'.format(year)
                    end_time = '{}-01-01'.format(year+1)

                formdata = {
                    'orderby': 'date11',
                    'marketType': self.market_part_market_type_dict.get(market_part),
                    'noticeType': self.period_notice_type_dict.get(period),
                    'stockCode': stock_cd,
                    'keyword': '',
                    'startTime': start_time,
                    'endTime': end_time,
                    'pageNo': '1'
                }

                yield FormRequest(
                    url=self.pdf_search_url,
                    formdata=formdata,
                    headers={
                        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                        'Accept-Encoding': 'gzip, deflate',
                        'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
                        'Content-Type': 'application/x-www-form-urlencoded',
                        'Host': 'www.cninfo.com.cn',
                        'Origin': 'http://www.cninfo.com.cn',
                        'Referer': 'http://www.cninfo.com.cn/search/search.jsp'
                    },
                    meta={
                        'stock_cd': stock_cd,
                        'year': year,
                        'period': period,
                    },
                    callback=self.parse_search
                )