예제 #1
0
 def start_requests(self):
     """初始时间为:2002-01-07 16:00:00 """
     start_time = select_update_time('shfe') + 57600
     while start_time < self.today_time:
         start_time += 86400
         yield scrapy.Request(self.url.format(
             date=time.strftime('%Y%m%d', time.localtime(start_time))),
                              callback=self.parse,
                              meta={'time': start_time})
예제 #2
0
 def start_requests(self):
     """初始爬去时间为 2003-01-02 16:00:00 """
     start_time = select_update_time('dec') + 57600
     while start_time < self.today_time:
         start_time += 86400
         year = time.strftime('%Y', time.localtime(start_time))
         month = str(int(time.strftime('%m', time.localtime(start_time)).strip('0')) -1)
         day = time.strftime('%d', time.localtime(start_time))
         yield scrapy.FormRequest(self.url,formdata={"dayQuotes.variety": "all", "dayQuotes.trade_type": "0",
                                     "year" : year,"month" : month,"day" : day},callback=self.parse,meta={'time':start_time})
예제 #3
0
 def start_requests(self):
     """请求日统计数据 第一页"""
     self.start_time = select_update_time('sge')
     if self.start_time:
         yield scrapy.Request('http://www.sge.com.cn/sjzx/mrhqsj',
                              callback=self.next_parse,
                              dont_filter=True,
                              meta={'page': 1})
     else:
         yield scrapy.Request('http://www.sge.com.cn/sjzx/mrhqsj',
                              callback=self.first_parse,
                              dont_filter=True)
예제 #4
0
 def start_requests(self):
     start_time = select_update_time('czce') + 57600
     while start_time < self.today_time:
         start_time += 86400
         datetime = time.strftime('%Y-%m-%d', time.localtime(start_time))
         data = {
             "dataType": "DAILY",
             "pubDate": datetime,
             'commodity': '',
         }
         yield scrapy.FormRequest(self.url,
                                  formdata=data,
                                  meta={'time': datetime},
                                  callback=self.html_parse)
예제 #5
0
    def start_requests(self):
        """
        设置初始爬取时间为 2010-01-01 16:00:00 ,以天为单位循环
        这里用的时间戳
        """
        start_time = select_update_time('cffex') + 57600

        while start_time < self.today_time:
            start_time += 86400
            year = time.strftime('%Y', time.localtime(start_time))
            month = time.strftime('%m', time.localtime(start_time))
            day = time.strftime('%d', time.localtime(start_time))
            yield scrapy.Request(self.url.format(yearmonth=year + month,
                                                 day=day),
                                 callback=self.parse,
                                 meta={'time': start_time})
예제 #6
0
 def start_requests(self):
     """初始时间为:2011-01-01 16:00:00  """
     start_time = select_update_time('new_exbxg')
     t = int(time.time() * 1000)
     if start_time:
         date = time.strftime('%Y-%m-%d',
                              time.localtime(start_time + 86400))
     else:
         date = ''
     data = {
         'callback':
         'jQuery110207587501276632844_%s' % (t - 1002),
         '{"timestamp":%s,"service":"U_D_FindSteel","body":{"beginDate":"%s","endDate":"","limit":10000}}' % (t, date):
         '',
         '_':
         '%s' % (t - 1000)
     }
     url = self.init_url + urlencode(data)
     yield scrapy.Request(url, callback=self.parse)