def start_requests(self): self.dataType = self.settings.get("dataType") if self.dataType is None or self.dataType == 'day_kdata': today = pd.Timestamp.today() for date in pd.date_range(start='20200101', end=today): the_dir = get_exchange_cache_path( security_type='future', exchange='czce', the_date=to_timestamp(date), data_type='day_kdata') + '.xls' if (date.dayofweek < 5 and not os.path.exists(the_dir)): yield Request( url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/" + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls", callback=self.download_czce_kline_data, meta={'filename': the_dir}) elif self.dataType == 'historyk': yield Request( url="http://www.czce.com.cn/cn/jysj/lshqxz/H770319index_1.htm", callback=self.download_czce_history_data) elif self.dataType == 'inventory': today = pd.Timestamp.today() for date in pd.date_range(start='20200101', end=today): the_dir = get_exchange_cache_path( security_type='future', exchange='czce', the_date=to_timestamp(date), data_type='inventory') + '.xls' if (date.dayofweek < 5 and not os.path.exists(the_dir)): yield Request( url="http://www.czce.com.cn/cn/DFSStaticFiles/Future/" + date.strftime("%Y/%Y%m%d") + "/FutureDataHolding.xls", callback=self.download_czce_kline_data, meta={'filename': the_dir})
def start_requests(self): if self.dataType is None or self.dataType == 'dayk': daterange = pd.date_range(start='2006-06-30', end=pd.Timestamp.today()) daterange = daterange[daterange.dayofweek < 5] for i in daterange: the_dir = get_exchange_cache_path( security_type='future', exchange='cffex', data_type='day_kdata', the_date=to_timestamp(i)) + ".csv" if not os.path.exists(the_dir): yield Request( url="http://www.cffex.com.cn/sj/hqsj/rtj/" + i.strftime("%Y%m/%d/%Y%m%d") + "_1.csv", callback=self.download_cffex_history_data_file, meta={'filename': the_dir}) elif self.dataType == 'inventory': daterange = pd.date_range(start='2006-06-30', end=pd.Timestamp.today()) k = ['IF', 'IC', 'IH', 'T', 'TF'] daterange = daterange[daterange.dayofweek < 5] for i in daterange: for j in k: the_dir = get_exchange_cache_path( security_type='future', exchange='cffex', data_type='inventory', the_date=to_timestamp(i)) + j + ".csv" if not os.path.exists(the_dir): yield Request( url="http://www.cffex.com.cn/sj/ccpm/" + i.strftime("%Y%m/%d/") + j + "_1.csv", callback=self.download_cffex_history_data_file, meta={'filename': the_dir})
def start_requests(self): self.dataType = self.settings.get("dataType") if self.dataType == 'inventory': today = pd.Timestamp.today() for date in pd.date_range(start=today.date() - pd.Timedelta(weeks=520), end=today): the_dir = get_exchange_cache_path( security_type='future', exchange='shfe', the_date=to_timestamp(date), data_type='inventory') + '.json' if date.dayofweek < 5 and not os.path.exists(the_dir): yield Request(url=self.get_day_inventory_url( the_date=date.strftime('%Y%m%d')), meta={ 'the_date': date, 'the_path': the_dir }, callback=self.download_shfe_data_by_date) if self.dataType == 'day_kdata': daterange = pd.date_range(start='2020-01-01', end=pd.Timestamp.today()) daterange = daterange[daterange.dayofweek < 5] # 每天的数据 for the_date in daterange: the_path = get_exchange_cache_path( security_type='future', exchange='shfe', the_date=to_timestamp(the_date), data_type='day_kdata') if not os.path.exists(the_path): yield Request(url=self.get_day_kdata_url( the_date=the_date.strftime('%Y%m%d')), meta={ 'the_date': the_date, 'the_path': the_path }, callback=self.download_shfe_data_by_date) else: # 直接抓年度统计数据 for the_year in range(2009, datetime.today().year): the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe') the_path = os.path.join( the_dir, "{}_shfe_history_data.zip".format(the_year)) if not os.path.exists(the_path): yield Request( url=self.get_year_k_data_url(the_year=the_year), meta={ 'the_year': the_year, 'the_path': the_path }, callback=self.download_shfe_history_data)
def start_requests(self): self.trading_dates = self.settings.get("trading_dates") if self.trading_dates: # 每天的数据 for the_date in self.trading_dates: the_path = get_exchange_cache_path( security_type='future', exchange='shfe', the_date=to_timestamp(the_date), data_type='day_kdata') yield Request(url=self.get_day_kdata_url(the_date=the_date), meta={ 'the_date': the_date, 'the_path': the_path }, callback=self.download_shfe_data_by_date) else: # 直接抓年度统计数据 for the_year in range(2009, datetime.today().year): the_dir = get_exchange_cache_dir(security_type='future', exchange='shfe') the_path = os.path.join( the_dir, "{}_shfe_history_data.zip".format(the_year)) if not os.path.exists(the_path): yield Request( url=self.get_year_k_data_url(the_year=the_year), meta={ 'the_year': the_year, 'the_path': the_path }, callback=self.download_shfe_history_data)
def start_requests(self): if self.dataType is None: today = pd.Timestamp.today() for date in pd.date_range(start=today.date() - pd.Timedelta(days=today.dayofyear - 1), end=today): the_dir = get_exchange_cache_path( security_type='future', exchange='czce', the_date=to_timestamp(date), data_type='day_kdata') + '.xls' if (date.dayofweek < 5 and not os.path.exists(the_dir)): yield Request( url= "http://www.czce.com.cn/portal/DFSStaticFiles/Future/" + date.strftime("%Y/%Y%m%d") + "/FutureDataDaily.xls", callback=self.download_czce_kline_data, meta={'filename': the_dir}) elif self.dataType == 'historyk': yield Request( url= "http://www.czce.com.cn/portal/jysj/qhjysj/lshqxz/A09112017index_1.htm", callback=self.download_czce_history_data) elif self.dataType == 'inventory': today = pd.Timestamp.today() for date in pd.date_range(start=today.date() - pd.Timedelta(weeks=450), end=today): the_dir = get_exchange_cache_path( security_type='future', exchange='czce', the_date=to_timestamp(date), data_type='inventory') + '.xls' if (date.dayofweek < 5 and not os.path.exists(the_dir)): yield Request( url= "http://www.czce.com.cn/portal/DFSStaticFiles/Future/" + date.strftime("%Y/%Y%m%d") + "/FutureDataHolding.xls", callback=self.download_czce_kline_data, meta={'filename': the_dir})
def request_currentyear_kdata(self): today = pd.Timestamp.today() requests=[] for date in pd.date_range(start='20200101',end=today): the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_kdata")+'.xls' if(date.dayofweek<5 and not os.path.exists(the_dir)): requests.append( FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportDayQuotesChData.html",formdata={ 'year':str(date.year), 'month':str(date.month-1), 'day':str(date.day), 'dayQuotes.trade_type':'0', 'dayQuotes.variety':'all', 'exportType':'excel' },callback=self.download_dce_kline_data,meta={ 'filename':the_dir })) return requests
def request_inventory_data(self): today = pd.Timestamp.today() requests = [] for date in pd.date_range(start='20200101',end=today): the_dir = get_exchange_cache_path(security_type='future', exchange='dce',the_date=to_timestamp(date),data_type="day_inventory")+'.zip' if(date.dayofweek<5 and not os.path.exists(the_dir)): requests.append(FormRequest(url="http://www.dce.com.cn/publicweb/quotesdata/exportMemberDealPosiQuotesBatchData.html",formdata={ 'batchExportFlag':'batch', 'contract.contract_id':'all', 'contract.variety_id':'a', 'year':str(date.year), 'month':str(date.month-1), 'day':str(date.day), 'memberDealPosiQuotes.trade_type':'0', 'memberDealPosiQuotes.variety':'all' },callback=self.download_dce_kline_data,meta={ 'filename':the_dir })) return requests
def crawl_rollYield_And_Spread(): cache_dir = get_exchange_cache_dir(security_type='future', exchange='shfe', data_type="day_kdata") today = pd.Timestamp.today() calendar = fushare.cons.get_calendar() filteredCalendar = list( filter(lambda x: datetime.strptime(x, '%Y%m%d') <= today, calendar)) for date in filteredCalendar: the_dir = get_exchange_cache_path(security_type='future', exchange='shfe', the_date=to_timestamp(date), data_type='misc') datet = date if not os.path.exists(the_dir): # rydf = fushare.get_rollYield_bar(type="var",date=datet) # rydf.to_csv(the_dir+'rollYeild'+datet+'.csv') try: spdf = fushare.get_spotPrice(datet) spdf.to_csv(the_dir + 'spotPrice' + datet + '.csv') except BaseException as e: print("not downloaded for " + datet)