def getEDBTimeSeriesDataFrame(codeList, beginDate, endDate, fillChoice="Previous"): """ 宏观数据提取 get edb time series from windPy, each code represents one capture : Param fillChoice: (string) previous或者None,空值数据是否需要被前一日的数据取代 """ codeListStr = ",".join(codeList) try: w.start() if fillChoice == "Previous": windData = w.edb(codeListStr, beginDate.strftime("%Y-%m-%d"), endDate.strftime("%Y-%m-%d"), "Fill=" + fillChoice) else: windData = w.edb(codeListStr, beginDate.strftime("%Y-%m-%d"), endDate.strftime("%Y-%m-%d")) if len(windData.Data) == 0: return None if len(windData.Data[0]) == 0: return None dataDict = {} for i in range(len(windData.Data)): dataDict[windData.Codes[i]] = windData.Data[i] df = pd.DataFrame(dataDict, index=windData.Times) df.index = pd.to_datetime(df.index) df.index.name = "trade_date" return df except BaseException as e: print(format(e)) raise
def fig_industrial_production(): name = 'fig_industrial_production' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err, df = w.edb("S5704502,S5715680,S5708175,S5715660,S5417017,S5914175", last_date, today_date, usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = [ "日均产量:粗钢:国内", "日均产量:焦炭:重点企业(旬)", "高炉开工率(163家):全国", "产能利用率:电炉:全国", "PTA产业链负荷率:PTA工厂", "浮法玻璃:产能利用率" ] df['date'] = df.index df = df.loc[(df.date > last_date) & (df.date < dt.datetime.now().date())] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def fig_midstream(): name = 'fig_midstream' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err,df = w.edb("S5705039,S0247603,S0181750,S5914515,S5907373,S5416650,M0067419,M0066359,\ M0066348,M0066350" , \ last_date, today_date, usedf = True) if df.shape[1] == 1: return [], name, [] df.columns = [ 'Mylpic综合钢价指数', '库存:主要钢材品种:合计', '库存:螺纹钢(含上海全部仓库)', '水泥价格指数:全国', '中国玻璃价格指数', '中国盛泽化纤价格指数', '期货收盘价(活跃合约):PVC', '期货收盘价(活跃合约):天然橡胶', '期货收盘价(活跃合约):黄大豆1号', '期货收盘价(活跃合约):黄玉米' ] df['date'] = df.index df = df.loc[(df.date > last_date) & (df.date < today_date.date())] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def download_data_wind(start_date, end_date, var_list): if w.isconnected() == False: w.start() raw = w.edb(var_list, start_date, end_date) raw_df = pd.DataFrame(raw.Data, index=raw.Codes, columns=raw.Times) raw_df = raw_df.T return raw_df
def rates_us(): name = 'rates_us' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err, df = w.edb("G0000886,G0000887,G0000891,G8455661,M0000185,G0000898", "2010-06-21", "2021-06-18", usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = ['美债1年', '美债2年', '美债10年', '美债10-2', '美元兑人民币', 'libor_3m'] df['date'] = df.index df = df.loc[(df.date > last_date) & (df.date < today_date.date())] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def download_wind_data(codes, start_date, end_date): ''' 返回dataframe ''' data = w.edb(codes, start_date, end_date) df = wind2df(data) return df
def get_edb(idx_universe, start_date, end_date, names=None, **options): ''' 获取经济数据。 Parameters ------------ idx_universe 经济指标代码 start_date '20160101' end_date '20170101' names list of str,列别名,默认为None options 其他参数 Returns -------- DataFrame Notes ------ names若不为None,则必须与idx_universe等长对应 ''' assert len(names) == len(idx_universe) options = dict_2_str(options) start_date = date_format_convert(start_date) end_date = date_format_convert(end_date) idx_universe = ','.join(idx_universe) edb = w.edb(idx_universe, start_date, end_date, options) df = pd.DataFrame(edb.Data, columns=edb.Times, index=names).T return df
def get_periodic_interest_rate(self, time_list): r = w.edb("M0043808", index_start_date, end_date, usedf=True)[1] r = r.reset_index(drop=True) r.index = pd.Series( r['时间']).apply(lambda x: str(x)[:4] + str(x)[5:7] + str(x)[8:10]) r.columns = ['一年定存利率', '时间'] return
def cash_amt_prc(): # 资金现券与成交量 name = 'cash_amt_prc' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err, df = w.edb( "M0041652,M0041653,M0041655,M1004511,M1004515,M0220162,M0220163,M0330244,M0041739,M0041740", last_date, today_date, usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = ['R001','R007','R021','GC001','GC007','DR001','DR007',\ '成交量:R001','成交量:银行间质押式回购','成交量:银行间债券现券'] df['date'] = df.index df = df.loc[(df.date > last_date) & (df.date < today_date.date())] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def policy_rate(): name = 'policy_rate' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err, df = w.edb('M0041371,M0041373,M0041377,M0329656,\ M0329543,M0329544,M0329545', last_date, today_date, usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = ['逆回购利率:7天', '逆回购利率:14天', '逆回购利率:28天',\ '逆回购利率:63天', 'MLF:3m', 'MLF:6m', 'MLF:1y'] df['date'] = df.index df = df.loc[df.date > last_date] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def daily_fig_rates(): name = 'fig_rates' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err, df = w.edb( 'S0059744,S0059746,S0059747,S0059749,M1004263,M1004265,M1004267,M1004271', last_date, today_date, usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = [ "1年国债", "3年国债", "5年国债", "10年国债", "1年国开", "3年国开", "5年国开", "10年国开" ] df = df.dropna(axis=0) df['date'] = df.index df = df.loc[df.date > last_date] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def ccl(): # 超储率 ## 2015年前无政府存款记录 err,df = w.edb("M0001528,M0062047,M0251905,M0043821,M0061518,M0043823,M0010096,\ M0001690,M0001380" ,\ "2010-01-01", "2021-06-16", usedf=True) df.columns=['住户存款','非金融企业存款','政府存款',\ '中小型准备金率','大型准备金率','超额准备金率','超储率_季度',\ '基础货币','M0'] df['date'] = df.index name = 'ccl_related' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def broad_liquid(): # TODO 广义流动性 err,df = w.edb("M0011456,M5525763,M0001385,M0061578,M1002334,\ M0001227,M0001383,M0010075" ,\ "2000-06-17", "2021-06-17",usedf=True) df.columns = [ '贷款需求指数', '社融_tb', 'M2_tb', '票据直贴利率_6m_长三角', '票据_AA+_3y', 'ppi_tb', 'M1_tb', 'DR007_monthly' ] df['date'] = df.index name = 'broad_liquid' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def fig_upstream(): # 上游 err, df = w.edb("S5104570,S5125686,S5111905,S5111903,\ S5705040,S5705131,S0031648,S0031645,\ M0066355,M0066356,S0049493,S0049494,S0200868", start, end, usedf=True) df.columns = [ '综合平均价格指数:环渤海动力煤', '炼焦煤库存:六港口合计', '现货价:原油:英国布伦特Dtd', '现货价:原油:美国西德克萨斯中级轻质原油(WTI)', 'Mylpic矿价指数:综合', '国内铁矿石港口库存量', '伦敦现货白银:以美元计价', '伦敦现货黄金:以美元计价', '期货收盘价(活跃合约):阴极铜', '期货收盘价(活跃合约):铝', '库存期货:阴极铜', '库存期货:铝', '南华焦炭指数' ] df['date'] = df.index name = 'fig_upstream' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def fig_midstream(): # 中游 err,df = w.edb("S5705039,S0247603,S0181750,S5914515,S5907373,S5416650,M0067419,M0066359,\ M0066348,M0066350" , \ start, end, usedf = True) df.columns = [ 'Mylpic综合钢价指数', '库存:主要钢材品种:合计', '库存:螺纹钢(含上海全部仓库)', '水泥价格指数:全国', '中国玻璃价格指数', '中国盛泽化纤价格指数', '期货收盘价(活跃合约):PVC', '期货收盘价(活跃合约):天然橡胶', '期货收盘价(活跃合约):黄大豆1号', '期货收盘价(活跃合约):黄玉米' ] df['date'] = df.index name = 'fig_midstream' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def load_macro_data_wind(self, macro_code="M0000545", beg_date="19900101", end_date=datetime.today().strftime("%Y%m%d")): """ 下载宏观数据 """ from WindPy import w w.start() beg_date = Date().change_to_str(beg_date) end_date = Date().change_to_str(end_date) # 下载数据 ############################################################################## data = w.edb(macro_code, beg_date, end_date, "Fill=Previous") new_data = pd.DataFrame(data.Data, columns=data.Times, index=data.Codes).T new_data = new_data.dropna() new_data.index = new_data.index.map(lambda x: x.strftime('%Y%m%d')) print(" Loading Macro Data %s From %s To %s " % (macro_code, beg_date, end_date)) out_file = os.path.join(self.data_path, macro_code + '.csv') if os.path.exists(out_file): data = pd.read_csv(out_file, encoding='gbk', index_col=[0]) data.index = data.index.map(str) data = FactorOperate().pandas_add_row(data, new_data) else: print(" File No Exist ", macro_code) data = new_data data = data.dropna(how='all') data.to_csv(out_file)
def fig_downstream(): err, df = w.edb("S2707379,S2707380,S2726996,S6126413, \ S0049599,S0000293,S6500614,S6424740, \ S6604459,S6604460,S0000066,S0237842,S0031550", start, end, usedf=True) df.columns = [ '30大中城市:商品房成交套数', '30大中城市:商品房成交面积', '100大中城市:成交土地溢价率:当周值', '当周日均销量:乘用车:厂家零售', '柯桥纺织:价格指数:总类', '义乌中国小商品指数:总价格指数', '中关村电子价格产品指数', '中国公路物流运价指数', '电影票房收入', '电影观影人次', 'CCFI:综合指数', 'CICFI:综合指数', '波罗的海干散货指数(BDI)' ] df['date'] = df.index name = 'fig_downstream' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def spreads(): # 息差与杠杆 name = 'spreads' last_date = do.get_latest_date(name) today_date = dt.datetime.now() print('表{}的最近更新日期为{}'.format(name, last_date)) err,df = w.edb("M0220162,M0220163,M1004515,M0048486,M0048490,M1004007,M1004900,S0059722,S0059724,S0059725,M1004271,M1004300", \ last_date, today_date, usedf=True) if df.shape[1] == 1: return [], name, [] df.columns = ['DR001','DR007','GC007','IRS_1y_FR007','IRS_5y_FR007',\ 'IRS_5y_shibor3m','cd_AAA_6m',\ '中短票_AA+_1y','中短票_AA+_3y','中短票_AA+_5y',\ '国开10年','地方债_AAA_3y'] df['date'] = df.index df = df.loc[(df.date > last_date) & (df.date < today_date.date())] columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def fig_industrial_production(): # 工业生产 err, df = w.edb("S5704502,S5715680,S5708175,S5715660,S5417017,S5914175", start, end, usedf=True) # ,"Fill=Previous" df.columns = [ "日均产量:粗钢:国内", "日均产量:焦炭:重点企业(旬)", "高炉开工率(163家):全国", "产能利用率:电炉:全国", "PTA产业链负荷率:PTA工厂", "浮法玻璃:产能利用率" ] #df = df.dropna(axis = 0) # df = df.fillna(0) df['date'] = df.index name = 'fig_industrial_production' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def daily_fig_liquidity_premium(): err, df = w.edb('M0017139,M0041653,M0220163,\ M0017142,M0048486,M1010889,M1010892,M0329545,\ M1011048', start, end, "Fill=Previous", usedf=True) df.columns = [ "shibor_7d", "质押回购利率_7天", "存款类质押回购利率_7天", "shibor_3m", "IRS:FR007:1y", "存单_AAA_3m", "存单_AAA_1y", "MLF:1年", "国股银票转贴现收益率_3m" ] df['date'] = df.index # df = df.dropna(axis = 0) name = 'fig_liquidity_premium' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def daily_fig_rates(): err, df = w.edb( 'S0059744,S0059746,S0059747,S0059749,M1004263,M1004265,M1004267,M1004271', start, end, "Fill=Previous", usedf=True) df.columns = [ "1年国债", "3年国债", "5年国债", "10年国债", "1年国开", "3年国开", "5年国开", "10年国开" ] df = df.dropna(axis=0) df['date'] = df.index name = 'fig_rates' columns_type = [ Float(), Float(), Float(), Float(), Float(), Float(), Float(), Float(), DateTime() ] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def get_periodic_interest_rate_from_wind(self, start_date, end_date): '''从wind客户端获取一年定期利率 ''' rst = w.edb("M0043808", start_date, end_date, usedf=True)[1] rst = rst.reset_index(drop=True) rst.index = pd.Series(rst['时间']).apply(lambda x: str(x)[:4] + str(x)[5:7] + str(x)[8:10]) rst.columns = ['一年定存利率','时间'] return rst
def get_data(self): indata=w.edb(self.code,self.startdate,self.enddate,"Fill=Previous") if indata.ErrorCode != 0: print('错误:' + str(indata.ErrorCode) + '\n') A = list(map(list, (zip(*indata.Data)))) df = pd.DataFrame(A, index=list(indata.Times), columns=indata.Codes) return df
def tbond_y1(self): k = "tbond_y1" if k not in self._cached: self._cached[k] = wind.edb("M1001940", beginTime=self.params["date_e"], endTime=self.params["date_s"], options="Fill=Previous") return self._cached[k]
def getEDBFromWind(self, collection, edb_code, **kwargs): self.windConn() coll = self.db[collection] if coll.find_one({'wind_code': edb_code}): queryArgs = {'wind_code': edb_code} projectionField = ['wind_code', 'date'] searchRes = coll.find(queryArgs, projectionField).sort( 'date', pymongo.DESCENDING).limit(1) start_date = list(searchRes)[0]['date'] + timedelta(1) end_date = datetime.today() else: start_date = datetime.strptime('19900101', '%Y%m%d') end_date = datetime.today() if start_date > end_date: return res = w.edb(edb_code, start_date, end_date, 'Fill=previous') if res.ErrorCode != 0: print res raise Exception(u'WIND提取数据出现了错误') else: unit_total = len(res.Data[0]) * len(res.Fields) self.logger.info(u'抓取EDB%s数据%s到%s的数据,共计%d个' % (edb_code, start_date, end_date, unit_total)) dict_res = dict(zip(res.Fields, res.Data)) df = pd.DataFrame.from_dict(dict_res) df.index = res.Times df['wind_code'] = edb_code df2dict = df.to_dict(orient='index') total = len(df2dict) count = 1 print '抓取%s数据' % edb_code for di in df2dict: process_str = '>' * int(count * 100. / total) + ' ' * ( 100 - int(count * 100. / total)) sys.stdout.write('\r' + process_str + u'【已完成%5.2f%%】' % (count * 100. / total)) sys.stdout.flush() # 该判断是必要的,因为如果日期是之后的,而数据没有,edb方法会返回最后一个数据 if coll.find_one({ 'wind_code': edb_code, 'date': datetime.strptime(str(di), '%Y-%m-%d') }): self.logger.info(u'该数据已经存在于数据库中,没有抓取') continue dtemp = df2dict[di].copy() dtemp['date'] = datetime.strptime(str(di), '%Y-%m-%d') dtemp['update_time'] = datetime.now() dtemp.update(kwargs) coll.insert_one(dtemp) count += 1 sys.stdout.write('\n') sys.stdout.flush()
def fetch_wind_data(self, symbol, date_start, date_end): w.start() resp = w.edb([symbol], date_start, date_end) if resp.ErrorCode != 0: raise Exception('fetching %s [%s ~ %s] failed, status code: %s' % ( symbol, self.date_start, self.date_end, resp.ErrorCode)) logger.info('fetching %s [%s ~ %s] success' % (symbol, self.date_start, self.date_end)) df = pd.DataFrame(resp.Data, columns=resp.Times, index=resp.Codes) return df
def get(collection, name, code): global date d = w.edb(code, "2000-01-01", date) for t, v in zip(d.Times, d.Data[0]): client['MACRO'][collection].insert_one({ 'DATE': t.strftime('%Y%m%d'), 'NAME': name, 'VALUE': str(v) })
def data_from_wind(code, freq, start="", end=""): w.start() if freq == 'D': df = w.wsd(code, "OPEN,HIGH,LOW,CLOSE", start, end, "") else: df = w.edb(code, start, end, "Fill=Previous") w.close() kl = to_dataframe(df, freq) return kl
def daily_fig_bond_leverage(): err, df = w.edb('M0041739,M5639029', start, end, usedf=True) df.columns = ['成交量:银行间质押式回购', '债券市场托管余额'] # df = df.dropna(axis = 0) df['date'] = df.index name = 'fig_bond_leverage' columns_type = [Float(4), Float(1), DateTime()] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def mkt_rates(): # TODO 货币市场利率 err,df=w.edb("M1006336,M1006337,M0017142,M1006645",\ "2000-06-17", "2021-06-16",usedf=True) df.columns = ['DR001', 'DR007', 'shibor_3m', '存单_1y'] df['date'] = df.index name = 'mkt_rates' columns_type = [Float(), Float(), Float(), Float(), DateTime()] dtypelist = dict(zip(df.columns, columns_type)) return df, name, dtypelist
def edb(code,startdate, enddate,name): tmp = w.edb(code, startdate, enddate) return pd.DataFrame(dict(zip([name], tmp.Data)),index = tmp.Times)