# Tushare + wind API 能用tushare就tushare 实在不行上windAPI 不用csmar token = '654d36bf9bb086cb8c973e0f259e38c3efe24975386b7922e88a4cf2' import tushare as ts ts.set_token(token) pro = ts.pro_api() import utils utils.setdir_fctr() s_path = Path("_saved_factors") if not os.path.exists(s_path): os.mkdir(s_path) # %% # use split-adjusted share prices Monthly_Quotation_sa = utils.cleandf( pd.read_csv(Path('data', 'buffer', 'MonFactorPrcd_sa.csv'))) Monthly_Quotation_sa = utils.todate(Monthly_Quotation_sa, 'end_date', format='%Y-%m-%d') Monthly_Quotation_sa = Monthly_Quotation_sa.set_index(["ts_code", 'end_date' ]).sort_index() Monthly_Quotation_sa['monthly_return'] = Monthly_Quotation_sa.groupby( ['ts_code'])['close'].pct_change() # load risk free rate rf = pd.read_csv(Path('_saved_factors', 'MacroFactor.csv'), index_col=0, parse_dates=['end_date'])[['RiskFreeRate']] rf["Mon_rfr"] = (1 + rf['RiskFreeRate'] / 100)**(1 / 12) - 1 # rf = rf.sort_index().shift(1) # shift by 1
token = '654d36bf9bb086cb8c973e0f259e38c3efe24975386b7922e88a4cf2' import tushare as ts ts.set_token(token) pro = ts.pro_api() _paths = os.getcwd().split('/') if _paths[-1] == "code": os.chdir("..") # %% ''' Quarter_data 去重、查看 ''' from utils import cleandf Quarter_data = cleandf(pd.read_csv(Path('data', 'buffer', 'QuarterFactorRaw.csv'))) Quarter_data = Quarter_data.sort_values(by=['ts_code','end_date'],ascending=[True,True]) Quarter_data = Quarter_data[~(Quarter_data.end_date.isna())] Quarter_data = Quarter_data.drop_duplicates() Quarter_data['end_date'] = Quarter_data['end_date'].astype(int) date = [datetime.strptime(str(i), "%Y%m%d") for i in Quarter_data.end_date.values] Quarter_data.loc[:, 'end_date'] = date Quarter_data = Quarter_data.reindex(range(len(Quarter_data))) #qgrid.show_grid(Quarter_data.loc[:,['ts_code','end_date','ann_date']]) #所有差分、roll的函数加一个.groupby,如果是if Quarter_data['is_beginning'] = 1, 则自动为Nan Quarter_data['is_beginning'] = 0 Quarter_data = Quarter_data.sort_values(['ts_code', 'end_date'])
pro = ts.pro_api() _paths = os.getcwd().split('/') if _paths[-1] == "code": os.chdir("..") s_path = Path("_saved_factors") if not os.path.exists(s_path): os.mkdir(s_path) import utils #%% # load data _load = False if _load: Daily_Quotation = utils.cleandf(pd.read_csv(Path('data', 'buffer', 'DayFactorPrcd.csv'))) Daily_Quotation = Daily_Quotation.rename(columns={'代码': 'ts_code', '日期': 'trade_date', '成交量(股)': 'volume'}) Daily_Quotation['trade_date'] = pd.to_datetime(Daily_Quotation['trade_date']) Daily_Quotation['end_date'] = Daily_Quotation['trade_date'] + pd.offsets.MonthEnd(0) Daily_Quotation.index = range(len(Daily_Quotation)) Daily_Quotation = Daily_Quotation.sort_values(by=['ts_code', 'trade_date']) Daily_Quotation.to_pickle(Path('data', 'buffer', 'DayFactorPrcd.pkl')) else: Daily_Quotation = pd.read_pickle(Path('data', 'buffer', 'DayFactorPrcd.pkl')) Daily_vol = Daily_Quotation[['ts_code', 'trade_date', 'volume']] #数据原始太大了 拆分做 # %% ''' 3.aeavol -Quarterly: Average daily trading volume(vol) for 3 days around(?before) earnings announcement -
_paths = os.getcwd().split('/') if _paths[-1] == "code": os.chdir("..") s_path = Path("_saved_factors") if not os.path.exists(s_path): os.mkdir(s_path) import utils #%% # load data _load = False if _load: Weekly_Quotation = utils.cleandf( pd.read_csv(Path('data', 'buffer', 'WeekFactorPrcd.csv'), encoding='gbk')) Weekly_Quotation = Weekly_Quotation.rename( { '代码': 'ts_code', '日期': 'trade_date' }, axis=1) Weekly_Quotation = Weekly_Quotation.sort_values( by=['ts_code', 'trade_date']) Weekly_Quotation.index = range(len(Weekly_Quotation)) Weekly_Quotation.to_pickle(Path('data', 'buffer', 'WeekFactorPrcd.pkl')) else: Weekly_Quotation = pd.read_pickle( Path('data', 'buffer', 'WeekFactorPrcd.pkl')) # %%
''' f27_dir = Path('data', 'divi') f27_pt = Path('data', 'factor27_divi.csv') if not os.path.exists(f27_pt): '''下载divi数据''' from utils import load_divi load_divi(f27_dir) dfs = pd.DataFrame() for f in tqdm(os.listdir(f27_dir)): divi = pd.read_csv(f27_dir / f) dfs = pd.concat([dfs, divi], axis=0) dfs = cleandf(dfs) dfs.to_csv(f27_pt, index=False) # %% divi = pd.read_csv(f27_pt) divi['end_date'] = divi['end_date'].astype(int).astype(str) divi['end_date'] = pd.to_datetime(divi['end_date']) + pd.offsets.QuarterEnd(0) divi = divi.drop_duplicates(['ts_code', 'end_date'], keep='last') # must drop duplicates 2 divi['pay_01'] = divi['pay_date'] * 0 + 1 divi['pay_01'] = divi['pay_01'].fillna(value=0) divi = divi.sort_values(by=['ts_code', 'end_date'], ascending=[True, True]) divi['raw_divi'] = divi.groupby(['ts_code'])['pay_01'].diff().fillna(0)
import tushare as ts ts.set_token(token) pro = ts.pro_api() import utils utils.setdir_fctr() s_path = Path("_saved_factors") if not os.path.exists(s_path): os.mkdir(s_path) # %% # load data _load = True if _load: Monthly_Quotation = utils.cleandf( pd.read_csv(Path('data', 'buffer', 'MonFactorPrcd.csv'))) Monthly_Quotation = utils.todate(Monthly_Quotation, 'end_date', format='%Y-%m-%d') Monthly_Quotation.index = range(len(Monthly_Quotation)) Monthly_Quotation.to_pickle(Path('data', 'buffer', 'MonFactorPrcd.pkl')) else: Monthly_Quotation = pd.read_pickle( Path('data', 'buffer', 'MonFactorPrcd.pkl')) Ind_fctr = pd.get_dummies(Monthly_Quotation.set_index(['ts_code', 'end_date' ])['industry'], prefix='Ind').reset_index() Ind_fctr.to_csv(Path('_saved_factors', 'IndFactor.csv'), index=False) # %% '''