#from sklearn.model_selection import train_test_split #from sklearn.metrics import precision_recall_curve,precision_recall_fscore_support,accuracy_score from podaci.guosen.data import get_stock_features col_names = ['b', 'c', 'u', 'l', 'ol', 'cl', 'ac'] for stk in target_universe_list: try: start_time = time.time() label = pd.read_hdf(os.path.join(train_data_path, 'label.h5'), stk.encode('utf8')) label.loc[:, 'label'].loc[label['label'] == 2] = 1 # feeatures features = get_stock_features('20050101', '20180831', [stk]) features = features.sort_values('trade_date', ascending=True) for i in range(1, 181): for col in col_names: features['%s_%s' % (col, i)] = features[col].shift(i) features.dropna(inplace=True) features.drop(col_names, axis=1, inplace=True) # combine and get train&test data comb = label.join(features.set_index('trade_date').drop('stock_code', axis=1), on='trade_date') comb = comb.drop(['stock_code'], axis=1)
#%% 模型训练 from sklearn.neural_network import MLPClassifier from sklearn.externals import joblib from podaci.guosen.data import get_stock_features col_names = ['b', 'c', 'u', 'l', 'ol', 'cl', 'ac'] for stk in target_universe_list: try: start_time = time.time() label = pd.read_hdf(os.path.join(train_data_path, stk + '.h5'), 'label') label['label'].loc[label['label'] == 2] = 1 # feeatures features = get_stock_features('20150801', '20180831', [stk]) features = features.sort_values('trade_date', ascending=True) for i in range(1, 31): for col in col_names: features['%s_%s' % (col, i)] = features[col].shift(i) features.dropna(inplace=True) features.drop(col_names, axis=1, inplace=True) # combine and get train&test data comb = label.join(features.set_index('trade_date').drop('stock_code', axis=1), on='trade_date') comb = comb.drop(['stock_code'], axis=1)
for i in range(1,30): for col in col_names: stock_features['%s_%s'%(col,i)] = stock_features[col].shift(i) stock_features.dropna(inplace = True) stock_features = trade_calendar.join(stock_features.set_index('trade_date'),on = 'trade_date', how = 'left') stock_features['target_date'] = stock_features['trade_date'].shift(-1) stock_features.dropna(inplace = True) target_date_list = stock_features['target_date'].tolist() X = stock_features.drop(['trade_date','stock_code','target_date'],axis = 1).values if len(X) < 1: # 样本数据宽度数量不足扩大取数据的宽度 stock_features = get_stock_features(start_date = '20100101',end_date = today_str, stock_universe = [stk]) stock_features = stock_features.sort_values('trade_date',ascending = True) for i in range(1,30): for col in col_names: stock_features['%s_%s'%(col,i)] = stock_features[col].shift(i) stock_features.dropna(inplace = True) stock_features = trade_calendar.join(stock_features.set_index('trade_date'),on = 'trade_date', how = 'left') stock_features['target_date'] = stock_features['trade_date'].shift(-1) stock_features.dropna(inplace = True) target_date_list = stock_features['target_date'].tolist() X = stock_features.drop(['trade_date','stock_code','target_date'],axis = 1).values if len(X) < 1: continue # 放弃更新此股票信号