コード例 #1
0
ファイル: prepare_daily.py プロジェクト: orxg/patpred
#from sklearn.model_selection import train_test_split
#from sklearn.metrics import precision_recall_curve,precision_recall_fscore_support,accuracy_score

from podaci.guosen.data import get_stock_features
col_names = ['b', 'c', 'u', 'l', 'ol', 'cl', 'ac']

for stk in target_universe_list:
    try:
        start_time = time.time()

        label = pd.read_hdf(os.path.join(train_data_path, 'label.h5'),
                            stk.encode('utf8'))
        label.loc[:, 'label'].loc[label['label'] == 2] = 1

        # feeatures
        features = get_stock_features('20050101', '20180831', [stk])

        features = features.sort_values('trade_date', ascending=True)

        for i in range(1, 181):
            for col in col_names:
                features['%s_%s' % (col, i)] = features[col].shift(i)
        features.dropna(inplace=True)
        features.drop(col_names, axis=1, inplace=True)

        # combine and get train&test data
        comb = label.join(features.set_index('trade_date').drop('stock_code',
                                                                axis=1),
                          on='trade_date')

        comb = comb.drop(['stock_code'], axis=1)
コード例 #2
0
ファイル: temp_fix.py プロジェクト: orxg/patpred
#%% 模型训练
from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib

from podaci.guosen.data import get_stock_features
col_names = ['b', 'c', 'u', 'l', 'ol', 'cl', 'ac']

for stk in target_universe_list:

    try:
        start_time = time.time()
        label = pd.read_hdf(os.path.join(train_data_path, stk + '.h5'),
                            'label')
        label['label'].loc[label['label'] == 2] = 1
        # feeatures
        features = get_stock_features('20150801', '20180831', [stk])

        features = features.sort_values('trade_date', ascending=True)

        for i in range(1, 31):
            for col in col_names:
                features['%s_%s' % (col, i)] = features[col].shift(i)
        features.dropna(inplace=True)
        features.drop(col_names, axis=1, inplace=True)

        # combine and get train&test data
        comb = label.join(features.set_index('trade_date').drop('stock_code',
                                                                axis=1),
                          on='trade_date')

        comb = comb.drop(['stock_code'], axis=1)
コード例 #3
0
    
    for i in range(1,30):
        for col in col_names:
            stock_features['%s_%s'%(col,i)] = stock_features[col].shift(i)
    stock_features.dropna(inplace = True)
    stock_features = trade_calendar.join(stock_features.set_index('trade_date'),on = 'trade_date',
                                         how = 'left')
    stock_features['target_date'] = stock_features['trade_date'].shift(-1)
    stock_features.dropna(inplace = True)
    target_date_list = stock_features['target_date'].tolist()

    X = stock_features.drop(['trade_date','stock_code','target_date'],axis = 1).values
    
    if len(X) < 1:
        # 样本数据宽度数量不足扩大取数据的宽度
        stock_features = get_stock_features(start_date = '20100101',end_date = today_str,
                                            stock_universe = [stk])
        stock_features = stock_features.sort_values('trade_date',ascending = True)
    
        for i in range(1,30):
            for col in col_names:
                stock_features['%s_%s'%(col,i)] = stock_features[col].shift(i)
        stock_features.dropna(inplace = True)
        stock_features = trade_calendar.join(stock_features.set_index('trade_date'),on = 'trade_date',
                                             how = 'left')
        stock_features['target_date'] = stock_features['trade_date'].shift(-1)
        stock_features.dropna(inplace = True)
        target_date_list = stock_features['target_date'].tolist()

        X = stock_features.drop(['trade_date','stock_code','target_date'],axis = 1).values
        if len(X) < 1:
            continue # 放弃更新此股票信号