Exemple #1
0
    def __init__(self,
                 file_name=None,
                 l_fac=1.0,
                 s_fac=1.0,
                 year_range=range(2006, 2021),
                 l_range=range(12, 31, 2),
                 s_range=range(2, 12)):
        # 在此修改长短期均线长度
        # ========================
        self.l_range = l_range  # 长期均线选择
        self.s_range = s_range  # 短期均线选择
        self.year_range = year_range
        # ============================

        # 需要输入文件名
        assert file_name != None
        # 载入文件
        self.close_data = load_file(file_name).read_file()
        # year range从loadfile中传递?
        self.time_list = self.get_ftd_list()
        # print(self.time_list)
        self.ret_list = {}
        # MACD指标选择的长短期为12,26
        self.l_fac = l_fac
        self.s_fac = s_fac

        self.main_loop()
    def __init__(self,
                 file_name=None,
                 l_fac=1.0,
                 s_fac=1.06,
                 year_range=range(2006, 2021),
                 lma=14,
                 sma=3):
        # 在此修改长短期均线长度
        # ========================
        self.lma = lma  # 长期均线选择
        self.sma = sma  # 短期均线选择
        self.year_range = year_range
        # ============================

        # 需要输入文件名
        assert file_name != None
        # 载入文件
        self.close_data = load_file(file_name).read_file()
        # year range从loadfile中传递?
        # print(self.time_list)
        self.ret_list = {}
        # MACD指标选择的长短期为12,26
        self.l_fac = l_fac
        self.s_fac = s_fac
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from LoadFile import load_file
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
DATA_FILE='./data/CSI.csv'
MODEL_NAME='my_model'




# 读取数据
close_earning=load_file(DATA_FILE).read_file()
dataset=close_earning['earning'].values.reshape(-1,1)
# data = read_csv('r.csv')  # csv文件 n*1 ,n代表样本数,反应时间序列,1维数据
#
# values1 = data.values;
# dataset = values1[:, 0].reshape(-1, 1)  # 注意将一维数组,转化为2维数组
# dataset = dataset.astype('float32')  # 将数据转化为32位浮点型,防止0数据

def cal_the_return(testPredict,testY):
    compare=pd.DataFrame(testPredict,)
    compare['testy']=testY[0]
    # 测试集的实际收益率和测试集的预测收益率
    compare.columns=['predy','testy']
    compare.to_csv('compare.csv')
    accy = [1]
    accp = [[1]for i in range(6)]
    def initial_model(self):

        # 读取数据
        close_earning = load_file(self.data_file).read_file()
        dataset = close_earning['earning'].values.reshape(-1, 1)
        # data = read_csv('r.csv')  # csv文件 n*1 ,n代表样本数,反应时间序列,1维数据
        #
        # values1 = data.values;
        # dataset = values1[:, 0].reshape(-1, 1)  # 注意将一维数组,转化为2维数组
        # dataset = dataset.astype('float32')  # 将数据转化为32位浮点型,防止0数据
        numpy.random.seed(7)  # 随机数生成时算法所用开始的整数值

        # normalize the dataset
        scaler = MinMaxScaler(feature_range=(0, 1))  # 归一化0-1
        dataset = scaler.fit_transform(dataset)
        # split into train and test sets  #训练集和测试集分割
        train_size = int(len(dataset) * 0.67)  # %67的训练集,剩下测试集
        test_size = len(dataset) - train_size
        train, test = dataset[0:train_size, :], dataset[train_size:len(
            dataset), :]  # 训练集和测试集

        # use this function to prepare the train and test datasets for modeling
        trainX, trainY = self.create_dataset(train)  # 训练输入输出
        testX, testY = self.create_dataset(test)  # 测试输入输出

        # reshape input to be [samples, time steps, features]#注意转化数据维数
        trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
        testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

        # def mean_squared_error(y_true, y_pred):
        #     return K.mean(K.square(y_pred - y_true), axis=-1)

        if self.train_flag:
            # 建立LSTM模型
            model = Sequential()
            model.add(LSTM(
                11,
                input_shape=(1, self.look_back)))  # 隐层11个神经元 (可以断调整此参数提高预测精度)
            model.add(Dense(1))
            model.compile(loss=self.loss,
                          optimizer=self.optimizer)  # 评价函数mse,优化器adam
            model.fit(trainX,
                      trainY,
                      epochs=self.epoch,
                      batch_size=100,
                      verbose=2)  # 100次迭代

            # save the model
            # model.save_weights("my_model_weights.h5") # only save the weight
            model.save('./model_file/{}.h5'.format(self.model_name))

        else:
            model = load_model('./model_file/{}.h5'.format(self.model_name))

        trainPredict = model.predict(trainX)
        testPredict = model.predict(testX)
        # 数据反归一化
        trainPredict = scaler.inverse_transform(trainPredict)
        trainY = scaler.inverse_transform([trainY])
        testPredict = scaler.inverse_transform(testPredict)
        testY = scaler.inverse_transform([testY])

        trainScore = math.sqrt(
            mean_squared_error(trainY[0], trainPredict[:, 0]))
        print('Train Score: %.5f RMSE' % (trainScore))
        testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
        print('Test Score: %.5f RMSE' % (testScore))

        trainPredictPlot = numpy.empty_like(dataset)
        trainPredictPlot[:, :] = numpy.nan
        trainPredictPlot[self.look_back:len(trainPredict) +
                         self.look_back, :] = trainPredict

        # shift test predictions for plotting
        testPredictPlot = numpy.empty_like(dataset)
        testPredictPlot[:, :] = numpy.nan
        testPredictPlot[len(trainPredict) + (self.look_back * 2) +
                        1:len(dataset) - 1, :] = testPredict

        compare = self.cal_the_return(testPredict, testY)
        print('index    return={};\nStrategy return={}'.format(
            compare['acc_y'].iloc[-1], compare['acc_p0'].iloc[-1]))
        # compare[['acc_p0','acc_p1','acc_p2','acc_p3','acc_p4','acc_p5','acc_y']].plot()
        plt.figure(figsize=(20, 6))
        l1, = plt.plot(compare['acc_y'], color='red', linewidth=5)
        l2, = plt.plot(compare['acc_p0'], color='b', linewidth=2)
        l3, = plt.plot(compare['acc_p5'], color='g', linewidth=2)
        plt.ylabel('Height m')
        plt.legend([l1, l2, l3], ('CSI500', 'S0', 'S1'), loc='best')
        plt.title('LSTM Prediction--{}'.format(self.model_name))
        plt.savefig('./img/收益率曲线_{}.svg'.format(self.model_name), format='svg')

        plt.show()