Beispiel #1
0
def evaluate_model(model_path, code, output_dir, input_shape=[30, 61]):
    extract_from_file("dataset/%s.csv" % code, output_dir, code)
    train_set, test_set = read_feature(output_dir, input_shape, code)
    saved_wp = WindPuller(input_shape).load_model(model_path)
    scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('test accuracy:', scores[1])
    pred = saved_wp.predict(test_set.images, 1024)
    [cr, cap] = calculate_cumulative_return(test_set.labels, pred)

    # Output to a csv file
    # Read in the date, close from original data file.
    days_for_test = 700
    tmp = pd.read_csv('dataset/%s.csv' % code, delimiter='\t')
    # tmp.columns = ['date', 'open', 'high', 'low', 'close', 'volume']
    date = tmp['date'][-days_for_test:]
    close = tmp['close'][-days_for_test:]
    output = pd.DataFrame(
        {
            'Return': test_set.labels,
            'Position': pred.reshape(-1),
            'Capital': cap.reshape(-1),
            'Close': close.values
        },
        index=date,
        columns=['Close', 'Return', 'Position', 'Capital'])
    output.to_csv('output/%s.csv' % code)
def make_model_type3(input_shape,
                     nb_epochs=100,
                     batch_size=128,
                     lr=0.01,
                     n_layers=1,
                     n_hidden=16,
                     rate_dropout=0.3):
    model_path = 'model.%s' % input_shape[0]
    windowSize = input_shape[0]  # num minutes
    X_train, y_train, X_val, Y_val, X_test, y_test, numFeatures = ft.generateDataSetTXF(
        os.getcwd(), input_wind_size=input_shape[0], toDataSet=False)
    input_shape[0] = numFeatures

    wp = WindPuller(input_shape=input_shape,
                    modelType=3,
                    lr=lr,
                    n_layers=n_layers,
                    n_hidden=n_hidden,
                    rate_dropout=rate_dropout)

    wp.fit(X_train,
           y_train,
           batch_size=batch_size,
           nb_epoch=nb_epochs,
           shuffle=True,
           verbose=1,
           validation_data=(X_val, Y_val))

    scores = wp.evaluate(X_test, y_test, verbose=0)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    wp.model.save(model_path)
def load_model_type(model_path,
                    input_shape,
                    _modelTyp=0,
                    nb_epochs=100,
                    batch_size=128,
                    lr=0.01,
                    n_layers=1,
                    n_hidden=16,
                    rate_dropout=0.3):
    train_set, validation_set, test_set, numFeatures = ft.generateDataSetTXF(
        os.getcwd(), input_wind_size=input_shape[0], toDataSet=True)
    input_shape[1] = numFeatures
    wp = WindPuller(input_shape=input_shape,
                    modelType=1,
                    lr=lr,
                    n_layers=n_layers,
                    n_hidden=n_hidden,
                    rate_dropout=rate_dropout)
    saved_wp = wp.load_model(model_path)
    scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('test accuracy:', scores[1])
    pred = saved_wp.predict(test_set.images, 1024)
    pred = np.reshape(pred, [-1])
    result = np.array([pred, test_set.labels]).transpose()
    with open('output.' + str(input_shape[0]), 'w') as fp:
        for i in range(result.shape[0]):
            for val in result[i]:
                fp.write(str(val) + "\t")
            fp.write('\n')
def make_model_type(input_shape,
                    _modelTyp=0,
                    nb_epochs=100,
                    batch_size=128,
                    lr=0.01,
                    n_layers=1,
                    n_hidden=16,
                    rate_dropout=0.3):
    model_path = 'model.%s' % input_shape[0]
    train_set, validation_set, test_set, numFeatures = ft.generateDataSetTXF(
        os.getcwd(), input_wind_size=input_shape[0], toDataSet=True)

    input_shape[1] = numFeatures
    wp = WindPuller(input_shape=input_shape,
                    modelType=_modelTyp,
                    lr=lr,
                    n_layers=n_layers,
                    n_hidden=n_hidden,
                    rate_dropout=rate_dropout)

    wp.fit(train_set.images,
           train_set.labels,
           batch_size=batch_size,
           nb_epoch=nb_epochs,
           shuffle=True,
           verbose=1,
           validation_data=(validation_set.images, validation_set.labels))

    scores = wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    wp.model.save(model_path)
Beispiel #5
0
def model_predict(model_path, code, input_shape=[30, 83]):
    extractfeatureonly_from_file("dataset/%s.csv" % code, code)
    ultimate_features = numpy.loadtxt("%s/%s_feature_only.%s" % (".", code, str(input_shape[0])))
    ultimate_features = numpy.reshape(ultimate_features, [-1, input_shape[0], input_shape[1]])
    saved_wp = WindPuller(input_shape).load_model(model_path)
    pred = saved_wp.predict(ultimate_features, 1024)
    for i in range(len(pred)):
        print(str(pred[i]))
Beispiel #6
0
def evaluate_model(model_path, code, input_shape=[30, 83]):
    extract_from_file("dataset/%s.csv" % code, code)
    train_set, test_set = read_feature(".", input_shape, code)
    saved_wp = WindPuller(input_shape).load_model(model_path)
    scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('test accuracy:', scores[1])
    pred = saved_wp.predict(test_set.images, 1024)
    cr = calculate_cumulative_return(test_set.labels, pred)
    print("changeRate\tpositionAdvice\tprincipal\tcumulativeReturn")
    for i in range(len(test_set.labels)):
        print(str(test_set.labels[i]) + "\t" + str(pred[i]) + "\t" + str(cr[i] + 1.) + "\t" + str(cr[i]))
Beispiel #7
0
def make_separate_model(nb_epochs=100,
                        batch_size=128,
                        lr=0.01,
                        n_layers=1,
                        n_hidden=14,
                        rate_dropout=0.3,
                        input_shape=[30, 73]):
    train_sets, test_sets = read_separate_feature("./ultimate_feature")

    wp = WindPuller(input_shape=input_shape,
                    lr=lr,
                    n_layers=n_layers,
                    n_hidden=n_hidden,
                    rate_dropout=rate_dropout)
    wp.build_model()
    for code, train_set in train_sets.items():
        test_set = test_sets[code]
        input_shape = [train_set.images.shape[1], train_set.images.shape[2]]
        print(input_shape)
        model_path = 'model.%s' % code

        print(train_set.images.shape)
        wp.fit(train_set.images,
               train_set.labels,
               batch_size=batch_size,
               nb_epoch=nb_epochs,
               shuffle=False,
               verbose=1,
               validation_data=(test_set.images, test_set.labels),
               callbacks=[
                   TensorBoard(histogram_freq=1000),
                   ModelCheckpoint(filepath=model_path + '.best.checkpoints',
                                   save_best_only=True,
                                   mode='min')
               ])
        scores = wp.evaluate(test_set.images, test_set.labels, verbose=0)
        print('Test loss:', scores[0])
        print('Test accuracy:', scores[1])

        wp.model.save(model_path)
        saved_wp = wp.load_model(model_path)
        scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
        print('Test loss:', scores[0])
        print('test accuracy:', scores[1])
        pred = saved_wp.predict(test_set.images, 1024)
        # print(pred)
        # print(test_set.labels)
        pred = numpy.reshape(pred, [-1])
        result = numpy.array([pred, test_set.labels]).transpose()
        with open('output.' + str(input_shape[0]), 'w') as fp:
            for i in range(result.shape[0]):
                for val in result[i]:
                    fp.write(str(val) + "\t")
                fp.write('\n')
def load_model_type3(input_shape):
    model_path = 'model.%s' % input_shape[0]
    wp = WindPuller(input_shape=input_shape,
                    modelType=2,
                    lr=lr,
                    n_layers=n_layers,
                    n_hidden=n_hidden,
                    rate_dropout=rate_dropout)
    saved_wp = wp.load_model(model_path)
    scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('test accuracy:', scores[1])
    pred = saved_wp.predict(X_test, 1024)
    pred = numpy.reshape(pred, [-1])
    result = numpy.array([pred, y_test]).transpose()
    with open('output.' + str(input_shape[0]), 'w') as fp:
        for i in range(result.shape[0]):
            for val in result[i]:
                fp.write(str(val) + "\t")
            fp.write('\n')
Beispiel #9
0
def make_model(input_shape, nb_epochs=100, batch_size=128, lr=0.01, n_layers=1, n_hidden=16, rate_dropout=0.3):
    model_path = '/output/model.{}.{}c.{}l.{}'.format(input_shape[0], n_hidden, n_layers, nb_epochs)

    wp = WindPuller(input_shape=input_shape, lr=lr, n_layers=n_layers, n_hidden=n_hidden, rate_dropout=rate_dropout)

    train_set, test_set = read_ultimate("/dataset/", input_shape)
    wp.fit(train_set.images, train_set.labels, batch_size=batch_size,
           nb_epoch=nb_epochs, shuffle=False, verbose=1,
           # validation_split=0.02,
           validation_data=(test_set.images, test_set.labels),
           callbacks=[
               TensorBoard(log_dir='/output/logs', histogram_freq=100),
               ModelCheckpoint(filepath=model_path + '.best', save_best_only=True, mode='min')
           ])

    scores = wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    wp.model.save(model_path)
    saved_wp = wp.load_model(model_path)
    scores = saved_wp.evaluate(test_set.images, test_set.labels, verbose=0)
    print('Test loss:', scores[0])
    print('test accuracy:', scores[1])
    pred = saved_wp.predict(test_set.images, 1024)
    # print(pred)
    # print(test_set.labels)
    pred = numpy.reshape(pred, [-1])
    result = numpy.array([pred, test_set.labels]).transpose()
    with open('output.' + str(input_shape[0]), 'w') as fp:
        for i in range(result.shape[0]):
            for val in result[i]:
                fp.write(str(val) + "\t")
            fp.write('\n')
Beispiel #10
0
def simple_predict_tomorrow():
    '''
    使用做多和做空两个模型,对data_dir里的每日行情数据提取特征并训练得到信号。
    '''

    signal_dir = './signal_close/'
    date = get_date_list()
    files = os.listdir(data_dir)

    # 0. 加载模型
    wp_buy = WindPuller(input_shape).load_model(model_path_buy)
    wp_sell = WindPuller(input_shape).load_model(model_path_sell)

    # 1. 提取所有特征
    days_for_test = len(date)
    extract_all_features(data_dir, feature_dir, days_for_test)

    for (idf, f) in enumerate(files):

        # 2. 读取测试集特征
        output_prefix = f.split('.')[0]
        test_set = read_features(feature_dir, input_shape, output_prefix)

        # 3. 训练模型
        signal_buy = wp_buy.predict(test_set.images, 1024)
        signal_buy = signal_buy[-days_for_test:]

        signal_sell = wp_sell.predict(test_set.images, 1024)
        signal_sell = signal_sell[-days_for_test:]

        # 4. 保存结果
        f_path_signal = os.path.join(signal_dir, f)
        data_signal = pd.DataFrame(
            {
                'signal_close_buy': signal_buy.reshape(-1),
                'signal_close_sell': signal_sell.reshape(-1)
            },
            index=date)
        data_signal.to_csv(f_path_signal)
        print('%d 指数%s处理完毕' % (idf, output_prefix))
        print('-' * 50)

    print('全部处理完毕!')
    print('=' * 80)
Beispiel #11
0
def predict_tomorrow(model_path="model.30.best", extract_all=False):
    '''
    1. 先对3个数据集中每一个品种提取特征;
    2. 读取只有一行数据的验证集;
    3. 加载训练好的模型,预测在验证集上的信号结果;
    4. 保存信号结果。
    '''

    # 1. 特征提取
    data_dir = './newdata/'
    output_dir = './output09/'
    feature_dir = './stock_features/'
    if not (os.path.exists(output_dir)):
        os.mkdir(output_dir)

    # 测试集从2017-09-01开始
    df = pd.read_csv('dataset/000300.csv', index_col='date', parse_dates=True)
    days_for_test = df.shape[0] - df.index.get_loc('2017-09-01')
    extract_all_features(data_dir, feature_dir, days_for_test, extract_all)

    # 2. 读取特征
    input_shape = [30, 61]
    file_list = os.listdir(data_dir)
    if extract_all == True:
        column_names = [s.split(sep='.')[0] for s in file_list]
    else:
        # 否则只测试3个指数
        column_names = ['000016', '000300', '000905']

    # 加载模型
    wp = WindPuller(input_shape).load_model(model_path)

    for f in column_names:

        _, test_set = read_feature(feature_dir, input_shape, f)
        tmp = pd.read_csv('dataset/%s.csv' % f)

        val = test_set
        pred = wp.predict(val.images, 1024)
        print(pred[-1])
        [cr, cap] = calculate_cumulative_return_cost(val.labels, pred)

        # 设置读取验证集数据的范围
        index = range(tmp.shape[0] - days_for_test - 1, tmp.shape[0])

        # 1. 保存资金曲线的数据
        date = tmp['date'].iloc[index]
        close = tmp['close'].iloc[index]
        buy_hold = close / close.iloc[0] - 1
        output = pd.DataFrame(
            {
                'Close': close.values,
                'Pct_change': np.concatenate(([np.nan], val.labels)),
                'Position': np.concatenate(([np.nan], pred.reshape(-1))),
                'Cum_return': cr.reshape(-1),
                'Buy_hold': buy_hold.values
            },
            index=date,
            columns=[
                'Close', 'Pct_change', 'Position', 'Cum_return', 'Buy_hold'
            ])
        names = pd.read_csv('指数名称.csv',
                            dtype={
                                'code': np.str,
                                'name': np.str
                            },
                            engine='python')
        names.set_index('code', inplace=True)
        names = names.to_dict()['name']
        n = names[f]

        # 写入文件
        cap_line_dir = os.path.join(output_dir, 'stocks')
        if not (os.path.exists(cap_line_dir)):
            os.mkdir(cap_line_dir)
        cap_line_f = os.path.join(cap_line_dir, '%s_test.csv' % n)
        output.to_csv(cap_line_f)

        ## 2. 统计各项表现,画出资金曲线,生成投资报告
        #print('当前处理 %s_%s_test\n' % (f, n))
        #calc_perf(output, f, n, 'test', output_dir)
        print('计算完毕')
        print('=' * 50)
Beispiel #12
0
def test_model(model_path="model.30.best",
               extract_all=True,
               days_for_test=False):
    '''
    1. 先对数据集中每一个品种提取特征;
    2. 读取训练集和验证集;
    3. 加载训练好的模型,预测在训练集和验证集上的结果;
    4. 根据结果绘制相应的资金变化图,并保存。
    '''

    # 1. 特征提取
    data_dir = './dataset/'
    output_dir = './output09/'
    feature_dir = './stock_features/'
    if not (os.path.exists(output_dir)):
        os.mkdir(output_dir)
    # 只提取测试集的特征
    if days_for_test == False:
        # 测试集从2017-09-01开始
        df = pd.read_csv('dataset/000001.csv',
                         index_col='date',
                         parse_dates=True)
        days_for_test = df.shape[0] - df.index.get_loc('2017-09-01')

    extract_all_features(data_dir, feature_dir, days_for_test)

    # 2. 读取特征
    input_shape = [30, 61]
    file_list = os.listdir(data_dir)
    if extract_all == True:
        column_names = [s.split(sep='.')[0] for s in file_list]
    else:
        # 否则只测试3个指数
        column_names = ['000016', '000300', '000905']

    wp = WindPuller(input_shape).load_model(model_path)

    for f in column_names:

        train_set, test_set = read_feature(feature_dir, input_shape, f)
        data_set = {'train': train_set, 'test': test_set}
        tmp = pd.read_csv('dataset/%s.csv' % f)

        for key in data_set:
            # 3.分别给训练集/验证集预测并画图保存
            print('当前处理 %s_%s\n' % (f, key))
            val = data_set[key]
            pred = wp.predict(val.images, 1024)
            [cr, cap] = calculate_cumulative_return_cost(val.labels, pred)

            # 根据训练集/验证集来设置读取数据的范围
            if key == 'train':
                index = range(input_shape[0] - 1,
                              input_shape[0] + pred.shape[0])
            elif key == 'test':
                index = range(tmp.shape[0] - days_for_test - 1, tmp.shape[0])

            # 1). 保存资金曲线的数据
            date = tmp['date'].iloc[index]
            close = tmp['close'].iloc[index]
            buy_hold = close / close.iloc[0] - 1
            # DEBUG:
            #print('date shape:\t', date.shape)
            #print('close shape:\t', close.shape)
            #print('buy_hold shape:\t', buy_hold.shape)
            #print('Pct_change shape:\t', val.labels.shape)
            #print('Position shape:\t', pred.shape)
            output = pd.DataFrame(
                {
                    'Close': close.values,
                    'Pct_change': np.concatenate(([np.nan], val.labels)),
                    'Position': np.concatenate(([np.nan], pred.reshape(-1))),
                    'Cum_return': cr.reshape(-1),
                    'Buy_hold': buy_hold.values
                },
                index=date,
                columns=[
                    'Close', 'Pct_change', 'Position', 'Cum_return', 'Buy_hold'
                ])
            names = pd.read_csv('指数名称.csv',
                                dtype={
                                    'code': np.str,
                                    'name': np.str
                                },
                                engine='python')
            names.set_index('code', inplace=True)
            names = names.to_dict()['name']
            n = names[f]

            # 写入文件
            cap_line_dir = os.path.join(output_dir, 'stocks')
            if not (os.path.exists(cap_line_dir)):
                os.mkdir(cap_line_dir)
            cap_line_f = os.path.join(cap_line_dir, '%s_%s.csv' % (n, key))
            output.to_csv(cap_line_f)

            # 2). 统计各项表现,画出资金曲线,生成投资报告
            print('开始计算策略表现 %s_%s_%s\n' % (f, n, key))
            calc_perf(output, f, n, key, output_dir)
            print('计算完毕')
            print('=' * 50)
Beispiel #13
0
def paper_test():
    '''
    逐个读取每一天的14:57的数据,与数据库中数据合并,生成新特征,读取训练好的模型,
    预测出信号。
    '''
    merged_data_dir = './paper_merge'
    signal_dir = './paper_signals'
    date = get_date_list()
    files = os.listdir(tsl_data_dir)

    # 0. 加载模型
    wp_buy = WindPuller(input_shape).load_model(model_path_buy)
    wp_sell = WindPuller(input_shape).load_model(model_path_sell)

    for (idx, d) in enumerate(date):

        print('当前处理日期\t%s' % d)
        for (idf, f) in enumerate(files):

            # 1. 读取新的数据
            f_path1 = os.path.join(tsl_data_dir, f)
            df1 = pd.read_csv(f_path1)
            # 获取某一天的数据
            df1 = df1[df1['date'] == d]
            df1['volume'] == df1['volume'] * 80 / 79

            # 2. 读取原来的数据
            f_path2 = os.path.join(data_dir, f)
            df2 = pd.read_csv(f_path2)

            # 3. 合并数据,删除原来数据多余部分,追加最新的一天的数据
            df2 = df2.iloc[:int(np.flatnonzero(df2.date == d))]
            df3 = df2.append(df1, ignore_index=True)
            df3 = df3[df2.columns]

            # 4. 保存数据
            f_path_merged = os.path.join(merged_data_dir, f)
            df3.to_csv(f_path_merged, index=False)

            # 5. 提取1个特征,存入相应文件夹
            output_prefix = f.split('.')[0]
            extract_from_file(idx, f_path_merged, feature_dir, output_prefix,
                              1)

            # 6. 读取提取完的特征
            test_set = read_features(feature_dir, input_shape, output_prefix)

            # 7. 训练模型
            signal_buy = wp_buy.predict(test_set.images, 1024)
            signal_buy = float(signal_buy[-1])

            signal_sell = wp_sell.predict(test_set.images, 1024)
            signal_sell = float(signal_sell[-1])

            # 8. 保存结果
            f_path_signal = os.path.join(signal_dir, f)

            if idx == 0:
                # 写入字段名
                title = 'date,signal_buy,signal_sell'
                with open(f_path_signal, 'a') as file:
                    file.write(title)

            write = '%s,%.2f,%.2f\n' % (d, signal_buy, signal_sell)
            with open(f_path_signal, 'a') as file:
                file.write(write)

            n_read = idx * len(files) + idf + 1
            print('当前处理第%d个文件,剩余%d个文件,请耐心等待...' %
                  (n_read, len(files) * len(date) - n_read))
            print('-' * 50)

    print('\n全部处理完毕!')
    print('=' * 80)
def main():
    '''
    每天获取14:57的3个指数数据,进行数据校正后添加到本地数据文件中,再提取特征,计算
    信号,保存到对应文件中。到15:01收盘后,再进行同样操作。
    '''
    # 1. 加载keras训练完的模型
    print('=' * 80)
    print('%s\t加载keras训练完的模型' % (datetime.now().strftime('%H:%M:%S')))
    set_gpu_fraction()
    model_path_buy = 'model.30.buy'
    wp_buy = WindPuller(input_shape).load_model(model_path_buy)

    model_path_sell = 'model.30.sell'
    wp_sell = WindPuller(input_shape).load_model(model_path_sell)
    print('\n%s\t模型加载完毕\n' % (datetime.now().strftime('%H:%M:%S')))

    # 2. 查询14:57的数据
    print('=' * 80)
    print('%s\t开始查询实时行情数据,将返回14:57的第一笔数据' %
          datetime.now().strftime('%H:%M:%S'))
    running = 1
    # 程序最多查询到15:01
    stop_time = datetime.now().replace(hour=15,
                                       minute=1,
                                       second=0,
                                       microsecond=0)

    while running:

        print('时间未到,请耐心等待数据...')
        running, data = get_realtime_data()
        if running == 1:
            time.sleep(3)
        # 只要获取到14:57的数据,或者查询时间超过15:01,就停止
        running = running and datetime.now() < stop_time

    print('%s\t查询数据完毕,开始合并数据\n' % datetime.now().strftime('%H:%M:%S'))
    print('=' * 80)

    # 3. 更新本地数据,共2个文件
    # 文件1:原始数据末尾添加一行14:56数据
    # 文件2:对比信号文件添加1行4列数据
    update_csv(data)

    # 4. 提取最新特征
    print('%s\t开始提取特征\n' % datetime.now().strftime('%H:%M:%S'))
    extract_all_features(data_dir,
                         feature_dir,
                         days_for_test=1,
                         extract_all=False)
    print('%s\t特征提取完毕\n' % datetime.now().strftime('%H:%M:%S'))
    print('=' * 80)

    # 5. 读取原始数据,生成特征,预测明天的信号,并保存
    predict_tomorrow(wp_buy, wp_sell, is_last_column=False)
    print('请等待15:01程序会继续获取当日行情数据进行计算和预测...')
    print('=' * 80)

    # 6. 在15:01:20运行一次, 获取收盘后的行情数据
    stop_time = stop_time.replace(second=20)
    while datetime.now() < stop_time:
        time.sleep(3)
        print('等待中,请勿中断...')

    print('=' * 25, '开始获取当日收盘后行情', '=' * 25)
    _, data = get_realtime_data()

    # 7. 使用当日收盘价更新本地数据文件,共2个文件
    # 文件1:原始数据末尾修改14:57的数据
    # 文件2: 对比信号文件最后一行追加2列数据
    update_close_csv(data)

    # 8. 提取最新特征
    print('%s\t开始提取特征\n' % datetime.now().strftime('%H:%M:%S'))
    extract_all_features(data_dir,
                         feature_dir,
                         days_for_test=1,
                         extract_all=False)
    print('%s\t特征提取完毕\n' % datetime.now().strftime('%H:%M:%S'))
    print('=' * 80)

    # 9. 读取收盘后的原始数据,生成特征,预测明天的信号
    predict_tomorrow(wp_buy, wp_sell, is_last_column=True)

    print('%s\t完成!\n' % datetime.now().strftime('%H:%M:%S'))
    print('=' * 80)