Esempio n. 1
0
def test(ind):
    print("=> test")
    test_data = load_data(file='test', norm=True)
    func = toolbox.compile(expr=ind)
    out = ""
    for (i, row) in test_data.iterrows():
        x = row[1:].tolist()
        # print(f"{func(*x)}")
        out += f"{func(*x)}\n"

    with open("test.txt", 'w') as f:
        f.write(out)
Esempio n. 2
0
def validate(ind):
    print("=> validate")
    out = ""
    val_data = load_data(file='validate', norm=True)
    func = toolbox.compile(expr=ind)
    for (i, row) in val_data.iterrows():
        x = row[1:].tolist()
        y = row[0]
        out += f"calc: {func(*x)}, label: {y}\n"
        # print(f"calc: {func(*x)}, label: {y}")

    with open("validation.txt", 'w') as f:
        f.write(out)
Esempio n. 3
0
def Multi_stocktrain(num, file_name, days, time_stamp, model, epochs,
                     batch_sizes):
    '''
    采用随机的方法来选取不同的股票来对同一模型进行训练
    :param num:     #选取额外的股票数据作为训练集
    :param file_name:
    :param days:预测未来几天内的收盘价的平均值
    :param timestamps:
    :param model:模型
    :param epochs:
    :param batch_sizes:
    :return:  model 返回训练好的模型
    '''
    files = os.listdir('stock_data')
    files.remove(file_name)
    stock_list = random.sample(range(1, len(files)), num)
    for i in stock_list:
        print(files[i])
        file_path = "stock_data\\" + files[i]
        data = handle_data.load_data(file_path)
        data = handle_data.K_mean(data, days)

        scaler = MinMaxScaler(feature_range=(0, 1))
        train = scaler.fit_transform(data)

        x_train, y_train = [], []
        for i in range(time_stamp, len(train) - days + 1):
            x_train.append(train[i - time_stamp:i])
            y_train.append(train[i, 0])
        x_train, y_train = np.array(x_train), np.array(y_train)

        model.fit(x_train,
                  y_train,
                  epochs=epochs,
                  batch_size=batch_sizes,
                  verbose=1)
        return model
Esempio n. 4
0
6) constants are always floats? if they can be integers - how the gaussian noise works?
7) in mutation: arity preservation OR arity disruption?

"""



# take only features
# df = df.iloc[:, 1:]
# dataset = [{'x': x,
#             'y': y,
#             LABEL: z}
#            for x, y, z in [(3, 6, 16), (4, 12, 45), (5, 10, 48), (2, 9, 13.5)]
#            ]

data = normalize(load_data())

variables = [Variable(i) for i in titles]
operators = [PLUS, MULTIPLY, SUBTRACT, DIVIDE] #  SQUARED,

components = tuple(variables + operators) # [Constant(range=(2, 6), integer=True)]

max_depth=10
max_nodes=30


class DomainChromosome(GPChromosome):
    def __init__(self, components=components, max_depth=max_depth, max_nodes=max_nodes, **kwargs):
        super().__init__(components, max_depth=max_depth, max_nodes=max_nodes, **kwargs)

Esempio n. 5
0
def Modle_preday(file_name, days, time_stamp, division):
    '''
    :param file_name: 对应的是股票数据的文件名
    :param days: 预测未来几天内的收盘价的平均值
    :param time_stamp: 一共使用前多少天的数据进行预测
    :param division: 把数据分成测试集和训练集的比例
    :return:
    '''
    #提取已经储存好的股票的历史数据
    file_path = "stock_data\\" + file_name
    data = handle_data.load_data(file_path)
    data = handle_data.K_mean(data, days)

    # 划分训练集以及测试集
    divide = division * data.shape[0]
    train = data[data.index <= divide]
    test = data[data.index > divide]

    # 数据归一化
    scaler = MinMaxScaler(feature_range=(0, 1))
    s_train = scaler.fit_transform(train)
    s_test = scaler.fit_transform(test)

    # 测试集与训练集
    x_train, y_train = [], []
    for i in range(time_stamp, len(train) - days + 1):
        x_train.append(s_train[i - time_stamp:i])
        y_train.append(s_train[i, 0])
    x_train, y_train = np.array(x_train), np.array(y_train)

    x_test, y_test = [], []
    for i in range(time_stamp, len(s_test) - days + 1):
        x_test.append(s_test[i - time_stamp:i])
        y_test.append(test.iloc[i, 0])
    x_test = np.array(x_test)

    history = LossHistory()

    # 创建模型
    epochs = 10
    batch_size = 16
    #model = Model_Lstm(x_train.shape[-1], x_train.shape[1])
    model = Model_Create(x_train.shape[-1], x_train.shape[1])
    model.fit(x_train,
              y_train,
              epochs=epochs,
              batch_size=batch_size,
              verbose=1,
              callbacks=[history])  # 训练网络

    #Multi_stocktrain(2, file_name, days, time_stamp, model, 3, batch_size)

    #保存模型
    model_file = "model_save\\" + file_name.strip('.csv') + ".h5"
    model.save(model_file)

    #使用测试集进行预测
    predict_price = model.predict(x_test)
    scaler.fit_transform(pd.DataFrame(test['close'].values))
    predict_price = scaler.inverse_transform(predict_price)

    # 模型效果指标
    pre = predict_price.reshape(1, -1)[0]
    pre = pre.tolist()
    handle_data.calPerformance(y_test, pre)

    #计算预测误差绝对值的平均数
    sum = 0
    for i in range(0, len(y_test)):
        sum += abs(y_test[i] - pre[i])
    err = sum / len(y_test)
    print(err)

    # 图像展示
    dict_data = {'pre': pre, 'close': y_test}
    data_pd = pd.DataFrame(dict_data)
    data_pd.plot()
    plt.plot(data_pd[['pre', 'close']])
    plt.show()

    #损失值的图像展示
    history.loss_plot()

    #未来三天的预测值
    Strategy(test.iloc[-1, 4], pre[-1], err)
    #print(pre[-1])

    #把预测数据和目标值存入文件
    predata_file = 'pre_data\\' + file_name
    data_pd.to_csv(predata_file)


#handle_data.stock_info(20)
#Modle_preday('000001.SZ.csv', 3, 50, 2/3)
# for root, dirs, files in os.walk('stock_data'):
# print(files)
Esempio n. 6
0
import numpy as np
import csv

from sklearn import linear_model

if __name__ == "__main__":
    from handle_data import load_data, dump_results

    x, y = load_data("../train.csv")

    print "Loaded training set"

    x_test, y_test = load_data("../test.csv", test=True)

    print "Loaded testing set"

    logreg = linear_model.LogisticRegression(solver='lbfgs', n_jobs=3, max_iter=200)
    logreg.fit(x, y)

    print "Classifier trained"

    y_predict = logreg.predict(x_test)

    print "Predictions ready"

    dump_results()