Ejemplo n.º 1
0
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from sklearn import ensemble
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from CrawlerUtil import read_csv
from sklearn.model_selection import train_test_split


# #############################################################################
# 导入数据
# 加载数据集

file_dir = 'C:/Users/yuzhe/Desktop/OptionAnalysis/files/'
csv_file = read_csv(file_dir + 'TestUSDIndex.csv')
dataNum = len(csv_file)
featureNum = len(csv_file[0])-2
print("特征的维度", featureNum)
dataMat = np.array(csv_file)
X = dataMat[1:, 1: featureNum].astype(float)
y = dataMat[1:, featureNum].astype(float)

'''
# 将y标签的增长率转化为增、跌、不变三种标签
for i in range(dataNum-1):
    if y[i] == 0: y[i] = 0
    elif y[i]<0: y[i] = -1
    else:y[i]= 1
'''
# 数据集分
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.model_selection import train_test_split
from CrawlerUtil import read_csv
# 加载样本数据集
if __name__=='__main__':
    n = 180
    csv_file = read_csv('C:/Users/yuzhe/Desktop/OptionAnalysis/files/files_' + n.__str__() + 'min/REDUCED_FEATURE_VECTOR_' + n.__str__() + '.csv')
    dataNum = len(csv_file)
    featureNum = len(csv_file[0])-1
    print("Dimension of feature", featureNum)
    dataMat = np.array(csv_file)
    X = dataMat[1:, 0:featureNum].astype(float)
    y = dataMat[1:, featureNum].astype(float)
    print(dataNum)
    for i in range(dataNum-1):
        if y[i] == 0: y[i] = 0
        elif y[i]<0: y[i] = -1
        else:y[i]= 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # 数据集分割
    pipe_scv = Pipeline([('scl', StandardScaler()), ('clf', SVC(random_state=1))])

    param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]
    param_grid = [{'clf_C': param_range, 'clf_kernel': ['linear']},
          {'clf_C': param_range, 'clf_gamma': param_range, 'clf_kernel': ['rbf']}]

    gs = GridSearchCV(estimator = pipe_scv ,
Ejemplo n.º 3
0
    print(dataNum)
    for i in range(dataNum - 1):
        if y[i] == 0:
            y[i] = 0
        elif y[i] < 0:
            y[i] = -1
        else:
            y[i] = 1
    return y


if __name__ == "__main__":
    # 加载样本数据集
    n = 120
    csv_file = read_csv('C:/Users/yuzhe/Desktop/OptionAnalysis/files/files_' +
                        n.__str__() + 'min/REDUCED_FEATURE_VECTOR_' +
                        n.__str__() + '.csv')
    dataNum = len(csv_file)
    featureNum = len(csv_file[0]) - 1
    print("Dimension of feature", featureNum)
    dataMat = np.array(csv_file)
    X = dataMat[1:, 0:featureNum].astype(float)
    y = dataMat[1:, featureNum].astype(float)
    y = convert2class(y, dataNum)  #转换为类别

    # 神经网络对数据尺度敏感,所以最好在训练前标准化,或者归一化,或者缩放到[-1,1]
    scaler = StandardScaler()  # 标准化转换
    scaler.fit(X)  # 训练标准化对象
    X = scaler.transform(X)  # 转换数据集
    # solver='lbfgs',  MLP的求解方法:L-BFGS 在小数据上表现较好,Adam 较为鲁棒,SGD在参数调整较优时会有最佳表现(分类效果与迭代次数);SGD标识随机梯度下降。
    # alpha:L2的参数:MLP是可以支持正则化的,默认为L2,具体参数需要调整