Ejemplo n.º 1
0
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression, RidgeClassifier
from sklearn.tree import DecisionTreeClassifier

baseDir = '/Users/eugene/Downloads/Data/'
instruments = ['000300.SH', '000016.SH', '000905.SH']
i = 2
startYear = 2015
yearNum = 1

df = readWSDFile(baseDir, instruments[i], startYear, yearNum)
print 'Day count:', len(df)
# print df.head(5)
dfi = readWSDIndexFile(baseDir, instruments[i], startYear, yearNum)

X, y, actionDates = prepareData(df, dfi)
print np.shape(X)
normalizer = preprocessing.Normalizer().fit(X)  # fit does nothing
X_norm = normalizer.transform(X)


def optimizeAdaBoostSGD(X_norm, y, kFolds=10):
    # grid search 多参数优化
    parameters = {
        # 'base_estimator__alpha': 10.0 ** (-np.arange(1, 7)),
        'base_estimator__alpha': np.logspace(-8, -1, 8),
        # 'n_estimators': np.linspace(1, 100, 10, dtype=np.dtype(np.int16)),
    }
    # sgd = SGDClassifier(loss='log', n_iter=np.ceil(10**6/len(X_norm)))
    sgd = SGDClassifier(loss='log', n_iter=5, random_state=47)
    clf = AdaBoostClassifier(base_estimator=sgd, n_estimators=200, random_state=47)
Ejemplo n.º 2
0
baseDir = '/Users/eugene/Downloads/Data/'
# baseDir = '/Users/eugene/Downloads/marketQuotationData/'
# 沪深300 上证50 中证500
instruments = ['000300.SH', '000016.SH', '000905.SH']
instrument = instruments[2]
initCapital = 100000000.0 # 一亿
startYear = 2015; yearNum = 1
# startYear = 2014; yearNum = 2

df = readWSDFile(baseDir, instrument, startYear, yearNum)
print 'Day count:', len(df)
# print df.head(5)
dfi = readWSDIndexFile(baseDir, instrument, startYear, yearNum)

X, y, actionDates = prepareData(df, dfi)
print np.shape(X)
normalizer = preprocessing.Normalizer().fit(X)  # fit does nothing
X_norm = normalizer.transform(X)
# gamma, C, score = optimizeSVM(X_norm, y, kFolds=10); print 'gamma=',gamma, 'C=',C, 'score=',score
# clf = svm.SVC(kernel='rbf', gamma=32, C=32768)
# clf = svm.SVC(kernel='rbf', gamma=32, C=128)
# clf = svm.SVC(kernel='rbf', gamma=128, C=2)
# clf = svm.SVC(kernel='rbf', gamma=512, C=0.5)
# clf = svm.SVC(kernel='rbf', gamma=2, C=128)
clf = svm.SVC(kernel='rbf', gamma=0.125, C=0.125)

from EnsembleTest import optimizeEnsemble
from AdaboostSGDTest import optimizeAdaBoostSGD
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.linear_model import SGDClassifier