from sklearn import preprocessing, cross_validation, metrics, pipeline, grid_search from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier from sklearn.svm import SVC from sklearn.linear_model import SGDClassifier, LogisticRegression, RidgeClassifier from sklearn.tree import DecisionTreeClassifier baseDir = '/Users/eugene/Downloads/Data/' instruments = ['000300.SH', '000016.SH', '000905.SH'] i = 2 startYear = 2015 yearNum = 1 df = readWSDFile(baseDir, instruments[i], startYear, yearNum) print 'Day count:', len(df) # print df.head(5) dfi = readWSDIndexFile(baseDir, instruments[i], startYear, yearNum) X, y, actionDates = prepareData(df, dfi) print np.shape(X) normalizer = preprocessing.Normalizer().fit(X) # fit does nothing X_norm = normalizer.transform(X) def optimizeAdaBoostSGD(X_norm, y, kFolds=10): # grid search 多参数优化 parameters = { # 'base_estimator__alpha': 10.0 ** (-np.arange(1, 7)), 'base_estimator__alpha': np.logspace(-8, -1, 8), # 'n_estimators': np.linspace(1, 100, 10, dtype=np.dtype(np.int16)), } # sgd = SGDClassifier(loss='log', n_iter=np.ceil(10**6/len(X_norm)))
else: return -1 baseDir = '/Users/eugene/Downloads/Data/' # baseDir = '/Users/eugene/Downloads/marketQuotationData/' # 沪深300 上证50 中证500 instruments = ['000300.SH', '000016.SH', '000905.SH'] instrument = instruments[2] initCapital = 100000000.0 # 一亿 startYear = 2015; yearNum = 1 # startYear = 2014; yearNum = 2 df = readWSDFile(baseDir, instrument, startYear, yearNum) print 'Day count:', len(df) # print df.head(5) dfi = readWSDIndexFile(baseDir, instrument, startYear, yearNum) X, y, actionDates = prepareData(df, dfi) print np.shape(X) normalizer = preprocessing.Normalizer().fit(X) # fit does nothing X_norm = normalizer.transform(X) # gamma, C, score = optimizeSVM(X_norm, y, kFolds=10); print 'gamma=',gamma, 'C=',C, 'score=',score # clf = svm.SVC(kernel='rbf', gamma=32, C=32768) # clf = svm.SVC(kernel='rbf', gamma=32, C=128) # clf = svm.SVC(kernel='rbf', gamma=128, C=2) # clf = svm.SVC(kernel='rbf', gamma=512, C=0.5) # clf = svm.SVC(kernel='rbf', gamma=2, C=128) clf = svm.SVC(kernel='rbf', gamma=0.125, C=0.125) from EnsembleTest import optimizeEnsemble from AdaboostSGDTest import optimizeAdaBoostSGD