def scores(conf): parameters = { 'nn__n_iter': range(10, 25), 'nn__layers': [ # [Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=10, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=20, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=35, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Softmax", name="output")], [Layer(type="Sigmoid", units=75, name="h0"), Layer(type="Softmax", name="output")], # [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Sigmoid", units=50, name="h1"), # Layer(type="Softmax", name="output")] ], 'nn__learning_rate': [.001, .01, .05], # 'nn__learning_momentum': [.8, .9], 'nn__batch_size': [1, 10, 25, 50], 'nn__dropout_rate': [0, .1, .25, .5] } training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = getNet(valid_size=0) pipeline = learning_utils.getPipeline(training_data, clf, 'nn') grid_search = GridSearchCV(pipeline, parameters, verbose=10) return grid_search, training_data, training_targets
def train(conf): training_data, training_targets, ids = learning_utils.getData( conf["datasets"], ratio=conf["ratio"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True, return_ids=True, ) clf = svm.SVC(C=1, kernel="rbf") clf1 = svm.SVC(C=100, kernel="rbf") # loss=conf['loss_type']) # clf1 = svm.LinearSVC(C=.9) clf.ids = ids pipeline = learning_utils.getPipeline(training_data, clf, "svm") pipeline1 = learning_utils.getPipeline(training_data, clf1, "svm") scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) scores1 = cross_validation.cross_val_score( pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1 ) print scores print scores1 return pipeline.fit(training_data, training_targets), training_data.columns.values
def train(conf): training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = tree.DecisionTreeClassifier(criterion=conf['criterion'], splitter='best', max_depth=12) # clf = RandomForestClassifier(criterion=conf['criterion'], max_depth=12) pipeline = learning_utils.getPipeline(training_data, clf, 'decision_tree') scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5) print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) return pipeline.fit(training_data, training_targets), training_data.columns.values
def scores(conf): parameters = { 'svm__C': np.logspace(-2, 5, 8), 'svm__gamma': np.logspace(-9, 2, 12), 'svm__kernel': ['linear', 'rbf'] } training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) clf = svm.SVC() pipeline = learning_utils.getPipeline(training_data, clf, 'svm') grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)
def scores(conf): parameters = { "svm__C": np.logspace(-2, 5, 8), "svm__gamma": np.logspace(-9, 2, 12), "svm__kernel": ["linear", "rbf"], } training_data, training_targets = learning_utils.getData( conf["datasets"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True ) clf = svm.SVC() pipeline = learning_utils.getPipeline(training_data, clf, "svm") grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)
def train(conf): training_data, training_targets, ids = learning_utils.getData(conf['datasets'], ratio=conf['ratio'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True, return_ids=True) clf = svm.SVC(C=1, kernel='rbf') clf1 = svm.SVC(C=100, kernel='rbf') # loss=conf['loss_type']) # clf1 = svm.LinearSVC(C=.9) clf.ids = ids pipeline = learning_utils.getPipeline(training_data, clf, 'svm') pipeline1 = learning_utils.getPipeline(training_data, clf1, 'svm') scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) scores1 = cross_validation.cross_val_score(pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1) print scores print scores1 return pipeline.fit(training_data, training_targets), training_data.columns.values
def getData(size, ratio, features, balanced, type, return_ids): complete_data = learning_utils.getData(size, split=False, balanced=balanced, type=type, return_ids=return_ids) if features is not None and len(features) > 0 and type != "md" and type != "mir": features.append('peak_cat') complete_data = complete_data[features] threshold = int(complete_data.shape[0] * (1 - ratio)) if return_ids: ids = complete_data['id'] complete_data.drop('id', axis=1, inplace=True) else: ids = None training_data = complete_data[:threshold] test_data = complete_data[threshold:] training_targets = training_data['peak_cat'] training_data.drop('peak_cat', axis=1, inplace=True) test_targets = test_data['peak_cat'] test_data.drop('peak_cat', axis=1, inplace=True) return training_data, training_targets, test_data, test_targets, ids
def scores(conf): if conf['tree'] == 'tree': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__splitter': ['best', 'random'], 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__presort': [True, False] } clf = tree.DecisionTreeClassifier() elif conf['tree'] == 'random': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__n_estimators': range(5, 50), 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__bootstrap': [True, False], } clf = ensemble.RandomForestClassifier() elif conf['tree'] == 'extra': parameters = { 'tree__criterion': ['gini', 'entropy'], 'tree__n_estimators': range(5, 50), 'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None], 'tree__max_depth': range(1, 20), 'tree__bootstrap': [True, False], } clf = ensemble.ExtraTreesClassifier() training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True, balanced=conf['balanced'], shuffle=True) pipeline = learning_utils.getPipeline(training_data, clf, 'tree') grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1) learning_utils.gs(grid_search, training_data, training_targets)