Ejemplo n.º 1
0
def scores(conf):
    parameters = {
        'nn__n_iter': range(10, 25),
        'nn__layers': [
            # [Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=10, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=20, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=35, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=75, name="h0"), Layer(type="Softmax", name="output")],
            # [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Sigmoid", units=50, name="h1"),
            # Layer(type="Softmax", name="output")]
        ],
        'nn__learning_rate': [.001, .01, .05],
        # 'nn__learning_momentum': [.8, .9],
        'nn__batch_size': [1, 10, 25, 50],
        'nn__dropout_rate': [0, .1, .25, .5]

    }
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)
    clf = getNet(valid_size=0)
    pipeline = learning_utils.getPipeline(training_data, clf, 'nn')
    grid_search = GridSearchCV(pipeline, parameters, verbose=10)
    return grid_search, training_data, training_targets
Ejemplo n.º 2
0
def train(conf):
    training_data, training_targets, ids = learning_utils.getData(
        conf["datasets"],
        ratio=conf["ratio"],
        type=conf["type"],
        split=True,
        balanced=conf["balanced"],
        shuffle=True,
        return_ids=True,
    )
    clf = svm.SVC(C=1, kernel="rbf")
    clf1 = svm.SVC(C=100, kernel="rbf")  # loss=conf['loss_type'])
    # clf1 = svm.LinearSVC(C=.9)
    clf.ids = ids
    pipeline = learning_utils.getPipeline(training_data, clf, "svm")
    pipeline1 = learning_utils.getPipeline(training_data, clf1, "svm")

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1)
    scores1 = cross_validation.cross_val_score(
        pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1
    )
    print scores
    print scores1

    return pipeline.fit(training_data, training_targets), training_data.columns.values
Ejemplo n.º 3
0
def train(conf):
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)

    clf = tree.DecisionTreeClassifier(criterion=conf['criterion'], splitter='best', max_depth=12)
    # clf = RandomForestClassifier(criterion=conf['criterion'], max_depth=12)
    pipeline = learning_utils.getPipeline(training_data, clf, 'decision_tree')

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    return pipeline.fit(training_data, training_targets), training_data.columns.values
Ejemplo n.º 4
0
def scores(conf):
    parameters = {
        'svm__C': np.logspace(-2, 5, 8),
        'svm__gamma': np.logspace(-9, 2, 12),
        'svm__kernel': ['linear', 'rbf']
    }
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)
    clf = svm.SVC()
    pipeline = learning_utils.getPipeline(training_data, clf, 'svm')
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
    learning_utils.gs(grid_search, training_data, training_targets)
Ejemplo n.º 5
0
def scores(conf):
    parameters = {
        "svm__C": np.logspace(-2, 5, 8),
        "svm__gamma": np.logspace(-9, 2, 12),
        "svm__kernel": ["linear", "rbf"],
    }
    training_data, training_targets = learning_utils.getData(
        conf["datasets"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True
    )
    clf = svm.SVC()
    pipeline = learning_utils.getPipeline(training_data, clf, "svm")
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
    learning_utils.gs(grid_search, training_data, training_targets)
Ejemplo n.º 6
0
def train(conf):
    training_data, training_targets, ids = learning_utils.getData(conf['datasets'], ratio=conf['ratio'],
                                                                  type=conf['type'], split=True,
                                                                  balanced=conf['balanced'], shuffle=True,
                                                                  return_ids=True)
    clf = svm.SVC(C=1, kernel='rbf')
    clf1 = svm.SVC(C=100, kernel='rbf')  # loss=conf['loss_type'])
    # clf1 = svm.LinearSVC(C=.9)
    clf.ids = ids
    pipeline = learning_utils.getPipeline(training_data, clf, 'svm')
    pipeline1 = learning_utils.getPipeline(training_data, clf1, 'svm')

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1)
    scores1 = cross_validation.cross_val_score(pipeline1, training_data, training_targets, cv=5, verbose=True,
                                               n_jobs=-1)
    print scores
    print scores1

    return  pipeline.fit(training_data, training_targets), training_data.columns.values
Ejemplo n.º 7
0
def getData(size, ratio, features, balanced, type, return_ids):
    complete_data = learning_utils.getData(size, split=False, balanced=balanced, type=type, return_ids=return_ids)

    if features is not None and len(features) > 0 and type != "md" and type != "mir":
        features.append('peak_cat')
        complete_data = complete_data[features]
    threshold = int(complete_data.shape[0] * (1 - ratio))

    if return_ids:
        ids = complete_data['id']
        complete_data.drop('id', axis=1, inplace=True)
    else:
        ids = None
    training_data = complete_data[:threshold]
    test_data = complete_data[threshold:]

    training_targets = training_data['peak_cat']
    training_data.drop('peak_cat', axis=1, inplace=True)

    test_targets = test_data['peak_cat']
    test_data.drop('peak_cat', axis=1, inplace=True)

    return training_data, training_targets, test_data, test_targets, ids
Ejemplo n.º 8
0
def scores(conf):
    if conf['tree'] == 'tree':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__splitter': ['best', 'random'],
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__presort': [True, False]
        }
        clf = tree.DecisionTreeClassifier()
    elif conf['tree'] == 'random':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__n_estimators': range(5, 50),
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__bootstrap': [True, False],
        }
        clf = ensemble.RandomForestClassifier()
    elif conf['tree'] == 'extra':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__n_estimators': range(5, 50),
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__bootstrap': [True, False],
        }
        clf = ensemble.ExtraTreesClassifier()

    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)

    pipeline = learning_utils.getPipeline(training_data, clf, 'tree')

    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)

    learning_utils.gs(grid_search, training_data, training_targets)