Esempio n. 1
0
def hist(iterations, conf):
    global train_errors, valid_errors, best_train_i, config
    config = conf
    best_valid = list()
    best_train = list()
    best_epoch_slp = np.zeros(conf['epochs'])
    best_epoch_mlp = np.zeros(conf['epochs'])
    for i in range(conf['epochs']):
        training_data, training_targets, valid_data, valid_target, ids = getData(size=conf['datasets'],
                                                                                 ratio=conf['ratio'],
                                                                                 features=conf['features'],
                                                                                 balanced=conf['balanced'],
                                                                                 type=conf['type'], return_ids=False)

        train_errors = np.zeros(conf['epochs'])
        valid_errors = np.zeros(conf['epochs'])
        slp = getNet(units=[], n_iter=conf['epochs'], callbacks=default_callbacks)
        pipeline_slp = learning_utils.getPipeline(training_data, slp, 'neural_network')
        model_slp = pipeline_slp.fit(training_data, training_targets)
        best_epoch_slp[best_train_i - 1] += 1

        train_errors = np.zeros(conf['epochs'])
        valid_errors = np.zeros(conf['epochs'])
        mlp = getNet(units=[61], n_iter=conf['epochs'], callbacks=default_callbacks)
        pipeline_mlp = learning_utils.getPipeline(training_data, mlp, 'neural_network')
        model_mlp = pipeline_mlp.fit(training_data, training_targets)
        best_epoch_mlp[best_train_i - 1] += 1

    learning_utils.plot_hist(best_epoch_slp, best_epoch_mlp)
Esempio n. 2
0
def train(conf):
    training_data, training_targets, ids = learning_utils.getData(
        conf["datasets"],
        ratio=conf["ratio"],
        type=conf["type"],
        split=True,
        balanced=conf["balanced"],
        shuffle=True,
        return_ids=True,
    )
    clf = svm.SVC(C=1, kernel="rbf")
    clf1 = svm.SVC(C=100, kernel="rbf")  # loss=conf['loss_type'])
    # clf1 = svm.LinearSVC(C=.9)
    clf.ids = ids
    pipeline = learning_utils.getPipeline(training_data, clf, "svm")
    pipeline1 = learning_utils.getPipeline(training_data, clf1, "svm")

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1)
    scores1 = cross_validation.cross_val_score(
        pipeline1, training_data, training_targets, cv=5, verbose=True, n_jobs=-1
    )
    print scores
    print scores1

    return pipeline.fit(training_data, training_targets), training_data.columns.values
Esempio n. 3
0
def scores(conf):
    parameters = {
        'nn__n_iter': range(10, 25),
        'nn__layers': [
            # [Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=10, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=20, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=35, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Softmax", name="output")],
            [Layer(type="Sigmoid", units=75, name="h0"), Layer(type="Softmax", name="output")],
            # [Layer(type="Sigmoid", units=50, name="h0"), Layer(type="Sigmoid", units=50, name="h1"),
            # Layer(type="Softmax", name="output")]
        ],
        'nn__learning_rate': [.001, .01, .05],
        # 'nn__learning_momentum': [.8, .9],
        'nn__batch_size': [1, 10, 25, 50],
        'nn__dropout_rate': [0, .1, .25, .5]

    }
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)
    clf = getNet(valid_size=0)
    pipeline = learning_utils.getPipeline(training_data, clf, 'nn')
    grid_search = GridSearchCV(pipeline, parameters, verbose=10)
    return grid_search, training_data, training_targets
Esempio n. 4
0
def train(conf):
    training_data, training_targets, ids = learning_utils.getData(conf['datasets'], ratio=conf['ratio'],
                                                                  type=conf['type'], split=True,
                                                                  balanced=conf['balanced'], shuffle=True,
                                                                  return_ids=True)
    clf = svm.SVC(C=1, kernel='rbf')
    clf1 = svm.SVC(C=100, kernel='rbf')  # loss=conf['loss_type'])
    # clf1 = svm.LinearSVC(C=.9)
    clf.ids = ids
    pipeline = learning_utils.getPipeline(training_data, clf, 'svm')
    pipeline1 = learning_utils.getPipeline(training_data, clf1, 'svm')

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5, verbose=True, n_jobs=-1)
    scores1 = cross_validation.cross_val_score(pipeline1, training_data, training_targets, cv=5, verbose=True,
                                               n_jobs=-1)
    print scores
    print scores1

    return  pipeline.fit(training_data, training_targets), training_data.columns.values
Esempio n. 5
0
def train(conf):
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)

    clf = tree.DecisionTreeClassifier(criterion=conf['criterion'], splitter='best', max_depth=12)
    # clf = RandomForestClassifier(criterion=conf['criterion'], max_depth=12)
    pipeline = learning_utils.getPipeline(training_data, clf, 'decision_tree')

    scores = cross_validation.cross_val_score(pipeline, training_data, training_targets, cv=5)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    return pipeline.fit(training_data, training_targets), training_data.columns.values
Esempio n. 6
0
def scores(conf):
    parameters = {
        'svm__C': np.logspace(-2, 5, 8),
        'svm__gamma': np.logspace(-9, 2, 12),
        'svm__kernel': ['linear', 'rbf']
    }
    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)
    clf = svm.SVC()
    pipeline = learning_utils.getPipeline(training_data, clf, 'svm')
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
    learning_utils.gs(grid_search, training_data, training_targets)
Esempio n. 7
0
def scores(conf):
    parameters = {
        "svm__C": np.logspace(-2, 5, 8),
        "svm__gamma": np.logspace(-9, 2, 12),
        "svm__kernel": ["linear", "rbf"],
    }
    training_data, training_targets = learning_utils.getData(
        conf["datasets"], type=conf["type"], split=True, balanced=conf["balanced"], shuffle=True
    )
    clf = svm.SVC()
    pipeline = learning_utils.getPipeline(training_data, clf, "svm")
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
    learning_utils.gs(grid_search, training_data, training_targets)
Esempio n. 8
0
def train_custom(conf, plot_path, debug, verbose, gs_params=None, callbacks=default_callbacks):
    global config, plot, train_errors, valid_errors
    plot = plot_path

    train_errors = np.zeros(conf['epochs'])
    valid_errors = np.zeros(conf['epochs'])
    all_ = list()
    for x in ['all', 'md', 'mir', 'feat_sel', 'random']:
        all = list()
        if x in ('all', 'md', 'mir'):
            training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, None,
                                                                                 None, type=x)
        elif x == 'random':
            from utils import features
            import random
            conf['features'] = random.sample(np.hstack(features.values()), random.randint(1, 115))
            training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, conf['features'],
                                                                                 balanced=conf['balanced'], type=x)
        else:
            training_data, training_targets, valid_data, valid_targets = getData(conf['datasets'], 0, conf['features'],
                                                                                 balanced=conf['balanced'], type=x)

        # if there is not enough data available
        conf['datasets'] = training_data.shape[0]
        if conf['units'] is None:
            conf['units'] = [int(math.ceil((training_data.shape[1] + 7) / 2))]
        conf['n_input'] = training_data.shape[1]
        config = conf
        units = [int(math.ceil((training_data.shape[1] + 7) / 2))]
        for i in range(1, 101):
            net = getNet(units, conf['learning_rate'], conf['epochs'], conf['learning_rule'],
                         conf['batch_size'], conf['weight_decay'], conf['dropout_rate'],
                         conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose,
                         callbacks=callbacks,
                         # valid_set=(valid_data, valid_targets)
                         valid_size=conf['ratio']
                         )
            pipeline = learning_utils.getPipeline(training_data, net)
            pipeline.fit(training_data, training_targets)
            all.append(valid_errors)
        all_.append(np.array(all).mean(axis=0))
    learning_utils.plot_lines(data=all_, labels=["all", "md", "mir", "feat_sel", "random"],
                              xlabel="number of epochs",
                              ylabel=config['loss_type'],
                              title="mean training and validation error", suptitle=None,
                              path="learning/nn/plots/comb/test.png")
Esempio n. 9
0
def scores(conf):
    if conf['tree'] == 'tree':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__splitter': ['best', 'random'],
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__presort': [True, False]
        }
        clf = tree.DecisionTreeClassifier()
    elif conf['tree'] == 'random':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__n_estimators': range(5, 50),
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__bootstrap': [True, False],
        }
        clf = ensemble.RandomForestClassifier()
    elif conf['tree'] == 'extra':
        parameters = {
            'tree__criterion': ['gini', 'entropy'],
            'tree__n_estimators': range(5, 50),
            'tree__max_features': [.1, .2, .5, 'sqrt', 'log2', None],
            'tree__max_depth': range(1, 20),
            'tree__bootstrap': [True, False],
        }
        clf = ensemble.ExtraTreesClassifier()

    training_data, training_targets = learning_utils.getData(conf['datasets'], type=conf['type'], split=True,
                                                             balanced=conf['balanced'], shuffle=True)

    pipeline = learning_utils.getPipeline(training_data, clf, 'tree')

    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)

    learning_utils.gs(grid_search, training_data, training_targets)
Esempio n. 10
0
def train(conf, plot_path, debug, verbose, gs_params=None, callbacks=default_callbacks):
    global config, plot, train_errors, valid_errors
    plot = plot_path
    if 'neural_network__n_iter' in gs_params:
        train_errors = np.zeros(20)
        valid_errors = np.zeros(20)
    else:
        train_errors = np.zeros(conf['epochs'])
        valid_errors = np.zeros(conf['epochs'])
    training_data, training_targets, valid_data, valid_targets, ids = getData(conf['datasets'], conf['ratio'],
                                                                              conf['features'],
                                                                              balanced=conf['balanced'],
                                                                              type=conf['type'], return_ids=True)

    # if there is not enough data available
    conf['datasets'] = training_data.shape[0]
    if conf['units'] is None:
        conf['units'] = [int(math.ceil((training_data.shape[1] + 7) / 2))]
    conf['n_input'] = training_data.shape[1]
    config = conf

    if conf['unit_range'] is not None:
        del conf['units']
        for units in range(conf['unit_range'][0], conf['unit_range'][1] + 1):
            net = getNet([units], conf['learning_rate'], conf['epochs'], conf['learning_rule'],
                         conf['batch_size'], conf['weight_decay'], conf['dropout_rate'],
                         conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose,
                         callbacks=callbacks,
                         valid_size=conf['ratio']
                         )
            pipeline = learning_utils.getPipeline(training_data, net, 'neural_network')
            pipeline.fit(training_data, training_targets)
        learning_utils.plot_lines(data=[unit_iter_train_error, unit_iter_valid_error],
                                  labels=["Training error", "Validation error"],
                                  xlabel="number of hidden units",
                                  ylabel=config['loss_type'],
                                  title="training and validation error", suptitle=None, conf=config, additionals=[
                [np.array(unit_iter_train_error).argmin() + conf['unit_range'][0],
                 np.array(unit_iter_train_error).min()],
                [np.array(unit_iter_valid_error).argmin() + conf['unit_range'][0],
                 np.array(unit_iter_valid_error).min()]],
                                  begin=conf['unit_range'][0],
                                  path="learning/nn/plots/unit_iter/{}_{}.png".format(conf['unit_range'],
                                                                                      conf['epochs']))
        learning_utils.plot_lines(data=[unit_iter_best_train_error, unit_iter_best_valid_error],
                                  labels=["Training error", "Validation error"],
                                  xlabel="number of hidden units",
                                  ylabel=config['loss_type'],
                                  title="training and validation error", suptitle=None, conf=config, additionals=[
                [np.array(unit_iter_best_train_error).argmin() + conf['unit_range'][0],
                 np.array(unit_iter_best_train_error).min()],
                [np.array(unit_iter_best_valid_error).argmin() + conf['unit_range'][0],
                 np.array(unit_iter_best_valid_error).min()]],
                                  begin=conf['unit_range'][0],
                                  path="learning/nn/plots/unit_iter/{}_{}_{}.png".format(conf['unit_range'],
                                                                                         conf['epochs'],
                                                                                         "best"))
    else:
        net = getNet(conf['units'], conf['learning_rate'], conf['epochs'], conf['learning_rule'],
                     conf['batch_size'], conf['weight_decay'], conf['dropout_rate'],
                     conf['loss_type'], n_stable=conf['n_stable'], debug=debug, verbose=verbose,
                     callbacks=callbacks,
                     # valid_set=(np.array(valid_data), valid_targets),
                     valid_size=conf['ratio']
                     )
        pipeline = learning_utils.getPipeline(training_data, net, 'neural_network')
        model = pipeline.fit(training_data, training_targets)
        model.ids = ids
        return model