Ejemplo n.º 1
0
    def test_predict_from_model_array(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})
        optimizer = GenericSolver(**solver_param)
        datasets.dump_svmlight_file(xtr, ytr, tr_f)
        datasets.dump_svmlight_file(xte, yte, te_f)

        # Train model
        clf = MLP(**params)
        clf.fit(xtr_arr, validation_data=[xte_arr], solver=optimizer)
        y_pred_tr = clf.predict(xtr_arr)
        y_pred_te = clf.predict(xte_arr)

        # Load from tained model
        params = nn_params.copy()
        params.update({
            'finetuning': True,
            'template': None,
            'nclasses': n_classes
        })
        clf = MLP(sname=clf.sname,
                  repository=clf.model['repository'],
                  **params)

        assert np.array_equal(y_pred_tr, clf.predict(xtr_arr))
        assert np.array_equal(y_pred_te, clf.predict(xte_arr))
        os_utils._remove_files([tr_f, te_f])
Ejemplo n.º 2
0
    def test_classification(self):

        params = {'gpu': True, 'nclasses': n_classes}
        params.update(connec_param)

        train_path = os.path.abspath('x_train.svm')
        test_path = os.path.abspath('x_test.svm')
        datasets.dump_svmlight_file(x_train, y_train, train_path)
        datasets.dump_svmlight_file(x_test, y_test, test_path)

        clfs = [
            [{
                'X': train_path
            }, test_path, MLPfromSVM(**params)],
            [{
                'X': [train_path, test_path]
            }, test_path,
             MLPfromSVM(**params)],
            [{
                'X': train_path
            }, test_path, LRfromSVM(**params)],
            [{
                'X': [train_path, test_path]
            }, test_path,
             LRfromSVM(**params)],
            [{
                'X': x_train,
                'Y': y_train,
                'validation_data': [(x_test, y_test)]
            }, x_test,
             MLPfromArray(**params)],
            [{
                'X': csr_matrix(x_train),
                'Y': y_train,
                'validation_data': [(csr_matrix(x_test), y_test)]
            }, x_test,
             MLPfromArray(**params)],
            [{
                'X': x_train,
                'Y': y_train,
                'validation_data': [(x_test, y_test)]
            }, x_test,
             LRfromArray(**params)],
            [{
                'X': csr_matrix(x_train),
                'Y': y_train,
                'validation_data': [(csr_matrix(x_test), y_test)]
            }, x_test,
             LRfromArray(**params)],
        ]

        for fit_param, predict_params, clf in clfs:
            clf.fit(**fit_param)
            acc = metrics.accuracy_score(y_test, clf.predict(predict_params))
            assert acc > 0.95

        os_utils._remove_files([train_path, test_path])
Ejemplo n.º 3
0
    def test_predict_from_model(self):

        snames = ['svm_predict_from_model', 'array_predict_from_model']
        model_repo = [
            os.path.abspath('model_svm'),
            os.path.abspath('model_array')
        ]
        params = {'nclasses': n_classes, 'gpu': True}
        params.update(connec_param)

        train_path = os.path.abspath('x_train.svm')
        test_path = os.path.abspath('x_test.svm')
        datasets.dump_svmlight_file(x_train, y_train, train_path)
        datasets.dump_svmlight_file(x_test, y_test, test_path)

        # We make sure model repo does not exist
        for folder in model_repo:
            if os.path.exists(folder):
                os_utils._remove_dirs([folder])
        os_utils._create_dirs(model_repo)

        # Create model, make sure the sname is not used by the server
        clf_svm = MLPfromSVM(sname=snames[0],
                             repository=model_repo[0],
                             **params)
        clf_array = MLPfromArray(sname=snames[1],
                                 repository=model_repo[1],
                                 **params)

        clf_svm.fit([train_path, test_path], iterations=300)
        clf_array.fit(x_train,
                      y_train,
                      validation_data=[(x_test, y_test)],
                      iterations=300)

        y_pred_svm = clf_svm.predict(test_path)
        y_pred_array = clf_array.predict(x_test)

        # Load from existing model
        params = {'nclasses': n_classes, 'finetuning': True, 'template': None}
        params.update(connec_param)
        clf_svm = MLPfromSVM(sname=snames[0],
                             repository=model_repo[0],
                             **params)
        clf_array = MLPfromArray(sname=snames[1],
                                 repository=model_repo[1],
                                 **params)

        assert np.array_equal(y_pred_svm, clf_svm.predict(test_path))
        assert np.array_equal(y_pred_array, clf_array.predict(x_test))
        os_utils._remove_files([train_path, test_path])
        os_utils._remove_dirs(model_repo)
Ejemplo n.º 4
0
    def test_lmdb_creation(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})

        # Create dataset
        X, Y = datasets.load_digits(return_X_y=True)
        X = preprocessing.StandardScaler().fit_transform(X)
        x_train, x_test, y_train, y_test = model_selection.train_test_split(
            X, Y, test_size=test_size, random_state=seed)

        # Save data in .svm format
        tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath(
            'x_train.lmdb')
        te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath(
            'x_test.lmdb')
        vocab_path = os.path.abspath('vocab.dat')

        datasets.dump_svmlight_file(x_train, y_train, tr_svm_f)
        datasets.dump_svmlight_file(x_test, y_test, te_svm_f)

        lmdb_utils.create_lmdb_from_svm(svm_path=tr_svm_f,
                                        lmdb_path=tr_lmdb_f,
                                        vocab_path=vocab_path,
                                        **params)
        lmdb_utils.create_lmdb_from_svm(svm_path=te_svm_f,
                                        lmdb_path=te_lmdb_f,
                                        **params)

        tr_lmdb = SVMConnector(path=tr_svm_f,
                               lmdb_path=tr_lmdb_f,
                               vocab_path=vocab_path)
        te_lmdb = SVMConnector(path=te_svm_f, lmdb_path=te_lmdb_f)

        optimizer = GenericSolver(solver_type='SGD',
                                  base_lr=0.01,
                                  iterations=100)
        clf = MLP(**params)
        clf.fit(tr_lmdb, validation_data=[te_lmdb], solver=optimizer)

        ytr_prob = clf.predict_proba(tr_lmdb)
        acc = metrics.accuracy_score(y_train, ytr_prob.argmax(-1))
        assert acc > 0.7

        os_utils._remove_files([tr_svm_f, te_svm_f, vocab_path])
        os_utils._remove_dirs([tr_lmdb_f, te_lmdb_f])
Ejemplo n.º 5
0
    def test_classification(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})
        optimizer = GenericSolver(**solver_param)
        datasets.dump_svmlight_file(xtr, ytr, tr_f)
        datasets.dump_svmlight_file(xte, yte, te_f)

        clfs = [
            # array connector without validation set
            [xtr_arr, [], MLP(**params)],
            [xtr_arr, [], LR(**params)],

            # sparse array connector without validation set
            [xtr_sparse, [], MLP(**params)],
            [xtr_sparse, [], LR(**params)],

            # svm connector without validation set
            [xtr_svm, [], MLP(**params)],
            [xtr_svm, [], LR(**params)],

            # array connector with validation set
            [xtr_arr, [xte_arr], MLP(**params)],
            [xtr_arr, [xte_arr], LR(**params)],

            # svm connector with validation set
            [xtr_svm, [xte_svm], MLP(**params)],
            [xtr_svm, [xte_svm], LR(**params)],
        ]

        for tr_data, te_data, clf in clfs:
            clf.fit(tr_data, te_data, optimizer)
            y_pred = clf.predict(tr_data)
            acc = metrics.accuracy_score(ytr, y_pred)
            print(acc)
            assert acc > 0.7

        os_utils._remove_files([tr_f, te_f])
Ejemplo n.º 6
0
Archivo: core.py Proyecto: ten2net/pyDD
                         service_parameters_input=service_parameters_input,
                         service_parameters_mllib=service_parameters_mllib,
                         service_parameters_output=service_parameters_output,
                         model=model,
                         tmp_dir=None)

    train_parameters_input = {"db": True},
    train_parameters_output = {"measure": ["accp", "mcll"]},
    train_parameters_mllib = {
        "gpu": service_parameters_mllib["gpu"],
        "solver": {
            "iterations": 100,
            "base_lr": 0.01,
            "solver_type": "SGD"
        },
        "net": {
            "batch_size": 128
        },
    }

    clf._train([tr_f],
               train_parameters_input,
               train_parameters_mllib,
               train_parameters_output,
               display_metric_interval=1,
               async=True)

    json_dump = clf._predict_proba([tr_f], parameters_output={"best": -1})

    os_utils._remove_files([tr_f])
Ejemplo n.º 7
0
              y_train,
              validation_data=[(x_test, y_test)],
              iterations=1000,
              test_interval=10)

del clf_svm
del clf_array

# Load from existing model
params = {
    'host': 'localhost',
    'port': 8081,
    'nclasses': 10,
    'finetuning': True,
    'template': None
}
clf_svm = MLPfromSVM(sname=snames[0], repository=model_repo[0], **params)
clf_array = MLPfromArray(sname=snames[1], repository=model_repo[1], **params)

for X, clf in [[test_path, clf_svm], [x_test, clf_array]]:
    y_prob = clf.predict_proba(X)
    y_pred = clf.predict(X)
    print('-' * 50)
    print("Model: {}".format(clf))
    print("Model: Accuracy: {}, Loss: {}".format(
        metrics.accuracy_score(y_test, y_pred),
        metrics.log_loss(y_test, y_prob)))

os_utils._remove_files([train_path, test_path])
os_utils._remove_dirs(model_repo)