Example #1
0
    def test_predict_from_model_array(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})
        optimizer = GenericSolver(**solver_param)
        datasets.dump_svmlight_file(xtr, ytr, tr_f)
        datasets.dump_svmlight_file(xte, yte, te_f)

        # Train model
        clf = MLP(**params)
        clf.fit(xtr_arr, validation_data=[xte_arr], solver=optimizer)
        y_pred_tr = clf.predict(xtr_arr)
        y_pred_te = clf.predict(xte_arr)

        # Load from tained model
        params = nn_params.copy()
        params.update({
            'finetuning': True,
            'template': None,
            'nclasses': n_classes
        })
        clf = MLP(sname=clf.sname,
                  repository=clf.model['repository'],
                  **params)

        assert np.array_equal(y_pred_tr, clf.predict(xtr_arr))
        assert np.array_equal(y_pred_te, clf.predict(xte_arr))
        os_utils._remove_files([tr_f, te_f])
Example #2
0
    def test_lmdb_creation(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})

        # Create dataset
        X, Y = datasets.load_digits(return_X_y=True)
        X = preprocessing.StandardScaler().fit_transform(X)
        x_train, x_test, y_train, y_test = model_selection.train_test_split(
            X, Y, test_size=test_size, random_state=seed)

        # Save data in .svm format
        tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath(
            'x_train.lmdb')
        te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath(
            'x_test.lmdb')
        vocab_path = os.path.abspath('vocab.dat')

        datasets.dump_svmlight_file(x_train, y_train, tr_svm_f)
        datasets.dump_svmlight_file(x_test, y_test, te_svm_f)

        lmdb_utils.create_lmdb_from_svm(svm_path=tr_svm_f,
                                        lmdb_path=tr_lmdb_f,
                                        vocab_path=vocab_path,
                                        **params)
        lmdb_utils.create_lmdb_from_svm(svm_path=te_svm_f,
                                        lmdb_path=te_lmdb_f,
                                        **params)

        tr_lmdb = SVMConnector(path=tr_svm_f,
                               lmdb_path=tr_lmdb_f,
                               vocab_path=vocab_path)
        te_lmdb = SVMConnector(path=te_svm_f, lmdb_path=te_lmdb_f)

        optimizer = GenericSolver(solver_type='SGD',
                                  base_lr=0.01,
                                  iterations=100)
        clf = MLP(**params)
        clf.fit(tr_lmdb, validation_data=[te_lmdb], solver=optimizer)

        ytr_prob = clf.predict_proba(tr_lmdb)
        acc = metrics.accuracy_score(y_train, ytr_prob.argmax(-1))
        assert acc > 0.7

        os_utils._remove_files([tr_svm_f, te_svm_f, vocab_path])
        os_utils._remove_dirs([tr_lmdb_f, te_lmdb_f])
Example #3
0
    def test_classification(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})
        optimizer = GenericSolver(**solver_param)
        datasets.dump_svmlight_file(xtr, ytr, tr_f)
        datasets.dump_svmlight_file(xte, yte, te_f)

        clfs = [
            # array connector without validation set
            [xtr_arr, [], MLP(**params)],
            [xtr_arr, [], LR(**params)],

            # sparse array connector without validation set
            [xtr_sparse, [], MLP(**params)],
            [xtr_sparse, [], LR(**params)],

            # svm connector without validation set
            [xtr_svm, [], MLP(**params)],
            [xtr_svm, [], LR(**params)],

            # array connector with validation set
            [xtr_arr, [xte_arr], MLP(**params)],
            [xtr_arr, [xte_arr], LR(**params)],

            # svm connector with validation set
            [xtr_svm, [xte_svm], MLP(**params)],
            [xtr_svm, [xte_svm], LR(**params)],
        ]

        for tr_data, te_data, clf in clfs:
            clf.fit(tr_data, te_data, optimizer)
            y_pred = clf.predict(tr_data)
            acc = metrics.accuracy_score(ytr, y_pred)
            print(acc)
            assert acc > 0.7

        os_utils._remove_files([tr_f, te_f])
Example #4
0
def create_lmdb_from_svm(svm_path,
                         lmdb_path,
                         vocab_path=None,
                         host='localhost',
                         port=8085,
                         nclasses=2,
                         gpu=True,
                         tmp_folder=None):

    if os.path.exists(lmdb_path):
        print("warning: {} exist, overwriting it".format(lmdb_path))

    tmp_folder = tempfile.mkdtemp(
        prefix="pydd_", dir=tmp_folder) if tmp_folder else tempfile.mkdtemp(
            prefix="pydd_")

    train_data = SVMConnector(path=svm_path)
    optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=1)

    clf = MLP(host=host,
              port=port,
              nclasses=nclasses,
              gpu=gpu,
              repository=tmp_folder)
    clf.fit(train_data, solver=optimizer)

    shutil.move(os.path.join(tmp_folder, "train.lmdb"), lmdb_path)
    if vocab_path:
        shutil.move(os.path.join(tmp_folder, "vocab.dat"), vocab_path)

    # delete service
    clf.delete_service(clf.sname, clear='lib')

    # delete tmp_folder
    shutil.rmtree(tmp_folder)

    return lmdb_path, vocab_path
# Parameters
seed = 1337
n_classes = 10
repository = "/tmp/pydd_test"
params = {
    "repository": repository,
    "port": 8080,
    "nclasses": n_classes,
    "gpu": True
}
split_params = {"test_size": 0.2, "random_state": seed}
np.random.seed(seed)  # for reproducibility
solver = GenericSolver(iterations=1000,
                       solver_type="SGD",
                       base_lr=0.01,
                       gamma=0.1,
                       stepsize=30,
                       momentum=0.9,
                       snapshot=200)
class_weights = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1]

# remove repository if existe else creates it
if os.path.exists(repository):
    os_utils._remove_dirs([repository])
os.makedirs(repository)

# create dataset
X, y = datasets.load_digits(n_class=n_classes, return_X_y=True)
X = preprocessing.StandardScaler().fit_transform(X)
xtr, xte, ytr, yte = model_selection.train_test_split(X, y, **split_params)
Example #6
0
    X, y = datasets.load_digits(n_class=n_classes, return_X_y=True)
    X = preprocessing.StandardScaler().fit_transform(X)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=test_size, random_state=seed)
    tr_f = os.path.abspath('x_train.svm')
    te_f = os.path.abspath('x_test.svm')
    datasets.dump_svmlight_file(x_train, y_train, tr_f)
    datasets.dump_svmlight_file(x_test, y_test, te_f)

    # train_data = ArrayConnector(x_train, y_train)
    # val_data = ArrayConnector(x_train, y_train)
    train_data = SVMConnector(tr_f)
    val_data = SVMConnector(te_f)

    clf = MLP(host=host, port=port, nclasses=n_classes, layers=[100], gpu=gpu)
    solver = GenericSolver(iterations=iteration, test_interval=30, solver_type="SGD", base_lr=lr)
    clf.fit(train_data, validation_data=[val_data],  solver=solver)
    clf.predict_proba(train_data)

    clf.fit(train_data, validation_data=[val_data], solver=solver)
    y_pred = clf.predict_proba(train_data)

    clf = LR(host=host, port=port, nclasses=n_classes, gpu=gpu)
    solver = GenericSolver(iterations=iteration, solver_type="SGD", base_lr=lr)
    clf.fit(train_data, solver=solver)
    y_pred = clf.predict_proba(train_data)

    clf = XGB(host=host, port=port, nclasses=n_classes)
    # logs = clf.fit(train_data, validation_data=[val_data])

    os_utils._remove_files([tr_f, te_f])
Example #7
0
                                                      test_size=test_size,
                                                      random_state=seed)

# create and save train.svm and test.svm
tr_f = os.path.abspath('x_train.svm')
te_f = os.path.abspath('x_test.svm')
datasets.dump_svmlight_file(xtr, ytr, tr_f)
datasets.dump_svmlight_file(xte, yte, te_f)

# create connectors
xtr_svm, xte_svm = SVMConnector(tr_f), SVMConnector(te_f)

# train model
params = {'host': host, 'port': port, 'nclasses': nclasses, 'layers': [100]}
optimizer = GenericSolver(solver_type='SGD',
                          iterations=500,
                          base_lr=0.1,
                          snapshot=100)
clf = MLP(sname=sname, repository=model_repo, **params)
clf.fit(xtr_svm, validation_data=[xte_svm, xtr_svm], solver=optimizer)
del clf

# load pre trained model
params = {
    'host': host,
    'port': port,
    'nclasses': nclasses,
    'finetuning': True,
    'template': None
}
clf = MLP(sname=sname, repository=model_repo, **params)
ytr_pred, yte_pred = clf.predict(xtr_svm), clf.predict(xte_svm)
Example #8
0
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        X, Y, test_size=test_size, random_state=seed)

    # Save data in .svm format
    tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath(
        'x_train.lmdb')
    te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath(
        'x_test.lmdb')
    vocab_path = os.path.abspath('vocab.dat')

    datasets.dump_svmlight_file(x_train, y_train, tr_svm_f)
    datasets.dump_svmlight_file(x_test, y_test, te_svm_f)

    # create lmdb and vocab file
    create_lmdb_from_svm(svm_path=tr_svm_f,
                         lmdb_path=tr_lmdb_f,
                         vocab_path=vocab_path,
                         **params)
    create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params)

    tr_data = SVMConnector(path=tr_svm_f,
                           lmdb_path=tr_lmdb_f,
                           vocab_path=vocab_path)
    te_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f)

    optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100)
    clf = MLP(**params)
    clf.fit(tr_data, validation_data=[te_data], solver=optimizer)

    y_pred_lmdb = clf.predict_proba(te_data)
Example #9
0
X, y = datasets.load_digits(n_class=n_classes, return_X_y=True)
X = preprocessing.StandardScaler().fit_transform(X)
xtr, xte, ytr, yte = model_selection.train_test_split(X, y, **split_params)

# create and save train.svm and test.svm
tr_f = os.path.abspath('x_train.svm')
te_f = os.path.abspath('x_test.svm')
datasets.dump_svmlight_file(xtr, ytr, tr_f)
datasets.dump_svmlight_file(xte, yte, te_f)

# Define models and class weights
clf = MLP(**params)

solver = GenericSolver(iterations=500,
                       solver_type="SGD",
                       base_lr=0.01,
                       gamma=0.1,
                       stepsize=30,
                       momentum=0.9)
# one class weight value for each class
class_weights = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1]

train_data, test_data = SVMConnector(path=tr_f), SVMConnector(path=te_f)

logs = clf.fit(train_data,
               validation_data=[test_data],
               solver=solver,
               class_weights=class_weights,
               batch_size=128)
yte_pred = clf.predict(test_data)
report = metrics.classification_report(yte, yte_pred)
print(report)