Example #1
0
    def test_lmdb_creation(self):

        params = nn_params.copy()
        params.update({'nclasses': n_classes})

        # Create dataset
        X, Y = datasets.load_digits(return_X_y=True)
        X = preprocessing.StandardScaler().fit_transform(X)
        x_train, x_test, y_train, y_test = model_selection.train_test_split(
            X, Y, test_size=test_size, random_state=seed)

        # Save data in .svm format
        tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath(
            'x_train.lmdb')
        te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath(
            'x_test.lmdb')
        vocab_path = os.path.abspath('vocab.dat')

        datasets.dump_svmlight_file(x_train, y_train, tr_svm_f)
        datasets.dump_svmlight_file(x_test, y_test, te_svm_f)

        lmdb_utils.create_lmdb_from_svm(svm_path=tr_svm_f,
                                        lmdb_path=tr_lmdb_f,
                                        vocab_path=vocab_path,
                                        **params)
        lmdb_utils.create_lmdb_from_svm(svm_path=te_svm_f,
                                        lmdb_path=te_lmdb_f,
                                        **params)

        tr_lmdb = SVMConnector(path=tr_svm_f,
                               lmdb_path=tr_lmdb_f,
                               vocab_path=vocab_path)
        te_lmdb = SVMConnector(path=te_svm_f, lmdb_path=te_lmdb_f)

        optimizer = GenericSolver(solver_type='SGD',
                                  base_lr=0.01,
                                  iterations=100)
        clf = MLP(**params)
        clf.fit(tr_lmdb, validation_data=[te_lmdb], solver=optimizer)

        ytr_prob = clf.predict_proba(tr_lmdb)
        acc = metrics.accuracy_score(y_train, ytr_prob.argmax(-1))
        assert acc > 0.7

        os_utils._remove_files([tr_svm_f, te_svm_f, vocab_path])
        os_utils._remove_dirs([tr_lmdb_f, te_lmdb_f])
Example #2
0
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        X, Y, test_size=test_size, random_state=seed)

    # Save data in .svm format
    tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath(
        'x_train.lmdb')
    te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath(
        'x_test.lmdb')
    vocab_path = os.path.abspath('vocab.dat')

    datasets.dump_svmlight_file(x_train, y_train, tr_svm_f)
    datasets.dump_svmlight_file(x_test, y_test, te_svm_f)

    # create lmdb and vocab file
    create_lmdb_from_svm(svm_path=tr_svm_f,
                         lmdb_path=tr_lmdb_f,
                         vocab_path=vocab_path,
                         **params)
    create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params)

    tr_data = SVMConnector(path=tr_svm_f,
                           lmdb_path=tr_lmdb_f,
                           vocab_path=vocab_path)
    te_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f)

    optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100)
    clf = MLP(**params)
    clf.fit(tr_data, validation_data=[te_data], solver=optimizer)

    y_pred_lmdb = clf.predict_proba(te_data)