def test_lmdb_creation(self): params = nn_params.copy() params.update({'nclasses': n_classes}) # Create dataset X, Y = datasets.load_digits(return_X_y=True) X = preprocessing.StandardScaler().fit_transform(X) x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=test_size, random_state=seed) # Save data in .svm format tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath( 'x_train.lmdb') te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath( 'x_test.lmdb') vocab_path = os.path.abspath('vocab.dat') datasets.dump_svmlight_file(x_train, y_train, tr_svm_f) datasets.dump_svmlight_file(x_test, y_test, te_svm_f) lmdb_utils.create_lmdb_from_svm(svm_path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path, **params) lmdb_utils.create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params) tr_lmdb = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path) te_lmdb = SVMConnector(path=te_svm_f, lmdb_path=te_lmdb_f) optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100) clf = MLP(**params) clf.fit(tr_lmdb, validation_data=[te_lmdb], solver=optimizer) ytr_prob = clf.predict_proba(tr_lmdb) acc = metrics.accuracy_score(y_train, ytr_prob.argmax(-1)) assert acc > 0.7 os_utils._remove_files([tr_svm_f, te_svm_f, vocab_path]) os_utils._remove_dirs([tr_lmdb_f, te_lmdb_f])
x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=test_size, random_state=seed) # Save data in .svm format tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath( 'x_train.lmdb') te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath( 'x_test.lmdb') vocab_path = os.path.abspath('vocab.dat') datasets.dump_svmlight_file(x_train, y_train, tr_svm_f) datasets.dump_svmlight_file(x_test, y_test, te_svm_f) # create lmdb and vocab file create_lmdb_from_svm(svm_path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path, **params) create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params) tr_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path) te_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f) optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100) clf = MLP(**params) clf.fit(tr_data, validation_data=[te_data], solver=optimizer) y_pred_lmdb = clf.predict_proba(te_data)