def test_predict_from_model_array(self): params = nn_params.copy() params.update({'nclasses': n_classes}) optimizer = GenericSolver(**solver_param) datasets.dump_svmlight_file(xtr, ytr, tr_f) datasets.dump_svmlight_file(xte, yte, te_f) # Train model clf = MLP(**params) clf.fit(xtr_arr, validation_data=[xte_arr], solver=optimizer) y_pred_tr = clf.predict(xtr_arr) y_pred_te = clf.predict(xte_arr) # Load from tained model params = nn_params.copy() params.update({ 'finetuning': True, 'template': None, 'nclasses': n_classes }) clf = MLP(sname=clf.sname, repository=clf.model['repository'], **params) assert np.array_equal(y_pred_tr, clf.predict(xtr_arr)) assert np.array_equal(y_pred_te, clf.predict(xte_arr)) os_utils._remove_files([tr_f, te_f])
def test_lmdb_creation(self): params = nn_params.copy() params.update({'nclasses': n_classes}) # Create dataset X, Y = datasets.load_digits(return_X_y=True) X = preprocessing.StandardScaler().fit_transform(X) x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=test_size, random_state=seed) # Save data in .svm format tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath( 'x_train.lmdb') te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath( 'x_test.lmdb') vocab_path = os.path.abspath('vocab.dat') datasets.dump_svmlight_file(x_train, y_train, tr_svm_f) datasets.dump_svmlight_file(x_test, y_test, te_svm_f) lmdb_utils.create_lmdb_from_svm(svm_path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path, **params) lmdb_utils.create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params) tr_lmdb = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path) te_lmdb = SVMConnector(path=te_svm_f, lmdb_path=te_lmdb_f) optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100) clf = MLP(**params) clf.fit(tr_lmdb, validation_data=[te_lmdb], solver=optimizer) ytr_prob = clf.predict_proba(tr_lmdb) acc = metrics.accuracy_score(y_train, ytr_prob.argmax(-1)) assert acc > 0.7 os_utils._remove_files([tr_svm_f, te_svm_f, vocab_path]) os_utils._remove_dirs([tr_lmdb_f, te_lmdb_f])
def test_classification(self): params = nn_params.copy() params.update({'nclasses': n_classes}) optimizer = GenericSolver(**solver_param) datasets.dump_svmlight_file(xtr, ytr, tr_f) datasets.dump_svmlight_file(xte, yte, te_f) clfs = [ # array connector without validation set [xtr_arr, [], MLP(**params)], [xtr_arr, [], LR(**params)], # sparse array connector without validation set [xtr_sparse, [], MLP(**params)], [xtr_sparse, [], LR(**params)], # svm connector without validation set [xtr_svm, [], MLP(**params)], [xtr_svm, [], LR(**params)], # array connector with validation set [xtr_arr, [xte_arr], MLP(**params)], [xtr_arr, [xte_arr], LR(**params)], # svm connector with validation set [xtr_svm, [xte_svm], MLP(**params)], [xtr_svm, [xte_svm], LR(**params)], ] for tr_data, te_data, clf in clfs: clf.fit(tr_data, te_data, optimizer) y_pred = clf.predict(tr_data) acc = metrics.accuracy_score(ytr, y_pred) print(acc) assert acc > 0.7 os_utils._remove_files([tr_f, te_f])
def create_lmdb_from_svm(svm_path, lmdb_path, vocab_path=None, host='localhost', port=8085, nclasses=2, gpu=True, tmp_folder=None): if os.path.exists(lmdb_path): print("warning: {} exist, overwriting it".format(lmdb_path)) tmp_folder = tempfile.mkdtemp( prefix="pydd_", dir=tmp_folder) if tmp_folder else tempfile.mkdtemp( prefix="pydd_") train_data = SVMConnector(path=svm_path) optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=1) clf = MLP(host=host, port=port, nclasses=nclasses, gpu=gpu, repository=tmp_folder) clf.fit(train_data, solver=optimizer) shutil.move(os.path.join(tmp_folder, "train.lmdb"), lmdb_path) if vocab_path: shutil.move(os.path.join(tmp_folder, "vocab.dat"), vocab_path) # delete service clf.delete_service(clf.sname, clear='lib') # delete tmp_folder shutil.rmtree(tmp_folder) return lmdb_path, vocab_path
# Parameters seed = 1337 n_classes = 10 repository = "/tmp/pydd_test" params = { "repository": repository, "port": 8080, "nclasses": n_classes, "gpu": True } split_params = {"test_size": 0.2, "random_state": seed} np.random.seed(seed) # for reproducibility solver = GenericSolver(iterations=1000, solver_type="SGD", base_lr=0.01, gamma=0.1, stepsize=30, momentum=0.9, snapshot=200) class_weights = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1] # remove repository if existe else creates it if os.path.exists(repository): os_utils._remove_dirs([repository]) os.makedirs(repository) # create dataset X, y = datasets.load_digits(n_class=n_classes, return_X_y=True) X = preprocessing.StandardScaler().fit_transform(X) xtr, xte, ytr, yte = model_selection.train_test_split(X, y, **split_params)
X, y = datasets.load_digits(n_class=n_classes, return_X_y=True) X = preprocessing.StandardScaler().fit_transform(X) x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=test_size, random_state=seed) tr_f = os.path.abspath('x_train.svm') te_f = os.path.abspath('x_test.svm') datasets.dump_svmlight_file(x_train, y_train, tr_f) datasets.dump_svmlight_file(x_test, y_test, te_f) # train_data = ArrayConnector(x_train, y_train) # val_data = ArrayConnector(x_train, y_train) train_data = SVMConnector(tr_f) val_data = SVMConnector(te_f) clf = MLP(host=host, port=port, nclasses=n_classes, layers=[100], gpu=gpu) solver = GenericSolver(iterations=iteration, test_interval=30, solver_type="SGD", base_lr=lr) clf.fit(train_data, validation_data=[val_data], solver=solver) clf.predict_proba(train_data) clf.fit(train_data, validation_data=[val_data], solver=solver) y_pred = clf.predict_proba(train_data) clf = LR(host=host, port=port, nclasses=n_classes, gpu=gpu) solver = GenericSolver(iterations=iteration, solver_type="SGD", base_lr=lr) clf.fit(train_data, solver=solver) y_pred = clf.predict_proba(train_data) clf = XGB(host=host, port=port, nclasses=n_classes) # logs = clf.fit(train_data, validation_data=[val_data]) os_utils._remove_files([tr_f, te_f])
test_size=test_size, random_state=seed) # create and save train.svm and test.svm tr_f = os.path.abspath('x_train.svm') te_f = os.path.abspath('x_test.svm') datasets.dump_svmlight_file(xtr, ytr, tr_f) datasets.dump_svmlight_file(xte, yte, te_f) # create connectors xtr_svm, xte_svm = SVMConnector(tr_f), SVMConnector(te_f) # train model params = {'host': host, 'port': port, 'nclasses': nclasses, 'layers': [100]} optimizer = GenericSolver(solver_type='SGD', iterations=500, base_lr=0.1, snapshot=100) clf = MLP(sname=sname, repository=model_repo, **params) clf.fit(xtr_svm, validation_data=[xte_svm, xtr_svm], solver=optimizer) del clf # load pre trained model params = { 'host': host, 'port': port, 'nclasses': nclasses, 'finetuning': True, 'template': None } clf = MLP(sname=sname, repository=model_repo, **params) ytr_pred, yte_pred = clf.predict(xtr_svm), clf.predict(xte_svm)
x_train, x_test, y_train, y_test = model_selection.train_test_split( X, Y, test_size=test_size, random_state=seed) # Save data in .svm format tr_svm_f, tr_lmdb_f = os.path.abspath('x_train.svm'), os.path.abspath( 'x_train.lmdb') te_svm_f, te_lmdb_f = os.path.abspath('x_test.svm'), os.path.abspath( 'x_test.lmdb') vocab_path = os.path.abspath('vocab.dat') datasets.dump_svmlight_file(x_train, y_train, tr_svm_f) datasets.dump_svmlight_file(x_test, y_test, te_svm_f) # create lmdb and vocab file create_lmdb_from_svm(svm_path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path, **params) create_lmdb_from_svm(svm_path=te_svm_f, lmdb_path=te_lmdb_f, **params) tr_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f, vocab_path=vocab_path) te_data = SVMConnector(path=tr_svm_f, lmdb_path=tr_lmdb_f) optimizer = GenericSolver(solver_type='SGD', base_lr=0.01, iterations=100) clf = MLP(**params) clf.fit(tr_data, validation_data=[te_data], solver=optimizer) y_pred_lmdb = clf.predict_proba(te_data)
X, y = datasets.load_digits(n_class=n_classes, return_X_y=True) X = preprocessing.StandardScaler().fit_transform(X) xtr, xte, ytr, yte = model_selection.train_test_split(X, y, **split_params) # create and save train.svm and test.svm tr_f = os.path.abspath('x_train.svm') te_f = os.path.abspath('x_test.svm') datasets.dump_svmlight_file(xtr, ytr, tr_f) datasets.dump_svmlight_file(xte, yte, te_f) # Define models and class weights clf = MLP(**params) solver = GenericSolver(iterations=500, solver_type="SGD", base_lr=0.01, gamma=0.1, stepsize=30, momentum=0.9) # one class weight value for each class class_weights = [1., 1., 1., 1., 1., 1., 1., 1., 1., 1] train_data, test_data = SVMConnector(path=tr_f), SVMConnector(path=te_f) logs = clf.fit(train_data, validation_data=[test_data], solver=solver, class_weights=class_weights, batch_size=128) yte_pred = clf.predict(test_data) report = metrics.classification_report(yte, yte_pred) print(report)