def test_trainer(self): # generate random data num_entries = 50 num_features = 10 # seed random number generator for consistency np.random.seed(1238927) x = np.random.rand(num_entries, num_features) y = (30 * x[:, 0]) - (10 * x[:, 2]) + np.random.rand(1, num_entries) # run forward t = Trainer(x, y) # run forward selection t.run_forward_selection([1], None) # initial column should still be in there when training self.assertIn(1, t.column_indices) # should have at least one more column than initial data self.assertGreater(len(t.column_indices), 1) # run forward selection t.run_forward_selection() self.assertIn(1, t.column_indices) # run backward selection t.run_backward_selection(range(0, num_features), None) # column 0 should still be in there with 99.9% likelihood self.assertIn(0, t.column_indices) # should have at least one more column than initial data self.assertLess(len(t.column_indices), num_features)
import numpy as np from lintrain import Trainer # must be placed outside the package and reference updated to use the parallel trainer # due to limitation related to relative references # from parallel.trainer import Trainer if "__main__" == __name__: # generate random data num_entries = 50 num_features = 10 x = np.random.rand(num_entries, num_features) y = (30 * x[:, 0]) - (10 * x[:, 2]) + np.random.rand(1, num_entries) # create trainer t = Trainer(x, y) t.debug = 2 # run #t.run_forward_selection() #t.run_backward_selection() t.run_bidirectional_selection([1, 3]) # print output print "COLUMN COEFFICIENTS" print t.fit print "" print "COLUMNS USED" print t.column_indices print ""
from lintrain import Trainer from lintrain.solvers import LogisticRegression # use parallel trainer instead # from lintrain import ParallelTrainer as Trainer if "__main__" == __name__: # generate random data num_entries = 500 num_features = 10 x = np.random.rand(num_entries, num_features) y = (5 * x[:, 0]) + (2 * x[:, 2]) + np.random.rand(1, num_entries) y = 1 * (y > 3.5) # create trainer t = Trainer(x, y, solver=LogisticRegression) t.debug = 2 # run t.run_forward_selection() #t.run_backward_selection() #t.run_bidirectional_selection([1, 3]) # print output print "COLUMN COEFFICIENTS" print t.fit print "" print "COLUMNS USED" print t.column_indices print ""
import numpy as np from lintrain import Trainer from lintrain.solvers import RidgeRegression # use parallel trainer instead # from lintrain import ParallelTrainer as Trainer if "__main__" == __name__: # generate random data num_entries = 50 num_features = 10 x = np.random.rand(num_entries, num_features) y = (30 * x[:, 0]) - (10 * x[:, 2]) + np.random.rand(1, num_entries) # create trainer t = Trainer(x, y, solver=RidgeRegression) t.debug = 2 # run #t.run_forward_selection() #t.run_backward_selection() t.run_bidirectional_selection([1, 3]) # print output print "COLUMN COEFFICIENTS" print t.fit print "" print "COLUMNS USED" print t.column_indices print ""