def test_sa_to_nd(self): dtype = np.dtype({'names': map('f{}'.format, xrange(3)), 'formats': [float] * 3}) sa = np.array([(-1.0, 2.0, -1.0), (0.0, -1.0, 2.0)], dtype=dtype) control = np.array([[-1.0, 2.0, -1.0], [0.0, -1.0, 2.0]], dtype=float) result = utils.cast_np_sa_to_nd(sa) self.assertTrue(np.array_equal(result, control))
def test_sa_to_nd(self): dtype = np.dtype({ 'names': map('f{}'.format, xrange(3)), 'formats': [float] * 3 }) sa = np.array([(-1.0, 2.0, -1.0), (0.0, -1.0, 2.0)], dtype=dtype) control = np.array([[-1.0, 2.0, -1.0], [0.0, -1.0, 2.0]], dtype=float) result = utils.cast_np_sa_to_nd(sa) self.assertTrue(np.array_equal(result, control))
def __init__( self, M, y, clfs=[{'clf': RandomForestClassifier}], subsets=[{'subset': SubsetNoSubset}], cvs=[{'cv': NoCV}], trials=None): if utils.is_sa(M): self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: # assuming an nd_array self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.y = y self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def test_get_top_features(self): M, labels = uft.generate_test_matrix(1000, 15, random_state=0) M = utils.cast_np_sa_to_nd(M) M_train, M_test, labels_train, labels_test = train_test_split( M, labels) clf = RandomForestClassifier(random_state=0) clf.fit(M_train, labels_train) res = comm.get_top_features(clf, M, verbose=False) ctrl = utils.convert_to_sa([('f5', 0.0773838526068), ('f13', 0.0769596713039), ('f8', 0.0751584839431), ('f6', 0.0730815879102), ('f11', 0.0684456133071), ('f9', 0.0666747414603), ('f10', 0.0659621889608), ('f7', 0.0657988099065), ('f2', 0.0634000069218), ('f0', 0.0632912268319)], col_names=('feat_name', 'score')) self.assertTrue(uft.array_equal(ctrl, res))
def test_get_top_features(self): M, labels = uft.generate_test_matrix(1000, 15, random_state=0) M = utils.cast_np_sa_to_nd(M) M_train, M_test, labels_train, labels_test = train_test_split( M, labels) clf = RandomForestClassifier(random_state=0) clf.fit(M_train, labels_train) res = comm.get_top_features(clf, M, verbose=False) ctrl = utils.convert_to_sa( [('f5', 0.0773838526068), ('f13', 0.0769596713039), ('f8', 0.0751584839431), ('f6', 0.0730815879102), ('f11', 0.0684456133071), ('f9', 0.0666747414603), ('f10', 0.0659621889608), ('f7', 0.0657988099065), ('f2', 0.0634000069218), ('f0', 0.0632912268319)], col_names=('feat_name', 'score')) self.assertTrue(uft.array_equal(ctrl, res))