def combine_cols(M, lambd, col_names): """Return an array that is the function of existing columns Parameters ---------- lambd : list of np.array > np.array Function that takes a list of columns and produces a single column. col_names : list of str Names of columns to combine """ utils.check_consistent(M, col_names=col_names) new_col = lambd(*[M[name] for name in col_names]) return new_col
def test_check_consistent(self): M = np.array([(1, 'a', 100), (2, 'b', 200)], dtype=[('f0', int), ('f1', 'O'), ('f2', int)]) col = np.array([1.0, 2.0]) col_names = ['f0', 'f1'] self.assertEqual(utils.check_consistent(M, col, col_names), [M, col, col_names]) self.assertEqual(utils.check_consistent(M, col), [M, col]) self.assertEqual(utils.check_consistent(M, col_names=col_names), [M, col_names]) self.assertEqual(utils.check_consistent(M, col, col_names, n_rows=2, n_cols=3), [M, col, col_names]) self.assertRaises(ValueError, utils.check_consistent, {}) self.assertRaises(ValueError, utils.check_consistent, M, n_rows=7) self.assertRaises(ValueError, utils.check_consistent, M, n_cols=7) self.assertRaises(ValueError, utils.check_consistent, M, col, {}) self.assertRaises(ValueError, utils.check_consistent, M, np.array([1.0, 2.0, 3.0])) self.assertRaises(ValueError, utils.check_consistent, M, col_names=['f0', 'not_a_col'])
def __init__(self, M, labels, clfs=[{ 'clf': RandomForestClassifier }], subsets=[{ 'subset': s_i.SubsetNoSubset }], cvs=[{ 'cv': KFold }], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col(labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments(subsets, { 'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter) }, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def __init__( self, M, labels, clfs=[{'clf': RandomForestClassifier}], subsets=[{'subset': s_i.SubsetNoSubset}], cvs=[{'cv': KFold}], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col( labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments( subsets, {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)}, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials