Ejemplo n.º 1
0
 def __init__(self,
              M,
              labels,
              clfs=[{
                  'clf': RandomForestClassifier
              }],
              subsets=[{
                  'subset': s_i.SubsetNoSubset
              }],
              cvs=[{
                  'cv': KFold
              }],
              trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(labels,
                                           n_rows=M.shape[0],
                                           argument_name='labels')
         else:
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                 M, labels, col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
             clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)},
             optional_keys_take_lists=True,
             argument_name='clfs')
         subsets = utils.check_arguments(subsets, {
             'subset':
             lambda subset: issubclass(subset, s_i.BaseSubsetIter)
         },
                                         optional_keys_take_lists=True,
                                         argument_name='subsets')
         cvs = utils.check_arguments(
             cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
             optional_keys_take_lists=True,
             argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials
Ejemplo n.º 2
0
 def __init__(
         self, 
         M, 
         labels, 
         clfs=[{'clf': RandomForestClassifier}], 
         subsets=[{'subset': s_i.SubsetNoSubset}], 
         cvs=[{'cv': KFold}],
         trials=None):
     if M is not None:
         if utils.is_nd(M) and not utils.is_sa(M):
             # nd_array, short circuit the usual type checking and coersion
             if M.ndim != 2:
                 raise ValueError('Expected 2-dimensional array for M')
             self.M = M
             self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])]
             self.labels = utils.check_col(
                     labels, 
                     n_rows=M.shape[0], 
                     argument_name='labels')
         else:    
             # M is either a structured array or something that should
             # be converted
             (M, self.labels) = utils.check_consistent(
                     M, 
                     labels, 
                     col_argument_name='labels')
             self.col_names = M.dtype.names
             self.M = utils.cast_np_sa_to_nd(M)
     else:
         self.col_names = None
     if trials is None:
         clfs = utils.check_arguments(
                 clfs, 
                 {'clf': lambda clf: issubclass(clf, BaseEstimator)},
                 optional_keys_take_lists=True,
                 argument_name='clfs')
         subsets = utils.check_arguments(
                 subsets,
                 {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)},
                 optional_keys_take_lists=True,
                 argument_name='subsets')
         cvs = utils.check_arguments(
                 cvs,
                 {'cv': lambda cv: issubclass(cv, _PartitionIterator)},
                 optional_keys_take_lists=True,
                 argument_name='cvs')
     self.clfs = clfs
     self.subsets = subsets
     self.cvs = cvs
     self.trials = trials
Ejemplo n.º 3
0
def __check_args_row_select(M, args, argument_name='arguments'):
    return utils.check_arguments(args, {
        'func': __valid_col_select_func,
        'vals': None,
        'col_name': lambda name: name in M.dtype.names
    },
                                 argument_name=argument_name)
Ejemplo n.º 4
0
    def test_check_arguments(self):
        row_reqs = {'func': lambda f: hasattr(f, '__call__'),
                    'vals': None,
                    'col_name': lambda c: isinstance(c, str)}
        col_reqs = {'func': lambda f: hasattr(f, '__call__'),
                    'vals': None}
        clfs_reqs = {'clf': lambda c: isinstance(c, type) and 
                                      issubclass(c, BaseEstimator)}
        row_valid = [{'func': modify.row_val_eq, 'vals': 8, 'col_name': 'f0'},
                     {'func': modify.row_val_between, 'vals': (1, 2), 
                      'col_name' : 'f1'}]
        col_valid = [{'func': modify.col_random, 'vals': 7},
                     {'func': modify.col_val_eq, 'vals': 2}]
        clfs_valid = [{'clf': RandomForestClassifier, 'n_estimators': [1, 10],
                       'max_features': [10, 100, 1000]},
                      {'clf': SVC, 'kernel': ['linear', 'poly']}]
        utils.check_arguments(row_valid, row_reqs)
        utils.check_arguments(col_valid, col_reqs)
        utils.check_arguments(clfs_valid, clfs_reqs, 
                optional_keys_take_lists=True)

        self.assertRaises(ValueError, utils.check_arguments, 
                          [2, row_valid[0]], row_reqs)
        self.assertRaises(ValueError, utils.check_arguments, row_valid[0], 
                          row_reqs)
        row_invalid1 = [{'func': modify.row_val_eq, 'vals': 8, 
                         'col_name': 'f0'},
                        {'func': 7, 'vals': (1, 2), 'col_name' :
                         'f1'}]
        self.assertRaises(ValueError, utils.check_arguments, row_invalid1,
                          row_reqs)
        row_invalid2 = [{'func': modify.row_val_eq, 
                         'vals': 8, 'col_name': 'f0'},
                        {'func': modify.row_val_between, 'vals': (1, 2)}]
        self.assertRaises(ValueError, utils.check_arguments, row_invalid2,
                          row_reqs)
        clfs_invalid1 = [{'clf': RandomForestClassifier(), 
                          'n_estimators': [1, 10],
                          'max_features': [10, 100, 1000]}]
        self.assertRaises(ValueError, utils.check_arguments, clfs_invalid1,
                          clfs_reqs, optional_keys_take_lists=True)
        clfs_invalid2 = [{'clf': RandomForestClassifier, 
                          'n_estimators': 1,
                          'max_features': [10, 100, 1000]}]
        self.assertRaises(ValueError, utils.check_arguments, clfs_invalid2,
                          clfs_reqs, optional_keys_take_lists=True)
Ejemplo n.º 5
0
def __check_args_row_select(M, args, argument_name='arguments'):
    return utils.check_arguments(args, {'func': __valid_col_select_func, 'vals': None, 
                                        'col_name': lambda name: name in M.dtype.names}, 
                                 argument_name)
Ejemplo n.º 6
0
def __check_args_col_select(args, argument_name='arguments'):
    return utils.check_arguments(args, {'func': __valid_col_select_func, 'vals': None}, 
                                 argument_name)
Ejemplo n.º 7
0
def __check_args_col_select(args, argument_name='arguments'):
    return utils.check_arguments(args, {
        'func': __valid_col_select_func,
        'vals': None
    },
                                 argument_name=argument_name)