def test_is_nd(self): nd = np.array([[1, 2, 3], [4, 5, 6]], dtype=int) dtype = np.dtype({'names': map('f{}'.format, xrange(3)), 'formats': [float] * 3}) sa = np.array([(-1.0, 2.0, -1.0), (0.0, -1.0, 2.0)], dtype=dtype) self.assertTrue(utils.is_nd(nd)) self.assertTrue(utils.is_nd(sa))
def plot_on_timeline(col, verbose=True): """Plots points on a timeline Parameters ---------- col : np.array verbose : boolean iff True, display the graph Returns ------- matplotlib.figure.Figure Figure containing plot Returns ------- matplotlib.figure.Figure """ col = utils.check_col(col) # http://stackoverflow.com/questions/1574088/plotting-time-in-python-with-matplotlib if is_nd(col): col = col.astype(datetime) dates = matplotlib.dates.date2num(col) fig = plt.figure() plt.plot_date(dates, [0] * len(dates)) if verbose: plt.show() return fig
def __init__(self, M, labels, clfs=[{ 'clf': RandomForestClassifier }], subsets=[{ 'subset': s_i.SubsetNoSubset }], cvs=[{ 'cv': KFold }], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col(labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments(subsets, { 'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter) }, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def __init__( self, M, labels, clfs=[{'clf': RandomForestClassifier}], subsets=[{'subset': s_i.SubsetNoSubset}], cvs=[{'cv': KFold}], trials=None): if M is not None: if utils.is_nd(M) and not utils.is_sa(M): # nd_array, short circuit the usual type checking and coersion if M.ndim != 2: raise ValueError('Expected 2-dimensional array for M') self.M = M self.col_names = ['f{}'.format(i) for i in xrange(M.shape[1])] self.labels = utils.check_col( labels, n_rows=M.shape[0], argument_name='labels') else: # M is either a structured array or something that should # be converted (M, self.labels) = utils.check_consistent( M, labels, col_argument_name='labels') self.col_names = M.dtype.names self.M = utils.cast_np_sa_to_nd(M) else: self.col_names = None if trials is None: clfs = utils.check_arguments( clfs, {'clf': lambda clf: issubclass(clf, BaseEstimator)}, optional_keys_take_lists=True, argument_name='clfs') subsets = utils.check_arguments( subsets, {'subset': lambda subset: issubclass(subset, s_i.BaseSubsetIter)}, optional_keys_take_lists=True, argument_name='subsets') cvs = utils.check_arguments( cvs, {'cv': lambda cv: issubclass(cv, _PartitionIterator)}, optional_keys_take_lists=True, argument_name='cvs') self.clfs = clfs self.subsets = subsets self.cvs = cvs self.trials = trials
def test_check_col(self): valid1 = np.array([1, 2, 3, 4]) valid2 = np.array([[1.0], [2], [3], [4]]) valid3 = [3.0, 2.0, 1.8] valid4 = pd.Series(valid1) for valid in (valid1, valid2, valid3, valid4): self.assertTrue(utils.is_nd(utils.check_col(valid))) self.assertRaises(ValueError, utils.check_col, None) self.assertRaises(ValueError, utils.check_col, "lalala") self.assertRaises(ValueError, utils.check_col, np.array( [[1, 2], [3, 4]])) utils.check_col(valid1, n_rows=4) self.assertRaises(ValueError, utils.check_col, valid1, n_rows=5)