def fit(self, X, y, X_va=None, y_va=None, skip_output=-1): y, y_va = self._check_y_shape(y, y_va) assert X.shape[0] == y.shape[0], "X and y have the a different number of samples" assert X.shape[1] == self.n_inputs, "X doesn't have the right number of features" if X_va is not None: assert X_va.shape[0] == y_va.shape[0], "X_va and y_va have the a different number of samples" assert X_va.shape[1] == X.shape[1], "X_va doesn't have the right number of features" # PyCUDA, doesn't allow shuffling via indexing (`X[idx]`) so to have # a random split, we just shuffle the data (if we're allowed to) if X_va is None and self.fraction_validation_set > 0.0: if self.shuffle_data: idx = np.arange(X.shape[0]) Xn, yn = op.shuffle_rows(X, y, idx=idx) else: warnings.warn("using first part of X as validation set without shuffling first") vi = int(y.shape[0]*self.fraction_validation_set) X, X_va, y, y_va = X[vi:], X[:vi], y[vi:], y[:vi] oldverbose = self.verbose try: # generate storage for shuffling now if self.shuffle_data: idx = np.arange(X.shape[0]) Xn, yn = op.shuffle_rows(X, y, idx=idx) t0 = time.time() for i in range(self.current_epoch, self.max_iter): if self.shuffle_data: Xn, yn = op.shuffle_rows(X, y, output=(Xn, yn), idx=idx) else: Xn, yn = X, y err = float(self.partial_fit(Xn, yn, encode_labels=False)) if oldverbose and skip_output > 0: self.verbose = (self.current_epoch % skip_output) == 0 # always show first epoch to measure times if self.current_epoch == 1: self.verbose = True #self.track_progress(t0, err, Xn, yn, X_va, y_va) for cb in self._epoch_callbacks: cb(self, t0, err, Xn, yn, X_va, y_va) if self.snapshot_interval is not None \ and (self.snapshot_interval % self.current_epoch == 0) \ and self.current_epoch > 0: _create_snapshot(self) # early stopping checks if self._no_improvement_since >= self.convergence_iter_tol and self.early_stopping: for i, l in enumerate(self.layers): l.W = self._best_params[0][i] l.b = self._best_params[1][i] self.current_epoch -= self._no_improvement_since break finally: self.verbose = oldverbose
def test_swaprows(): n = 1270 X = 5.0*np.random.randn(n, 1000).astype(np.float32) ytemp = np.array(range(X.shape[0]))[:, None] y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32) idx = list(range(X.shape[0])) idx = np.array(idx, dtype=np.int32) np.random.shuffle(idx) Xd = op.to_gpu(X) yd = op.to_gpu(y) Xoutd = gpuarray.empty_like(Xd) youtd = gpuarray.empty_like(yd) op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx) X2 = op.to_cpu(Xoutd) y2 = op.to_cpu(youtd) assert_allclose(X[idx], X2) assert_allclose(y[idx], y2)
def test_swaprows(): n = 1270 X = 5.0 * np.random.randn(n, 1000).astype(np.float32) ytemp = np.array(range(X.shape[0]))[:, None] y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32) idx = list(range(X.shape[0])) idx = np.array(idx, dtype=np.int32) np.random.shuffle(idx) Xd = op.to_gpu(X) yd = op.to_gpu(y) Xoutd = gpuarray.empty_like(Xd) youtd = gpuarray.empty_like(yd) op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx) X2 = op.to_cpu(Xoutd) y2 = op.to_cpu(youtd) assert_allclose(X[idx], X2) assert_allclose(y[idx], y2)
def fit(self, X, y, X_va=None, y_va=None, skip_output=-1): y, y_va = self._check_y_shape(y, y_va) assert X.shape[0] == y.shape[ 0], "X and y have the a different number of samples" assert X.shape[ 1] == self.n_inputs, "X doesn't have the right number of features" if X_va is not None: assert X_va.shape[0] == y_va.shape[ 0], "X_va and y_va have the a different number of samples" assert X_va.shape[1] == X.shape[ 1], "X_va doesn't have the right number of features" # PyCUDA, doesn't allow shuffling via indexing (`X[idx]`) so to have # a random split, we just shuffle the data (if we're allowed to) if X_va is None and self.fraction_validation_set > 0.0: if self.shuffle_data: idx = np.arange(X.shape[0]) Xn, yn = op.shuffle_rows(X, y, idx=idx) else: warnings.warn( "using first part of X as validation set without shuffling first" ) vi = int(y.shape[0] * self.fraction_validation_set) X, X_va, y, y_va = X[vi:], X[:vi], y[vi:], y[:vi] oldverbose = self.verbose try: # generate storage for shuffling now if self.shuffle_data: idx = np.arange(X.shape[0]) Xn, yn = op.shuffle_rows(X, y, idx=idx) t0 = time.time() for i in range(self.current_epoch, self.max_iter): if self.shuffle_data: Xn, yn = op.shuffle_rows(X, y, output=(Xn, yn), idx=idx) else: Xn, yn = X, y err = float(self.partial_fit(Xn, yn, encode_labels=False)) if oldverbose and skip_output > 0: self.verbose = (self.current_epoch % skip_output) == 0 # always show first epoch to measure times if self.current_epoch == 1: self.verbose = True #self.track_progress(t0, err, Xn, yn, X_va, y_va) for cb in self._epoch_callbacks: cb(self, t0, err, Xn, yn, X_va, y_va) if self.snapshot_interval is not None \ and (self.snapshot_interval % self.current_epoch == 0) \ and self.current_epoch > 0: _create_snapshot(self) # early stopping checks if self._no_improvement_since >= self.convergence_iter_tol and self.early_stopping: for i, l in enumerate(self.layers): l.W = self._best_params[0][i] l.b = self._best_params[1][i] self.current_epoch -= self._no_improvement_since break finally: self.verbose = oldverbose