Ejemplo n.º 1
0
    def fit(self, X, y, X_va=None, y_va=None, skip_output=-1):
        y, y_va = self._check_y_shape(y, y_va)
        assert X.shape[0] == y.shape[0], "X and y have the a different number of samples"
        assert X.shape[1] == self.n_inputs, "X doesn't have the right number of features"
        if X_va is not None:
            assert X_va.shape[0] == y_va.shape[0], "X_va and y_va have the a different number of samples"
            assert X_va.shape[1] == X.shape[1], "X_va doesn't have the right number of features"


        # PyCUDA, doesn't allow shuffling via indexing (`X[idx]`) so to have
        # a random split, we just shuffle the data (if we're allowed to)
        if X_va is None and self.fraction_validation_set > 0.0:
            if self.shuffle_data:
                idx = np.arange(X.shape[0])
                Xn, yn = op.shuffle_rows(X, y, idx=idx)
            else:
                warnings.warn("using first part of X as validation set without shuffling first")
            vi = int(y.shape[0]*self.fraction_validation_set)
            X, X_va, y, y_va = X[vi:], X[:vi], y[vi:], y[:vi]

        oldverbose = self.verbose
        try:
            # generate storage for shuffling now
            if self.shuffle_data:
                idx = np.arange(X.shape[0])
                Xn, yn = op.shuffle_rows(X, y, idx=idx)
            t0 = time.time()
            for i in range(self.current_epoch, self.max_iter):
                if self.shuffle_data:
                    Xn, yn = op.shuffle_rows(X, y, output=(Xn, yn), idx=idx)
                else:
                    Xn, yn = X, y

                err = float(self.partial_fit(Xn, yn, encode_labels=False))

                if oldverbose and skip_output > 0:
                    self.verbose = (self.current_epoch % skip_output) == 0
                     # always show first epoch to measure times
                    if self.current_epoch == 1:
                        self.verbose = True
                #self.track_progress(t0, err, Xn, yn, X_va, y_va)
                for cb in self._epoch_callbacks:
                    cb(self, t0, err, Xn, yn, X_va, y_va)

                if self.snapshot_interval is not None \
                   and (self.snapshot_interval % self.current_epoch == 0) \
                   and self.current_epoch > 0:
                    _create_snapshot(self)

                # early stopping checks
                if self._no_improvement_since >= self.convergence_iter_tol and self.early_stopping:
                    for i, l in enumerate(self.layers):
                        l.W = self._best_params[0][i]
                        l.b = self._best_params[1][i]
                    self.current_epoch -= self._no_improvement_since
                    break
        finally:
            self.verbose = oldverbose
Ejemplo n.º 2
0
def test_swaprows():
    n = 1270

    X = 5.0*np.random.randn(n, 1000).astype(np.float32)
    ytemp = np.array(range(X.shape[0]))[:, None]
    y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32)

    idx = list(range(X.shape[0]))
    idx = np.array(idx, dtype=np.int32)
    np.random.shuffle(idx)

    Xd = op.to_gpu(X)
    yd = op.to_gpu(y)
    Xoutd = gpuarray.empty_like(Xd)
    youtd = gpuarray.empty_like(yd)
    op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx)

    X2 = op.to_cpu(Xoutd)
    y2 = op.to_cpu(youtd)

    assert_allclose(X[idx], X2)
    assert_allclose(y[idx], y2)
Ejemplo n.º 3
0
def test_swaprows():
    n = 1270

    X = 5.0 * np.random.randn(n, 1000).astype(np.float32)
    ytemp = np.array(range(X.shape[0]))[:, None]
    y = np.hstack((ytemp, ytemp, ytemp)).astype(np.float32)

    idx = list(range(X.shape[0]))
    idx = np.array(idx, dtype=np.int32)
    np.random.shuffle(idx)

    Xd = op.to_gpu(X)
    yd = op.to_gpu(y)
    Xoutd = gpuarray.empty_like(Xd)
    youtd = gpuarray.empty_like(yd)
    op.shuffle_rows(Xd, yd, (Xoutd, youtd), idx)

    X2 = op.to_cpu(Xoutd)
    y2 = op.to_cpu(youtd)

    assert_allclose(X[idx], X2)
    assert_allclose(y[idx], y2)
Ejemplo n.º 4
0
    def fit(self, X, y, X_va=None, y_va=None, skip_output=-1):
        y, y_va = self._check_y_shape(y, y_va)
        assert X.shape[0] == y.shape[
            0], "X and y have the a different number of samples"
        assert X.shape[
            1] == self.n_inputs, "X doesn't have the right number of features"
        if X_va is not None:
            assert X_va.shape[0] == y_va.shape[
                0], "X_va and y_va have the a different number of samples"
            assert X_va.shape[1] == X.shape[
                1], "X_va doesn't have the right number of features"

        # PyCUDA, doesn't allow shuffling via indexing (`X[idx]`) so to have
        # a random split, we just shuffle the data (if we're allowed to)
        if X_va is None and self.fraction_validation_set > 0.0:
            if self.shuffle_data:
                idx = np.arange(X.shape[0])
                Xn, yn = op.shuffle_rows(X, y, idx=idx)
            else:
                warnings.warn(
                    "using first part of X as validation set without shuffling first"
                )
            vi = int(y.shape[0] * self.fraction_validation_set)
            X, X_va, y, y_va = X[vi:], X[:vi], y[vi:], y[:vi]

        oldverbose = self.verbose
        try:
            # generate storage for shuffling now
            if self.shuffle_data:
                idx = np.arange(X.shape[0])
                Xn, yn = op.shuffle_rows(X, y, idx=idx)
            t0 = time.time()
            for i in range(self.current_epoch, self.max_iter):
                if self.shuffle_data:
                    Xn, yn = op.shuffle_rows(X, y, output=(Xn, yn), idx=idx)
                else:
                    Xn, yn = X, y

                err = float(self.partial_fit(Xn, yn, encode_labels=False))

                if oldverbose and skip_output > 0:
                    self.verbose = (self.current_epoch % skip_output) == 0
                    # always show first epoch to measure times
                    if self.current_epoch == 1:
                        self.verbose = True
                #self.track_progress(t0, err, Xn, yn, X_va, y_va)
                for cb in self._epoch_callbacks:
                    cb(self, t0, err, Xn, yn, X_va, y_va)

                if self.snapshot_interval is not None \
                   and (self.snapshot_interval % self.current_epoch == 0) \
                   and self.current_epoch > 0:
                    _create_snapshot(self)

                # early stopping checks
                if self._no_improvement_since >= self.convergence_iter_tol and self.early_stopping:
                    for i, l in enumerate(self.layers):
                        l.W = self._best_params[0][i]
                        l.b = self._best_params[1][i]
                    self.current_epoch -= self._no_improvement_since
                    break
        finally:
            self.verbose = oldverbose