Beispiel #1
0
def test_continuous_y():
    for inference_method in get_installed(["lp", "ad3"]):
        X, Y = generate_blocks(n_samples=1)
        x, y = X[0], Y[0]
        w = np.array([1, 0, 0, 1, 0, -4, 0])  # unary  # pairwise

        crf = GridCRF(inference_method=inference_method)
        crf.initialize(X, Y)
        joint_feature = crf.joint_feature(x, y)
        y_cont = np.zeros_like(x)
        gx, gy = np.indices(x.shape[:-1])
        y_cont[gx, gy, y] = 1
        # need to generate edge marginals
        vert = np.dot(y_cont[1:, :, :].reshape(-1, 2).T, y_cont[:-1, :, :].reshape(-1, 2))
        # horizontal edges
        horz = np.dot(y_cont[:, 1:, :].reshape(-1, 2).T, y_cont[:, :-1, :].reshape(-1, 2))
        pw = vert + horz

        joint_feature_cont = crf.joint_feature(x, (y_cont, pw))
        assert_array_almost_equal(joint_feature, joint_feature_cont)

        const = find_constraint(crf, x, y, w, relaxed=False)
        const_cont = find_constraint(crf, x, y, w, relaxed=True)

        # djoint_feature and loss are equal:
        assert_array_almost_equal(const[1], const_cont[1], 4)
        assert_almost_equal(const[2], const_cont[2], 4)

        # returned y_hat is one-hot version of other
        if isinstance(const_cont[0], tuple):
            assert_array_equal(const[0], np.argmax(const_cont[0][0], axis=-1))

            # test loss:
            assert_almost_equal(crf.loss(y, const[0]), crf.continuous_loss(y, const_cont[0][0]), 4)
Beispiel #2
0
def test_continuous_y():
    for inference_method in ["lp", "ad3"]:
        X, Y = toy.generate_blocks(n_samples=1)
        x, y = X[0], Y[0]
        w = np.array([1, 0, 0, 1, 0, -4, 0])  # unary  # pairwise

        crf = LatentGridCRF(n_labels=2, n_states_per_label=1, inference_method=inference_method)
        psi = crf.psi(x, y)
        y_cont = np.zeros_like(x)
        gx, gy = np.indices(x.shape[:-1])
        y_cont[gx, gy, y] = 1
        # need to generate edge marginals
        vert = np.dot(y_cont[1:, :, :].reshape(-1, 2).T, y_cont[:-1, :, :].reshape(-1, 2))
        # horizontal edges
        horz = np.dot(y_cont[:, 1:, :].reshape(-1, 2).T, y_cont[:, :-1, :].reshape(-1, 2))
        pw = vert + horz

        psi_cont = crf.psi(x, (y_cont, pw))
        assert_array_almost_equal(psi, psi_cont)

        const = find_constraint(crf, x, y, w, relaxed=False)
        const_cont = find_constraint(crf, x, y, w, relaxed=True)

        # dpsi and loss are equal:
        assert_array_almost_equal(const[1], const_cont[1])
        assert_almost_equal(const[2], const_cont[2])

        # returned y_hat is one-hot version of other
        assert_array_equal(const[0], np.argmax(const_cont[0][0], axis=-1))

        # test loss:
        assert_equal(crf.loss(y, const[0]), crf.continuous_loss(y, const_cont[0][0]))
def test_learning():
    crf = IgnoreVoidCRF(n_states=3,
                        n_features=2,
                        void_label=2,
                        inference_method='lp')
    ssvm = SubgradientStructuredSVM(crf,
                                    verbose=10,
                                    C=100,
                                    n_jobs=1,
                                    max_iter=50,
                                    learning_rate=0.01)
    ssvm.fit(X, Y)

    for x in X:
        y_hat_exhaustive = exhaustive_inference(crf, x, ssvm.w)
        y_hat = crf.inference(x, ssvm.w)
        assert_array_equal(y_hat, y_hat_exhaustive)

    constr = [
        find_constraint(crf, x, y, ssvm.w, y_hat=y_hat)
        for x, y, y_hat in zip(X, Y, ssvm.predict(X))
    ]
    losses = [c[3] for c in constr]
    slacks = [c[2] for c in constr]
    assert_true(np.all(np.array(slacks) >= np.array(losses)))
    def _sequential_learning(self, X, Y, w):
        n_samples = len(X)
        objective, positive_slacks = 0, 0
        if self.batch_size in [None, 1]:
            # online learning
            for x, y in zip(X, Y):
                y_hat, delta_joint_feature, slack, loss = \
                    find_constraint(self.model, x, y, w)
                objective += slack
                if slack > 0:
                    positive_slacks += 1
                self._solve_subgradient(delta_joint_feature, n_samples, w)
        else:
            # mini batch learning
            if self.batch_size == -1:
                slices = [slice(0, len(X)), None]
            else:
                n_batches = int(np.ceil(float(len(X)) / self.batch_size))
                slices = gen_even_slices(n_samples, n_batches)
            for batch in slices:
                X_b = X[batch]
                Y_b = Y[batch]
                Y_hat = self.model.batch_loss_augmented_inference(
                    X_b, Y_b, w, relaxed=True)
                delta_joint_feature = (self.model.batch_joint_feature(X_b, Y_b)
                             - self.model.batch_joint_feature(X_b, Y_hat))
                loss = np.sum(self.model.batch_loss(Y_b, Y_hat))

                violation = np.maximum(0, loss - np.dot(w, delta_joint_feature))
                objective += violation
                positive_slacks += self.batch_size
                self._solve_subgradient(delta_joint_feature / len(X_b), n_samples, w)
        return objective, positive_slacks, w
Beispiel #5
0
def test_continuous_y():
    for inference_method in get_installed(["lp", "ad3"]):
        X, Y = generate_blocks(n_samples=1)
        x, y = X[0], Y[0]
        w = np.array([
            1,
            0,  # unary
            0,
            1,
            0,  # pairwise
            -4,
            0
        ])

        crf = LatentGridCRF(n_labels=2,
                            n_features=2,
                            n_states_per_label=1,
                            inference_method=inference_method)
        joint_feature = crf.joint_feature(x, y)
        y_cont = np.zeros_like(x)
        gx, gy = np.indices(x.shape[:-1])
        y_cont[gx, gy, y] = 1
        # need to generate edge marginals
        vert = np.dot(y_cont[1:, :, :].reshape(-1, 2).T,
                      y_cont[:-1, :, :].reshape(-1, 2))
        # horizontal edges
        horz = np.dot(y_cont[:, 1:, :].reshape(-1, 2).T,
                      y_cont[:, :-1, :].reshape(-1, 2))
        pw = vert + horz

        joint_feature_cont = crf.joint_feature(x, (y_cont, pw))
        assert_array_almost_equal(joint_feature, joint_feature_cont, 4)

        const = find_constraint(crf, x, y, w, relaxed=False)
        const_cont = find_constraint(crf, x, y, w, relaxed=True)

        # djoint_feature and loss are equal:
        assert_array_almost_equal(const[1], const_cont[1], 4)
        assert_almost_equal(const[2], const_cont[2], 4)

        if isinstance(const_cont[0], tuple):
            # returned y_hat is one-hot version of other
            assert_array_equal(const[0], np.argmax(const_cont[0][0], axis=-1))

            # test loss:
            assert_almost_equal(crf.loss(y, const[0]),
                                crf.continuous_loss(y, const_cont[0][0]), 4)
    def _frank_wolfe_bc(self, X, Y):
        """Block-Coordinate Frank-Wolfe learning.

        Compare Algorithm 3 in the reference paper.
        """
        n_samples = len(X)
        w = self.w.copy()
        w_mat = np.zeros((n_samples, self.model.size_psi))
        l_mat = np.zeros(n_samples)
        l_avg = 0.0
        l = 0.0
        k = 0
        for p in xrange(self.max_iter):
            if self.verbose > 0:
                print("Iteration %d" % p)
            for i in range(n_samples):
                x, y = X[i], Y[i]
                y_hat, delta_psi, slack, loss = find_constraint(self.model, x, y, w)
                # ws and ls
                ws = delta_psi * self.C
                ls = loss / n_samples

                # line search
                if self.line_search:
                    eps = 1e-15
                    w_diff = w_mat[i] - ws
                    gamma = (w_diff.T.dot(w) - (self.C * n_samples)*(l_mat[i] - ls)) / (np.sum(w_diff ** 2) + eps)
                    gamma = max(0.0, min(1.0, gamma))
                else:
                    gamma = 2.0 * n_samples / (k + 2.0 * n_samples)

                w -= w_mat[i]
                w_mat[i] = (1.0 - gamma) * w_mat[i] + gamma * ws
                w += w_mat[i]

                l -= l_mat[i]
                l_mat[i] = (1.0 - gamma) * l_mat[i] + gamma * ls
                l += l_mat[i]

                if self.do_averaging:
                    rho = 2.0 / (k + 2.0)
                    self.w = (1.0 - rho) * self.w + rho * w
                    l_avg = (1.0 - rho) * l_avg + rho * l
                else:
                    self.w = w
                k += 1

            if (self.check_dual_every != 0) and (p % self.check_dual_every == 0):
                dual_val, dual_gap, primal_val = self._calc_dual_gap(X, Y, l)
                if self.verbose > 0:
                    print("dual: %f, dual_gap: %f, primal: %f"
                          % (dual_val, dual_gap, primal_val))
                if dual_gap < self.tol:
                    return
Beispiel #7
0
def test_continuous_y():
    for inference_method in get_installed(["lp", "ad3"]):
        X, Y = generate_blocks(n_samples=1)
        x, y = X[0], Y[0]
        w = np.array([1, 0,  # unary
                      0, 1,
                      0,     # pairwise
                      -4, 0])

        crf = GridCRF(inference_method=inference_method)
        crf.initialize(X, Y)
        psi = crf.psi(x, y)
        y_cont = np.zeros_like(x)
        gx, gy = np.indices(x.shape[:-1])
        y_cont[gx, gy, y] = 1
        # need to generate edge marginals
        vert = np.dot(y_cont[1:, :, :].reshape(-1, 2).T,
                      y_cont[:-1, :, :].reshape(-1, 2))
        # horizontal edges
        horz = np.dot(y_cont[:, 1:, :].reshape(-1, 2).T,
                      y_cont[:, :-1, :].reshape(-1, 2))
        pw = vert + horz

        psi_cont = crf.psi(x, (y_cont, pw))
        assert_array_almost_equal(psi, psi_cont)

        const = find_constraint(crf, x, y, w, relaxed=False)
        const_cont = find_constraint(crf, x, y, w, relaxed=True)

        # dpsi and loss are equal:
        assert_array_almost_equal(const[1], const_cont[1])
        assert_almost_equal(const[2], const_cont[2])

        # returned y_hat is one-hot version of other
        if isinstance(const_cont[0], tuple):
            assert_array_equal(const[0], np.argmax(const_cont[0][0], axis=-1))

            # test loss:
            assert_almost_equal(crf.loss(y, const[0]),
                                crf.continuous_loss(y, const_cont[0][0]))
def test_learning():
    crf = IgnoreVoidCRF(n_states=3, n_features=2, void_label=2,
                        inference_method='lp')
    ssvm = SubgradientStructuredSVM(crf, verbose=10, C=100, n_jobs=1,
                                    max_iter=50, learning_rate=0.01)
    ssvm.fit(X, Y)

    for x in X:
        y_hat_exhaustive = exhaustive_inference(crf, x, ssvm.w)
        y_hat = crf.inference(x, ssvm.w)
        assert_array_equal(y_hat, y_hat_exhaustive)

    constr = [find_constraint(crf, x, y, ssvm.w, y_hat=y_hat)
              for x, y, y_hat in zip(X, Y, ssvm.predict(X))]
    losses = [c[3] for c in constr]
    slacks = [c[2] for c in constr]
    assert_true(np.all(np.array(slacks) >= np.array(losses)))
    def _sequential_learning(self, X, Y, w):
        n_samples = len(X)
        objective, positive_slacks = 0, 0
        if self.batch_size in [None, 1]:
            # online learning
            for x, y in zip(X, Y):
                y_hat, delta_joint_feature, slack, loss = \
                    find_constraint(self.model, x, y, w)
                objective += slack
                if slack > 0:
                    positive_slacks += 1
                self._solve_subgradient(delta_joint_feature, n_samples, w)
        else:
            # mini batch learning
            if self.batch_size == -1:
                slices = [slice(0, len(X)), None]
            else:
                n_batches = int(np.ceil(float(len(X)) / self.batch_size))
                slices = gen_even_slices(n_samples, n_batches)
            for batch in slices:
                X_b = X[batch]
                Y_b = Y[batch]
                Y_hat = self.model.batch_loss_augmented_inference(X_b,
                                                                  Y_b,
                                                                  w,
                                                                  relaxed=True)
                delta_joint_feature = (
                    self.model.batch_joint_feature(X_b, Y_b) -
                    self.model.batch_joint_feature(X_b, Y_hat))
                loss = np.sum(self.model.batch_loss(Y_b, Y_hat))

                violation = np.maximum(0,
                                       loss - np.dot(w, delta_joint_feature))
                objective += violation
                positive_slacks += self.batch_size
                self._solve_subgradient(delta_joint_feature / len(X_b),
                                        n_samples, w)
        return objective, positive_slacks, w
Beispiel #10
0
    def _frank_wolfe_bc(self, X, Y):
        """Block-Coordinate Frank-Wolfe learning.

        Compare Algorithm 3 in the reference paper.
        """
        n_samples = len(X)
        w = self.w.copy()
        w_mat = np.zeros((n_samples, self.model.size_joint_feature))
        l_mat = np.zeros(n_samples)
        l = 0.0
        k = 0

        rng = check_random_state(self.random_state)
        for iteration in xrange(self.max_iter):
            if self.verbose > 0:
                print("Iteration %d" % iteration)

            perm = np.arange(n_samples)
            if self.sample_method == 'perm':
                rng.shuffle(perm)
            elif self.sample_method == 'rnd':
                perm = rng.randint(low=0, high=n_samples, size=n_samples)

            for j in range(n_samples):
                i = perm[j]
                x, y = X[i], Y[i]
                y_hat, delta_joint_feature, slack, loss = find_constraint(
                    self.model, x, y, w)
                # ws and ls
                ws = delta_joint_feature * self.C
                ls = loss / n_samples

                # line search
                if self.line_search:
                    eps = 1e-15
                    w_diff = w_mat[i] - ws
                    gamma = (w_diff.T.dot(w) - (self.C * n_samples) *
                             (l_mat[i] - ls)) / (np.sum(w_diff**2) + eps)
                    gamma = max(0.0, min(1.0, gamma))
                else:
                    gamma = 2.0 * n_samples / (k + 2.0 * n_samples)

                w -= w_mat[i]
                w_mat[i] = (1.0 - gamma) * w_mat[i] + gamma * ws
                w += w_mat[i]

                l -= l_mat[i]
                l_mat[i] = (1.0 - gamma) * l_mat[i] + gamma * ls
                l += l_mat[i]

                if self.do_averaging:
                    rho = 2. / (k + 2.)
                    self.w = (1. - rho) * self.w + rho * w
                    self.l = (1. - rho) * self.l + rho * l
                else:
                    self.w = w
                    self.l = l
                k += 1

            if (self.check_dual_every != 0) and (iteration %
                                                 self.check_dual_every == 0):
                dual_val, dual_gap, primal_val = self._calc_dual_gap(X, Y)
                self.primal_objective_curve_.append(primal_val)
                self.objective_curve_.append(dual_val)
                self.timestamps_.append(time() - self.timestamps_[0])
                if self.verbose > 0:
                    print("dual: %f, dual_gap: %f, primal: %f" %
                          (dual_val, dual_gap, primal_val))

            if self.logger is not None:
                self.logger(self, iteration)

            if dual_gap < self.tol:
                return
Beispiel #11
0
    def _frank_wolfe_bc(self, param_x, param_y, initialize=True):

        n_samples = len(param_x)
        w = self.w.copy()
        if initialize:
            self.w_mat = np.zeros((n_samples, self.model.size_joint_feature))
            self.l_mat = np.zeros(n_samples)
            self.l_loss = 0.0
            self.k = 0
            self.rng = check_random_state(self.random_state)

        for iteration in range(self.max_iter):
            if self.verbose > 0:
                print(("Iteration %d" % iteration))

            perm = np.arange(n_samples)
            if self.sample_method == 'perm':
                self.rng.shuffle(perm)
            elif self.sample_method == 'rnd':
                perm = self.rng.randint(low=0, high=n_samples, size=n_samples)

            for j in range(n_samples):
                i = perm[j]
                x, y = param_x[i], param_y[i]
                y_hat, delta_joint_feature, slack, loss = find_constraint(
                    self.model, x, y, w)
                ws = delta_joint_feature * self.C
                ls = loss / n_samples
                if self.line_search:
                    eps = 1e-15
                    w_diff = self.w_mat[i] - ws
                    self.gamma = (w_diff.T.dot(w) - (self.C * n_samples) *
                                  (self.l_mat[i] - ls)) / (np.sum(w_diff**2) +
                                                           eps)
                    self.gamma = max(0.0, min(1.0, self.gamma))
                else:
                    self.gamma = 2.0 * n_samples / (self.k + 2.0 * n_samples)

                w -= self.w_mat[i]
                self.w_mat[i] = (1.0 - self.gamma) * \
                    self.w_mat[i] + self.gamma * ws
                w += self.w_mat[i]

                self.l_loss -= self.l_mat[i]
                self.l_mat[i] = (1.0 - self.gamma) * \
                    self.l_mat[i] + self.gamma * ls
                self.l_loss += self.l_mat[i]

                if self.do_averaging:
                    self.rho = 2. / (self.k + 2.)
                    self.w = (1. - self.rho) * self.w + self.rho * w
                    self.param_l = (1. - self.rho) * \
                        self.param_l + self.rho * self.l_loss
                else:
                    self.w = w
                    self.param_l = self.l_loss
                self.k += 1

            if (self.check_dual_every != 0) and (iteration %
                                                 self.check_dual_every == 0):
                dual_val, dual_gap, primal_val = self._calc_dual_gap(
                    param_x, param_y)
                self.primal_objective_curve_.append(primal_val)
                self.objective_curve_.append(dual_val)
                self.timestamps_.append(time() - self.timestamps_[0])
                if self.verbose > 0:
                    print(("dual: %f, dual_gap: %f, primal: %f" %
                           (dual_val, dual_gap, primal_val)))

            if self.logger is not None:
                self.logger(self, iteration)

            if dual_gap < self.tol:
                return
Beispiel #12
0
    def _frank_wolfe_bc(self, X, Y):
        """Block-Coordinate Frank-Wolfe learning.

        Compare Algorithm 3 in the reference paper.
        """
        n_samples = len(X)
        w = self.w.copy()
        w_mat = np.zeros((n_samples, self.model.size_joint_feature))
        l_mat = np.zeros(n_samples)
        l = 0.0
        k = 0

        rng = check_random_state(self.random_state)
        for iteration in range(self.max_iter):
            if self.verbose > 0:
                print("Iteration %d" % iteration)

            perm = np.arange(n_samples)
            if self.sample_method == 'perm':
                rng.shuffle(perm)
            elif self.sample_method == 'rnd':
                perm = rng.randint(low=0, high=n_samples, size=n_samples)

            for j in range(n_samples):
                i = perm[j]
                x, y = X[i], Y[i]
                y_hat, delta_joint_feature, slack, loss = find_constraint(self.model, x, y, w)
                # ws and ls
                ws = delta_joint_feature * self.C
                ls = loss / n_samples

                # line search
                if self.line_search:
                    eps = 1e-15
                    w_diff = w_mat[i] - ws
                    gamma = (w_diff.T.dot(w) - (self.C * n_samples)*(l_mat[i] - ls)) / (np.sum(w_diff ** 2) + eps)
                    gamma = max(0.0, min(1.0, gamma))
                else:
                    gamma = 2.0 * n_samples / (k + 2.0 * n_samples)

                w -= w_mat[i]
                w_mat[i] = (1.0 - gamma) * w_mat[i] + gamma * ws
                w += w_mat[i]

                l -= l_mat[i]
                l_mat[i] = (1.0 - gamma) * l_mat[i] + gamma * ls
                l += l_mat[i]

                if self.do_averaging:
                    rho = 2. / (k + 2.)
                    self.w = (1. - rho) * self.w + rho * w
                    self.l = (1. - rho) * self.l + rho * l
                else:
                    self.w = w
                    self.l = l
                k += 1

            if (self.check_dual_every != 0) and (iteration % self.check_dual_every == 0):
                dual_val, dual_gap, primal_val = self._calc_dual_gap(X, Y)
                self.primal_objective_curve_.append(primal_val)
                self.objective_curve_.append(dual_val)
                self.timestamps_.append(time() - self.timestamps_[0])
                if self.verbose > 0:
                    print("dual: %f, dual_gap: %f, primal: %f"
                          % (dual_val, dual_gap, primal_val))

            if self.logger is not None:
                self.logger(self, iteration)

            if dual_gap < self.tol:
                return