Esempio n. 1
0
def test_latent_node_boxes_standard_latent():
    # learn the "easy" 2x2 boxes dataset.
    # a 2x2 box is placed randomly in a 4x4 grid
    # we add a latent variable for each 2x2 patch
    # that should make the model fairly simple

    X, Y = make_simple_2x2(seed=1, n_samples=40)
    latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1)
    one_slack = OneSlackSSVM(latent_crf)
    n_slack = NSlackSSVM(latent_crf)
    subgradient = SubgradientSSVM(latent_crf, max_iter=100)
    for base_svm in [one_slack, n_slack, subgradient]:
        base_svm.C = 10
        latent_svm = LatentSSVM(base_svm, latent_iter=10)

        G = [make_grid_edges(x) for x in X]

        # make edges for hidden states:
        edges = make_edges_2x2()

        G = [np.vstack([make_grid_edges(x), edges]) for x in X]

        # reshape / flatten x and y
        X_flat = [x.reshape(-1, 1) for x in X]
        Y_flat = [y.ravel() for y in Y]

        X_ = zip(X_flat, G, [2 * 2 for x in X_flat])
        latent_svm.fit(X_[:20], Y_flat[:20])

        assert_array_equal(latent_svm.predict(X_[:20]), Y_flat[:20])
        assert_equal(latent_svm.score(X_[:20], Y_flat[:20]), 1)

        # test that score is not always 1
        assert_true(.98 < latent_svm.score(X_[20:], Y_flat[20:]) < 1)
Esempio n. 2
0
def test_inference_chain():
    # same with pairwise edges:
    features = np.array([-1, 1, -1, 1, -1, 1])
    unary_parameters = np.array([-1, 1])
    pairwise_parameters = np.array([+1, +0, 1, +3, 0, 0, +0, 3, 0, 0])
    w = np.hstack([unary_parameters, pairwise_parameters])
    crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2)
    edges = np.vstack([np.arange(5), np.arange(1, 6)]).T

    # edges for latent states. Latent states named 6, 7
    node_indices = np.arange(features.size)
    other_edges = []
    for n in node_indices[:3]:
        other_edges.append([n, 6])
    for n in node_indices[3:]:
        other_edges.append([n, 7])
    all_edges = np.vstack([edges, other_edges])

    x = (features.reshape(-1, 1), all_edges, 2)
    h, energy_lp = crf.inference(x, w, return_energy=True)
    y = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6]
    energy_psi = np.dot(w, crf.psi(x, h))

    assert_almost_equal(energy_psi, -energy_lp)
    assert_array_equal(y, features > 0)
    assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3])

    # continuous inference and psi:
    h, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True)
    energy_psi = np.dot(w, crf.psi(x, h))
    assert_almost_equal(energy_psi, -energy_lp)
Esempio n. 3
0
def test_initialize():
    # 17 nodes, three features, 5 visible states, 2 hidden states
    rnd = np.random.RandomState(0)
    feats = rnd.normal(size=(17, 3))
    edges = np.zeros((0, 2), dtype=np.int)  # no edges
    x = (feats, edges, 4)  # 4 latent variables
    y = rnd.randint(5, size=17)
    crf = LatentNodeCRF(n_labels=5, n_features=3)
    # no-op
    crf.initialize([x], [y])
    assert_equal(crf.n_states, 5 + 2)

    #test initialization works
    crf = LatentNodeCRF()
    crf.initialize([x], [y])
    assert_equal(crf.n_labels, 5)
    assert_equal(crf.n_states, 5 + 2)
    assert_equal(crf.n_features, 3)

    crf = LatentNodeCRF(n_labels=3)
    assert_raises(ValueError, crf.initialize, X=[x], Y=[y])
Esempio n. 4
0
def test_inference_chain():
    # same with pairwise edges:
    features = np.array([-1,  1, -1, 1, -1,  1])
    unary_parameters = np.array([-1, 1])
    pairwise_parameters = np.array([+1,
                                    +0,  1,
                                    +3,  0, 0,
                                    +0,  3, 0, 0])
    w = np.hstack([unary_parameters, pairwise_parameters])
    crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2)
    edges = np.vstack([np.arange(5), np.arange(1, 6)]).T

    # edges for latent states. Latent states named 6, 7
    node_indices = np.arange(features.size)
    other_edges = []
    for n in node_indices[:3]:
        other_edges.append([n, 6])
    for n in node_indices[3:]:
        other_edges.append([n, 7])
    all_edges = np.vstack([edges, other_edges])

    x = (features.reshape(-1, 1), all_edges, 2)
    h, energy_lp = crf.inference(x, w, return_energy=True)
    y = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6]
    energy_joint_feature = np.dot(w, crf.joint_feature(x, h))

    assert_almost_equal(energy_joint_feature, -energy_lp)
    assert_array_equal(y, features > 0)
    assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3])

    # continuous inference and joint_feature:
    h, energy_lp = crf.inference(x, w, return_energy=True, relaxed=True)
    energy_joint_feature = np.dot(w, crf.joint_feature(x, h))
    assert_almost_equal(energy_joint_feature, -energy_lp)
def test_binary_blocks_cutting_plane_latent_node():
    #testing cutting plane ssvm on easy binary dataset
    # we use the LatentNodeCRF without latent nodes and check that it does the
    # same as GraphCRF
    X, Y = generate_blocks(n_samples=3)
    crf = GraphCRF()
    clf = NSlackSSVM(model=crf,
                     max_iter=20,
                     C=100,
                     check_constraints=True,
                     break_on_bad=False,
                     n_jobs=1)
    x1, x2, x3 = X
    y1, y2, y3 = Y
    n_states = len(np.unique(Y))
    # delete some rows to make it more fun
    x1, y1 = x1[:, :-1], y1[:, :-1]
    x2, y2 = x2[:-1], y2[:-1]
    # generate graphs
    X_ = [x1, x2, x3]
    G = [make_grid_edges(x) for x in X_]

    # reshape / flatten x and y
    X_ = [x.reshape(-1, n_states) for x in X_]
    Y = [y.ravel() for y in [y1, y2, y3]]

    X = zip(X_, G)

    clf.fit(X, Y)
    Y_pred = clf.predict(X)
    for y, y_pred in zip(Y, Y_pred):
        assert_array_equal(y, y_pred)

    latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=0)
    latent_svm = LatentSSVM(NSlackSSVM(model=latent_crf,
                                       max_iter=20,
                                       C=100,
                                       check_constraints=True,
                                       break_on_bad=False,
                                       n_jobs=1),
                            latent_iter=3)
    X_latent = zip(X_, G, np.zeros(len(X_)))
    latent_svm.fit(X_latent, Y, H_init=Y)
    Y_pred = latent_svm.predict(X_latent)
    for y, y_pred in zip(Y, Y_pred):
        assert_array_equal(y, y_pred)

    assert_array_almost_equal(latent_svm.w, clf.w)
def test_latent_node_boxes_standard_latent_features():
    # learn the "easy" 2x2 boxes dataset.
    # we make it even easier now by adding features that encode the correct
    # latent state. This basically tests that the features are actually used

    X, Y = make_simple_2x2(seed=1, n_samples=20, n_flips=6)
    latent_crf = LatentNodeCRF(n_labels=2,
                               n_hidden_states=2,
                               n_features=1,
                               latent_node_features=True)
    one_slack = OneSlackSSVM(latent_crf)
    n_slack = NSlackSSVM(latent_crf)
    subgradient = SubgradientSSVM(latent_crf,
                                  max_iter=100,
                                  learning_rate=0.01,
                                  momentum=0)
    for base_svm in [one_slack, n_slack, subgradient]:
        base_svm.C = 10
        latent_svm = LatentSSVM(base_svm, latent_iter=10)

        G = [make_grid_edges(x) for x in X]

        # make edges for hidden states:
        edges = make_edges_2x2()

        G = [np.vstack([make_grid_edges(x), edges]) for x in X]

        # reshape / flatten x and y
        X_flat = [x.reshape(-1, 1) for x in X]
        # augment X with the features for hidden units
        X_flat = [
            np.vstack([x, y[::2, ::2].reshape(-1, 1)])
            for x, y in zip(X_flat, Y)
        ]
        Y_flat = [y.ravel() for y in Y]

        X_ = zip(X_flat, G, [2 * 2 for x in X_flat])
        latent_svm.fit(X_[:10], Y_flat[:10])

        assert_array_equal(latent_svm.predict(X_[:10]), Y_flat[:10])
        assert_equal(latent_svm.score(X_[:10], Y_flat[:10]), 1)

        # we actually become prefect ^^
        assert_true(.98 < latent_svm.score(X_[10:], Y_flat[10:]) <= 1)
Esempio n. 7
0
def test_latent_node_boxes_latent_subgradient():
    # same as above, now with elementary subgradients

    X, Y = make_simple_2x2(seed=1)
    latent_crf = LatentNodeCRF(n_labels=2, n_hidden_states=2, n_features=1)
    latent_svm = SubgradientLatentSSVM(model=latent_crf, max_iter=50, C=10)

    G = [make_grid_edges(x) for x in X]

    edges = make_edges_2x2()
    G = [np.vstack([make_grid_edges(x), edges]) for x in X]

    # reshape / flatten x and y
    X_flat = [x.reshape(-1, 1) for x in X]
    Y_flat = [y.ravel() for y in Y]

    X_ = zip(X_flat, G, [4 * 4 for x in X_flat])
    latent_svm.fit(X_, Y_flat)

    assert_equal(latent_svm.score(X_, Y_flat), 1)
Esempio n. 8
0
def test_initialize():
    # 17 nodes, three features, 5 visible states, 2 hidden states
    rnd = np.random.RandomState(0)
    feats = rnd.normal(size=(17, 3))
    edges = np.zeros((0, 2), dtype=np.int)  # no edges
    x = (feats, edges, 4)   # 4 latent variables
    y = rnd.randint(5, size=17)
    crf = LatentNodeCRF(n_labels=5, n_features=3)
    # no-op
    crf.initialize([x], [y])
    assert_equal(crf.n_states, 5 + 2)

    #test initialization works
    crf = LatentNodeCRF()
    crf.initialize([x], [y])
    assert_equal(crf.n_labels, 5)
    assert_equal(crf.n_states, 5 + 2)
    assert_equal(crf.n_features, 3)

    crf = LatentNodeCRF(n_labels=3)
    assert_raises(ValueError, crf.initialize, X=[x], Y=[y])
Esempio n. 9
0
def test_inference_trivial():
    # size 6 chain graph
    # first three and last three have a latent variable
    features = np.array([-1,  1, -1, 1, -1,  1])
    unary_parameters = np.array([-1, 1])
    pairwise_parameters = np.array([+0,
                                    +0,  0,
                                    +3,  0, 0,
                                    +0,  3, 0, 0])
    w = np.hstack([unary_parameters, pairwise_parameters])
    crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2)
    # edges for latent states. Latent states named 6, 7
    node_indices = np.arange(features.size)
    other_edges = []
    for n in node_indices[:3]:
        other_edges.append([n, 6])
    for n in node_indices[3:]:
        other_edges.append([n, 7])

    all_edges = np.vstack(other_edges)
    x = (features.reshape(-1, 1), all_edges, 2)

    # test inference
    h, energy_lp = crf.inference(x, w, return_energy=True)
    assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3])

    y = crf.label_from_latent(h)
    assert_array_equal(y, [0, 0, 0, 1, 1, 1])

    y_unaries = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6]
    assert_array_equal(y_unaries, features > 0)

    # test joint_feature
    energy_joint_feature = np.dot(w, crf.joint_feature(x, h))
    assert_almost_equal(energy_joint_feature, -energy_lp)

    # test loss
    h_unaries = crf.latent(x, y_unaries, w)
    assert_equal(crf.loss(h, h_unaries), 2)

    # continuous inference and joint_feature:
    h_continuous, energy_lp = crf.inference(x, w, return_energy=True,
                                            relaxed=True)
    energy_joint_feature = np.dot(w, crf.joint_feature(x, h))
    assert_almost_equal(energy_joint_feature, -energy_lp)

    # test continuous loss
    assert_equal(crf.loss(h, h_continuous), 0)

    #test loss-augmented inference energy
    h_hat, energy_lp = crf.loss_augmented_inference(x, h, w,
                                                    return_energy=True)
    assert_almost_equal(-energy_lp, np.dot(w, crf.joint_feature(x, h_hat)) +
                        crf.loss(h_hat, y))
Esempio n. 10
0
def test_inference_trivial():
    # size 6 chain graph
    # first three and last three have a latent variable
    features = np.array([-1, 1, -1, 1, -1, 1])
    unary_parameters = np.array([-1, 1])
    pairwise_parameters = np.array([+0, +0, 0, +3, 0, 0, +0, 3, 0, 0])
    w = np.hstack([unary_parameters, pairwise_parameters])
    crf = LatentNodeCRF(n_labels=2, n_features=1, n_hidden_states=2)
    # edges for latent states. Latent states named 6, 7
    node_indices = np.arange(features.size)
    other_edges = []
    for n in node_indices[:3]:
        other_edges.append([n, 6])
    for n in node_indices[3:]:
        other_edges.append([n, 7])

    all_edges = np.vstack(other_edges)
    x = (features.reshape(-1, 1), all_edges, 2)

    # test inference
    h, energy_lp = crf.inference(x, w, return_energy=True)
    assert_array_equal(h, [0, 0, 0, 1, 1, 1, 2, 3])

    y = crf.label_from_latent(h)
    assert_array_equal(y, [0, 0, 0, 1, 1, 1])

    y_unaries = np.argmax(crf._get_unary_potentials(x, w), axis=1)[:6]
    assert_array_equal(y_unaries, features > 0)

    # test psi
    energy_psi = np.dot(w, crf.psi(x, h))
    assert_almost_equal(energy_psi, -energy_lp)

    # test loss
    h_unaries = crf.latent(x, y_unaries, w)
    assert_equal(crf.loss(h, h_unaries), 2)

    # continuous inference and psi:
    h_continuous, energy_lp = crf.inference(x,
                                            w,
                                            return_energy=True,
                                            relaxed=True)
    energy_psi = np.dot(w, crf.psi(x, h))
    assert_almost_equal(energy_psi, -energy_lp)

    # test continuous loss
    assert_equal(crf.loss(h, h_continuous), 0)

    #test loss-augmented inference energy
    h_hat, energy_lp = crf.loss_augmented_inference(x,
                                                    h,
                                                    w,
                                                    return_energy=True)
    assert_almost_equal(-energy_lp,
                        np.dot(w, crf.psi(x, h_hat)) + crf.loss(h_hat, y))
Esempio n. 11
0
Y_flat = [y.ravel() for y in Y]

# first, use standard graph CRF. Can't do much, high loss.
crf = GraphCRF()
svm = NSlackSSVM(model=crf, max_iter=200, C=1, n_jobs=1)

G = [make_grid_edges(x) for x in X]

asdf = zip(X_flat, G)
svm.fit(asdf, Y_flat)
plot_boxes(svm.predict(asdf), title="Non-latent SSVM predictions")
print("Training score binary grid CRF: %f" % svm.score(asdf, Y_flat))

# using one latent variable for each 2x2 rectangle
latent_crf = LatentNodeCRF(n_labels=2,
                           n_features=1,
                           n_hidden_states=2,
                           inference_method='lp')

ssvm = OneSlackSSVM(model=latent_crf,
                    max_iter=200,
                    C=100,
                    verbose=1,
                    n_jobs=-1,
                    show_loss_every=10,
                    inference_cache=50)
latent_svm = LatentSSVM(ssvm)

# make edges for hidden states:
edges = []
node_indices = np.arange(4 * 4).reshape(4, 4)
for i, (x, y) in enumerate(itertools.product([0, 2], repeat=2)):
Esempio n. 12
0
def svm_on_segments(C=.1, learning_rate=.001, subgradient=True):
    # load and prepare data
    lateral = True
    latent = True
    test = False
    #data_train = load_data(which="piecewise")
    #data_train = add_edges(data_train, independent=False)
    #data_train = add_kraehenbuehl_features(data_train, which="train_30px")
    #data_train = add_kraehenbuehl_features(data_train, which="train")
    #if lateral:
    #data_train = add_edge_features(data_train)
    data_train = load_data_global_probs(latent=latent)
    X_org_ = data_train.X
    #data_train = make_hierarchical_data(data_train, lateral=lateral,
    #latent=latent, latent_lateral=True)
    data_train = discard_void(data_train, 21, latent_features=True)
    X_, Y_ = data_train.X, data_train.Y
    # remove edges
    if not lateral:
        X_org_ = [(x[0], np.zeros((0, 2), dtype=np.int)) for x in X_org_]

    if test:
        data_val = load_data('val', which="piecewise")
        data_val = add_edges(data_val, independent=False)
        data_val = add_kraehenbuehl_features(data_val)
        data_val = make_hierarchical_data(data_val,
                                          lateral=lateral,
                                          latent=latent)
        data_val = discard_void(data_val, 21)

        X_.extend(data_val.X)
        Y_.extend(data_val.Y)

    n_states = 21
    class_weights = 1. / np.bincount(np.hstack(Y_))
    class_weights *= 21. / np.sum(class_weights)
    experiment_name = ("latent5_features_C%f_top_node" % C)
    logger = SaveLogger(experiment_name + ".pickle", save_every=10)
    if latent:
        model = LatentNodeCRF(n_labels=n_states,
                              n_features=data_train.X[0][0].shape[1],
                              n_hidden_states=5,
                              inference_method='qpbo' if lateral else 'dai',
                              class_weight=class_weights,
                              latent_node_features=True)
        if subgradient:
            ssvm = learners.LatentSubgradientSSVM(model,
                                                  C=C,
                                                  verbose=1,
                                                  show_loss_every=10,
                                                  logger=logger,
                                                  n_jobs=-1,
                                                  learning_rate=learning_rate,
                                                  decay_exponent=1,
                                                  momentum=0.,
                                                  max_iter=100000)
        else:
            latent_logger = SaveLogger("lssvm_" + experiment_name +
                                       "_%d.pickle",
                                       save_every=1)
            base_ssvm = learners.OneSlackSSVM(model,
                                              verbose=2,
                                              C=C,
                                              max_iter=100000,
                                              n_jobs=-1,
                                              tol=0.001,
                                              show_loss_every=200,
                                              inference_cache=50,
                                              logger=logger,
                                              cache_tol='auto',
                                              inactive_threshold=1e-5,
                                              break_on_bad=False,
                                              switch_to_ad3=True)
            ssvm = learners.LatentSSVM(base_ssvm, logger=latent_logger)
        warm_start = False
        if warm_start:
            ssvm = logger.load()
            ssvm.logger = SaveLogger(experiment_name + "_retrain.pickle",
                                     save_every=10)
            ssvm.max_iter = 100000
            ssvm.learning_rate = 0.00001
            ssvm.momentum = 0
    else:
        #model = GraphCRF(n_states=n_states,
        #n_features=data_train.X[0][0].shape[1],
        #inference_method='qpbo' if lateral else 'dai',
        #class_weight=class_weights)
        model = EdgeFeatureGraphCRF(
            n_states=n_states,
            n_features=data_train.X[0][0].shape[1],
            inference_method='qpbo' if lateral else 'dai',
            class_weight=class_weights,
            n_edge_features=4,
            symmetric_edge_features=[0, 1],
            antisymmetric_edge_features=[2])
        ssvm = learners.OneSlackSSVM(model,
                                     verbose=2,
                                     C=C,
                                     max_iter=100000,
                                     n_jobs=-1,
                                     tol=0.0001,
                                     show_loss_every=200,
                                     inference_cache=50,
                                     logger=logger,
                                     cache_tol='auto',
                                     inactive_threshold=1e-5,
                                     break_on_bad=False)

    #ssvm = logger.load()

    X_, Y_ = shuffle(X_, Y_)
    #ssvm.fit(data_train.X, data_train.Y)
    #ssvm.fit(X_, Y_, warm_start=warm_start)
    ssvm.fit(X_, Y_)
    print("fit finished!")
Esempio n. 13
0
def svm_on_segments(C=.1, learning_rate=.001, subgradient=False):
    data_file = "data_train_XY.pickle"
    ds = PascalSegmentation()
    if os.path.exists(data_file):
        X_, Y_ = cPickle.load(open(data_file))
    else:
        # load and prepare data
        data_train = load_pascal("train", sp_type="cpmc")
        data_train = make_cpmc_hierarchy(ds, data_train)
        data_train = discard_void(ds, data_train)
        X_, Y_ = data_train.X, data_train.Y
        cPickle.dump((X_, Y_), open(data_file, 'wb'), -1)

    class_weights = 1. / np.bincount(np.hstack(Y_))
    class_weights *= 21. / np.sum(class_weights)
    experiment_name = ("latent_25_cpmc_%f_qpbo_n_slack_blub3" % C)
    logger = SaveLogger(experiment_name + ".pickle", save_every=10)
    model = LatentNodeCRF(n_hidden_states=25,
                          inference_method='qpbo',
                          class_weight=class_weights,
                          latent_node_features=False)
    if subgradient:
        ssvm = learners.LatentSubgradientSSVM(model,
                                              C=C,
                                              verbose=1,
                                              show_loss_every=10,
                                              logger=logger,
                                              n_jobs=-1,
                                              learning_rate=learning_rate,
                                              decay_exponent=1,
                                              momentum=0.,
                                              max_iter=100000,
                                              decay_t0=100)
    else:
        latent_logger = SaveLogger("lssvm_" + experiment_name + "_%d.pickle",
                                   save_every=1)
        #base_ssvm = learners.OneSlackSSVM(
        #model, verbose=2, C=C, max_iter=100, n_jobs=-1, tol=0.001,
        #show_loss_every=200, inference_cache=50, logger=logger,
        #cache_tol='auto', inactive_threshold=1e-5, break_on_bad=False,
        #switch_to=('ogm', {'alg': 'dd'}))
        base_ssvm = learners.NSlackSSVM(model,
                                        verbose=4,
                                        C=C,
                                        n_jobs=-1,
                                        tol=0.1,
                                        show_loss_every=20,
                                        logger=logger,
                                        inactive_threshold=1e-8,
                                        break_on_bad=False,
                                        batch_size=36,
                                        inactive_window=10,
                                        switch_to=('ad3', {
                                            'branch_and_bound': True
                                        }))
        ssvm = learners.LatentSSVM(base_ssvm,
                                   logger=latent_logger,
                                   latent_iter=3)
    #warm_start = True
    warm_start = False
    if warm_start:
        ssvm = logger.load()
        ssvm.logger = SaveLogger(experiment_name + "_retrain.pickle",
                                 save_every=10)
        ssvm.max_iter = 10000
        ssvm.decay_exponent = 1
        #ssvm.decay_t0 = 1000
        #ssvm.learning_rate = 0.00001
        #ssvm.momentum = 0

    X_, Y_ = shuffle(X_, Y_)
    #ssvm.fit(data_train.X, data_train.Y)
    ssvm.fit(X_, Y_)
    #H_init = [np.hstack([y, np.random.randint(21, 26)]) for y in Y_]
    #ssvm.fit(X_, Y_, H_init=H_init)
    print("fit finished!")