Example #1
0
def test_syntetic_weak(mode):
    # needs refactoring
    # Syntetic data
    # test latentSSVM on different train set sizes & on different train sets
    # mode can be 'heterogenous' or 'latent'
    results = np.zeros((18, 6))
    full_labeled = np.array([0, 2, 4, 10, 25, 100])
    train_size = 400

    for dataset in xrange(1, 19):
        X, H = load_syntetic(dataset)
        H = list(H)
        Y = weak_from_hidden(H)

        for j, nfull in enumerate(full_labeled):
            if mode == 'latent':
                crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2,
                                inference_method='qpbo')
                base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0,
                                        tol=0.1, n_jobs=4, inference_cache=100)
                clf = LatentSSVM(base_clf, latent_iter=5)
            elif mode == 'heterogenous':
                crf = HCRF(n_states=10, n_features=10, n_edge_features=2,
                           inference_method='gco')
                base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0,
                                        tol=0.001, n_jobs=4, inference_cache=100)
                clf = LatentSSVM(base_clf, latent_iter=5, verbose=0)

            x_train = X[:train_size]
            y_train = Y[:train_size]
            h_train = H[:train_size]
            x_test = X[(train_size + 1):]
            h_test = H[(train_size + 1):]

            for i in xrange(nfull, len(h_train)):
                h_train[i] = None

            try:
                if mode == 'latent':
                    clf.fit(x_train, y_train, h_train)
                elif mode == 'heterogenous':
                    clf.fit(x_train, y_train, h_train,
                            pass_labels=True, initialize=True)
                h_pred = clf.predict_latent(x_test)

                results[dataset - 1, j] = compute_error(h_test, h_pred)

                print 'dataset=%d, nfull=%d, error=%f' % (dataset,
                                                          nfull,
                                                          results[dataset - 1, j])
            except ValueError:
                # bad QP
                print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull)

    if mode == 'latent':
        np.savetxt('results/weak_labeled.csv', results, delimiter=',')
    elif mode == 'heterogenous':
        np.savetxt('results/heterogenous.csv', results, delimiter=',')

    return results
def syntetic_test():
    # test model on different train set size & on different train sets
    results = np.zeros((18, 5))
    full_labeled = np.array([2, 4, 10, 25, 100])
    train_size = 400

    for dataset in xrange(1, 19):
        X, Y = load_syntetic(dataset)

        for j, nfull in enumerate(full_labeled):
            crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2,
                          inference_method='qpbo')
            clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=0,
                               tol=0.1, n_jobs=4, inference_cache=100)

            x_train = X[:nfull]
            y_train = Y[:nfull]
            x_test = X[(train_size + 1):]
            y_test = Y[(train_size + 1):]

            try:
                clf.fit(x_train, y_train)
                y_pred = clf.predict(x_test)

                results[dataset - 1, j] = compute_error(y_test, y_pred)

                print 'dataset=%d, nfull=%d, error=%f' % (dataset, nfull,
                                                          results[dataset - 1, j])
            except ValueError:
                print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull)

    np.savetxt('results/syntetic/full_labeled.txt', results)
def syntetic():
    # train model on a single set
    models_basedir = 'models/syntetic/'
    crf = EdgeCRF(n_states=10, n_features=10, n_edge_features=2,
                  inference_method='gco')
    clf = OneSlackSSVM(crf, max_iter=10000, C=0.01, verbose=2,
                       tol=0.1, n_jobs=4, inference_cache=100)

    X, Y = load_syntetic(1)

    x_train, x_test, y_train, y_test = train_test_split(X, Y,
                                                        train_size=100,
                                                        random_state=179)

    start = time()
    clf.fit(x_train, y_train)
    stop = time()

    np.savetxt(models_basedir + 'syntetic_full.csv', clf.w)
    with open(models_basedir + 'syntetic_full' + '.pickle', 'w') as f:
        cPickle.dump(clf, f)

    y_pred = clf.predict(x_test)

    print 'Error on test set: %f' % compute_error(y_test, y_pred)
    print 'Score on test set: %f' % clf.score(x_test, y_test)
    print 'Score on train set: %f' % clf.score(x_train, y_train)
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % (stop - start)

    return clf
Example #4
0
def syntetic_train_score_per_iter(result, only_weak=False, plot=True):
    w_history = result.data["w_history"]
    meta_data = result.meta
    n_full = meta_data["n_full"]
    n_train = meta_data["n_train"]
    n_inference_iter = meta_data["n_inference_iter"]
    n_full = meta_data["n_full"]
    n_train = meta_data["n_train"]
    dataset = meta_data["dataset"]
    C = meta_data["C"]
    latent_iter = meta_data["latent_iter"]
    max_iter = meta_data["max_iter"]
    inner_tol = meta_data["inner_tol"]
    outer_tol = meta_data["outer_tol"]
    alpha = meta_data["alpha"]
    min_changes = meta_data["min_changes"]
    initialize = meta_data["initialize"]

    crf = HCRF(
        n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method="gco", n_iter=n_inference_iter
    )
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)

    Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = split_test_train(X, Y, n_full, n_train)

    if only_weak:
        Xtrain = [x for (i, x) in enumerate(Xtrain) if not Ytrain[i].full_labeled]
        Ytrain_full = [y for (i, y) in enumerate(Ytrain_full) if not Ytrain[i].full_labeled]

    base_clf.w = None
    clf.w_history_ = w_history
    clf.iter_done = w_history.shape[0]

    train_scores = []
    for score in clf.staged_score(Xtrain, Ytrain_full):
        train_scores.append(score)
    train_scores = np.array(train_scores)

    if plot:
        x = np.arange(0, train_scores.size)
        pl.rc("text", usetex=True)
        pl.rc("font", family="serif")
        pl.figure(figsize=(10, 10), dpi=96)
        pl.title("score on train set")
        pl.plot(x, train_scores)
        pl.scatter(x, train_scores)
        pl.xlabel("iteration")
        pl.xlim([-0.5, train_scores.size + 1])

    return train_scores
Example #5
0
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15,
                  max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
                  initialize=True, alpha=0.1):
    crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha,
               inference_method='gco')
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=100)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)

    x_train, y_train, y_train_full, x_test, y_test = \
        split_test_train(X, Y, n_full, n_train)

    start = time()
    clf.fit(x_train, y_train, initialize=initialize)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start

    print 'Score on train set: %f' % train_score
    print 'Score on test set: %f' % test_score
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % time_elapsed

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    result = ExperimentResult(np.array(test_scores), clf.changes_,
                              clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, 
                              clf.objective_curve_, clf.timestamps_, clf.base_iter_history_,
                              train_score=train_score,
                              test_score=test_score, time_elapsed=time_elapsed,
                              n_full=n_full, n_train=n_train, C=C, dataset=dataset,
                              latent_iter=latent_iter, max_iter=max_iter,
                              inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha,
                              min_changes=min_changes, initialize=initialize,
                              dataset_name='syntetic', annotation_type='image-level labelling',
                              label_type='full+weak')
    return result
Example #6
0
def load_dataset(result):
    n_train = result.meta['n_train']
    n_full = result.meta['n_full']

    Xtrain = None
    Ytrain = None
    Ytrain_full = None
    Xtest = None
    Ytest = None

    if result.meta['dataset_name'] == 'syntetic':
        dataset = result.meta['dataset']
        X, Y = load_syntetic(dataset)
        Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = \
            split_test_train(X, Y, n_full, n_train)
    elif result.meta['dataset_name'] == 'msrc':
        Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = \
            msrc_load(n_full, n_train)

    return Xtrain, Ytrain, Ytrain_full, Xtest, Ytest
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15,
                  max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
                  initialize=True, alpha=0.1, n_inference_iter=5,
                  inactive_window=50, inactive_threshold=1e-5):
    # save parameters as meta
    meta_data = locals()

    crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha,
               inference_method='gco', n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=100,
                            inactive_window=inactive_window,
                            inactive_threshold=inactive_threshold)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)
    x_train, y_train, y_train_full, x_test, y_test = \
        split_test_train(X, Y, n_full, n_train)

    start = time()
    clf.fit(x_train, y_train, initialize=initialize)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start

    print('============================================================')
    print('Score on train set: %f' % train_score)
    print('Score on test set: %f' % test_score)
    print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w))
    print('Elapsed time: %f s' % time_elapsed)

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    train_scores = []
    for score in clf.staged_score(x_train, y_train_full):
        train_scores.append(score)

    exp_data = {}
    exp_data['test_scores'] = np.array(test_scores)
    exp_data['train_scores'] = np.array(train_scores)
    exp_data['changes'] = clf.changes_
    exp_data['w_history'] = clf.w_history_
    exp_data['delta_history'] = clf.delta_history_
    exp_data['primal_objective_curve'] = clf.primal_objective_curve_
    exp_data['objective_curve'] = clf.objective_curve_
    exp_data['timestamps'] = clf.timestamps_
    exp_data['qp_timestamps'] = clf.qp_timestamps_
    exp_data['inference_timestamps'] = clf.inference_timestamps_
    exp_data['number_of_iterations'] = clf.number_of_iterations_
    exp_data['number_of_constraints'] = clf.number_of_constraints_
    exp_data['calls_to_inference'] = clf.calls_to_inference_

    meta_data['dataset_name'] = 'syntetic'
    meta_data['annotation_type'] = 'image-level labelling'
    meta_data['label_type'] = 'full+weak'
    meta_data['train_score'] = train_score
    meta_data['test_score'] = test_score
    meta_data['time_elapsed'] = time_elapsed

    return ExperimentResult(exp_data, meta_data)