Exemple #1
0
def syntetic_train_score_per_iter(result, only_weak=False, plot=True):
    w_history = result.data["w_history"]
    meta_data = result.meta
    n_full = meta_data["n_full"]
    n_train = meta_data["n_train"]
    n_inference_iter = meta_data["n_inference_iter"]
    n_full = meta_data["n_full"]
    n_train = meta_data["n_train"]
    dataset = meta_data["dataset"]
    C = meta_data["C"]
    latent_iter = meta_data["latent_iter"]
    max_iter = meta_data["max_iter"]
    inner_tol = meta_data["inner_tol"]
    outer_tol = meta_data["outer_tol"]
    alpha = meta_data["alpha"]
    min_changes = meta_data["min_changes"]
    initialize = meta_data["initialize"]

    crf = HCRF(
        n_states=10, n_features=10, n_edge_features=2, alpha=alpha, inference_method="gco", n_iter=n_inference_iter
    )
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0, tol=inner_tol, n_jobs=4, inference_cache=100)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2, tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)

    Xtrain, Ytrain, Ytrain_full, Xtest, Ytest = split_test_train(X, Y, n_full, n_train)

    if only_weak:
        Xtrain = [x for (i, x) in enumerate(Xtrain) if not Ytrain[i].full_labeled]
        Ytrain_full = [y for (i, y) in enumerate(Ytrain_full) if not Ytrain[i].full_labeled]

    base_clf.w = None
    clf.w_history_ = w_history
    clf.iter_done = w_history.shape[0]

    train_scores = []
    for score in clf.staged_score(Xtrain, Ytrain_full):
        train_scores.append(score)
    train_scores = np.array(train_scores)

    if plot:
        x = np.arange(0, train_scores.size)
        pl.rc("text", usetex=True)
        pl.rc("font", family="serif")
        pl.figure(figsize=(10, 10), dpi=96)
        pl.title("score on train set")
        pl.plot(x, train_scores)
        pl.scatter(x, train_scores)
        pl.xlabel("iteration")
        pl.xlim([-0.5, train_scores.size + 1])

    return train_scores
Exemple #2
0
def create_model(result):
    meta = result.meta

    alpha = meta['alpha']
    n_inference_iter = meta['n_inference_iter']
    max_iter = meta['max_iter']
    C = meta['C']
    inner_tol = meta['inner_tol']
    inactive_window = meta['inactive_window']
    inactive_threshold = meta['inactive_threshold']
    latent_iter = meta['latent_iter']
    outer_tol = meta['outer_tol']
    min_changes = meta['min_changes']

    try:
        inference_cache = meta['inference_cache']
    except:
        inference_cache = 0

    crf = None
    if meta['dataset_name'] == 'syntetic':
        crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha,
                   inference_method='gco', n_iter=n_inference_iter)
    elif meta['dataset_name'] == 'msrc':
        crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha,
                   inference_method='gco', n_iter=n_inference_iter)

    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=inference_cache,
                            inactive_window=inactive_window,
                            inactive_threshold=inactive_threshold)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    return clf
def test_syntetic_weak(mode):
    # needs refactoring
    # Syntetic data
    # test latentSSVM on different train set sizes & on different train sets
    # mode can be 'heterogenous' or 'latent'
    results = np.zeros((18, 6))
    full_labeled = np.array([0, 2, 4, 10, 25, 100])
    train_size = 400

    for dataset in xrange(1, 19):
        X, H = load_syntetic(dataset)
        H = list(H)
        Y = weak_from_hidden(H)

        for j, nfull in enumerate(full_labeled):
            if mode == 'latent':
                crf = LatentCRF(n_states=10, n_features=10, n_edge_features=2,
                                inference_method='qpbo')
                base_clf = OneSlackSSVM(crf, max_iter=100, C=0.01, verbose=0,
                                        tol=0.1, n_jobs=4, inference_cache=100)
                clf = LatentSSVM(base_clf, latent_iter=5)
            elif mode == 'heterogenous':
                crf = HCRF(n_states=10, n_features=10, n_edge_features=2,
                           inference_method='gco')
                base_clf = OneSlackSSVM(crf, max_iter=500, C=0.1, verbose=0,
                                        tol=0.001, n_jobs=4, inference_cache=100)
                clf = LatentSSVM(base_clf, latent_iter=5, verbose=0)

            x_train = X[:train_size]
            y_train = Y[:train_size]
            h_train = H[:train_size]
            x_test = X[(train_size + 1):]
            h_test = H[(train_size + 1):]

            for i in xrange(nfull, len(h_train)):
                h_train[i] = None

            try:
                if mode == 'latent':
                    clf.fit(x_train, y_train, h_train)
                elif mode == 'heterogenous':
                    clf.fit(x_train, y_train, h_train,
                            pass_labels=True, initialize=True)
                h_pred = clf.predict_latent(x_test)

                results[dataset - 1, j] = compute_error(h_test, h_pred)

                print 'dataset=%d, nfull=%d, error=%f' % (dataset,
                                                          nfull,
                                                          results[dataset - 1, j])
            except ValueError:
                # bad QP
                print 'dataset=%d, nfull=%d: Failed' % (dataset, nfull)

    if mode == 'latent':
        np.savetxt('results/weak_labeled.csv', results, delimiter=',')
    elif mode == 'heterogenous':
        np.savetxt('results/heterogenous.csv', results, delimiter=',')

    return results
def test_simple_dataset(max_iter=1000, C=0.1, latent_iter=10, min_changes=0,
                        inner_tol=1e-5, outer_tol=1e-5):
    meta_data = locals()

    crf = SimpleMRF()
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=2,
                            n_jobs=4, tol=inner_tol)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     min_changes=min_changes, n_jobs=4, tol=outer_tol)

    x_train, y_train_full, y_train, x_test, y_test = load_simple_dataset()

    start = time()
    clf.fit(x_train, y_train)
    stop = time()

    time_elapsed = stop - start

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)

    print '============================================================'
    print 'Score on train set: %f' % train_score
    print 'Score on test set: %f' % test_score
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % time_elapsed

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    train_scores = []
    for score in clf.staged_score(x_train, y_train_full):
        train_scores.append(score)

    raw_scores = []
    for score in clf.staged_score2(x_train, y_train):
        raw_scores.append(score)

    exp_data = clf._get_data()
    exp_data['test_scores'] = np.array(test_scores)
    exp_data['train_scores'] = np.array(train_scores)
    exp_data['raw_scores'] = np.array(raw_scores)

    meta_data['dataset_name'] = 'toy'
    meta_data['annotation_type'] = 'area'
    meta_data['label_type'] = 'full+weak'
    meta_data['time_elapsed'] = time_elapsed

    return ExperimentResult(exp_data, meta_data)
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25,
              max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
              initialize=True, alpha=0.1, n_inference_iter=5):
    crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha,
               inference_method='gco', n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=10)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    Xtest, Ytest = load_msrc('test')
    Ytest = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
             for y in Ytest]

    train_mask = np.genfromtxt('../data/msrc/trainmasks/trainMaskX%d.txt' % n_full)
    train_mask = train_mask[0:n_train].astype(np.bool)
    Xtrain, Ytrain_raw = load_msrc('train')
    Ytrain_full = [Label(y[:, 0].astype(np.int32), None, y[:, 1], True)
                   for y in Ytrain_raw]
    Ytrain = []
    for y, f in zip(Ytrain_raw, train_mask):
        if f:
            Ytrain.append(Label(y[:, 0].astype(np.int32),
                                None, y[:, 1], True))
        else:
            Ytrain.append(Label(None, np.unique(y[:, 0].astype(np.int32)),
                                y[:, 1], False))

    start = time()
    clf.fit(Xtrain, Ytrain, initialize=initialize)
    stop = time()

    train_score = clf.score(Xtrain, Ytrain_full)
    test_score = clf.score(Xtest, Ytest)
    time_elapsed = stop - start 

    print 'Score on train set: %f' % train_score
    print 'Score on test set: %f' % test_score
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % time_elapsed

    test_scores = []
    for score in clf.staged_score(Xtest, Ytest):
        test_scores.append(score)

    result = ExperimentResult(np.array(test_scores), clf.changes_,
                              clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, 
                              clf.objective_curve_, clf.timestamps_, clf.base_iter_history_,
                              train_score=train_score, test_score=test_score,
                              time_elapsed=time_elapsed, n_inference_iter=n_inference_iter,
                              n_full=n_full, n_train=n_train, C=C,
                              latent_iter=latent_iter, max_iter=max_iter,
                              inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha,
                              min_changes=min_changes, initialize=initialize,
                              dataset_name='msrc', annotation_type='image-level labelling',
                              label_type='full+weak')
    return result
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15,
                  max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
                  initialize=True, alpha=0.1):
    crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha,
               inference_method='gco')
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=100)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)

    x_train, y_train, y_train_full, x_test, y_test = \
        split_test_train(X, Y, n_full, n_train)

    start = time()
    clf.fit(x_train, y_train, initialize=initialize)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start

    print 'Score on train set: %f' % train_score
    print 'Score on test set: %f' % test_score
    print 'Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w)
    print 'Elapsed time: %f s' % time_elapsed

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    result = ExperimentResult(np.array(test_scores), clf.changes_,
                              clf.w_history_, clf.delta_history_, clf.primal_objective_curve_, 
                              clf.objective_curve_, clf.timestamps_, clf.base_iter_history_,
                              train_score=train_score,
                              test_score=test_score, time_elapsed=time_elapsed,
                              n_full=n_full, n_train=n_train, C=C, dataset=dataset,
                              latent_iter=latent_iter, max_iter=max_iter,
                              inner_tol=inner_tol, outer_tol=outer_tol, alpha=alpha,
                              min_changes=min_changes, initialize=initialize,
                              dataset_name='syntetic', annotation_type='image-level labelling',
                              label_type='full+weak')
    return result
def syntetic_weak(n_full=10,
                  n_train=200,
                  C=0.1,
                  dataset=1,
                  latent_iter=15,
                  max_iter=500,
                  inner_tol=0.001,
                  outer_tol=0.01,
                  min_changes=0,
                  initialize=True,
                  alpha=0.1,
                  n_inference_iter=5,
                  inactive_window=50,
                  inactive_threshold=1e-5,
                  warm_start=False,
                  inference_cache=0,
                  save_inner_w=False,
                  inference_method='gco'):
    # save parameters as meta
    meta_data = locals()

    logger = logging.getLogger(__name__)

    crf = HCRF(n_states=10,
               n_features=10,
               n_edge_features=2,
               alpha=alpha,
               inference_method=inference_method,
               n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf,
                            verbose=2,
                            n_jobs=4,
                            max_iter=max_iter,
                            tol=inner_tol,
                            C=C,
                            inference_cache=inference_cache,
                            inactive_window=inactive_window,
                            inactive_threshold=inactive_threshold)
    clf = LatentSSVM(base_clf,
                     latent_iter=latent_iter,
                     verbose=2,
                     tol=outer_tol,
                     min_changes=min_changes,
                     n_jobs=4)

    x_train, y_train, y_train_full, x_test, y_test = \
        load_syntetic(dataset, n_full, n_train)

    start = time()
    clf.fit(x_train,
            y_train,
            initialize=initialize,
            warm_start=warm_start,
            save_inner_w=save_inner_w)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start

    logger.info('============================================================')
    logger.info('Score on train set: %f', train_score)
    logger.info('Score on test set: %f', test_score)
    logger.info('Norm of weight vector: |w|=%f', np.linalg.norm(clf.w))
    logger.info('Elapsed time: %f s', time_elapsed)

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    train_scores = []
    for score in clf.staged_score(x_train, y_train_full):
        train_scores.append(score)

    raw_scores = []
    for score in clf.staged_score2(x_train, y_train):
        raw_scores.append(score)

    exp_data = clf._get_data()
    exp_data['test_scores'] = np.array(test_scores)
    exp_data['train_scores'] = np.array(train_scores)
    exp_data['raw_scores'] = np.array(raw_scores)

    meta_data['dataset_name'] = 'syntetic'
    meta_data['annotation_type'] = 'image-level labelling'
    meta_data['label_type'] = 'full+weak'
    meta_data['train_score'] = train_score
    meta_data['test_score'] = test_score
    meta_data['time_elapsed'] = time_elapsed
    meta_data['iter_done'] = clf.iter_done

    return ExperimentResult(exp_data, meta_data)
def syntetic_weak(n_full=10, n_train=200, C=0.1, dataset=1, latent_iter=15,
                  max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
                  initialize=True, alpha=0.1, n_inference_iter=5,
                  inactive_window=50, inactive_threshold=1e-5):
    # save parameters as meta
    meta_data = locals()

    crf = HCRF(n_states=10, n_features=10, n_edge_features=2, alpha=alpha,
               inference_method='gco', n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf, max_iter=max_iter, C=C, verbose=0,
                            tol=inner_tol, n_jobs=4, inference_cache=100,
                            inactive_window=inactive_window,
                            inactive_threshold=inactive_threshold)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    X, Y = load_syntetic(dataset)
    x_train, y_train, y_train_full, x_test, y_test = \
        split_test_train(X, Y, n_full, n_train)

    start = time()
    clf.fit(x_train, y_train, initialize=initialize)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start

    print('============================================================')
    print('Score on train set: %f' % train_score)
    print('Score on test set: %f' % test_score)
    print('Norm of weight vector: |w|=%f' % np.linalg.norm(clf.w))
    print('Elapsed time: %f s' % time_elapsed)

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    train_scores = []
    for score in clf.staged_score(x_train, y_train_full):
        train_scores.append(score)

    exp_data = {}
    exp_data['test_scores'] = np.array(test_scores)
    exp_data['train_scores'] = np.array(train_scores)
    exp_data['changes'] = clf.changes_
    exp_data['w_history'] = clf.w_history_
    exp_data['delta_history'] = clf.delta_history_
    exp_data['primal_objective_curve'] = clf.primal_objective_curve_
    exp_data['objective_curve'] = clf.objective_curve_
    exp_data['timestamps'] = clf.timestamps_
    exp_data['qp_timestamps'] = clf.qp_timestamps_
    exp_data['inference_timestamps'] = clf.inference_timestamps_
    exp_data['number_of_iterations'] = clf.number_of_iterations_
    exp_data['number_of_constraints'] = clf.number_of_constraints_
    exp_data['calls_to_inference'] = clf.calls_to_inference_

    meta_data['dataset_name'] = 'syntetic'
    meta_data['annotation_type'] = 'image-level labelling'
    meta_data['label_type'] = 'full+weak'
    meta_data['train_score'] = train_score
    meta_data['test_score'] = test_score
    meta_data['time_elapsed'] = time_elapsed

    return ExperimentResult(exp_data, meta_data)
Exemple #9
0
def msrc_weak(n_full=20, n_train=276, C=100, latent_iter=25,
              max_iter=500, inner_tol=0.001, outer_tol=0.01, min_changes=0,
              initialize=True, alpha=0.1, n_inference_iter=5,
              inactive_window=50, inactive_threshold=1e-5,
              warm_start=False, inference_cache=0,
              save_inner_w=False, inference_method='gco'):
    meta_data = locals()

    logger = logging.getLogger(__name__)

    crf = HCRF(n_states=24, n_features=2028, n_edge_features=4, alpha=alpha,
               inference_method=inference_method, n_iter=n_inference_iter)
    base_clf = OneSlackSSVM(crf, verbose=2, n_jobs=4,
                            tol=inner_tol, max_iter=max_iter, C=C,
                            inference_cache=inference_cache,
                            inactive_window=inactive_window,
                            inactive_threshold=inactive_threshold)
    clf = LatentSSVM(base_clf, latent_iter=latent_iter, verbose=2,
                     tol=outer_tol, min_changes=min_changes, n_jobs=4)

    x_train, y_train, y_train_full, x_test, y_test = \
        load_msrc(n_full, n_train)

    start = time()
    clf.fit(x_train, y_train,
            initialize=initialize,
            warm_start=warm_start,
            save_inner_w=save_inner_w)
    stop = time()

    train_score = clf.score(x_train, y_train_full)
    test_score = clf.score(x_test, y_test)
    time_elapsed = stop - start 

    logger.info('============================================================')
    logger.info('Score on train set: %f', train_score)
    logger.info('Score on test set: %f', test_score)
    logger.info('Norm of weight vector: |w|=%f', np.linalg.norm(clf.w))
    logger.info('Elapsed time: %f s', time_elapsed)

    test_scores = []
    for score in clf.staged_score(x_test, y_test):
        test_scores.append(score)

    train_scores = []
    for score in clf.staged_score(x_train, y_train_full):
        train_scores.append(score)

    raw_scores = []
    for score in clf.staged_score2(x_train, y_train):
        raw_scores.append(score)

    exp_data = clf._get_data()
    exp_data['test_scores'] = np.array(test_scores)
    exp_data['train_scores'] = np.array(train_scores)
    exp_data['raw_scores'] = np.array(raw_scores)

    meta_data['dataset_name'] = 'msrc'
    meta_data['annotation_type'] = 'image-level labelling'
    meta_data['label_type'] = 'full+weak'
    meta_data['train_score'] = train_score
    meta_data['test_score'] = test_score
    meta_data['time_elapsed'] = time_elapsed
    meta_data['iter_done'] = clf.iter_done

    return ExperimentResult(exp_data, meta_data)