Example #1
0
def screening_program(slm_desc, decisions, comparison, preproc, namebase,
                      image_features=None, ctrl=None):
    if image_features is None:
        image_features = scope.slm_memmap(
                desc=slm_desc,
                X=scope.get_images('float32', preproc=preproc),
                name=namebase + '_img_feat')
    # XXX: check that float32 images lead to correct features

    # XXX: make sure namebase takes a different value for each process
    #      because it's important that the memmaps don't interfere on disk

    def pairs_dataset(split):
        return scope.pairs_memmap(
            scope.verification_pairs(split=split),
            image_features,
            comparison_name=comparison,
            name=namebase + '_pairs_' + split,
            )

    result = {}

    train_X, train_y = pairs_dataset('DevTrain')
    test_X, test_y = pairs_dataset('DevTest')

    train_d = scope.get_decisions_lfw('DevTrain', decisions)
    test_d = scope.get_decisions_lfw('DevTest', decisions)

    train_Xyd_n, test_Xyd_n = scope.normalize_Xcols(
        (train_X, train_y, train_d,),
        (test_X, test_y, test_d,))

    if 0 and ctrl is not None:
        print >> sys.stderr, "SKIPPING FEATURE KERNEL"
        train_Xyd_n = scope.attach_feature_kernels(train_Xyd_n, test_Xyd_n,
                ctrl, comparison)

    ### TODO: put consts in config, possibly loop over them in MultiBandit
    svm = scope.train_svm(train_Xyd_n,
            l2_regularization=1e-3,
            max_observations=20000)

    new_d_train = scope.svm_decisions_lfw(svm, train_Xyd_n)
    new_d_test = scope.svm_decisions_lfw(svm, test_Xyd_n)

    result = scope.result_binary_classifier_stats_lfw(
            train_Xyd_n,
            test_Xyd_n,
            new_d_train,
            new_d_test,
            result=result)

    result_w_cleanup = scope.run_all(
        result,
        scope.delete_memmap(train_X),
        scope.delete_memmap(test_X),
        scope.delete_memmap(image_features),
        )[0]

    return result_w_cleanup, locals()
Example #2
0
def screening_prog(
        n_examples_train,
        n_examples_test,
        n_folds,
        feat_spec,
        decisions,
        svm_l2_regularization,
        svm_max_observations,
        save_svms,
        ctrl):
    """
    Build a pyll graph representing the experiment.
    """
    split_decisions = decisions

    if split_decisions is None:
        split_decisions = np.zeros((n_folds, n_examples_train))
    else:
        # -- experiment may store this as list
        split_decisions = np.asarray(split_decisions)

    Xy = scope.digits_xy(0, n_examples_train)

    # -- build a graph with n_folds paths
    split_results = []
    new_ds = []
    tt_idxs_list = []
    for fold_idx in range(n_folds):
        split_result = {}
        decisions = np.asarray(split_decisions[fold_idx])

        train_idxs, test_idxs = scope.random_train_test_idxs(
                n_examples_train, n_folds, fold_idx)

        train_Xyd = scope.slice_Xyd(Xy, decisions, train_idxs)
        test_Xyd = scope.slice_Xyd(Xy, decisions, test_idxs)

        train_Xyd_n, test_Xyd_n = scope.normalize_Xcols(train_Xyd, test_Xyd)

        train_Xyd_f = scope.features(train_Xyd_n, feat_spec)
        test_Xyd_f = scope.features(test_Xyd_n, feat_spec)

        train_Xyd_fn, test_Xyd_fn = scope.normalize_Xcols(
                train_Xyd_f, test_Xyd_f)

        svm = scope.train_svm(train_Xyd_fn,
                l2_regularization=svm_l2_regularization,
                max_observations=svm_max_observations,
                )

        new_d_train = scope.svm_decisions(svm, train_Xyd_fn)
        new_d_test = scope.svm_decisions(svm, test_Xyd_fn)

        split_result = scope.result_binary_classifier_stats(
                train_Xyd,
                test_Xyd,
                new_d_train,
                new_d_test,
                result=split_result)

        new_ds.append((new_d_train, new_d_test))
        tt_idxs_list.append((train_idxs, test_idxs))

        # -- if we save weights, then do this:
        if save_svms:
            split_results.append(scope.run_all(
                split_result,
                scope.attach_svm(ctrl, svm, 'svm_%i' % fold_idx,
                ))[0])
        else:
            split_results.append(split_result)

    result = scope.combine_results(
            split_results,
            tt_idxs_list,
            new_ds,
            split_decisions,
            Xy[1])

    return result