def screening_program(slm_desc, decisions, comparison, preproc, namebase, image_features=None, ctrl=None): if image_features is None: image_features = scope.slm_memmap( desc=slm_desc, X=scope.get_images('float32', preproc=preproc), name=namebase + '_img_feat') # XXX: check that float32 images lead to correct features # XXX: make sure namebase takes a different value for each process # because it's important that the memmaps don't interfere on disk def pairs_dataset(split): return scope.pairs_memmap( scope.verification_pairs(split=split), image_features, comparison_name=comparison, name=namebase + '_pairs_' + split, ) result = {} train_X, train_y = pairs_dataset('DevTrain') test_X, test_y = pairs_dataset('DevTest') train_d = scope.get_decisions_lfw('DevTrain', decisions) test_d = scope.get_decisions_lfw('DevTest', decisions) train_Xyd_n, test_Xyd_n = scope.normalize_Xcols( (train_X, train_y, train_d,), (test_X, test_y, test_d,)) if 0 and ctrl is not None: print >> sys.stderr, "SKIPPING FEATURE KERNEL" train_Xyd_n = scope.attach_feature_kernels(train_Xyd_n, test_Xyd_n, ctrl, comparison) ### TODO: put consts in config, possibly loop over them in MultiBandit svm = scope.train_svm(train_Xyd_n, l2_regularization=1e-3, max_observations=20000) new_d_train = scope.svm_decisions_lfw(svm, train_Xyd_n) new_d_test = scope.svm_decisions_lfw(svm, test_Xyd_n) result = scope.result_binary_classifier_stats_lfw( train_Xyd_n, test_Xyd_n, new_d_train, new_d_test, result=result) result_w_cleanup = scope.run_all( result, scope.delete_memmap(train_X), scope.delete_memmap(test_X), scope.delete_memmap(image_features), )[0] return result_w_cleanup, locals()
def screening_prog( n_examples_train, n_examples_test, n_folds, feat_spec, decisions, svm_l2_regularization, svm_max_observations, save_svms, ctrl): """ Build a pyll graph representing the experiment. """ split_decisions = decisions if split_decisions is None: split_decisions = np.zeros((n_folds, n_examples_train)) else: # -- experiment may store this as list split_decisions = np.asarray(split_decisions) Xy = scope.digits_xy(0, n_examples_train) # -- build a graph with n_folds paths split_results = [] new_ds = [] tt_idxs_list = [] for fold_idx in range(n_folds): split_result = {} decisions = np.asarray(split_decisions[fold_idx]) train_idxs, test_idxs = scope.random_train_test_idxs( n_examples_train, n_folds, fold_idx) train_Xyd = scope.slice_Xyd(Xy, decisions, train_idxs) test_Xyd = scope.slice_Xyd(Xy, decisions, test_idxs) train_Xyd_n, test_Xyd_n = scope.normalize_Xcols(train_Xyd, test_Xyd) train_Xyd_f = scope.features(train_Xyd_n, feat_spec) test_Xyd_f = scope.features(test_Xyd_n, feat_spec) train_Xyd_fn, test_Xyd_fn = scope.normalize_Xcols( train_Xyd_f, test_Xyd_f) svm = scope.train_svm(train_Xyd_fn, l2_regularization=svm_l2_regularization, max_observations=svm_max_observations, ) new_d_train = scope.svm_decisions(svm, train_Xyd_fn) new_d_test = scope.svm_decisions(svm, test_Xyd_fn) split_result = scope.result_binary_classifier_stats( train_Xyd, test_Xyd, new_d_train, new_d_test, result=split_result) new_ds.append((new_d_train, new_d_test)) tt_idxs_list.append((train_idxs, test_idxs)) # -- if we save weights, then do this: if save_svms: split_results.append(scope.run_all( split_result, scope.attach_svm(ctrl, svm, 'svm_%i' % fold_idx, ))[0]) else: split_results.append(split_result) result = scope.combine_results( split_results, tt_idxs_list, new_ds, split_decisions, Xy[1]) return result