def mkl_binclass_modular (train_data, testdata, train_labels, test_labels, d1, d2): # create some Gaussian train/test matrix tfeats = RealFeatures(train_data) tkernel = GaussianKernel(128, d1) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(test_data) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(train_data)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(GaussianKernel(128, d2)) kernel.init(feats_train, feats_train) # train mkl labels = Labels(train_labels) mkl = MKLClassification() # not to use svmlight mkl.set_interleaved_optimization_enabled(0) # which norm to use for MKL mkl.set_mkl_norm(2) # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(test_data)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(GaussianKernel(128, d2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) output = mkl.apply().get_labels() output = [1.0 if i>0 else -1.0 for i in output] accu = len(where(output == test_labels)[0]) / float(len(output)) return accu
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import CrossValidationPrintOutput from shogun.Evaluation import CrossValidationMKLStorage from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.Features import BinaryLabels from shogun.Features import RealFeatures, CombinedFeatures from shogun.Kernel import GaussianKernel, CombinedKernel from shogun.Classifier import LibSVM, MKLClassification from shogun.Mathematics import Statistics # training data, combined features all on same data features=RealFeatures(traindat) comb_features=CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels=BinaryLabels(label_traindat) # kernel, different Gaussians combined kernel=CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm=MKLClassification(LibSVM()); svm.set_interleaved_optimization_enabled(False); svm.set_kernel(kernel); # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy=StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium=ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation=CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) mkl_storage=CrossValidationMKLStorage() cross_validation.add_cross_validation_output(mkl_storage) cross_validation.set_num_runs(3) # perform cross-validation result=cross_validation.evaluate() # print mkl weights weights=mkl_storage.get_mkl_weights()
#feats_test.append_feature_obj(RealFeatures(testdata_real)) labels = Labels(trainlab) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 2.0)) kernel.append_kernel(GaussianKernel(10, 0.25)) kernel.append_kernel(GaussianKernel(10, 0.062)) kernel.append_kernel(GaussianKernel(10, 8.0)) kernel.append_kernel(GaussianKernel(10, 10.0)) kernel.init(feats_train, feats_train) # Create a classifier classifier=MKLClassification(LibSVM()) classifier.set_interleaved_optimization_enabled(False) classifier.set_kernel(kernel) classifier.set_labels(labels) classifier.set_C(1, 1) param_tree_root=ModelSelectionParameters() # () C1 parameter to the tree c1=ModelSelectionParameters("C1"); c1.build_values(-4.0, 4.0, R_EXP); param_tree_root.append_child(c1) # Attached C1 parameter to the tree c2=ModelSelectionParameters("C2"); c2.build_values(-4.0, 4.0, R_EXP); param_tree_root.append_child(c2)
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ import numpy numpy.random.seed(666) # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) # set kernel cache if param.flags.has_key("cache_size"): combined_kernel.set_cache_size(param.flags["cache_size"]) # create features base_features = shogun_factory.create_features(data.examples, param) combined_features = CombinedFeatures() ######################################################## print "creating a masked kernel for possible subset:" ######################################################## power_set_tasks = power_set(data.get_task_ids()) for active_task_ids in power_set_tasks: print "masking all entries other than:", active_task_ids # create mask-based normalizer normalizer = MultitaskKernelMaskNormalizer(data.task_vector_nums, data.task_vector_nums, active_task_ids) # normalize trace if param.flags.has_key( "normalize_trace") and param.flags["normalize_trace"]: norm_factor = len(data.get_task_ids()) / len(active_task_ids) normalizer.set_normalization_constant(norm_factor) kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() self.additional_information[ "weights before trainng"] = combined_kernel.get_subkernel_weights( ) print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: svm = MKLClassification() svm.set_mkl_norm(param.flags["mkl_q"]) # set interleaved optimization if param.flags.has_key("interleaved"): svm.set_interleaved_optimization_enabled( param.flags["interleaved"]) # set solver type if param.flags.has_key( "solver_type") and param.flags["solver_type"]: if param.flags["solver_type"] == "ST_CPLEX": svm.set_solver_type(ST_CPLEX) if param.flags["solver_type"] == "ST_DIRECT": svm.set_solver_type(ST_DIRECT) if param.flags["solver_type"] == "ST_NEWTON": svm.set_solver_type(ST_NEWTON) if param.flags["solver_type"] == "ST_GLPK": svm.set_solver_type(ST_GLPK) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: svm = SVMLight(param.cost, combined_kernel, lab) # optimization settings num_threads = 4 svm.parallel.set_num_threads(num_threads) if param.flags.has_key("epsilon"): svm.set_epsilon(param.flags["epsilon"]) # enable output svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) # disable unsupported optimizations (due to special normalizer) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float( len([l for l in data.labels if l == 1])) norm_c_neg = param.cost / float( len([l for l in data.labels if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) svm.train() # prepare mapping weight_map = {} weights = combined_kernel.get_subkernel_weights() for (i, pset) in enumerate(power_set_tasks): print pset subset_str = str([data.id_to_name(task_idx) for task_idx in pset]) weight_map[subset_str] = weights[i] # store additional info self.additional_information["svm objective"] = svm.get_objective() self.additional_information["weight_map"] = weight_map # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), len(power_set_tasks), combined_kernel, svm, param) return svms
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ #numpy.random.seed(1337) numpy.random.seed(666) # merge data sets data = PreparedMultitaskData(train_data, shuffle=True) # create shogun label lab = shogun_factory.create_labels(data.labels) # assemble combined kernel combined_kernel = CombinedKernel() combined_kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) # set kernel cache if param.flags.has_key("cache_size"): combined_kernel.set_cache_size(param.flags["cache_size"]) # create features base_features = shogun_factory.create_features(data.examples) combined_features = CombinedFeatures() ######################################################## print "creating a masked kernel for each node:" ######################################################## # fetch taxonomy from parameter object taxonomy = param.taxonomy.data # create name to leaf map nodes = taxonomy.get_all_nodes() for node in nodes: print "creating kernel for ", node.name # fetch sub-tree active_task_ids = [data.name_to_id(leaf.name) for leaf in node.get_leaves()] print "masking all entries other than:", active_task_ids # create mask-based normalizer normalizer = MultitaskKernelMaskNormalizer(data.task_vector_nums, data.task_vector_nums, active_task_ids) # normalize trace if param.flags.has_key("normalize_trace") and param.flags["normalize_trace"]: norm_factor = len(node.get_leaves()) / len(active_task_ids) normalizer.set_normalization_constant(norm_factor) # create kernel kernel = shogun_factory.create_empty_kernel(param) kernel.set_normalizer(normalizer) # append current kernel to CombinedKernel combined_kernel.append_kernel(kernel) # append features combined_features.append_feature_obj(base_features) print "------" combined_kernel.init(combined_features, combined_features) #combined_kernel.precompute_subkernels() print "subkernel weights:", combined_kernel.get_subkernel_weights() svm = None print "using MKL:", (param.flags["mkl_q"] >= 1.0) if param.flags["mkl_q"] >= 1.0: # set up MKL svm = MKLClassification() # set the "q" in q-norm MKL svm.set_mkl_norm(param.flags["mkl_q"]) # set interleaved optimization if param.flags.has_key("interleaved"): svm.set_interleaved_optimization_enabled(param.flags["interleaved"]) # set solver type if param.flags.has_key("solver_type") and param.flags["solver_type"]: if param.flags["solver_type"] == "ST_CPLEX": svm.set_solver_type(ST_CPLEX) if param.flags["solver_type"] == "ST_DIRECT": svm.set_solver_type(ST_DIRECT) if param.flags["solver_type"] == "ST_NEWTON": svm.set_solver_type(ST_NEWTON) if param.flags["solver_type"] == "ST_GLPK": svm.set_solver_type(ST_GLPK) svm.set_kernel(combined_kernel) svm.set_labels(lab) else: # create vanilla SVM svm = SVMLight(param.cost, combined_kernel, lab) # optimization settings num_threads = 4 svm.parallel.set_num_threads(num_threads) if param.flags.has_key("epsilon"): svm.set_epsilon(param.flags["epsilon"]) # enable output svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_DEBUG) # disable unsupported optimizations (due to special normalizer) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # set cost if param.flags["normalize_cost"]: norm_c_pos = param.cost / float(len([l for l in data.labels if l==1])) norm_c_neg = param.cost / float(len([l for l in data.labels if l==-1])) svm.set_C(norm_c_neg, norm_c_pos) else: svm.set_C(param.cost, param.cost) # start training svm.train() ######################################################## print "svm objective:" print svm.get_objective() ######################################################## # store additional info self.additional_information["svm objective"] = svm.get_objective() self.additional_information["weights"] = combined_kernel.get_subkernel_weights() # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in train_data.keys(): svms[task_name] = (data.name_to_id(task_name), len(nodes), combined_kernel, svm) return svms