def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print "obtaining DA SVM from previously trained SVM" feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = Labels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.apply(feats_test2).get_labels() return out #,dasvm TODO
def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print("obtaining DA SVM from previously trained SVM") feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = BinaryLabels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.apply_binary(feats_test2) return out #,dasvm TODO
def create_domain_adaptation_svm(param, k, lab, presvm, weight): ''' create SVM object with standard settings @param param: parameter object @param k: kernel @param lab: label object @return: svm object ''' # create SVM if param.flags.has_key( "svm_type") and param.flags["svm_type"] == "liblineardual": svm = DomainAdaptationSVMLinear(param.cost, k, lab, presvm, weight) else: svm = DomainAdaptationSVM(param.cost, k, lab, presvm, weight) return set_svm_parameters(svm, param)
Math_init_random(1) dasvm_manual_libsvm.train() ############### # compare to LibLinear dasvm_manual_liblinear = LibLinear(1.0, feat, lab) dasvm_manual_liblinear.set_linear_term(linterm_manual) dasvm_manual_liblinear.set_bias_enabled(False) dasvm_manual_liblinear.train() ############################################# # compute DA-SVMs in shogun (kernelized AND linear) ############################################# dasvm_libsvm = DomainAdaptationSVM(1.0, wdk, lab, presvm_libsvm, B) dasvm_libsvm.set_bias_enabled(False) dasvm_libsvm.train() dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.io.set_loglevel(MSG_DEBUG) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() print "##############" alphas = [] sv_ids = dasvm_libsvm.get_support_vectors() for (j, sv_id) in enumerate(sv_ids): alpha = dasvm_libsvm.get_alphas()[j]
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ for task_id in train_data.keys(): print "task_id:", task_id root = param.taxonomy.data grey_nodes = [root] # top-down processing of taxonomy while len(grey_nodes) > 0: node = grey_nodes.pop(0) # pop first item # enqueue children if node.children != None: grey_nodes.extend(node.children) ##################################################### # init data structures ##################################################### # get data below current node data = [train_data[key] for key in node.get_data_keys()] print "data at current level" for instance_set in data: print instance_set[0].dataset # initialize containers examples = [] labels = [] # concatenate data for instance_set in data: print "train split_set:", instance_set[0].dataset.organism for inst in instance_set: examples.append(inst.example) labels.append(inst.label) # create shogun data objects k = shogun_factory_new.create_kernel(examples, param) lab = shogun_factory_new.create_labels(labels) cost = param.cost #cost = node.cost print "using cost:", cost ##################################################### # train predictors ##################################################### # init predictor variable svm = None # set up SVM if node.is_root(): print "training svm at top level" svm = SVMLight(cost, k, lab) else: # regularize vs parent predictor #weight = node.edge_weight weight = param.transform print "current edge_weight:", weight, " ,name:", node.name parent_svm = node.parent.predictor svm = DomainAdaptationSVM(cost, k, lab, parent_svm, weight) #svm.set_train_factor(param.base_similarity) if param.flags["normalize_cost"]: norm_c_pos = param.cost / float( len([l for l in lab.get_labels() if l == 1])) norm_c_neg = param.cost / float( len([l for l in lab.get_labels() if l == -1])) svm.set_C(norm_c_neg, norm_c_pos) # set epsilon if param.flags.has_key("epsilon"): svm.set_epsilon(param.flags["epsilon"]) # enable output svm.io.enable_progress() svm.io.set_loglevel(shogun.Classifier.MSG_INFO) svm.set_train_factor(param.flags["train_factor"]) svm.train() # attach svm to node node.predictor = svm # save some information self.additional_information[node.name + " svm obj"] = svm.get_objective() self.additional_information[ node.name + " svm num sv"] = svm.get_num_support_vectors() self.additional_information[node.name + " runtime"] = svm.get_runtime() ##################################################### # Wrap things up ##################################################### # wrap up predictors for later use predictors = {} for leaf in root.get_leaves(): predictors[leaf.name] = leaf.predictor assert (leaf.predictor != None) # make sure we have the same keys (potentiall in a different order) sym_diff_keys = set(train_data.keys()).symmetric_difference( set(predictors.keys())) assert len( sym_diff_keys ) == 0, "symmetric difference between keys non-empty: " + str( sym_diff_keys) # save graph plot mypath = "/fml/ag-raetsch/share/projects/multitask/graphs/" filename = mypath + "graph_" + str(param.id) root.plot(filename) #, plot_cost=True, plot_B=True) return predictors
'AGCAGGAAGGGGGGGAGTC'] label_test_dna2 = numpy.array(5*[-1.0] + 5*[1.0]) C = 1.0 feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() ##################################### print "obtaining DA SVM from previously trained SVM" feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = Labels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.classify(feats_test2).get_labels() print out
objective = svm.get_objective() print "svmlight alphas:", numpy.array(alphas[0:5]) ############################################# # compute DA-SVMs in shogun ############################################# dasvm = DomainAdaptationSVM(1.0, wdk, lab, presvm, B) #dasvm = SVMLight(1.0, wdk, lab) Math_init_random(1) dasvm.train() #dasvm = SVMLight(1.0, wdk, lab) #dasvm.set_linear_term(numpy.double(p)) #dasvm.train() lin_da = dasvm.get_linear_term() daobj = dasvm.get_objective()
############### # compare to LibLinear dasvm_manual_liblinear = LibLinear(1.0, feat, lab) dasvm_manual_liblinear.set_linear_term(linterm_manual) dasvm_manual_liblinear.set_bias_enabled(False) dasvm_manual_liblinear.train() ############################################# # compute DA-SVMs in shogun (kernelized AND linear) ############################################# dasvm_libsvm = DomainAdaptationSVM(1.0, wdk, lab, presvm_libsvm, B) dasvm_libsvm.set_bias_enabled(False) dasvm_libsvm.train() dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.io.set_loglevel(MSG_DEBUG) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() print "##############" alphas = [] sv_ids = dasvm_libsvm.get_support_vectors() for (j, sv_id) in enumerate(sv_ids):
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ for task_id in train_data.keys(): print "task_id:", task_id root = param.taxonomy.data grey_nodes = [root] # top-down processing of taxonomy for node in root.get_leaves(): ##################################################### # train predictor ##################################################### parent_node = node.get_nearest_neighbor() cost = param.cost (examples, labels) = self.get_data(parent_node, train_data) # create shogun data objects k_parent = shogun_factory_new.create_kernel(examples, param) lab_parent = shogun_factory_new.create_labels(labels) parent_svm = SVMLight(cost, k_parent, lab_parent) parent_svm.train() ##################################################### # train predictors ##################################################### (examples, labels) = self.get_data(node, train_data) # create shogun data objects k = shogun_factory_new.create_kernel(examples, param) lab = shogun_factory_new.create_labels(labels) # regularize vs parent predictor weight = param.transform print "current edge_weight:", weight, " ,name:", node.name svm = DomainAdaptationSVM(cost, k, lab, parent_svm, weight) svm.train() # attach svm to node node.predictor = svm ##################################################### # Wrap things up ##################################################### # wrap up predictors for later use predictors = {} for leaf in root.get_leaves(): predictors[leaf.name] = leaf.predictor assert(leaf.predictor!=None) sym_diff_keys = set(train_data.keys()).symmetric_difference(set(predictors.keys())) assert len(sym_diff_keys)==0, "symmetric difference between keys non-empty: " + str(sym_diff_keys) return predictors
alphas = svm.get_alphas() alphas_full = numpy.zeros(N) alphas_full[sv_idx] = alphas lin = svm.get_linear_term() print "AAAAAA", lin, type(lin) objective = svm.get_objective() print "svmlight alphas:", numpy.array(alphas[0:5]) ############################################# # compute DA-SVMs in shogun ############################################# dasvm = DomainAdaptationSVM(1.0, wdk, lab, presvm, B) #dasvm = SVMLight(1.0, wdk, lab) Math_init_random(1) dasvm.train() #dasvm = SVMLight(1.0, wdk, lab) #dasvm.set_linear_term(numpy.double(p)) #dasvm.train() lin_da = dasvm.get_linear_term() daobj = dasvm.get_objective() sv_idx_da = dasvm.get_support_vectors() alphas_da = dasvm.get_alphas()
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ for task_id in train_data.keys(): print "task_id:", task_id root = param.taxonomy.data grey_nodes = [root] # top-down processing of taxonomy for node in root.get_leaves(): ##################################################### # train predictor ##################################################### parent_node = node.get_nearest_neighbor() cost = param.cost (examples, labels) = self.get_data(parent_node, train_data) # create shogun data objects k_parent = shogun_factory_new.create_kernel(examples, param) lab_parent = shogun_factory_new.create_labels(labels) parent_svm = SVMLight(cost, k_parent, lab_parent) parent_svm.train() ##################################################### # train predictors ##################################################### (examples, labels) = self.get_data(node, train_data) # create shogun data objects k = shogun_factory_new.create_kernel(examples, param) lab = shogun_factory_new.create_labels(labels) # regularize vs parent predictor weight = param.transform print "current edge_weight:", weight, " ,name:", node.name svm = DomainAdaptationSVM(cost, k, lab, parent_svm, weight) svm.train() # attach svm to node node.predictor = svm ##################################################### # Wrap things up ##################################################### # wrap up predictors for later use predictors = {} for leaf in root.get_leaves(): predictors[leaf.name] = leaf.predictor assert (leaf.predictor != None) sym_diff_keys = set(train_data.keys()).symmetric_difference( set(predictors.keys())) assert len( sym_diff_keys ) == 0, "symmetric difference between keys non-empty: " + str( sym_diff_keys) return predictors