def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)
    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels = BinaryLabels(label_train_dna)
    svm = SVMLight(C, kernel, labels)
    svm.train()
    #svm.io.set_loglevel(MSG_DEBUG)

    #####################################

    #print("obtaining DA SVM from previously trained SVM")

    feats_train2 = StringCharFeatures(fm_train_dna, DNA)
    feats_test2 = StringCharFeatures(fm_test_dna, DNA)
    kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels2 = BinaryLabels(label_train_dna)

    # we regularize against the previously obtained solution
    dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
    dasvm.train()

    out = dasvm.apply_binary(feats_test2)

    return out  #,dasvm TODO
def create_domain_adaptation_svm(param, k, lab, presvm, weight):
    '''
    create SVM object with standard settings
    
    @param param: parameter object
    @param k: kernel
    @param lab: label object
    
    @return: svm object
    '''

    # create SVM
    if param.flags.has_key(
            "svm_type") and param.flags["svm_type"] == "liblineardual":
        svm = DomainAdaptationSVMLinear(param.cost, k, lab, presvm, weight)
    else:
        svm = DomainAdaptationSVM(param.cost, k, lab, presvm, weight)

    return set_svm_parameters(svm, param)
Example #3
0
Math_init_random(1)
dasvm_manual_libsvm.train()

###############
# compare to LibLinear

dasvm_manual_liblinear = LibLinear(1.0, feat, lab)
dasvm_manual_liblinear.set_linear_term(linterm_manual)
dasvm_manual_liblinear.set_bias_enabled(False)
dasvm_manual_liblinear.train()

#############################################
#    compute DA-SVMs in shogun (kernelized AND linear)
#############################################

dasvm_libsvm = DomainAdaptationSVM(1.0, wdk, lab, presvm_libsvm, B)
dasvm_libsvm.set_bias_enabled(False)
dasvm_libsvm.train()

dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear,
                                            B)
dasvm_liblinear.io.set_loglevel(MSG_DEBUG)
dasvm_liblinear.set_bias_enabled(False)
dasvm_liblinear.train()

print "##############"
alphas = []

sv_ids = dasvm_libsvm.get_support_vectors()
for (j, sv_id) in enumerate(sv_ids):
    alpha = dasvm_libsvm.get_alphas()[j]
Example #4
0
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """

        for task_id in train_data.keys():
            print "task_id:", task_id

        root = param.taxonomy.data

        grey_nodes = [root]

        # top-down processing of taxonomy

        while len(grey_nodes) > 0:

            node = grey_nodes.pop(0)  # pop first item

            # enqueue children
            if node.children != None:
                grey_nodes.extend(node.children)

            #####################################################
            #     init data structures
            #####################################################

            # get data below current node
            data = [train_data[key] for key in node.get_data_keys()]

            print "data at current level"
            for instance_set in data:
                print instance_set[0].dataset

            # initialize containers
            examples = []
            labels = []

            # concatenate data
            for instance_set in data:

                print "train split_set:", instance_set[0].dataset.organism

                for inst in instance_set:
                    examples.append(inst.example)
                    labels.append(inst.label)

            # create shogun data objects
            k = shogun_factory_new.create_kernel(examples, param)
            lab = shogun_factory_new.create_labels(labels)

            cost = param.cost
            #cost = node.cost

            print "using cost:", cost

            #####################################################
            #    train predictors
            #####################################################

            # init predictor variable
            svm = None

            # set up SVM
            if node.is_root():

                print "training svm at top level"
                svm = SVMLight(cost, k, lab)

            else:

                # regularize vs parent predictor

                #weight = node.edge_weight
                weight = param.transform

                print "current edge_weight:", weight, " ,name:", node.name

                parent_svm = node.parent.predictor

                svm = DomainAdaptationSVM(cost, k, lab, parent_svm, weight)
                #svm.set_train_factor(param.base_similarity)

            if param.flags["normalize_cost"]:

                norm_c_pos = param.cost / float(
                    len([l for l in lab.get_labels() if l == 1]))
                norm_c_neg = param.cost / float(
                    len([l for l in lab.get_labels() if l == -1]))
                svm.set_C(norm_c_neg, norm_c_pos)

            # set epsilon
            if param.flags.has_key("epsilon"):
                svm.set_epsilon(param.flags["epsilon"])

            # enable output
            svm.io.enable_progress()
            svm.io.set_loglevel(shogun.Classifier.MSG_INFO)

            svm.set_train_factor(param.flags["train_factor"])
            svm.train()

            # attach svm to node
            node.predictor = svm

            # save some information
            self.additional_information[node.name +
                                        " svm obj"] = svm.get_objective()
            self.additional_information[
                node.name + " svm num sv"] = svm.get_num_support_vectors()
            self.additional_information[node.name +
                                        " runtime"] = svm.get_runtime()

        #####################################################
        #    Wrap things up
        #####################################################

        # wrap up predictors for later use
        predictors = {}

        for leaf in root.get_leaves():

            predictors[leaf.name] = leaf.predictor

            assert (leaf.predictor != None)

        # make sure we have the same keys (potentiall in a different order)
        sym_diff_keys = set(train_data.keys()).symmetric_difference(
            set(predictors.keys()))
        assert len(
            sym_diff_keys
        ) == 0, "symmetric difference between keys non-empty: " + str(
            sym_diff_keys)

        # save graph plot
        mypath = "/fml/ag-raetsch/share/projects/multitask/graphs/"
        filename = mypath + "graph_" + str(param.id)
        root.plot(filename)  #, plot_cost=True, plot_B=True)

        return predictors
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """

        for task_id in train_data.keys():
            print "task_id:", task_id

        root = param.taxonomy.data

        grey_nodes = [root]

        # top-down processing of taxonomy

        for node in root.get_leaves():

            #####################################################
            #    train predictor
            #####################################################

            parent_node = node.get_nearest_neighbor()

            cost = param.cost

            (examples, labels) = self.get_data(parent_node, train_data)

            # create shogun data objects
            k_parent = shogun_factory_new.create_kernel(examples, param)
            lab_parent = shogun_factory_new.create_labels(labels)

            parent_svm = SVMLight(cost, k_parent, lab_parent)

            parent_svm.train()

            #####################################################
            #    train predictors
            #####################################################

            (examples, labels) = self.get_data(node, train_data)

            # create shogun data objects
            k = shogun_factory_new.create_kernel(examples, param)
            lab = shogun_factory_new.create_labels(labels)

            # regularize vs parent predictor

            weight = param.transform
            print "current edge_weight:", weight, " ,name:", node.name

            svm = DomainAdaptationSVM(cost, k, lab, parent_svm, weight)
            svm.train()

            # attach svm to node
            node.predictor = svm

        #####################################################
        #    Wrap things up
        #####################################################

        # wrap up predictors for later use
        predictors = {}

        for leaf in root.get_leaves():

            predictors[leaf.name] = leaf.predictor

            assert (leaf.predictor != None)

        sym_diff_keys = set(train_data.keys()).symmetric_difference(
            set(predictors.keys()))
        assert len(
            sym_diff_keys
        ) == 0, "symmetric difference between keys non-empty: " + str(
            sym_diff_keys)

        return predictors