def create_empty_promoter_kernel(param):
    """
    creates an uninitialized promoter kernel
   
    @param param:
    """

    # centered WDK/WDK-shift
    if param["shifts"] == 0:
        kernel_center = WeightedDegreeStringKernel(param["degree"])
    else:
        kernel_center = WeightedDegreePositionStringKernel(10, param["degree"])
        shifts_vector = numpy.ones(param["center_offset"] * 2,
                                   dtype=numpy.int32) * param["shifts"]
        kernel_center.set_shifts(shifts_vector)

    kernel_center.set_cache_size(param["kernel_cache"] / 3)

    # border spetrum kernels
    size = param["kernel_cache"] / 3
    use_sign = False
    kernel_left = WeightedCommWordStringKernel(size, use_sign)
    kernel_right = WeightedCommWordStringKernel(size, use_sign)

    # assemble combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_center)
    kernel.append_kernel(kernel_left)
    kernel.append_kernel(kernel_right)

    return kernel
Esempio n. 2
0
def create_empty_promoter_kernel(param):
    """
    creates an uninitialized promoter kernel
   
    @param param:
    """


    # centered WDK/WDK-shift
    if param["shifts"] == 0:
        kernel_center = WeightedDegreeStringKernel(param["degree"])
    else:
        kernel_center = WeightedDegreePositionStringKernel(10, param["degree"])
        shifts_vector = numpy.ones(param["center_offset"]*2, dtype=numpy.int32)*param["shifts"]
        kernel_center.set_shifts(shifts_vector)

    kernel_center.set_cache_size(param["kernel_cache"]/3)

    # border spetrum kernels
    size = param["kernel_cache"]/3
    use_sign = False
    kernel_left = WeightedCommWordStringKernel(size, use_sign)
    kernel_right = WeightedCommWordStringKernel(size, use_sign)

    # assemble combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(kernel_center)
    kernel.append_kernel(kernel_left)
    kernel.append_kernel(kernel_right)


    return kernel
Esempio n. 3
0
def create_kernel(examples, param):
    """
    kernel factory
    
    @param examples: list/array of examples
    @type examples: list
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """


    # first create feature object of correct type
    feat = create_features(examples, param)
    
    
    kernel = None
    

    if param.kernel == "WeightedDegreeStringKernel":        
        kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree)
        kernel.set_cache_size(200)
        
        
    elif param.kernel == "LinearKernel":
        kernel = LinearKernel(feat, feat)
        
    
    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(feat, feat, 1, False)        
        
        
    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(feat, feat, param.sigma)
    
    
    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 200
        nof_properties = 20
        sigma = param.base_similarity
        kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache)

    elif param.kernel == "Promoter":
        kernel = create_promoter_kernel(examples, param.flags)

    
    else:
        raise Exception, "Unknown kernel type."
    
    
    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])
    
    if param.flags.has_key("debug"):    
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)
    
    return kernel
Esempio n. 4
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(
                10, self.param["degree"])
            shifts_vector = numpy.ones(
                self.param["center_offset"] * 2,
                dtype=numpy.int32) * self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"] / 3)

        # border spetrum kernels
        size = self.param["kernel_cache"] / 3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)

        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features
        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])

        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
def create_kernel(examples, param):
    """
    kernel factory
    
    @param examples: list/array of examples
    @type examples: list
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    # first create feature object of correct type
    feat = create_features(examples, param)

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree)
        kernel.set_cache_size(200)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel(feat, feat)

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(feat, feat, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(feat, feat, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 200
        nof_properties = 20
        sigma = param.base_similarity
        kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree,
                                         nof_properties, size_cache)

    elif param.kernel == "Promoter":
        kernel = create_promoter_kernel(examples, param.flags)

    else:
        raise Exception, "Unknown kernel type."

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
Esempio n. 6
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"])
            shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"]/3)

        # border spetrum kernels
        size = self.param["kernel_cache"]/3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)
        
        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features 
        feat = create_features(data, self.param["center_offset"], self.param["center_pos"])
        
        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
Esempio n. 7
0
def create_empty_kernel(param):
    """
    kernel factory
    
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    
    kernel = None
    

    if param.kernel == "WeightedDegreeStringKernel":        
        kernel = WeightedDegreeStringKernel(param.wdk_degree)
        
        
    elif param.kernel == "LinearKernel":
        kernel = LinearKernel()
        
    
    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(10, 1, False)        
        
        
    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(10, param.sigma)
    
    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 50
        nof_properties = 5 #20
        sigma = param.transform
        kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties)
     
    
    else:
        
        raise Exception, "Unknown kernel type:" + param.kernel
    
    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):    
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)
    
    return kernel
def create_empty_kernel(param):
    """
    kernel factory
    
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(param.wdk_degree)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel()

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(10, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(10, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 50
        nof_properties = 5  #20
        sigma = param.transform
        kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree,
                                         nof_properties)

    else:

        raise Exception, "Unknown kernel type:" + param.kernel

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
Esempio n. 9
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times