def create_empty_promoter_kernel(param): """ creates an uninitialized promoter kernel @param param: """ # centered WDK/WDK-shift if param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, param["degree"]) shifts_vector = numpy.ones(param["center_offset"] * 2, dtype=numpy.int32) * param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(param["kernel_cache"] / 3) # border spetrum kernels size = param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) return kernel
def create_empty_promoter_kernel(param): """ creates an uninitialized promoter kernel @param param: """ # centered WDK/WDK-shift if param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, param["degree"]) shifts_vector = numpy.ones(param["center_offset"]*2, dtype=numpy.int32)*param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(param["kernel_cache"]/3) # border spetrum kernels size = param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) return kernel
def create_kernel(examples, param): """ kernel factory @param examples: list/array of examples @type examples: list @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ # first create feature object of correct type feat = create_features(examples, param) kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree) kernel.set_cache_size(200) elif param.kernel == "LinearKernel": kernel = LinearKernel(feat, feat) elif param.kernel == "PolyKernel": kernel = PolyKernel(feat, feat, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(feat, feat, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 200 nof_properties = 20 sigma = param.base_similarity kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache) elif param.kernel == "Promoter": kernel = create_promoter_kernel(examples, param.flags) else: raise Exception, "Unknown kernel type." if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel( 10, self.param["degree"]) shifts_vector = numpy.ones( self.param["center_offset"] * 2, dtype=numpy.int32) * self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"] / 3) # border spetrum kernels size = self.param["kernel_cache"] / 3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
def create_kernel(examples, param): """ kernel factory @param examples: list/array of examples @type examples: list @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ # first create feature object of correct type feat = create_features(examples, param) kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree) kernel.set_cache_size(200) elif param.kernel == "LinearKernel": kernel = LinearKernel(feat, feat) elif param.kernel == "PolyKernel": kernel = PolyKernel(feat, feat, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(feat, feat, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 200 nof_properties = 20 sigma = param.base_similarity kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache) elif param.kernel == "Promoter": kernel = create_promoter_kernel(examples, param.flags) else: raise Exception, "Unknown kernel type." if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def train(self, data, labels): """ model training """ # centered WDK/WDK-shift if self.param["shifts"] == 0: kernel_center = WeightedDegreeStringKernel(self.param["degree"]) else: kernel_center = WeightedDegreePositionStringKernel(10, self.param["degree"]) shifts_vector = numpy.ones(self.param["center_offset"]*2, dtype=numpy.int32)*self.param["shifts"] kernel_center.set_shifts(shifts_vector) kernel_center.set_cache_size(self.param["kernel_cache"]/3) # border spetrum kernels size = self.param["kernel_cache"]/3 use_sign = False kernel_left = WeightedCommWordStringKernel(size, use_sign) kernel_right = WeightedCommWordStringKernel(size, use_sign) # assemble combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_center) kernel.append_kernel(kernel_left) kernel.append_kernel(kernel_right) ## building features feat = create_features(data, self.param["center_offset"], self.param["center_pos"]) # init combined kernel kernel.init(feat, feat) print "len(labels) = %i" % (len(labels)) lab = BinaryLabels(numpy.double(labels)) self.svm = SVMLight(self.param["cost"], kernel, lab) # show debugging output self.svm.io.enable_progress() self.svm.io.set_loglevel(MSG_DEBUG) # optimization settings num_threads = 2 self.svm.parallel.set_num_threads(num_threads) self.svm.set_epsilon(10e-8) self.svm.train() return self
def create_empty_kernel(param): """ kernel factory @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(param.wdk_degree) elif param.kernel == "LinearKernel": kernel = LinearKernel() elif param.kernel == "PolyKernel": kernel = PolyKernel(10, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(10, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 50 nof_properties = 5 #20 sigma = param.transform kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties) else: raise Exception, "Unknown kernel type:" + param.kernel if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def create_empty_kernel(param): """ kernel factory @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(param.wdk_degree) elif param.kernel == "LinearKernel": kernel = LinearKernel() elif param.kernel == "PolyKernel": kernel = PolyKernel(10, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(10, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 50 nof_properties = 5 #20 sigma = param.transform kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties) else: raise Exception, "Unknown kernel type:" + param.kernel if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps, target_obj): """ implementation using multitask kernel """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) print "task_sim:", tsm num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == numpy.string_: feat = StringCharFeatures(DNA) xt = [str(a) for a in xt] feat.set_features(xt) base_kernel = WeightedDegreeStringKernel(feat, feat, 8) else: feat = RealFeatures(xt.T) base_kernel = LinearKernel(feat, feat) lab = Labels(lt) # set up normalizer normalizer = MultitaskKernelNormalizer(tt.tolist()) for i in xrange(num_tasks): for j in xrange(num_tasks): normalizer.set_task_similarity(i, j, M[i, j]) print "num of unique tasks: ", normalizer.get_num_unique_tasks( task_indicator) # set up kernel base_kernel.set_cache_size(2000) base_kernel.set_normalizer(normalizer) base_kernel.init_normalizer() # set up svm svm = SVMLight() #LibSVM() svm.set_epsilon(eps) #print "reducing num threads to one" #svm.parallel.set_num_threads(1) #print "using one thread" # how often do we like to compute objective etc svm.set_record_interval(0) svm.set_target_objective(target_obj) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.io.set_loglevel(MSG_DEBUG) #SET THREADS TO 1 svm.set_C(C, C) svm.set_bias_enabled(False) # prepare for training svm.set_labels(lab) svm.set_kernel(base_kernel) # train svm svm.train() train_times = svm.get_training_times() objectives = [-obj for obj in svm.get_dual_objectives()] if False: # get model parameters sv_idx = svm.get_support_vectors() sparse_alphas = svm.get_alphas() assert len(sv_idx) == len(sparse_alphas) # compute dense alpha (remove label) alphas = numpy.zeros(len(xt)) for id_sparse, id_dense in enumerate(sv_idx): alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense] # print alphas W = alphas_to_w(alphas, xt, lt, task_indicator, M) primal_obj = compute_primal_objective( W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt, task_indicator, L) objectives.append(primal_obj) train_times.append(train_times[-1] + 100) return objectives, train_times