def check_C_testset(mss_id): import pylab import expenv import numpy from helper import Options from method_hierarchy_svm_new import Method #from method_augmented_svm_new import Method #costs = 10000 #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(20000), 6))] costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.4), numpy.log(10), 6))] print costs mss = expenv.MultiSplitSet.get(mss_id) train = mss.get_train_data(-1) test = mss.get_eval_data(-1) au_roc = [] au_prc = [] for cost in costs: #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 10 param.transform = cost param.base_similarity = 1.0 param.taxonomy = mss.taxonomy param.id = 666 #param.cost = cost param.cost = 10000 param.freeze() # train mymethod = Method(param) mymethod.train(train) assessment = mymethod.evaluate(test) au_roc.append(assessment.auROC) au_prc.append(assessment.auPRC) print assessment assessment.destroySelf() pylab.title("auROC") pylab.semilogx(costs, au_roc, "-o") pylab.show() pylab.figure() pylab.title("auPRC") pylab.semilogx(costs, au_prc, "-o") pylab.show() return (costs, au_roc, au_prc)
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" #flags["boosting"] = "L1" flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(434) # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" flags["boosting"] = "L1" #flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet # select dataset multi_split_set = MultiSplitSet.get(384) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params #flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() #param.kernel = "GaussianKernel" param.kernel = "PolyKernel" param.sigma = 3.0 param.cost = 10.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(432) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description flags = {} flags["normalize_cost"] = False flags["epsilon"] = 0.05 flags["cache_size"] = 7 #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON) flags["normalize_trace"] = True flags["interleaved"] = True #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 1 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options from task_similarities import fetch_gammas # select dataset multi_split_set = MultiSplitSet.get(317) #multi_split_set = MultiSplitSet.get(374) #multi_split_set = MultiSplitSet.get(2) dataset_name = multi_split_set.description transform = 1.0 base = 1.0 similarity_matrix = fetch_gammas(transform, base, dataset_name) #create mock taxonomy object by freezable struct taxonomy = Options() taxonomy.data = similarity_matrix taxonomy.description = dataset_name taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 1.0 param.taxonomy = taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) create_plot_inner(param, data_train, data_eval)
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf();
def training_for_sigma(sigma): print "starting debugging:" from expenv import MultiSplitSet # select dataset multi_split_set = MultiSplitSet.get(393) SPLIT_POINTER = 1 #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" #"WeightedDegreeRBFKernel" # # param.wdk_degree = 2 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.base_similarity = sigma param.degree = 2 param.flags = {} param.flags["wdk_rbf_on"] = False param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf() return assessment.auROC
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(399) #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"# # param.wdk_degree = 1 param.cost = 1.0 param.transform = 1.0 param.sigma = 1.0 param.id = 666 param.base_similarity = 1 param.degree = 2 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 100 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(384) # flags flags = {} flags["normalize_cost"] = False flags["kernel_cache"] = 1000 flags["use_bias"] = False #flags["debug"] = False #create mock param object by freezable struct param = Options() param.kernel = "PolyKernel" param.cost = 100.0 param.id = 1 param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def setUp(self): import expenv run = expenv.Run.get(13490) self.instances = run.get_train_data() self.test_data = run.get_eval_data() self.param = run.method.param flags = {} flags["kernel_cache"] = 200 #create mock param object by freezable struct param = Options() param.kernel = "GaussianKernel" param.sigma = 3.0 param.cost = 10.0 param.flags = flags self.param = param
def define_param(): # flags flags = {} flags["normalize_cost"] = False flags["kernel_cache"] = 200 flags["use_bias"] = False flags["epsilon"] = 0.01 # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 #create mock param object by freezable struct param = Options() param.cost = 1.0 param.id = 666 param.flags = flags param.freeze() return param
def test_data(): ################################################################## # select MSS ################################################################## mss = expenv.MultiSplitSet.get(379) ################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data) support = numpy.linspace(0, 100, 4) distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]] # create tree normalizer tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names) task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names: for t2_name in task_names: similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity) ################################################################## # Train SVMs ################################################################## # create shogun objects wdk_tree = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) wdk_tree.set_normalizer(tree_normalizer) wdk_tree.init_normalizer() print "--->",wdk_tree.get_normalizer().get_name() svm_tree = SVMLight(cost, wdk_tree, lab) svm_tree.set_linadd_enabled(False) svm_tree.set_batch_computation_enabled(False) svm_tree.train() del wdk_tree del tree_normalizer print "finished training tree-norm SVM:", svm_tree.get_objective() wdk = shogun_factory.create_kernel(data.examples, param) wdk.set_normalizer(normalizer) wdk.init_normalizer() print "--->",wdk.get_normalizer().get_name() svm = SVMLight(cost, wdk, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.train() print "finished training manually set SVM:", svm.get_objective() alphas_tree = svm_tree.get_alphas() alphas = svm.get_alphas() assert(len(alphas_tree)==len(alphas)) for i in xrange(len(alphas)): assert(abs(alphas_tree[i] - alphas[i]) < 0.0001) print "success: all alphas are the same"
################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data) # create tree normalizer tree_normalizer = MultitaskKernelTreeNormalizer(data.task_vector_names, data.task_vector_names, taxonomy)
def test_data(): ################################################################## # select MSS ################################################################## mss = expenv.MultiSplitSet.get(379) ################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data) support = numpy.linspace(0, 100, 4) distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]] # create tree normalizer tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names) task_names = data.get_task_names() FACTOR = 1.0 # init gamma matrix gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for t1_name in task_names: for t2_name in task_names: similarity = taxonomy.compute_node_similarity( taxonomy.get_id(t1_name), taxonomy.get_id(t2_name)) gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity helper.save("/tmp/gammas", gammas) gammas = gammas * FACTOR cost = param.cost * numpy.sqrt(FACTOR) print gammas ########## # regular normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) for t1_name in task_names: for t2_name in task_names: similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity) ################################################################## # Train SVMs ################################################################## # create shogun objects wdk_tree = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) wdk_tree.set_normalizer(tree_normalizer) wdk_tree.init_normalizer() print "--->", wdk_tree.get_normalizer().get_name() svm_tree = SVMLight(cost, wdk_tree, lab) svm_tree.set_linadd_enabled(False) svm_tree.set_batch_computation_enabled(False) svm_tree.train() del wdk_tree del tree_normalizer print "finished training tree-norm SVM:", svm_tree.get_objective() wdk = shogun_factory.create_kernel(data.examples, param) wdk.set_normalizer(normalizer) wdk.init_normalizer() print "--->", wdk.get_normalizer().get_name() svm = SVMLight(cost, wdk, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.train() print "finished training manually set SVM:", svm.get_objective() alphas_tree = svm_tree.get_alphas() alphas = svm.get_alphas() assert (len(alphas_tree) == len(alphas)) for i in xrange(len(alphas)): assert (abs(alphas_tree[i] - alphas[i]) < 0.0001) print "success: all alphas are the same"
def main(): print "starting debugging:" from expenv import MultiSplitSet from helper import Options from task_similarities import dataset_to_hierarchy # select dataset #multi_split_set = MultiSplitSet.get(317) multi_split_set = MultiSplitSet.get(432) #multi_split_set = MultiSplitSet.get(2) #small splicing #multi_split_set = MultiSplitSet.get(377) #medium splicing dataset_name = multi_split_set.description # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 1000 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["train_factor"] = 1 flags["center_offset"] = 70 flags["center_pos"] = 500 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) (perf_xval, final_pred, best_idx_cost) = create_plot_inner(param, data_train, data_eval) perf_regular = create_plot_regular(param, data_train, data_eval) # plot performances import pylab if TARGET_PARAM=="both": #X,Y = pylab.meshgrid(range(len(RANGE)), range(len(RANGE))) cmap = pylab.cm.get_cmap('jet', 20) # 10 discrete colors pylab.contourf(RANGE, RANGE, perf_xval, cmap=cmap) #im = pylab.imshow(perf_xval, cmap=cmap, interpolation='bilinear') pylab.axis('on') pylab.colorbar() pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , param:" + TARGET_PARAM + ", split:" + str(SPLIT_POINTER)) pylab.show() else: pylab.semilogx(RANGE, perf_regular, "g-o") pylab.semilogx(RANGE, perf_xval, "b-o") #pylab.semilogx([a*0.66 for a in RANGE], perf_xval, "b-o") #pylab.plot(numpy.array(perf_regular) - numpy.array(perf_xval), "y-o") #pylab.plot([best_idx_cost], [final_pred], "r+") pylab.axhline(y=final_pred, color="r") pylab.axvline(x=RANGE[best_idx_cost], color="r") pylab.axvline(x=1.0, color="g") pylab.ylabel(TARGET_MEASURE) pylab.xlabel(TARGET_PARAM) pylab.legend( ("outer", "inner xval"), loc="best") pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , degree:" + str(param.wdk_degree) + ", split:" + str(SPLIT_POINTER)) pylab.show()
################################################################## # data ################################################################## # fetch data instance_set = mss.get_train_data(-1) # prepare data data = PreparedMultitaskData(instance_set, shuffle=True) # set parameters param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 4 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.freeze() ################################################################## # taxonomy ################################################################## taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data)
def check_C_testset(mss_id): import pylab import expenv import numpy from helper import Options from method_hierarchy_svm_new import Method #from method_augmented_svm_new import Method #costs = 10000 #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(20000), 6))] costs = [ float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.4), numpy.log(10), 6)) ] print costs mss = expenv.MultiSplitSet.get(mss_id) train = mss.get_train_data(-1) test = mss.get_eval_data(-1) au_roc = [] au_prc = [] for cost in costs: #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 10 param.transform = cost param.base_similarity = 1.0 param.taxonomy = mss.taxonomy param.id = 666 #param.cost = cost param.cost = 10000 param.freeze() # train mymethod = Method(param) mymethod.train(train) assessment = mymethod.evaluate(test) au_roc.append(assessment.auROC) au_prc.append(assessment.auPRC) print assessment assessment.destroySelf() pylab.title("auROC") pylab.semilogx(costs, au_roc, "-o") pylab.show() pylab.figure() pylab.title("auPRC") pylab.semilogx(costs, au_prc, "-o") pylab.show() return (costs, au_roc, au_prc)