def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" #flags["boosting"] = "L1" flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def check_C_testset(mss_id): import pylab import expenv import numpy from helper import Options from method_hierarchy_svm_new import Method #from method_augmented_svm_new import Method #costs = 10000 #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(20000), 6))] costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.4), numpy.log(10), 6))] print costs mss = expenv.MultiSplitSet.get(mss_id) train = mss.get_train_data(-1) test = mss.get_eval_data(-1) au_roc = [] au_prc = [] for cost in costs: #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 10 param.transform = cost param.base_similarity = 1.0 param.taxonomy = mss.taxonomy param.id = 666 #param.cost = cost param.cost = 10000 param.freeze() # train mymethod = Method(param) mymethod.train(train) assessment = mymethod.evaluate(test) au_roc.append(assessment.auROC) au_prc.append(assessment.auPRC) print assessment assessment.destroySelf() pylab.title("auROC") pylab.semilogx(costs, au_roc, "-o") pylab.show() pylab.figure() pylab.title("auPRC") pylab.semilogx(costs, au_prc, "-o") pylab.show() return (costs, au_roc, au_prc)
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" flags["boosting"] = "L1" #flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(434) # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(432) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet # select dataset multi_split_set = MultiSplitSet.get(384) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params #flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() #param.kernel = "GaussianKernel" param.kernel = "PolyKernel" param.sigma = 3.0 param.cost = 10.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description flags = {} flags["normalize_cost"] = False flags["epsilon"] = 0.05 flags["cache_size"] = 7 #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON) flags["normalize_trace"] = True flags["interleaved"] = True #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 1 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf();
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options from task_similarities import fetch_gammas # select dataset multi_split_set = MultiSplitSet.get(317) #multi_split_set = MultiSplitSet.get(374) #multi_split_set = MultiSplitSet.get(2) dataset_name = multi_split_set.description transform = 1.0 base = 1.0 similarity_matrix = fetch_gammas(transform, base, dataset_name) #create mock taxonomy object by freezable struct taxonomy = Options() taxonomy.data = similarity_matrix taxonomy.description = dataset_name taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 1.0 param.taxonomy = taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) create_plot_inner(param, data_train, data_eval)
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 100 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" from expenv import MultiSplitSet from helper import Options from task_similarities import dataset_to_hierarchy # select dataset #multi_split_set = MultiSplitSet.get(317) multi_split_set = MultiSplitSet.get(432) #multi_split_set = MultiSplitSet.get(2) #small splicing #multi_split_set = MultiSplitSet.get(377) #medium splicing dataset_name = multi_split_set.description # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 1000 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["train_factor"] = 1 flags["center_offset"] = 70 flags["center_pos"] = 500 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) (perf_xval, final_pred, best_idx_cost) = create_plot_inner(param, data_train, data_eval) perf_regular = create_plot_regular(param, data_train, data_eval) # plot performances import pylab if TARGET_PARAM=="both": #X,Y = pylab.meshgrid(range(len(RANGE)), range(len(RANGE))) cmap = pylab.cm.get_cmap('jet', 20) # 10 discrete colors pylab.contourf(RANGE, RANGE, perf_xval, cmap=cmap) #im = pylab.imshow(perf_xval, cmap=cmap, interpolation='bilinear') pylab.axis('on') pylab.colorbar() pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , param:" + TARGET_PARAM + ", split:" + str(SPLIT_POINTER)) pylab.show() else: pylab.semilogx(RANGE, perf_regular, "g-o") pylab.semilogx(RANGE, perf_xval, "b-o") #pylab.semilogx([a*0.66 for a in RANGE], perf_xval, "b-o") #pylab.plot(numpy.array(perf_regular) - numpy.array(perf_xval), "y-o") #pylab.plot([best_idx_cost], [final_pred], "r+") pylab.axhline(y=final_pred, color="r") pylab.axvline(x=RANGE[best_idx_cost], color="r") pylab.axvline(x=1.0, color="g") pylab.ylabel(TARGET_MEASURE) pylab.xlabel(TARGET_PARAM) pylab.legend( ("outer", "inner xval"), loc="best") pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , degree:" + str(param.wdk_degree) + ", split:" + str(SPLIT_POINTER)) pylab.show()
def check_C_testset(mss_id): import pylab import expenv import numpy from helper import Options from method_hierarchy_svm_new import Method #from method_augmented_svm_new import Method #costs = 10000 #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(20000), 6))] costs = [ float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.4), numpy.log(10), 6)) ] print costs mss = expenv.MultiSplitSet.get(mss_id) train = mss.get_train_data(-1) test = mss.get_eval_data(-1) au_roc = [] au_prc = [] for cost in costs: #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 10 param.transform = cost param.base_similarity = 1.0 param.taxonomy = mss.taxonomy param.id = 666 #param.cost = cost param.cost = 10000 param.freeze() # train mymethod = Method(param) mymethod.train(train) assessment = mymethod.evaluate(test) au_roc.append(assessment.auROC) au_prc.append(assessment.auPRC) print assessment assessment.destroySelf() pylab.title("auROC") pylab.semilogx(costs, au_roc, "-o") pylab.show() pylab.figure() pylab.title("auPRC") pylab.semilogx(costs, au_prc, "-o") pylab.show() return (costs, au_roc, au_prc)