def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" #flags["boosting"] = "L1" flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(434) # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) #multi_split_set = MultiSplitSet.get(407) multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 2 param.cost = 1.0 param.transform = 0.2 param.base_similarity = 1 param.taxonomy = multi_split_set.taxonomy param.id = 666 flags= {} #flags["boosting"] = "ones" flags["boosting"] = "L1" #flags["boosting"] = "L2" #flags["boosting"] = "L2_reg" flags["signum"] = False flags["normalize_cost"] = True flags["all_positions"] = False flags["wdk_rbf_on"] = False param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet # select dataset multi_split_set = MultiSplitSet.get(384) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params #flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() #param.kernel = "GaussianKernel" param.kernel = "PolyKernel" param.sigma = 3.0 param.cost = 10.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(432) # flags flags = {} flags["normalize_cost"] = False #flags["epsilon"] = 0.005 flags["kernel_cache"] = 200 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["center_offset"] = 70 flags["train_factor"] = 1 flags["local"] = False flags["mem"] = "6G" flags["maxNumThreads"] = 1 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy.data param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(399) #dataset_name = multi_split_set.description flags = {} flags["normalize_cost"] = False flags["epsilon"] = 0.05 flags["cache_size"] = 7 #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON) flags["normalize_trace"] = True flags["interleaved"] = True #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 1 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options from task_similarities import fetch_gammas # select dataset multi_split_set = MultiSplitSet.get(317) #multi_split_set = MultiSplitSet.get(374) #multi_split_set = MultiSplitSet.get(2) dataset_name = multi_split_set.description transform = 1.0 base = 1.0 similarity_matrix = fetch_gammas(transform, base, dataset_name) #create mock taxonomy object by freezable struct taxonomy = Options() taxonomy.data = similarity_matrix taxonomy.description = dataset_name taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 1.0 param.taxonomy = taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) create_plot_inner(param, data_train, data_eval)
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf();
def training_for_sigma(sigma): print "starting debugging:" from expenv import MultiSplitSet # select dataset multi_split_set = MultiSplitSet.get(393) SPLIT_POINTER = 1 #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" #"WeightedDegreeRBFKernel" # # param.wdk_degree = 2 param.cost = 1.0 param.transform = 1.0 param.id = 666 param.base_similarity = sigma param.degree = 2 param.flags = {} param.flags["wdk_rbf_on"] = False param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf() return assessment.auROC
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(379) dataset_name = multi_split_set.description print "dataset_name", dataset_name #create mock taxonomy object by freezable struct #taxonomy = Options() #taxonomy.data = taxonomy_graph.data #taxonomy.description = dataset_name #taxonomy.freeze() #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel" param.wdk_degree = 1 param.cost = 1.0 param.transform = 2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train hierarchical xval mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 100 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(399) #create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"# # param.wdk_degree = 1 param.cost = 1.0 param.transform = 1.0 param.sigma = 1.0 param.id = 666 param.base_similarity = 1 param.degree = 2 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = -1 from expenv import MultiSplitSet from helper import Options # select dataset #multi_split_set = MultiSplitSet.get(387) multi_split_set = MultiSplitSet.get(386) #dataset_name = multi_split_set.description # create mock param object by freezable struct param = Options() param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" param.wdk_degree = 1 param.cost = 1 param.transform = 2 #2.0 param.taxonomy = multi_split_set.taxonomy param.id = 666 param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def main(): print "starting debugging:" SPLIT_POINTER = 1 from expenv import MultiSplitSet from helper import Options # select dataset multi_split_set = MultiSplitSet.get(384) # flags flags = {} flags["normalize_cost"] = False flags["kernel_cache"] = 1000 flags["use_bias"] = False #flags["debug"] = False #create mock param object by freezable struct param = Options() param.kernel = "PolyKernel" param.cost = 100.0 param.id = 1 param.flags = flags param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) # train mymethod = Method(param) mymethod.train(data_train) print "training done" assessment = mymethod.evaluate(data_eval) print assessment assessment.destroySelf()
def run_multi_example(dataset_idx, mymethod, comment): """ sets up and runs experiment """ ####################################### # fix parameters ####################################### flags= {} # general flags["normalize_cost"] = True #False flags["epsilon"] = 0.03 flags["cache_size"] = 500 # Boosting #flags["boosting"] = "ones" #flags["boosting"] = "L1" #flags["boosting"] = "L2" flags["boosting"] = "L2_reg" #flags["use_all_nodes"] = False flags["signum"] = False #flags["all_positions"] = True # MKL #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON) #flags["normalize_trace"] = True #flags["interleaved"] = True #flags["mkl_q"] = 0 #WDK_RBF flags["wdk_rbf_on"] = False # define parameter search space [float(numpy.power(10, 3.58))] # costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(1000), numpy.log(100000), 8))] #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(float(numpy.power(10, 3))), numpy.log(10000), 4))] #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.01), numpy.log(1000), 8))] #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(2000), 10))] costs.reverse() degrees = [1,2,3,4,5] #[1, 5, 10, 15, 20, 22] #print "WARNING: Degree is ONE" base_similarities = [200] #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(1), numpy.log(1000), 8))] #base_similarities = [float(c) for c in numpy.linspace(1, 5000, 6)] #[1] #transform_params = [float(c) for c in numpy.linspace(1, 10000, 6)] #[1] #1.5, 2.0, 2.5, 3.0] #, 3.5, 4.0, 4.5, 5.0] #transform_params = [float(c) for c in numpy.linspace(0.01, 0.99, 6)] transform_params = [0.99] generation_parameters = locals() ####################################### # create experiment ####################################### # select dataset multi_split_set = MultiSplitSet.get(dataset_idx) dataset_name = multi_split_set.description print "method:", mymethod print "dataset:", dataset_name print "multi split set id:", dataset_idx experiment_description = dataset_name + " (" + mymethod + ") " + comment # allow different features/kernel types feature_type = multi_split_set.feature_type if feature_type == "string": kernel_type = "WeightedDegreeStringKernel" else: kernel_type = "PolyKernel" kernel_type = "WeightedDegreeRBFKernel" # create experiment experiment = MultiSourceExperiment(split_set = multi_split_set, description = experiment_description, method_name = mymethod, meta_data = generation_parameters) print "experiment id:", experiment.id ####################################### # create runs ####################################### if multi_split_set.taxonomy==None: print "WARNING: NO taxonomy set, generating one for dataset " + dataset_name taxonomy = dataset_to_hierarchy(dataset_name) else: taxonomy = multi_split_set.taxonomy for cost in costs: for degree in degrees: for base in base_similarities: for transform in transform_params: param = ParameterMultiSvm(cost=cost, wdk_degree=degree, base_similarity=base, transform=transform, taxonomy=taxonomy, kernel=kernel_type, flags=flags) print param Method(module_name=mymethod, param=param, experiment=experiment) # skip model selection if we only have one model if len(experiment.methods) > 1: # create evaluation runs based on splits and methods run_ids = [run.id for run in experiment.create_eval_runs()] # execute runs execute_runs(run_ids) # finally perform model selection and retrain select_best_and_test(experiment, target) #experiment.select_best_method(target) return experiment.id
def main(): print "starting debugging:" from expenv import MultiSplitSet from helper import Options from task_similarities import dataset_to_hierarchy # select dataset #multi_split_set = MultiSplitSet.get(317) multi_split_set = MultiSplitSet.get(432) #multi_split_set = MultiSplitSet.get(2) #small splicing #multi_split_set = MultiSplitSet.get(377) #medium splicing dataset_name = multi_split_set.description # flags flags = {} flags["normalize_cost"] = False flags["epsilon"] = 1.0 #0.005 flags["kernel_cache"] = 1000 flags["use_bias"] = False # arts params flags["svm_type"] = "liblineardual" flags["degree"] = 24 flags["degree_spectrum"] = 4 flags["shifts"] = 0 #32 flags["train_factor"] = 1 flags["center_offset"] = 70 flags["center_pos"] = 500 #create mock param object by freezable struct param = Options() param.kernel = "Promoter" param.cost = 1.0 param.transform = 1.0 param.id = 666 param.flags = flags param.taxonomy = multi_split_set.taxonomy param.freeze() data_train = multi_split_set.get_train_data(SPLIT_POINTER) data_eval = multi_split_set.get_eval_data(SPLIT_POINTER) (perf_xval, final_pred, best_idx_cost) = create_plot_inner(param, data_train, data_eval) perf_regular = create_plot_regular(param, data_train, data_eval) # plot performances import pylab if TARGET_PARAM=="both": #X,Y = pylab.meshgrid(range(len(RANGE)), range(len(RANGE))) cmap = pylab.cm.get_cmap('jet', 20) # 10 discrete colors pylab.contourf(RANGE, RANGE, perf_xval, cmap=cmap) #im = pylab.imshow(perf_xval, cmap=cmap, interpolation='bilinear') pylab.axis('on') pylab.colorbar() pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , param:" + TARGET_PARAM + ", split:" + str(SPLIT_POINTER)) pylab.show() else: pylab.semilogx(RANGE, perf_regular, "g-o") pylab.semilogx(RANGE, perf_xval, "b-o") #pylab.semilogx([a*0.66 for a in RANGE], perf_xval, "b-o") #pylab.plot(numpy.array(perf_regular) - numpy.array(perf_xval), "y-o") #pylab.plot([best_idx_cost], [final_pred], "r+") pylab.axhline(y=final_pred, color="r") pylab.axvline(x=RANGE[best_idx_cost], color="r") pylab.axvline(x=1.0, color="g") pylab.ylabel(TARGET_MEASURE) pylab.xlabel(TARGET_PARAM) pylab.legend( ("outer", "inner xval"), loc="best") pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , degree:" + str(param.wdk_degree) + ", split:" + str(SPLIT_POINTER)) pylab.show()
def run_multi_example(dataset_idx, mymethod, comment): """ sets up and runs experiment """ ####################################### # fix parameters ####################################### flags = {} # general flags["normalize_cost"] = True #False flags["epsilon"] = 0.03 flags["cache_size"] = 500 # Boosting #flags["boosting"] = "ones" #flags["boosting"] = "L1" #flags["boosting"] = "L2" flags["boosting"] = "L2_reg" #flags["use_all_nodes"] = False flags["signum"] = False #flags["all_positions"] = True # MKL #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON) #flags["normalize_trace"] = True #flags["interleaved"] = True #flags["mkl_q"] = 0 #WDK_RBF flags["wdk_rbf_on"] = False # define parameter search space [float(numpy.power(10, 3.58))] # costs = [ float(c) for c in numpy.exp( numpy.linspace(numpy.log(1000), numpy.log(100000), 8)) ] #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(float(numpy.power(10, 3))), numpy.log(10000), 4))] #costs = [float(c) for c in numpy.exp(numpy.linspace(numpy.log(0.01), numpy.log(1000), 8))] #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(10), numpy.log(2000), 10))] costs.reverse() degrees = [1, 2, 3, 4, 5] #[1, 5, 10, 15, 20, 22] #print "WARNING: Degree is ONE" base_similarities = [ 200 ] #[float(c) for c in numpy.exp(numpy.linspace(numpy.log(1), numpy.log(1000), 8))] #base_similarities = [float(c) for c in numpy.linspace(1, 5000, 6)] #[1] #transform_params = [float(c) for c in numpy.linspace(1, 10000, 6)] #[1] #1.5, 2.0, 2.5, 3.0] #, 3.5, 4.0, 4.5, 5.0] #transform_params = [float(c) for c in numpy.linspace(0.01, 0.99, 6)] transform_params = [0.99] generation_parameters = locals() ####################################### # create experiment ####################################### # select dataset multi_split_set = MultiSplitSet.get(dataset_idx) dataset_name = multi_split_set.description print "method:", mymethod print "dataset:", dataset_name print "multi split set id:", dataset_idx experiment_description = dataset_name + " (" + mymethod + ") " + comment # allow different features/kernel types feature_type = multi_split_set.feature_type if feature_type == "string": kernel_type = "WeightedDegreeStringKernel" else: kernel_type = "PolyKernel" kernel_type = "WeightedDegreeRBFKernel" # create experiment experiment = MultiSourceExperiment(split_set=multi_split_set, description=experiment_description, method_name=mymethod, meta_data=generation_parameters) print "experiment id:", experiment.id ####################################### # create runs ####################################### if multi_split_set.taxonomy == None: print "WARNING: NO taxonomy set, generating one for dataset " + dataset_name taxonomy = dataset_to_hierarchy(dataset_name) else: taxonomy = multi_split_set.taxonomy for cost in costs: for degree in degrees: for base in base_similarities: for transform in transform_params: param = ParameterMultiSvm(cost=cost, wdk_degree=degree, base_similarity=base, transform=transform, taxonomy=taxonomy, kernel=kernel_type, flags=flags) print param Method(module_name=mymethod, param=param, experiment=experiment) # skip model selection if we only have one model if len(experiment.methods) > 1: # create evaluation runs based on splits and methods run_ids = [run.id for run in experiment.create_eval_runs()] # execute runs execute_runs(run_ids) # finally perform model selection and retrain select_best_and_test(experiment, target) #experiment.select_best_method(target) return experiment.id