def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    #multi_split_set = MultiSplitSet.get(407)
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeRBFKernel" #"WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 0.2
    param.base_similarity = 1.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    flags= {}
    #flags["boosting"] = "ones"
    #flags["boosting"] = "L1"
    flags["boosting"] = "L2"
    #flags["boosting"] = "L2_reg"
    flags["signum"] = False
    flags["normalize_cost"] = True
    flags["all_positions"] = False
    
    flags["wdk_rbf_on"] = False
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
예제 #2
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(387)
    #multi_split_set = MultiSplitSet.get(407)
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description

    
    # create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"#"PolyKernel" 
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 0.2
    param.base_similarity = 1
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    flags= {}
    #flags["boosting"] = "ones"
    flags["boosting"] = "L1"
    #flags["boosting"] = "L2"
    #flags["boosting"] = "L2_reg"
    flags["signum"] = False
    flags["normalize_cost"] = True
    flags["all_positions"] = False
    flags["wdk_rbf_on"] = False
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(434)

    
    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 1.0 
    #0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False 

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0 #32
    flags["center_offset"] = 70
    flags["train_factor"] = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy
    
    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
예제 #4
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(432)

    # flags
    flags = {}
    flags["normalize_cost"] = False
    #flags["epsilon"] = 0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0  #32
    flags["center_offset"] = 70
    flags["train_factor"] = 1

    flags["local"] = False
    flags["mem"] = "6G"
    flags["maxNumThreads"] = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy.data

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
예제 #5
0
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
     
        
    # select dataset
    multi_split_set = MultiSplitSet.get(384)
    
    # flags
    flags = {}
    flags["normalize_cost"] = False
    #flags["epsilon"] = 0.005
    flags["kernel_cache"] = 200
    flags["use_bias"] = False 

    # arts params
    #flags["svm_type"] = "liblineardual"

    flags["degree"] = 24

    flags["local"] = False
    flags["mem"] = "6G"
    flags["maxNumThreads"] = 1
    
    
    #create mock param object by freezable struct
    param = Options()
    #param.kernel = "GaussianKernel"
    param.kernel = "PolyKernel"
    param.sigma = 3.0
    param.cost = 10.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy.data
    
    param.freeze()
    
    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
def main():
    
    
    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options 
    
    
    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 0.05
    flags["cache_size"] = 7
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    flags["normalize_trace"] = True
    flags["interleaved"] = True
    
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1
    param.transform = 1 #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666
    
    
    param.flags = flags
    
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)


    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()
예제 #7
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = -1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(399)

    #dataset_name = multi_split_set.description
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 0.05
    flags["cache_size"] = 7
    #flags["solver_type"] = "ST_DIRECT" #ST_CPLEX #ST_GLPK) #ST_DIRECT) #ST_NEWTON)
    flags["normalize_trace"] = True
    flags["interleaved"] = True

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 1
    param.cost = 1
    param.transform = 1  #2.0
    param.taxonomy = multi_split_set.taxonomy
    param.id = 666

    param.flags = flags

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
예제 #8
0
def training_for_sigma(sigma):

    print "starting debugging:"


    from expenv import MultiSplitSet
        
    # select dataset
    multi_split_set = MultiSplitSet.get(393)

    SPLIT_POINTER = 1
    
    #create mock param object by freezable struct
    param = Options()
    param.kernel =  "WeightedDegreeStringKernel" #"WeightedDegreeRBFKernel" # #
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 1.0 
    param.id = 666
    param.base_similarity = sigma
    param.degree = 2
    param.flags = {}
    
    param.flags["wdk_rbf_on"] = False   
    param.freeze()
    

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)


    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)
    
    print assessment
    
    assessment.destroySelf()



    return assessment.auROC
예제 #9
0
def main():

    print "starting debugging:"

    SPLIT_POINTER = 1

    from expenv import MultiSplitSet
    from helper import Options

    # select dataset
    multi_split_set = MultiSplitSet.get(384)

    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["kernel_cache"] = 1000
    flags["use_bias"] = False
    #flags["debug"] = False

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "PolyKernel"
    param.cost = 100.0
    param.id = 1
    param.flags = flags

    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()
예제 #10
0
    def setUp(self):

        import expenv

        run = expenv.Run.get(13490)
        self.instances = run.get_train_data()
        self.test_data = run.get_eval_data()

        self.param = run.method.param
        flags = {}
        flags["kernel_cache"] = 200

        #create mock param object by freezable struct
        param = Options()
        param.kernel = "GaussianKernel"
        param.sigma = 3.0
        param.cost = 10.0
        param.flags = flags

        self.param = param
예제 #11
0
    def setUp(self):

        import expenv

        run = expenv.Run.get(13490)
        self.instances = run.get_train_data()
        self.test_data = run.get_eval_data()
        
        self.param = run.method.param
        flags = {}
        flags["kernel_cache"] = 200 

        #create mock param object by freezable struct
        param = Options()
        param.kernel = "GaussianKernel"
        param.sigma = 3.0
        param.cost = 10.0
        param.flags = flags
        
        self.param = param
예제 #12
0
def training_for_sigma(sigma):

    print "starting debugging:"

    from expenv import MultiSplitSet

    # select dataset
    multi_split_set = MultiSplitSet.get(393)

    SPLIT_POINTER = 1

    #create mock param object by freezable struct
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"  #"WeightedDegreeRBFKernel" # #
    param.wdk_degree = 2
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.base_similarity = sigma
    param.degree = 2
    param.flags = {}

    param.flags["wdk_rbf_on"] = False
    param.freeze()

    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    # train
    mymethod = Method(param)
    mymethod.train(data_train)

    print "training done"

    assessment = mymethod.evaluate(data_eval)

    print assessment

    assessment.destroySelf()

    return assessment.auROC
예제 #13
0
def define_param():
    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["kernel_cache"] = 200
    flags["use_bias"] = False
    flags["epsilon"] = 0.01

    # arts params
    flags["svm_type"] = "liblineardual"
    flags["degree"] = 24

    #create mock param object by freezable struct
    param = Options()
    param.cost = 1.0
    param.id = 666
    param.flags = flags

    param.freeze()

    return param
def main():
    
    
    print "starting debugging:"
    

    from expenv import MultiSplitSet
    from helper import Options 
    from task_similarities import dataset_to_hierarchy
    
    # select dataset
    #multi_split_set = MultiSplitSet.get(317)
    multi_split_set = MultiSplitSet.get(432)
    #multi_split_set = MultiSplitSet.get(2) #small splicing
    #multi_split_set = MultiSplitSet.get(377) #medium splicing

    dataset_name = multi_split_set.description

    # flags
    flags = {}
    flags["normalize_cost"] = False
    flags["epsilon"] = 1.0 
    #0.005
    flags["kernel_cache"] = 1000
    flags["use_bias"] = False 

    # arts params
    flags["svm_type"] = "liblineardual"

    flags["degree"] = 24
    flags["degree_spectrum"] = 4
    flags["shifts"] = 0 #32
    flags["train_factor"] = 1
    flags["center_offset"] = 70
    flags["center_pos"] = 500


    #create mock param object by freezable struct
    param = Options()
    param.kernel = "Promoter"
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.flags = flags
    param.taxonomy = multi_split_set.taxonomy
    
    param.freeze()


    data_train = multi_split_set.get_train_data(SPLIT_POINTER)
    data_eval = multi_split_set.get_eval_data(SPLIT_POINTER)

    (perf_xval, final_pred, best_idx_cost) = create_plot_inner(param, data_train, data_eval)
    perf_regular = create_plot_regular(param, data_train, data_eval)


    # plot performances
      
    import pylab
    
    if TARGET_PARAM=="both":


        #X,Y = pylab.meshgrid(range(len(RANGE)), range(len(RANGE)))
        
        cmap = pylab.cm.get_cmap('jet', 20)    # 10 discrete colors
        
        pylab.contourf(RANGE, RANGE, perf_xval, cmap=cmap)
        #im = pylab.imshow(perf_xval, cmap=cmap, interpolation='bilinear')
        pylab.axis('on')
        pylab.colorbar()
        
        pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , param:" + TARGET_PARAM +  ", split:" + str(SPLIT_POINTER))
        
        pylab.show()
    
    else:
        
        pylab.semilogx(RANGE, perf_regular, "g-o")
        pylab.semilogx(RANGE, perf_xval, "b-o")
        #pylab.semilogx([a*0.66 for a in RANGE], perf_xval, "b-o")
        
        #pylab.plot(numpy.array(perf_regular) - numpy.array(perf_xval), "y-o")
        
        #pylab.plot([best_idx_cost], [final_pred], "r+")
        pylab.axhline(y=final_pred, color="r")
        pylab.axvline(x=RANGE[best_idx_cost], color="r")
        pylab.axvline(x=1.0, color="g")
        
        pylab.ylabel(TARGET_MEASURE)
        pylab.xlabel(TARGET_PARAM)
        
        pylab.legend( ("outer", "inner xval"), loc="best")
        pylab.title("mss:" + str(multi_split_set.id) + ", task:" + TARGET_TASK + " , degree:" + str(param.wdk_degree) +  ", split:" + str(SPLIT_POINTER))
        
        pylab.show()