Beispiel #1
0
	def writeFilesToDisk(self):
		# print 10*"=", "writeFilesToDisk"
		save(self.files, self.file_files)
		save(self.folders, self.file_folders)
		save(self.copyFiles, self.file_copyFiles)
		save(self.newFiles, self.file_newFiles)
		save(self.newFolders, self.file_newFolders)
Beispiel #2
0
 def headers(self):
     """docstring for headers"""
     headers = {
         "Authorization": "Bearer " + str(self.auth.Bearer(self.user_Id))
     }
     save(self.auth.prefs, self.file_prefs)
     return headers
Beispiel #3
0
    def save_predictor(self, file_name):
        """
        saves predictor to file system for later use
        
        @param file_name: file name to save predictor
        @type file_name: str
        """

        print "saving predictor to", file_name
        print self.predictor

        try:
            helper.save(file_name, self.predictor, "gzip")
        except Exception, detail:
            print "error writing predictor"
            print detail
Beispiel #4
0
 def save_predictor(self, file_name):
     """
     saves predictor to file system for later use
     
     @param file_name: file name to save predictor
     @type file_name: str
     """
     
     print "saving predictor to", file_name
     print self.predictor
     
     try:
         helper.save(file_name, self.predictor, "gzip")
     except Exception, detail:
         print "error writing predictor"
         print detail
Beispiel #5
0
def main():
	
	(total, General, Inventory) = traverse()
	print(total)

	
	# General = hp.load('genre.txt')
	hp.save(General, 'genre.txt')
	hp.save(Inventory, 'inventory.txt')
	raw_genre_list = hp.sort_genre(General)

	print(raw_genre_list)
	

	# test-part
	# traverse()
	# structure('TRAAADZ128F9348C2E.h5', True)
	# print(extract('TRAAAAW128F429D538.h5'))
	print
Beispiel #6
0
def main():

    base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/"
    organisms = os.listdir(base_dir)

    for org_name in organisms:

        work_dir = base_dir + org_name + "/"

        (neg, pos) = create_seq_data(org_name, work_dir)

        result = {}
        result["pos"] = pos
        result["neg"] = neg

        print "======================="
        print "%s pos=%i, neg=%i" % (org_name, len(pos), len(neg))

        save_fn = work_dir + "seqs.pickle"

        helper.save(save_fn, result)
def main():

    base_dir = "/fml/ag-raetsch/home/cwidmer/Documents/phd/projects/multitask/data/translation_start/"
    organisms = os.listdir(base_dir)    
        
        
    for org_name in organisms:
    
        work_dir = base_dir + org_name + "/"
            
        (neg, pos) = create_seq_data(org_name, work_dir)
        
        result = {}
        result["pos"] = pos
        result["neg"] = neg

        print "======================="
        print "%s pos=%i, neg=%i" % (org_name, len(pos), len(neg))

        save_fn = work_dir + "seqs.pickle"
        
        helper.save(save_fn, result)
def main():

    base_dir = "data/splice"
    organisms = os.listdir(base_dir)

    for org_name in organisms:

        print "processing", org_name

        work_dir = base_dir + org_name + "/"

        (neg, pos) = create_seq_data(org_name, work_dir)

        result = {}
        result["pos"] = pos
        result["neg"] = neg

        print "======================="
        print "%s pos=%i, neg=%i" % (org_name, len(pos), len(neg))

        save_fn = work_dir + "seqs_acc.pickle"

        helper.save(save_fn, result)
Beispiel #9
0
        labels_test,
        parameter_dict,
        init_dir,
        all_init_files,
        cross_validate='test')
    """ create hebbian convolution neural network """
    net = hebbian_cnn.Network(**parameter_dict)
    """ train network """
    perf_train = net.train(images_train, labels_train)
    """ test network """
    perf_test = net.test(images_test, labels_test)
    """ plot weights of the network """
    plots = helper.generate_plots(net)
    """ save network to disk """
    save_name = helper.save(net,
                            overwrite=False,
                            plots=plots,
                            save_path=save_path)
    """ collect results from multiple runs """
    perf_train_all, perf_test_all = helper.mutliruns_collect(
        n_runs, r, perf_train, perf_test, perf_train_all, perf_test_all,
        save_path_multiruns)
""" print run time """
run_stop = time.time()
print '\nrun name:\t' + save_name
print 'start time:\t' + time.strftime("%a, %d %b %Y %H:%M:%S",
                                      time.localtime(run_start))
print 'end time:\t' + time.strftime("%a, %d %b %Y %H:%M:%S",
                                    time.localtime(run_stop))
print 'train time:\t' + str(datetime.timedelta(seconds=run_stop - run_start))
Beispiel #10
0
import math
import helper

VAR = 3
EPS = 1e-5

xs = range(-5, 6)
sums = []
ns = []
for x in xs:
    fx = VAR * x
    sum = fx
    i = 3
    n = 0
    while math.fabs(fx) > EPS:
        fx *= -((VAR * x)**2) / (i * (i - 1))
        sum += fx
        i += 2
        n += 1
    sums.append(round(sum, 4))
    ns.append(n)

helper.print_table(table_headers=['x', 'f(x)', 'n'],
                   table_values=[xs, sums, ns],
                   table_title='Taylor sum')

helper.save(value_list=[xs, sums], to='values.txt')
Beispiel #11
0
                                          shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=64)
validloader = torch.utils.data.DataLoader(validate_data, batch_size=64)

if args.arch == 'vgg':
    input_size = 25088
    model = models.vgg16(pretrained=True)
elif args.arch == 'resnet':
    input_size = 2048
    model = models.alexnet(pretrained=True)

for param in model.parameters():
    param.requires_grad = False
model.classifier = nn.Sequential(nn.Linear(input_size, args.hidden_layers),
                                 nn.ReLU(), nn.Dropout(p=0.5),
                                 nn.Linear(args.hidden_layers, 102),
                                 nn.LogSoftmax(dim=1))
print(model)

criterion = nn.NLLLoss()
device = args.gpu
optimizer = optim.Adam(model.classifier.parameters(), args.lr)
loss, accuracy = helper.validate(model, criterion, testloader, device)
print(f"loss: {loss} \n Accuracy: {accuracy}")
epochs = args.epochs
model = helper.train(model, optimizer, criterion, epochs, trainloader,
                     validloader, device)
helper.accuracy(model, testloader, device)
helper.save(model, train_data, args.arch, input_size, args.hidden_layers,
            epochs, args.lr)
Beispiel #12
0
			if Tag.has_key(ID): # consider
				if I2S.has_key(ID): # already done
					pass
				else: # do something
					count += 1
					(artist, album, title) = hp.abstract_title(filename)
					I2S[ID] = [artist, title]
			pass
	print 'new added: ', count

	return I2S


i2s = file('filter_ID_SONG.dic', 'r')
id_song = eval(i2s.read())
i2s.close()
print 'already has: ', len(id_song)

my_tag = file('my_tag.dic', 'r')
tag = eval(my_tag.read())
my_tag.close()
print 'total has: ', len(tag)


new_i2s = traverse(id_song, tag)

print 'new total has: ', len(new_i2s)
hp.save(new_i2s, 'new_i2s.dic')


Beispiel #13
0
def compare_solvers(d):
    """
    call different solvers, compare objectives

    available solvers:
    - finite_diff_primal
    - cvxopt_dual_solver
    - finite_diff_dual
    - dcd
    - dcd_shrinking
    - dcd_shogun
    - mtk_shogun
    """

    data_name = d["data_name"]
    min_interval = d["min_interval"]

    #solvers = ["dcd_shogun", "mtk_shogun"]
    solvers = ["mtk_shogun"]
    #solvers = ["dcd_shogun"]

    plot = False

    data, task_sim = get_data(data_name)

    # set up plot
    if plot:
        import pylab
        fig = pylab.figure()

    print "computing true objective"
    # determine true objective
    record_interval = 0
    solver = dcd.train_mtl_svm(data, task_sim, "dcd_shogun", 1e-9,
                               record_interval, min_interval)
    #solver = dcd.train_mtl_svm(data, task_sim, "mtk_shogun", 1e-9)
    true_obj = -solver.final_dual_obj
    #true_obj = solver.final_primal_obj
    #true_obj = -solver.dual_objectives[-1] #solver.final_dual_obj

    print "true objective computed:", true_obj

    for s_idx, solver_name in enumerate(solvers):

        print "processing solver", solver_name

        # new implementation
        if "dcd" in solver_name:
            eps = 1e-8
        else:
            eps = 1e-8

        #
        solver = dcd.train_mtl_svm(data, task_sim, solver_name, eps, 100,
                                   min_interval)

        #TODO is this working correctly????
        rd = [
            np.abs(np.abs(true_obj) - np.abs(obj))
            for obj in solver.dual_objectives
        ]
        tt = np.array(solver.train_times, dtype=np.float64) / 1000.0 + 1.0

        # save results
        dat = {}
        dat["dual_obj"] = solver.dual_objectives
        dat["primal_obj"] = solver.primal_objectives
        dat["fun_diff"] = rd
        dat["time"] = solver.train_times
        dat["true_obj"] = true_obj
        dat["solver_obj"] = solver
        dat["name"] = solver_name

        prefix = "/fml/ag-raetsch/home/cwidmer/svn/projects/2012/mtl_dcd/"
        fn = prefix + "results/result_newkids_nitro_" + data_name + "_" + solver_name + ".pickle"
        helper.save(fn, dat)

        # plot stuff
        #pylab.semilogy(num_xt, train_time[0], "o", label=solvers[0])
        if plot:
            pylab.plot(tt, rd, "-o", label=solver_name.replace("_shogun", ""))
            pylab.yscale("log")
            pylab.xscale("log")
            pylab.xlabel("time (s)")
            pylab.ylabel("relative function difference")  #TODO relative!
            pylab.grid(True)

    # plot training time
    #pylab.semilogy(num_xt, train_time[1], "o", label=solvers[1])
    if plot:
        pylab.legend(loc="best")
        fig_name = "newkids_" + data_name + ".pdf"
        fig.savefig(fig_name)
task_names = data.get_task_names()

FACTOR = 1.0

# init gamma matrix
gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))

for t1_name in task_names:
    for t2_name in task_names:

        similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name),
                                                      taxonomy.get_id(t2_name))
        gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity

helper.save("/tmp/gammas", gammas)

gammas = gammas * FACTOR

cost = param.cost * numpy.sqrt(FACTOR)

print gammas

##########
# regular normalizer

normalizer = MultitaskKernelNormalizer(data.task_vector_nums)

for t1_name in task_names:
    for t2_name in task_names:
def test_data():
    
    ##################################################################
    # select MSS
    ##################################################################
    
    mss = expenv.MultiSplitSet.get(379)
    
    
    
    ##################################################################
    # data
    ##################################################################
    
    # fetch data
    instance_set = mss.get_train_data(-1)
    
    # prepare data
    data = PreparedMultitaskData(instance_set, shuffle=True)
    
    # set parameters
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 4
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.freeze()
    
    
    
    
    ##################################################################
    # taxonomy
    ##################################################################
    
    
    taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data)
    
    
    support = numpy.linspace(0, 100, 4)
    
    
    distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]]
    
    # create tree normalizer 
    tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names)
    
    
    
    
    task_names = data.get_task_names()
    
    
    FACTOR = 1.0
    
    
    # init gamma matrix
    gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))
    
    for t1_name in task_names:
        for t2_name in task_names:
            
            similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name))        
            gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity
    
    helper.save("/tmp/gammas", gammas)
    
    
    gammas = gammas * FACTOR
    
    cost = param.cost * numpy.sqrt(FACTOR) 
    
    print gammas
    
    
    ##########
    # regular normalizer
    
    normalizer = MultitaskKernelNormalizer(data.task_vector_nums)
    
    for t1_name in task_names:
        for t2_name in task_names:
                    
            similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)]
            normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity)
    
                
    ##################################################################
    # Train SVMs
    ##################################################################
    
    # create shogun objects
    wdk_tree = shogun_factory.create_kernel(data.examples, param)
    lab = shogun_factory.create_labels(data.labels)
    
    wdk_tree.set_normalizer(tree_normalizer)
    wdk_tree.init_normalizer()
    
    print "--->",wdk_tree.get_normalizer().get_name()
    
    svm_tree = SVMLight(cost, wdk_tree, lab)
    svm_tree.set_linadd_enabled(False)
    svm_tree.set_batch_computation_enabled(False)
    
    svm_tree.train()
    
    del wdk_tree
    del tree_normalizer
    
    print "finished training tree-norm SVM:", svm_tree.get_objective()
    
    
    wdk = shogun_factory.create_kernel(data.examples, param)
    wdk.set_normalizer(normalizer)
    wdk.init_normalizer()
    
    print "--->",wdk.get_normalizer().get_name()
    
    svm = SVMLight(cost, wdk, lab)
    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    
    svm.train()
    
    print "finished training manually set SVM:", svm.get_objective()
    
    
    alphas_tree = svm_tree.get_alphas()
    alphas = svm.get_alphas()
    
    assert(len(alphas_tree)==len(alphas))
    
    for i in xrange(len(alphas)):
        assert(abs(alphas_tree[i] - alphas[i]) < 0.0001)
        
    print "success: all alphas are the same"
                companbot_x.cred = int(companbot_x.cred) + 1
                text_area.x = 5
                text_area.text = "I found treasure!"
                animate(idle)
                time.sleep(0.4)
            if randomEvent == 12:
                companbot_x.xp = int(companbot_x.xp) + random.randint(1, 2)
                time.sleep(0.4)
            if randomEvent == 22:
                text_area.x = 5
                text_area.text = helper.chat()
                animate(idle)
            if randomEvent == 42:
                text_area.x = 5
                text_area.text = "Dance Time!"
                animate(dance)
            if int(companbot_x.xp) > 100:
                print("Level Up")
                companbot_x = companion.companbot.levelUp(companbot_x)
                helper.save(companbot_x)
            led[0] = (helper.get_rndRGB())
        else:
            helper.save(companbot_x)
            user_AFK, AFKTimer = sandman.sleep(minitft, user_AFK, AFKTimer)

        monoClk_last, AFKTimer, user_AFK = helper.timelasp(
            monoClk_last, AFKTimer, user_AFK)

    except Exception as e:
        print("Error Main: " + str(e))
        led[0] = (255, 0, 0)
# create dense matrices A,B,C
A = array([[1, 2, 3], [4, 0, 0], [0, 0, 0], [0, 5, 0], [0, 0, 6], [9, 9, 9]],
          dtype=float64)
B = array([1, 1, 1, -1, -1, -1], dtype=float64)

# ... of type Real, LongInt and Byte
feats_train = RealFeatures(A.transpose())
kernel = GaussianKernel(feats_train, feats_train, 1.0)
kernel.io.set_loglevel(MSG_DEBUG)

lab = Labels(B)

svm = SVMLight(1, kernel, lab)
svm.train()

helper.save("/tmp/awesome_svm", svm)
svm = helper.load("/tmp/awesome_svm")

svm.train()

#sys.exit(0)

run = expenv.Run.get(1010)
#run = expenv.Run.get(974)
dat = run.get_train_data()

print dat.keys()
d = dat["thaliana"]
subset_size = 20

examples = [i.example for i in d[0:subset_size]]
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """

        root = param.taxonomy.data
        
        print ">>>" + str(param.taxonomy.data) + "<<<"
        print "initial root weight:", root.edge_weight
        print "tasks", train_data.keys()
        print "tax keys", root.get_data_keys()


        numpy.random.seed(1)
        # prepare data splits for inner validation
        
        # set up validation strategy
        # this has to be done here, because the training set CANNOT contain
        # any examples that will be used to evaluate further down the tree
        # 
        # also by doing it this way, we have equally many examples from each
        # task in each split
        
        inner_train_data = {}
        inner_eval_data = {}
        
        for task_id in root.get_data_keys():
            
            idx = range(len(train_data[task_id]))
            
            idx_pos = [idx for idx in range(len(train_data[task_id])) if train_data[task_id][idx].label == 1]
            idx_neg = [idx for idx in range(len(train_data[task_id])) if train_data[task_id][idx].label == -1]
            
            numpy.random.shuffle(idx_pos)
            numpy.random.shuffle(idx_neg)

            # distribute pos/negs evenly across splits            
            splits_pos = helper.split_list(idx_pos, FOLD)
            splits_neg = helper.split_list(idx_neg, FOLD)
        
            eval_split_id = 0
            train_idx_pos = list(helper.flatten([splits_pos[j] for j in xrange(FOLD) if j!=eval_split_id]))
            train_idx_neg = list(helper.flatten([splits_neg[j] for j in xrange(FOLD) if j!=eval_split_id]))
            
            train_idx = train_idx_pos
            train_idx.extend(train_idx_neg)
            numpy.random.shuffle(train_idx)
            
            
            eval_idx_pos = splits_pos[eval_split_id]
            eval_idx_neg = splits_neg[eval_split_id]
            
            eval_idx = eval_idx_pos
            eval_idx.extend(eval_idx_neg)
            
            numpy.random.shuffle(eval_idx)
            
            
            
            #            numpy.random.shuffle(idx)
            #    
            #            splits = helper.split_list(idx, FOLD)
            #        
            #            eval_split_id = 0
            #            train_idx = list(helper.flatten([splits[j] for j in xrange(FOLD) if j!=eval_split_id]))
            #            eval_idx = splits[eval_split_id]
            
            # make sure idx lists are disjoint
            assert( len(set(train_idx).intersection(set(eval_idx))) == 0 )
           
            print "len train data", len(train_data[task_id]), task_id
 
            # select data sets
            inner_train_data[task_id] = [train_data[task_id][idx] for idx in train_idx]
            inner_eval_data[task_id] = [train_data[task_id][idx] for idx in eval_idx]

        

        ###########################################################
        #    Learn Taxonomy Parameters
        ###########################################################
        
        grey_nodes = [root]
        
        #initialize inner cost
        inner_cost = param.cost
        
        
        while len(grey_nodes)>0:
           
            # fetch next node to process
            node = grey_nodes.pop(0) #pop first item
            
            # enqueue children
            if not node.is_leaf():
                grey_nodes.extend(node.children)
    
    
    
            ###################################
            #train current node
            ###################################
            
            
            # concatenate instances from all task for nodes below
            instance_set_train = list(helper.flatten([inner_train_data[key] for key in node.get_data_keys()]))
            instance_set_eval = list(helper.flatten([inner_eval_data[key] for key in node.get_data_keys()]))
            
            # shuffle to avoid having instances from one task in consecutive order
            numpy.random.shuffle(instance_set_train)
            numpy.random.shuffle(instance_set_eval)

            # extract examples and labels
            train_examples = [inst.example for inst in instance_set_train]
            train_labels = [inst.label for inst in instance_set_train]
            
            eval_examples = [inst.example for inst in instance_set_eval]
            eval_labels = [inst.label for inst in instance_set_eval]
            
            
            #import copy
            #debug_examples = copy.copy(train_examples)
            #debug_examples.extend(eval_examples)
            
            #debug_labels = copy.copy(train_labels)
            #debug_labels.extend(eval_labels)
                            
            # only local xval for leaves
            #if node.is_root():
            #    inner_param = 0.0
            #    predictor = self._train_inner_classifier(node, train_examples, train_labels, param, inner_param, param.cost)
            
            #else:
            #TODO: also perform inner validation on non-leaves 
            if node.is_leaf():# not node.is_root():

                print "performing inner xval at node", node.name               
 
                # perform local model selection
                result_dict = self._perform_inner_xval(node, train_examples, train_labels, eval_examples, eval_labels, param)
            
                # use dict for returning args to avoid order glitches
                inner_edge_weight = result_dict["best_edge_weight"]
                inner_cost = result_dict["best_inner_cost"]
                predictor = result_dict["best_predictor"]
                
                
            else:
                # for non-leaves train without model selection
                inner_edge_weight = param.transform
                inner_cost = param.cost    
                
                predictor = self._train_inner_classifier(node, train_examples, train_labels, param, inner_edge_weight, inner_cost)
                #predictor = self._train_inner_classifier(node, debug_examples, debug_labels, param, inner_edge_weight, inner_cost)
                
            
            
            node.predictor = predictor
            node.edge_weight = inner_edge_weight
            node.cost = inner_cost



        ###########################################################
        # Retrain on whole training set with optimal parameters
        ###########################################################

        grey_nodes = [root]
        
        
        while len(grey_nodes)>0:
           
            node = grey_nodes.pop(0) #pop first item
            
            # enqueue children
            if not node.is_leaf():
                grey_nodes.extend(node.children)
    
    
            # fetch all data that belongs to leaves underneath current node
            instance_set_retrain = list(helper.flatten([train_data[key] for key in node.get_data_keys()]))
            
            # shuffle instances
            numpy.random.shuffle(instance_set_retrain)

            # extract examples and labels
            examples = [inst.example for inst in instance_set_retrain]
            labels = [inst.label for inst in instance_set_retrain]


            print "FINAL TRAIN on " + node.name + " C=" + str(node.cost) + " B=" + str(node.edge_weight)
            predictor = self._train_inner_classifier(node, examples, labels, param, node.edge_weight, node.cost)
            
            # attach predictor to node
            node.predictor = predictor



        #####################################################
        #    Wrap things up    
        #####################################################
 
        # wrap up predictors for later use
        predictors = {}

        for leaf in root.get_leaves():

            assert(leaf.predictor!=None)
            
            predictors[leaf.name] = leaf.predictor
            

        # make sure we have the same keys (potentially in a different order)
        sym_diff_keys = set(train_data.keys()).symmetric_difference(set(predictors.keys()))
        assert len(sym_diff_keys)==0, "symmetric difference between keys non-empty: " + str(sym_diff_keys)  


        # save graph plot
        mypath = "/fml/ag-raetsch/share/projects/multitask/graphs/"
        filename = mypath + "graph_" + str(param.id)
        filename_perf = mypath + "performances_" + str(param.id)
        
        
        helper.save(filename_perf, result_dict["performances"])
        print "saving results to:", filename_perf 
        
        root.plot(filename, plot_cost=True, plot_B=True)


        return predictors
def test_data():

    ##################################################################
    # select MSS
    ##################################################################

    mss = expenv.MultiSplitSet.get(379)

    ##################################################################
    # data
    ##################################################################

    # fetch data
    instance_set = mss.get_train_data(-1)

    # prepare data
    data = PreparedMultitaskData(instance_set, shuffle=True)

    # set parameters
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 4
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.freeze()

    ##################################################################
    # taxonomy
    ##################################################################

    taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data)

    support = numpy.linspace(0, 100, 4)

    distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]]

    # create tree normalizer
    tree_normalizer = MultitaskKernelPlifNormalizer(support,
                                                    data.task_vector_names)

    task_names = data.get_task_names()

    FACTOR = 1.0

    # init gamma matrix
    gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))

    for t1_name in task_names:
        for t2_name in task_names:

            similarity = taxonomy.compute_node_similarity(
                taxonomy.get_id(t1_name), taxonomy.get_id(t2_name))
            gammas[data.name_to_id(t1_name),
                   data.name_to_id(t2_name)] = similarity

    helper.save("/tmp/gammas", gammas)

    gammas = gammas * FACTOR

    cost = param.cost * numpy.sqrt(FACTOR)

    print gammas

    ##########
    # regular normalizer

    normalizer = MultitaskKernelNormalizer(data.task_vector_nums)

    for t1_name in task_names:
        for t2_name in task_names:

            similarity = gammas[data.name_to_id(t1_name),
                                data.name_to_id(t2_name)]
            normalizer.set_task_similarity(data.name_to_id(t1_name),
                                           data.name_to_id(t2_name),
                                           similarity)

    ##################################################################
    # Train SVMs
    ##################################################################

    # create shogun objects
    wdk_tree = shogun_factory.create_kernel(data.examples, param)
    lab = shogun_factory.create_labels(data.labels)

    wdk_tree.set_normalizer(tree_normalizer)
    wdk_tree.init_normalizer()

    print "--->", wdk_tree.get_normalizer().get_name()

    svm_tree = SVMLight(cost, wdk_tree, lab)
    svm_tree.set_linadd_enabled(False)
    svm_tree.set_batch_computation_enabled(False)

    svm_tree.train()

    del wdk_tree
    del tree_normalizer

    print "finished training tree-norm SVM:", svm_tree.get_objective()

    wdk = shogun_factory.create_kernel(data.examples, param)
    wdk.set_normalizer(normalizer)
    wdk.init_normalizer()

    print "--->", wdk.get_normalizer().get_name()

    svm = SVMLight(cost, wdk, lab)
    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)

    svm.train()

    print "finished training manually set SVM:", svm.get_objective()

    alphas_tree = svm_tree.get_alphas()
    alphas = svm.get_alphas()

    assert (len(alphas_tree) == len(alphas))

    for i in xrange(len(alphas)):
        assert (abs(alphas_tree[i] - alphas[i]) < 0.0001)

    print "success: all alphas are the same"
Beispiel #20
0
def learning_curve(data_name, solvers):
    """
    call different solvers, compare objectives

    available solvers:
    - finite_diff_primal
    - cvxopt_dual_solver
    - finite_diff_dual
    - dcd
    - dcd_shrinking
    - dcd_shogun
    - mtk_shogun
    """


    #solvers = ["mtk_shogun"]
    #solvers = ["dcd_shogun"]

    num_runs = 10
    #fractions = np.linspace(0.1, 1.0, num_runs)
    fractions = [float(c) for c in np.exp(np.linspace(np.log(0.1), np.log(1.0), num_runs))]


    # keep track of training time
    num_xt = np.zeros(num_runs)
    train_times = np.zeros((2,num_runs))


    for run_id, fraction_data in enumerate(fractions):

        data, task_sim = get_data(data_name)
        #fig = pylab.figure()

        data_subset = defaultdict(dict)


        num_xt[run_id] = 0

        for task_name in data:
            num_total = len(data[task_name]["xt"])
            num_subset = int(float(num_total) * fraction_data)
            xt, lt = coshuffle(data[task_name]["xt"], data[task_name]["lt"])

            data_subset[task_name]["xt"] = xt[0:num_subset]
            data_subset[task_name]["lt"] = lt[0:num_subset]

            num_xt[run_id] += num_subset


        for s_idx, solver in enumerate(solvers):

            eps = 1e-3
            start_time = time.time()
            dcd.train_mtl_svm(data_subset, task_sim, solver, eps, 0, 0)
            ttime = time.time() - start_time
            print "training time:", ttime, "seconds"

            train_times[s_idx,run_id] = ttime

            # write progress to file
            fn = "results/learning_curve_" + data_name + "_" + solver + ".txt"
            txt_file = file(fn, "a")
            txt_file.write("num_xt:\t%i\ttime:\t%i\n" % (num_xt[run_id], ttime))
            txt_file.close()
            

    # save results
    fn = "results/learning_curve_" + data_name + ".pickle" 
    helper.save(fn, {"num_xt": num_xt, "time": train_times})
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """

        # split for training weak_learners and boosting
        (train_weak, train_boosting) = split_data(train_data, 4)
          
        # merge data sets
        data = PreparedMultitaskData(train_weak, shuffle=True)
        
        # create shogun label
        lab = shogun_factory.create_labels(data.labels)
        


        ##################################################
        # define pockets
        ##################################################
        
        pockets = [0]*9
        
        pockets[0] = [1, 5, 6, 7, 8, 31, 32, 33, 34]
        pockets[1] = [1, 2, 3, 4, 6, 7, 8, 9, 11, 21, 31]
        pockets[2] = [11, 20, 21, 22, 29, 31]
        pockets[3] = [8, 30, 31, 32]
        pockets[4] = [10, 11, 30]
        pockets[5] = [10, 11, 12, 13, 20, 29]
        pockets[6] = [10, 12, 20, 22, 26, 27, 28, 29]
        pockets[7] = [12, 14, 15, 26]
        pockets[8] = [13, 15, 16, 17, 18, 19, 20, 23, 24, 25, 26]
        
        pockets = []
        for i in xrange(35):
            pockets.append([i])


        #new_pockets = []
        
        # merge neighboring pockets
        #for i in range(8):
        #    new_pockets.append(list(set(pockets[i]).union(set(pockets[i+1]))))
            
        #pockets = new_pockets
        
        
        ########################################################
        print "creating a kernel:"
        ########################################################
        
        
        # init seq handler 
        pseudoseqs = SequencesHandler()

        
        classifiers = []


        for pocket in pockets:

            print "creating normalizer"
            #import pdb
            #pdb.set_trace()
            
            normalizer = MultitaskKernelNormalizer(data.task_vector_nums)
            
            print "processing pocket", pocket

            # set similarity
            for task_name_lhs in data.get_task_names():
                for task_name_rhs in data.get_task_names():
                    
                    similarity = 0.0
                    
                    for pseudo_seq_pos in pocket:
                        similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1))
                    
                    # normalize
                    similarity = similarity / float(len(pocket))
                    
                    print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity)
                    
                    normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity)
               

            print "creating empty kernel"
            kernel = shogun_factory.create_kernel(data.examples, param)
            
            print "setting normalizer"
            kernel.set_normalizer(normalizer)

            print "training SVM for pocket", pocket
            svm = self._train_single_svm(param, kernel, lab)

            classifiers.append(svm)
        
        
        print "done obtaining weak learners"
            
        
        # save additional info
        #self.additional_information["svm_objective"] = svm.get_objective()
        #self.additional_information["svm num sv"] = svm.get_num_support_vectors()
        #self.additional_information["post_weights"] = combined_kernel.get_subkernel_weights()
        
        #print self.additional_information 
        


        ##################################################
        # combine weak learners for each task
        ##################################################
        
        
        # set constants
        
        some = 0.9
        import cvxmod
        
        
        # wrap up predictors
        svms = {}
            
        # use a reference to the same svm several times
        for task_name in train_boosting.keys():
            
            instances = train_boosting[task_name]
            
            N = len(instances)
            F = len(pockets)
            
            examples = [inst.example for inst in instances]
            labels = [inst.label for inst in instances]
            
            # dim = (F x N)
            out = cvxmod.zeros((N,F))
            
            for i in xrange(F):
                svm = classifiers[i]
                tmp_out = self._predict_weak(svm, examples, data.name_to_id(task_name))

                out[:,i] = numpy.sign(tmp_out)
                #out[:,i] = tmp_out
            

            #TODO: fix
            helper.save("/tmp/out_sparse", (out,labels))
            pdb.set_trace()
            
            weights = solve_boosting(out, labels, some, solver="mosek")
            
            
            
            svms[task_name] = (data.name_to_id(task_name), svm)

        
        return svms
task_names = data.get_task_names()


FACTOR = 1.0


# init gamma matrix
gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))

for t1_name in task_names:
    for t2_name in task_names:
        
        similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name))        
        gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity

helper.save("/tmp/gammas", gammas)


gammas = gammas * FACTOR

cost = param.cost * numpy.sqrt(FACTOR) 

print gammas


##########
# regular normalizer

normalizer = MultitaskKernelNormalizer(data.task_vector_nums)

for t1_name in task_names:
Beispiel #23
0
def get_presvm(B=2.0):

    examples_presvm = [numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751, -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751,  -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 0.93099452,  0.38871617,  1.57968949,  1.25672527, -0.8123137 ,   0.20786586,  1.378121  ,  1.15598866,  0.80265343]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945,  -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 ,  -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945, -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 , -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([-0.98028302, -0.23974489,  2.1687206 ,  1.99338824, -0.67070205, -0.33167281,  1.3500379 ,  1.34915685,  1.13747975]),
     numpy.array([ 0.67109612,  0.12662017, -0.48254886, -0.49091898,  1.31522237, -0.34108933,  0.57832179, -0.01992828, -0.26581628]),
     numpy.array([ 0.3193611 ,  0.44903416,  3.62187778,  4.1490827 ,  1.58832961,  1.95583397,  1.36836023,  1.92521945,  2.41114998])]
    labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0]

    examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012,  0.14621013, -0.50415481,  0.32317317, -0.00317602, -0.21422637]), 
     numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116,  0.31639514,  0.32558471,  0.49364473,  0.04515591, -0.06963456]),
     numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 ,  1.31488853, -0.33165365,  0.60176018, -0.00384946, -0.15603975]),
     numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552,  1.63314995, -0.51199338,  0.33340685, -0.0170519 , -0.19211039]),
     numpy.array([-0.18338766, -0.07783465,  0.42019824,  0.201753  ,  2.01160098,  0.33326111,  0.75591909,  0.36631525,  0.1761829 ]),
     numpy.array([ 0.10273793, -0.02189574,  0.91092358,  0.74827973,  0.51882902, -0.1286531 ,  0.64463658,  0.67468349,  0.55587266]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364, -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([-0.22494375, -0.15492964,  0.28017737,  0.29794467,  0.96403895,  0.43880289,  0.08053425,  0.07456818,  0.12102371]),
     numpy.array([-0.18161417, -0.17692039,  0.19554942, -0.00785625,  1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]),
     numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256,  0.94205875,  0.40162798,  0.00327328, -0.24107393])]

    labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0]

    examples_test = [numpy.array([-0.45159799, -0.11401394,  1.28574573,  1.09144306,  0.92253119,  -0.47230164,  0.77032486,  0.83047366,  0.74768906]),
     numpy.array([ 0.42613105,  0.0092778 , -0.78640296, -0.71632445,  0.41154244,   0.88380309,  0.19475759, -0.14195876, -0.30479425]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364,  -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017,  -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]),
     numpy.array([ 1.23679696,  0.18753081, -0.25593329, -0.12051991,  0.64976989,  -0.17184101,  0.14951337,  0.01988587, -0.0356698 ]),
     numpy.array([ 1.03355002,  0.05316195, -0.97905368, -0.75482121,  0.28673776,   2.27142733,  0.02654739, -0.31109851, -0.44555277]),
     numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257,  0.66947047,   0.05474302, -0.00717455, -0.17700575, -0.22253444]),
     numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609,  0.88518932,  -0.51558669, -0.12000557, -0.32973613, -0.38488736]),
     numpy.array([ 0.8372111 ,  0.06972199, -1.00454229, -0.79869642,  1.19376333,  -0.40160273, -0.25122157, -0.46417918, -0.50234858]),
     numpy.array([-0.36325018, -0.12206184,  0.10525247, -0.15663416,  1.03616948,  -0.51699463,  0.59566286,  0.35363369,  0.10545559])]


    #############################################
    #    compute pre-svm
    #############################################


    # create real-valued features as first step
    examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
    examples_presvm = numpy.transpose(examples_presvm)

    feat_presvm = RealFeatures(examples_presvm)
    lab_presvm = Labels(numpy.array(labels_presvm))
    wdk_presvm = LinearKernel(feat_presvm, feat_presvm)



    presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
    presvm_liblinear.set_max_iterations(10000)
    presvm_liblinear.set_bias_enabled(False)
    presvm_liblinear.train()


    #return presvm_liblinear


    #def get_da_svm(presvm_liblinear):


    #############################################
    #    compute linear term manually
    #############################################

    examples = numpy.array(examples, dtype=numpy.float64)
    examples = numpy.transpose(examples)

    feat = RealFeatures(examples)
    lab = Labels(numpy.array(labels))

    dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B)
    dasvm_liblinear.set_bias_enabled(False)
    dasvm_liblinear.train()

    helper.save("/tmp/svm", presvm_liblinear)
    presvm_pickle = helper.load("/tmp/svm")

    dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B)
    dasvm_pickle.set_bias_enabled(False)
    dasvm_pickle.train()

    helper.save("/tmp/dasvm", dasvm_liblinear)
    dasvm_pickle2 = helper.load("/tmp/dasvm")

    #############################################
    #    load test data
    #############################################

    examples_test = numpy.array(examples_test, dtype=numpy.float64)
    examples_test = numpy.transpose(examples_test)
    feat_test = RealFeatures(examples_test)

    # check if pickled and unpickled classifiers behave the same
    out1 = dasvm_liblinear.classify(feat_test).get_labels()
    out2 = dasvm_pickle.classify(feat_test).get_labels()

    # compare outputs
    for i in xrange(len(out1)):    
        
        try:
            assert(abs(out1[i]-out2[i])<= 0.001)
        except:
            print "(%.5f, %.5f)" % (out1[i], out2[i])

            
    print "classification agrees."
Beispiel #24
0
import math
import helper

VAR = 3
EPS = 1e-5

xs = range(-5, 6)
sums = []
ns = []
for x in xs:
	fx = VAR * x
	sum = fx
	i = 3
	n = 0
	while math.fabs(fx) > EPS:
		fx *= -((VAR * x) ** 2) / (i * (i - 1))
		sum += fx
		i += 2
		n += 1
	sums.append(round(sum, 4))
	ns.append(n)

helper.print_table(table_headers=['x', 'f(x)', 'n'],
				   table_values=[xs, sums, ns],
				   table_title='Taylor sum')

helper.save(value_list=[xs, sums], to='values.txt')
A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64)
B=array([1,1,1,-1,-1,-1], dtype=float64)


# ... of type Real, LongInt and Byte
feats_train = RealFeatures(A.transpose())
kernel = GaussianKernel(feats_train, feats_train, 1.0)
kernel.io.set_loglevel(MSG_DEBUG)

lab = Labels(B)

svm = SVMLight(1, kernel, lab)
svm.train()


helper.save("/tmp/awesome_svm", svm)
svm = helper.load("/tmp/awesome_svm")

svm.train()


#sys.exit(0)


run = expenv.Run.get(1010)
#run = expenv.Run.get(974)
dat = run.get_train_data()

print dat.keys()
d = dat["thaliana"]
subset_size = 20
Beispiel #26
0
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """

        # split for training weak_learners and boosting
        (train_weak, train_boosting) = split_data(train_data, 4)
          
        # merge data sets
        data = PreparedMultitaskData(train_weak, shuffle=True)
        
        # create shogun label
        lab = shogun_factory.create_labels(data.labels)
        


        
        
        ########################################################
        print "creating a kernel:"
        ########################################################
        
        
        # init seq handler 
        pseudoseqs = SequencesHandler()

        
        classifiers = []


        for pocket in pockets:

            print "creating normalizer"
            #import pdb
            #pdb.set_trace()
            
            normalizer = MultitaskKernelNormalizer(data.task_vector_nums)
            
            print "processing pocket", pocket

            # set similarity
            for task_name_lhs in data.get_task_names():
                for task_name_rhs in data.get_task_names():
                    
                    similarity = 0.0
                    
                    for pseudo_seq_pos in pocket:
                        similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1))
                    
                    # normalize
                    similarity = similarity / float(len(pocket))
                    
                    print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity)
                    
                    normalizer.set_task_similarity(data.name_to_id(task_name_lhs), data.name_to_id(task_name_rhs), similarity)
               

            print "creating empty kernel"
            kernel = shogun_factory.create_kernel(data.examples, param)
            
            print "setting normalizer"
            kernel.set_normalizer(normalizer)

            print "training SVM for pocket", pocket
            svm = self._train_single_svm(param, kernel, lab)

            classifiers.append(svm)
        
        
        print "done obtaining weak learners"
            
        
        # save additional info
        #self.additional_information["svm_objective"] = svm.get_objective()
        #self.additional_information["svm num sv"] = svm.get_num_support_vectors()
        #self.additional_information["post_weights"] = combined_kernel.get_subkernel_weights()
        
        #print self.additional_information 
        


        ##################################################
        # combine weak learners for each task
        ##################################################
        
        
        # set constants
        
        some = 0.9
        import cvxmod
        
        
        # wrap up predictors
        svms = {}
            
        # use a reference to the same svm several times
        for task_name in train_boosting.keys():
            
            instances = train_boosting[task_name]
            
            N = len(instances)
            F = len(pockets)
            
            examples = [inst.example for inst in instances]
            labels = [inst.label for inst in instances]
            
            # dim = (F x N)
            out = cvxmod.zeros((N,F))
            
            for i in xrange(F):
                svm = classifiers[i]
                tmp_out = self._predict_weak(svm, examples, data.name_to_id(task_name))

                out[:,i] = numpy.sign(tmp_out)
                #out[:,i] = tmp_out
            

            #TODO: fix
            helper.save("/tmp/out_sparse", (out,labels))
            pdb.set_trace()
            
            weights = solve_boosting(out, labels, some, solver="mosek")
            
            
            
            svms[task_name] = (data.name_to_id(task_name), svm)

        
        return svms