Ejemplo n.º 1
0
def parallel_grid_search(settings, folds, data, features, estimate_g):
    n_tests = settings.grid_tests
    n_proc = settings.number_proc
    ##starts the parameters
    exps_c = [-7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15]
    if estimate_g:  ##in case we have to estimate g we define a range
        exps_g = [-13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7]
    else:  ##in case we dont just define it already
        exps_g = [1]
    params_c = []
    params_g = []
    for e in exps_c:
        params_c.append(2**e)
    for e in exps_g:
        params_g.append(2**e)
    param_grid = [(x, y) for x in params_c for y in params_g]

    com = mp.Pipe()
    shared_lock = mp.Lock()

    larger_search = True  ##first we will search with huge intervals as recomended on the paper then when a larger interval is found we will search on lower intervals inside that interval

    while (len(param_grid) != 1):  ##while we dont have one set
        ##generates n random subsets
        subsets = utils.generate_random_sets(features, n_tests, 3,
                                             len(features))
        division_subsets = utils.divide_list(n_proc, subsets)

        ##parallelize work to it becomes faster
        workers = []
        for i in range(1, n_proc):
            p = mp.Process(target=grid_search,
                           args=(i, division_subsets[i], folds, data,
                                 param_grid, shared_lock, com,
                                 settings.svm_kernel))
            workers.append(p)
            p.start()

        grid_search(0, division_subsets[0], folds, data, param_grid,
                    shared_lock, com, settings.svm_kernel)

        for w in workers:  ##hopefully this waits for every process to finish...
            w.join()

        output, input = com
        best_params = output.recv()

        if len(
                best_params
        ) != n_tests:  ##either we received the score of every feature or some error happened
            print "Didn't receive best param for every subset. ERROR"
            quit()

        param_grid, larger_search = get_top_selected(best_params,
                                                     larger_search, estimate_g)
    ##returns c and gamma
    return param_grid[0][0], param_grid[0][1]
Ejemplo n.º 2
0
def parallel_grid_search(settings, folds, data, features, estimate_g):
    n_tests = settings.grid_tests
    n_proc = settings.number_proc
    ##starts the parameters
    exps_c = [-7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15]
    if estimate_g: ##in case we have to estimate g we define a range
        exps_g = [-13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7]
    else: ##in case we dont just define it already
        exps_g = [1]
    params_c = []
    params_g = []
    for e in exps_c:
        params_c.append(2**e)
    for e in exps_g:
        params_g.append(2**e)
    param_grid = [(x, y) for x in params_c for y in params_g]
    
    com = mp.Pipe()
    shared_lock = mp.Lock()
    
    larger_search = True ##first we will search with huge intervals as recomended on the paper then when a larger interval is found we will search on lower intervals inside that interval
    
    while (len(param_grid) != 1): ##while we dont have one set
        ##generates n random subsets
        subsets = utils.generate_random_sets(features, n_tests, 3, len(features))
        division_subsets = utils.divide_list(n_proc, subsets)

    
        ##parallelize work to it becomes faster
        workers = []
        for i in range(1, n_proc):
            p = mp.Process(target=grid_search, args=(i, division_subsets[i], folds, data, param_grid, shared_lock, com, settings.svm_kernel))
            workers.append(p)
            p.start()
    
        grid_search(0, division_subsets[0], folds, data, param_grid, shared_lock, com, settings.svm_kernel)
    
        for w in workers: ##hopefully this waits for every process to finish...
            w.join()
        
        output, input = com
        best_params = output.recv()
    
        if len(best_params) != n_tests: ##either we received the score of every feature or some error happened
            print "Didn't receive best param for every subset. ERROR"
            quit()
        
        param_grid, larger_search = get_top_selected(best_params, larger_search, estimate_g)
    ##returns c and gamma
    return param_grid[0][0], param_grid[0][1]
Ejemplo n.º 3
0
def get_improve_prob(features, n_tests, dataset, folds, n_proc, p_features, shared_lock, com, cost, gamma, svm_kernel):
    test_sets = utils.generate_random_sets(features, n_tests, 3, 10)
    accs = []
    for test in test_sets:  ##test each dataset agaisnt the classifier
        acc = classification_part.classify(folds, dataset, test, cost, gamma, svm_kernel)
        accs.append(acc)
    ##split tests among processes
    #print "accuracies subsets:", acc
    workers = []
    for i in range(1, n_proc):
        p = mp.Process(target=calculate_improve_prob, args=(i, p_features[i], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel))
        workers.append(p)
        p.start()
    
    calculate_improve_prob(0, p_features[0], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel)
    
    for w in workers:
        w.join()
    return
Ejemplo n.º 4
0
def get_improve_prob(features, n_tests, dataset, folds, n_proc, p_features, shared_lock, com, cost, gamma, svm_kernel):
    test_sets = utils.generate_random_sets(features, n_tests, 3, 10)
    accs = []
    for test in test_sets:  ##test each dataset agaisnt the classifier
        acc = classification_part.classify(folds, dataset, test, cost, gamma, svm_kernel)
        accs.append(acc)
    ##split tests among processes
    # print "accuracies subsets:", acc
    workers = []
    for i in range(1, n_proc):
        p = mp.Process(
            target=calculate_improve_prob,
            args=(i, p_features[i], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel),
        )
        workers.append(p)
        p.start()

    calculate_improve_prob(0, p_features[0], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel)

    for w in workers:
        w.join()
    return