def parallel_grid_search(settings, folds, data, features, estimate_g): n_tests = settings.grid_tests n_proc = settings.number_proc ##starts the parameters exps_c = [-7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15] if estimate_g: ##in case we have to estimate g we define a range exps_g = [-13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7] else: ##in case we dont just define it already exps_g = [1] params_c = [] params_g = [] for e in exps_c: params_c.append(2**e) for e in exps_g: params_g.append(2**e) param_grid = [(x, y) for x in params_c for y in params_g] com = mp.Pipe() shared_lock = mp.Lock() larger_search = True ##first we will search with huge intervals as recomended on the paper then when a larger interval is found we will search on lower intervals inside that interval while (len(param_grid) != 1): ##while we dont have one set ##generates n random subsets subsets = utils.generate_random_sets(features, n_tests, 3, len(features)) division_subsets = utils.divide_list(n_proc, subsets) ##parallelize work to it becomes faster workers = [] for i in range(1, n_proc): p = mp.Process(target=grid_search, args=(i, division_subsets[i], folds, data, param_grid, shared_lock, com, settings.svm_kernel)) workers.append(p) p.start() grid_search(0, division_subsets[0], folds, data, param_grid, shared_lock, com, settings.svm_kernel) for w in workers: ##hopefully this waits for every process to finish... w.join() output, input = com best_params = output.recv() if len( best_params ) != n_tests: ##either we received the score of every feature or some error happened print "Didn't receive best param for every subset. ERROR" quit() param_grid, larger_search = get_top_selected(best_params, larger_search, estimate_g) ##returns c and gamma return param_grid[0][0], param_grid[0][1]
def parallel_grid_search(settings, folds, data, features, estimate_g): n_tests = settings.grid_tests n_proc = settings.number_proc ##starts the parameters exps_c = [-7, -5, -3, -1, 1, 3, 5, 7, 9, 11, 13, 15] if estimate_g: ##in case we have to estimate g we define a range exps_g = [-13, -11, -9, -7, -5, -3, -1, 1, 3, 5, 7] else: ##in case we dont just define it already exps_g = [1] params_c = [] params_g = [] for e in exps_c: params_c.append(2**e) for e in exps_g: params_g.append(2**e) param_grid = [(x, y) for x in params_c for y in params_g] com = mp.Pipe() shared_lock = mp.Lock() larger_search = True ##first we will search with huge intervals as recomended on the paper then when a larger interval is found we will search on lower intervals inside that interval while (len(param_grid) != 1): ##while we dont have one set ##generates n random subsets subsets = utils.generate_random_sets(features, n_tests, 3, len(features)) division_subsets = utils.divide_list(n_proc, subsets) ##parallelize work to it becomes faster workers = [] for i in range(1, n_proc): p = mp.Process(target=grid_search, args=(i, division_subsets[i], folds, data, param_grid, shared_lock, com, settings.svm_kernel)) workers.append(p) p.start() grid_search(0, division_subsets[0], folds, data, param_grid, shared_lock, com, settings.svm_kernel) for w in workers: ##hopefully this waits for every process to finish... w.join() output, input = com best_params = output.recv() if len(best_params) != n_tests: ##either we received the score of every feature or some error happened print "Didn't receive best param for every subset. ERROR" quit() param_grid, larger_search = get_top_selected(best_params, larger_search, estimate_g) ##returns c and gamma return param_grid[0][0], param_grid[0][1]
def get_improve_prob(features, n_tests, dataset, folds, n_proc, p_features, shared_lock, com, cost, gamma, svm_kernel): test_sets = utils.generate_random_sets(features, n_tests, 3, 10) accs = [] for test in test_sets: ##test each dataset agaisnt the classifier acc = classification_part.classify(folds, dataset, test, cost, gamma, svm_kernel) accs.append(acc) ##split tests among processes #print "accuracies subsets:", acc workers = [] for i in range(1, n_proc): p = mp.Process(target=calculate_improve_prob, args=(i, p_features[i], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel)) workers.append(p) p.start() calculate_improve_prob(0, p_features[0], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel) for w in workers: w.join() return
def get_improve_prob(features, n_tests, dataset, folds, n_proc, p_features, shared_lock, com, cost, gamma, svm_kernel): test_sets = utils.generate_random_sets(features, n_tests, 3, 10) accs = [] for test in test_sets: ##test each dataset agaisnt the classifier acc = classification_part.classify(folds, dataset, test, cost, gamma, svm_kernel) accs.append(acc) ##split tests among processes # print "accuracies subsets:", acc workers = [] for i in range(1, n_proc): p = mp.Process( target=calculate_improve_prob, args=(i, p_features[i], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel), ) workers.append(p) p.start() calculate_improve_prob(0, p_features[0], dataset, test_sets, accs, folds, shared_lock, com, cost, gamma, svm_kernel) for w in workers: w.join() return