def compute_core(arg_list): """ call to be distributed onto the cluster """ signal, method, fold_idx, train_idx, dev_idx, test_idx, grid_idx, grid_point = arg_list data = data_loader.load_all(signal) data_train = subset_mtl_data(data, train_idx) data_dev = subset_mtl_data(data, dev_idx) data_test = subset_mtl_data(data, test_idx) model = method(cost=grid_point["cost"]) model.train(data_train) out_test = model.predict(data_test) out_dev = model.predict(data_dev) ret = [] # return a list of key-value pairs for key in data_test.keys(): perf_prc_dev = helper.calcroc(out_dev[key], data_dev[key]["labels"])[0] perf_prc_test = helper.calcroc(out_test[key], data_test[key]["labels"])[0] reduce_key = (key, fold_idx, grid_idx) ret.append({reduce_key : {"prc_dev": perf_prc_dev, "prc_test": perf_prc_test}}) return ret
def compute_core(arg_list): """ call to be distributed onto the cluster """ signal, method, fold_idx, train_idx, dev_idx, test_idx, grid_idx, grid_point = arg_list data = data_loader.load_all(signal) data_train = subset_mtl_data(data, train_idx) data_dev = subset_mtl_data(data, dev_idx) data_test = subset_mtl_data(data, test_idx) model = method(cost=grid_point["cost"]) model.train(data_train) out_test = model.predict(data_test) out_dev = model.predict(data_dev) ret = [] # return a list of key-value pairs for key in data_test.keys(): perf_prc_dev = helper.calcroc(out_dev[key], data_dev[key]["labels"])[0] perf_prc_test = helper.calcroc(out_test[key], data_test[key]["labels"])[0] reduce_key = (key, fold_idx, grid_idx) ret.append( {reduce_key: { "prc_dev": perf_prc_dev, "prc_test": perf_prc_test }}) return ret
def setup_splits(signal, method_name, method, param, num_folds, test_size, random_state): """ splitting the example data into train/test/validation group """ data = data_loader.load_all(signal) sizes = dict((org, len(data[org]["labels"])) for org in data.keys()) # set up splitting strategy kf = MultitaskShuffleSplitThreeWay(sizes, n_iter=num_folds, indices=True, test_size=test_size * 2, random_state=random_state) param_grid = list(ParameterGrid(param)) argument_list = [] for fold_idx, (train_idx, dev_idx, test_idx) in enumerate(kf): for grid_idx, grid_point in enumerate(param_grid): arg = [ signal, method, fold_idx, train_idx, dev_idx, test_idx, grid_idx, grid_point ] argument_list.append(arg) local = False max_num_threads = 2 if method_name in ['union', 'individual']: param = { 'vmem': '4gb', 'pvmem': '4gb', 'pmem': '4gb', 'mem': '4gb', 'ppn': '1', 'nodes': '1', 'walltime': '2:00:00' } intermediate_ret = pg.pg_map(compute_core, argument_list, param=param, local=local, maxNumThreads=1, mem="4gb") #import ipdb #ipdb.set_trace() print "DONE with computation" flat_intermediate = list(chain.from_iterable(intermediate_ret)) perf_dev, perf_test = reduce_result(flat_intermediate) print "DONE reducing" return perf_dev, perf_test
def setup_splits(signal, method_name, method, param, num_folds, test_size, random_state): """ splitting the example data into train/test/validation group """ data = data_loader.load_all(signal) sizes = dict((org, len(data[org]["labels"])) for org in data.keys()) # set up splitting strategy kf = MultitaskShuffleSplitThreeWay(sizes, n_iter=num_folds, indices=True, test_size=test_size*2, random_state=random_state) param_grid = list(ParameterGrid(param)) argument_list = [] for fold_idx, (train_idx, dev_idx, test_idx) in enumerate(kf): for grid_idx, grid_point in enumerate(param_grid): arg = [signal, method, fold_idx, train_idx, dev_idx, test_idx, grid_idx, grid_point] argument_list.append(arg) local = False max_num_threads = 2 if method_name in ['union', 'individual']: param = {'vmem':'4gb', 'pvmem':'4gb', 'pmem':'4gb', 'mem':'4gb', 'ppn':'1', 'nodes':'1', 'walltime':'2:00:00'} intermediate_ret = pg.pg_map(compute_core, argument_list, param=param, local=local, maxNumThreads=1, mem="4gb") #import ipdb #ipdb.set_trace() print "DONE with computation" flat_intermediate = list(chain.from_iterable(intermediate_ret)) perf_dev, perf_test = reduce_result(flat_intermediate) print "DONE reducing" return perf_dev, perf_test