def main(): # Init #fit_fn = "."+os.path.join(this_dir, "resc", "fit") #cfg_fn = os.path.join(this_dir, "resc", "config.json") #log_fn = os.path.join(this_dir, "prod", "log.txt") fit_fn = "./resc/fit" cfg_fn = "resc/config.json" log_fn = "prod/log.txt" # Setup cmd = generate_command(fit_fn, cfg_fn, script_prefix="", config_prefix="-c") mon = generate_monitor(log_fn, 360) #cmd = ["chmod","-R","777","/home/elia/Dropbox/Files/KUL/research/codebases/homework/libs/PxS2/resc/fit"] #cmd = ["./resc/predict", "-c", "resc/config.json"] msg = """ cmd: {} """.format(cmd) debug_print(msg, V=VERBOSITY) run_process(cmd, monitors=mon, cwd=this_dir) return
def main(config_fname): # Load config config = load_config(config_fname) child = config["child"] io_config = config["io"] msg = """ Directories in this experiment are: {} """.format(config["io"]["dirs"]) debug_print(msg, level=2, V=VERBOSITY) # Make exploration explore_config = config["explore"] start_idx = config.get("start-idx", None) start_idx = determine_start_idx(start_idx, io_config) explore_config = explore(explore_config, start_idx) # Generate commands static_child_config = config[child] # Static parameters cmd_ht, cmd_tl, ofn_ht, ofn_tl = generate_outputs(child, explore_config, static_child_config) # Save outputs save_explore(explore_config, io_config) save_outputs(cmd_ht, cmd_tl, io_config, kind="commands") save_outputs(ofn_ht, ofn_tl, io_config, kind="outp_fns") return
def inference_and_evaluation(model, test_data, qry_codes, vsd_config, eval_config): # Extract config _, q_targ, _ = codes_to_query(qry_codes) eval_kinds = eval_config.get("kinds", ["macro_f1"]) msg = """ run_VersaDummy.py eval_kinds: {} """.format(qry_codes, eval_kinds) debug_print(msg, V=VERBOSITY) # Initialize head_tuple = ("q_idx", *eval_kinds) data_tuple = tuple([0] + [0.0] * len(eval_kinds)) tuple_list = [data_tuple] * len(qry_codes) inf_timing = np.zeros(len(qry_codes)) # Actions for q_idx, q_code in enumerate(qry_codes): true_data = test_data[:, q_targ[q_idx]] pred_data = model.predict(test_data, q_code=q_code) evals = [ round(eval_dict[kind](true_data, pred_data), PRECISION) for kind in eval_kinds ] tuple_list[q_idx] = q_idx, *evals inf_timing[q_idx] = round(model.model_data["inf_time"], PRECISION) del pred_data results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple) return results_df, inf_timing
def main(config_fname, fold): # Load config config = load_config(config_fname) # Configuration matters io_config = config['io'] mod_config = config.get("mod", {}) fit_config = config.get("fit", {}) machine = config.get("machine", None) io_config['fold'] = fold msg = """ io_config: {} """.format(io_config) debug_print(msg, V=VERBOSITY, l=2) # Loading things train_fname = load_input(io_config) # Induce model = induction(train_fname, fit_config, mod_config, io_config, machine=machine) # Save Model + Config cfg = {'mod': mod_config, 'fit': fit_config} save_model_and_model_config(model, io_config, cfg) return 0
def save_outputs(head_tuple, tuple_list, io_config, kind="commands"): fname = io_config["file"][kind] msg = """ Head Tuple: {} len(tuple_list[0]): {} tuple_list[0]: {} """.format(head_tuple, len(tuple_list[0]), tuple_list[0]) debug_print(msg, V=VERBOSITY) df = pd.DataFrame.from_records(tuple_list, columns=head_tuple) df.to_csv(fname) return
def _build_commands(fnames, shuffle=True): """ Build commands from parameter .json file(s) Commands are generated by an instance exp of the Exp() class. This happens for each .json file. Afterwards, the commands are extracted from exp, and collected in a big list. Parameters ---------- fnames Returns ------- """ commands_df = pd.DataFrame() for fname in fnames: msg = """ Looking at parameters file: {} """.format(fname) debug_print(msg, V=VERBOSITY) with open(fname, "r") as f: parameters = json.load(f) # Init exp = RunExp() # Config exp.make_config(**parameters) exp.save_config() # Generate commands of RunExp's children exp.run() # Load commands commands_df = commands_df.append(exp.load_output(kind="commands")) exp_fname = exp.config["io"]["file"]["RunExp"] with open(exp_fname, "wb") as f: pkl.dump(exp, f) del exp if shuffle: # Shuffle all commands, cf. https://stackoverflow.com/questions/29576430/shuffle-dataframe-rows commands_df = commands_df.sample(frac=1).reset_index(drop=True) return commands_df
def load_mod(io_config): fold = io_config['fold'] mod_fnames = io_config['file']['load-mod'] mod_fname = [t[1] for t in mod_fnames if t[0] == fold][0] msg = """ Loading an external model: {} """.format(mod_fname) debug_print(msg, V=VERBOSITY) with open(mod_fname, 'rb') as f: mod = pkl.load(f) ind_time = mod.s['model_data']['ind_time'] ind_time = round(ind_time, PRECISION) return mod, ind_time
def induction(train_fname, smile_config): model = pxs.PxS() msg = """ Succesfully initialized PxS model. """ debug_print(msg, V=VERBOSITY) res = model.fit(train_fname, **smile_config) msg = """ Code returned from fit method: {} """.format(res) debug_print(msg, V=VERBOSITY) ind_time = model.s['model_data']['ind_time'] ind_time = round(ind_time, PRECISION) return model, ind_time
def induction(train_fname, cwd, machine, cfg_fit): model = pxl.PxL(cwd=cwd, machine=machine) msg = """ Succesfully initialized PxL model. Cfg_fit: {} """.format(cfg_fit) debug_print(msg, V=VERBOSITY) res = model.fit(i=train_fname, **cfg_fit) msg = """ Code returned from fit method: {} """.format(res) debug_print(msg, V=VERBOSITY) ind_time = model.s['model_data']['ind_time'] ind_time = round(ind_time, PRECISION) return model, ind_time
def save_model_and_model_config(model, io_config, mod_config): # Config fold = io_config['fold'] mod_config_fname = io_config['file']['mod-config'] mod_fname = [t[1] for t in io_config['file']['mod'] if t[0] == fold][0] # Actions ensure_dir(dirname(mod_config_fname), empty=False) with open(mod_config_fname, 'w') as f: json.dump(mod_config, f, indent=4) ensure_dir(dirname(mod_fname), empty=False) with open(mod_fname, 'wb') as f: pkl.dump(model, f) msg = """ Successful save of model to: {} """.format(mod_fname) debug_print(msg, V=VERBOSITY) return
def main(csv_fname, cmd_idx): """ Run single command from csv file that specifies many commands. The command that should be run corresponds to a row in the .csv file with all commands. The specific command that should be run is indicated by the row idx. Parameters ---------- csv_fname: str Filename of the csv containing all commands cmd_idx: int Index of row that corresponds to command to be run. Returns ------- """ assert isinstance(cmd_idx, int) assert isinstance(csv_fname, str) # Extract command df = pd.read_csv(csv_fname, index_col=0) head_tuple = tuple(df.columns) data_tuple = tuple(df.iloc[cmd_idx]) param_dict = {k: v for k, v in zip(head_tuple, data_tuple)} msg = """ param_dict: {} """.format(param_dict) debug_print(msg, V=VERBOSITY) # Run command sig = signature(run_script) ba = sig.bind(**param_dict) run_script(*ba.args, **ba.kwargs) return
def determine_start_idx(start_idx, io_config): assert isinstance(start_idx, (int, type(None))) if start_idx is None: prod_dir = dirname(dirname(io_config["dirs"]["prod"])) msg = """ io_config['dirs']['prod']: {} prod_dir assumed to be: {} """.format(io_config["dirs"]["prod"], prod_dir) debug_print(msg, V=VERBOSITY) subdirs = [os.path.join(prod_dir, d) for d in os.listdir(prod_dir)] start_idx = [detect_largest_idx_in_directory(d) for d in subdirs] start_idx = max(start_idx) + 1 msg = """ Automatically detected start-idx: {} """.format(start_idx) debug_print(msg, V=VERBOSITY) return start_idx
def main(config_fname, fold): # Load config config = load_config(config_fname) # Load external files io_config = config['io'] pxs_config = config['smile'] io_config['fold'] = fold io_config['load'] = io_config['file'].get('load-mod', None) is not None # If this is non-empty, this is set to true. msg = """ io_config: {} """.format(io_config) debug_print(msg, V=VERBOSITY) train_fname, test_fname = load_input(io_config) qry_codes = load_qry_codes(io_config) pxs_config['cwd'] = io_config['dirs']['prod-tmp'] # Induce if io_config['load']: model, ind_time = load_mod(io_config) else: model, ind_time = induction(train_fname, pxs_config) # Inference + Evaluation eval_config = config['eval'] inference_and_evaluation(model, test_fname, qry_codes, pxs_config, eval_config, io_config, ind_time) return
# Execute subprocess.call(bash, env=env) return # For executable script if __name__ == '__main__': # Extracting options parser = argparse.ArgumentParser() parser.add_argument('--commands', '-c', help='commands_fname_outer_scope') parser.add_argument('--local', '-l', help='local_outer_scope, local yes/no', action="store_true") args = parser.parse_args() commands_fname_outer_scope = args.commands local_outer_scope = args.local msg = """ We are running local: {} """.format(local_outer_scope) debug_print(msg, V=VERBOSITY) assert isinstance(commands_fname_outer_scope, str) main(commands_fname_outer_scope, local=local_outer_scope)
def inference_and_evaluation(model, test_fname, qry_codes, smile_config, eval_config, io_config, ind_time): # Extract config _, q_targ, q_miss = codes_to_query(qry_codes) eval_kinds = eval_config.get('kinds', ['macro_f1']) msg = """ run_PxS.py eval_kinds: {} """.format(qry_codes, eval_kinds) debug_print(msg, V=VERBOSITY) # Initialize head_tuple = ('q_idx', *eval_kinds) data_tuple = tuple([0] + [0.0] * len(eval_kinds)) tuple_list = [data_tuple] * len(qry_codes) test_data = pd.read_csv(test_fname) inf_timing = np.zeros(len(qry_codes)) # Actions for q_idx, q_code in enumerate(qry_codes): smile_config['miss_idx'] = np.array(q_miss[q_idx]).tolist() # For converting types. smile_config['targ_idx'] = np.array(q_targ[q_idx]).tolist() smile_config['q_idx'] = q_idx msg = """ Diagnostics about query given to PxS: {} """.format(smile_config) debug_print(msg, V=VERBOSITY) true_data = test_data.values[:, q_targ[q_idx]] pred_data = model.predict(test_fname, **smile_config) if not isinstance(pred_data, np.ndarray): pred_data = np.full_like(true_data, fill_value=np.nan) uvals_pred_data = np.unique(pred_data) msg = """ Diagnostics about pred_data. Unique values: {} Shape: {} Type: {} """.format(uvals_pred_data, pred_data.shape, pred_data.dtype) debug_print(msg, V=VERBOSITY) evals = [round(eval_dict[kind](true_data, pred_data), PRECISION) for kind in eval_kinds] tuple_list[q_idx] = q_idx, *evals inf_timing[q_idx] = round(model.s['model_data']['inf_time'], PRECISION) del pred_data if q_idx % 10 == 0: results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple) # Write outputs # Tidy timings timings_df = tidy_timings(ind_time, inf_timing) # Save output save_results(results_df, io_config) save_timings(timings_df, io_config) results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple) # Tidy timings timings_df = tidy_timings(ind_time, inf_timing) # Save output save_results(results_df, io_config) save_timings(timings_df, io_config) return
def inference_and_evaluation(model, test_fname, qry_codes, pred_config, eval_config, io_config, ind_time): # Extract config _, q_targ, q_miss = codes_to_query(qry_codes) eval_kinds = eval_config.get('kinds', ['macro_f1']) msg = """ {} eval_kinds: {} """.format(__file__, qry_codes, eval_kinds) debug_print(msg, V=VERBOSITY) # Initialize head_tuple = ('q_idx', *eval_kinds) data_tuple = tuple([0] + [0.0] * len(eval_kinds)) tuple_list = [data_tuple] * len(qry_codes) test_data = pd.read_csv(test_fname, header=None) inf_timing = np.zeros(len(qry_codes)) # Actions for q_idx, q_code in enumerate(qry_codes): pred_config['miss_idx'] = np.array( q_miss[q_idx]).tolist() # For converting types. pred_config['targ_idx'] = np.array(q_targ[q_idx]).tolist() pred_config['q_idx'] = q_idx msg = """ cfg_pred: {} """.format(pred_config) debug_print(msg, V=VERBOSITY) true_data = test_data.values[:, q_targ[q_idx]] pred_data = model.predict(test_fname, **pred_config) if not isinstance(pred_data, np.ndarray): # If all goes wrong pred_data = np.full_like(true_data, fill_value=np.nan) # region Advanced diagnostics for debugging. if VERBOSITY > 0: uvals_pred_data = np.unique(pred_data) msg = """ Diagnostics about pred_data. Unique values: {} Shape: {} Type: {} Diagnostics about true_data. Shape: {} Type: {} """.format(uvals_pred_data, pred_data.shape, pred_data.dtype, true_data.shape, true_data.dtype) debug_print(msg, V=VERBOSITY) # endregion evals = [ round(eval_dict[kind](true_data, pred_data), PRECISION) for kind in eval_kinds ] tuple_list[q_idx] = q_idx, *evals inf_timing[q_idx] = round(model.s['model_data']['inf_time'], PRECISION) del pred_data # Save every ten queries if q_idx % 10 == 0: write_outputs(tuple_list, head_tuple, ind_time, inf_timing, io_config) write_outputs(tuple_list, head_tuple, ind_time, inf_timing, io_config) return
def main(config_fname, fold, q_idx): """ Run a certain system. Stages ^^^^^^ 1. Load configurations In general, we hold on to a `program(config)` kind of philosophy, meaning that in principle, we expect this script to be called with a single argument which is a configfile. In this configuration file, all the details of how the script is supposed to function are to be included. Hence, the first step is to extract these configurations. Different configurations come from different places, but all of them are dicts, saved as json files. 1.b Alter configurations A few important parameters are passed on as cmd-parameters, such as fold and query-idx. This because it would be overkill to generate separate configfiles for each of those. For consistency later on, we explicitly put them in the configurations as well. 2. Induction We induce or load our predictive model. Before doing so, we already extract some relevant parameters from the config files. 3. Inference We use our model to perform inference tasks (predictions.) Parameters ---------- config_fname fold q_idx Returns ------- """ # Load config (all different categories) config = load_config(config_fname) io_config = config['io'] pxl_config = config.get('PxL', {}) fit_config = {k.replace('fit.', ''):v for k,v in pxl_config.items() if "fit." in k} pred_config = {k.replace('predict.', ''): v for k, v in pxl_config.items() if "predict." in k} # Alter configuration with cmd-parameters io_config['fold'] = fold io_config['load'] = io_config['file'].get('load-mod', None) is not None # If this is non-empty, this is set to true. io_config['q_idx'] = q_idx msg = """ io_config: {} """.format(io_config) debug_print(msg, V=VERBOSITY, level=2) # Extract relevant parameters train_fname, test_fname = load_input(io_config) cwd = io_config['dirs']['prod-tmp'] machine = config.get("machine", None) qry_codes = load_qry_codes(io_config) # Induce if io_config['load']: msg = """ Loading model from disk; {} """.format(io_config['file']['load-mod']) debug_print(msg, V=VERBOSITY) model, ind_time = load_mod(io_config, cwd, machine) else: model, ind_time = induction(train_fname, cwd, machine, fit_config) # Inference + Evaluation eval_config = config['eval'] inference_and_evaluation(model, test_fname, qry_codes, pred_config, eval_config, io_config, ind_time) return
def inference_and_evaluation(model, test_fname, qry_codes, pred_config, eval_config, io_config, ind_time): # Extract config q_idx = io_config['q_idx'] _, q_targ, q_miss = codes_to_query(qry_codes) eval_kinds = eval_config.get('kinds', ['macro_f1']) msg = """ {} eval_kinds: {} """.format(__file__, qry_codes, eval_kinds) debug_print(msg, V=VERBOSITY) # Initialize head_tuple = ('q_idx', *eval_kinds) data_tuple = tuple([0] + [0.0] * len(eval_kinds)) tuple_list = [data_tuple] # Actions pred_config['miss_idx'] = np.array(q_miss[q_idx]).tolist() # For converting types. pred_config['targ_idx'] = np.array(q_targ[q_idx]).tolist() pred_config['q_idx'] = q_idx msg = """ cfg_pred: {} """.format(pred_config) debug_print(msg, V=VERBOSITY) test_data = pd.read_csv(test_fname, header=None) true_data = test_data.values[:, q_targ[q_idx]] pred_data = model.predict(test_fname, **pred_config) if not isinstance(pred_data, np.ndarray): # If all goes wrong, fill in np.NaN pred_data = np.full_like(true_data, fill_value=np.nan) # region Advanced diagnostics for debugging. if VERBOSITY > 0: uvals_pred_data = np.unique(pred_data) msg = """ Diagnostics about pred_data. Unique values: {} Shape: {} Type: {} Diagnostics about true_data. Shape: {} Type: {} """.format(uvals_pred_data, pred_data.shape, pred_data.dtype, true_data.shape, true_data.dtype) debug_print(msg, V=VERBOSITY) # endregion # Evaluation evals = [round(eval_dict[kind](true_data, pred_data), PRECISION) for kind in eval_kinds] # Collect results tuple_list[0] = q_idx, *evals results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple) # Collect timings inf_timing = round(model.s['model_data']['inf_time'], PRECISION) timings_tuple = (q_idx, ind_time, inf_timing) timings_df = pd.DataFrame.from_records([timings_tuple], columns=('q_idx', 'ind_time', 'inf_time')) # Write results save_results(results_df, io_config) save_timings(timings_df, io_config) return
def induction(train_fname, fit_config, mod_config, io_config, machine=None): # Config mod_type = mod_config['type'] fold = io_config['fold'] cwd = io_config['dirs']['prod-tmp'] # Actions if mod_type in {'PxS'}: net_fname = [t[1] for t in io_config['file']['net'] if t[0] == fold][0] ensure_dir(dirname(net_fname), empty=False) model = pxs.PxS() msg = """ Succesfully initialized PxS model. Net fname: {} """.format(net_fname) debug_print(msg, V=VERBOSITY) res = model.fit(train_fname, model_fname=net_fname, cwd=cwd, **fit_config) if res != 0: msg = """ Code returned from fit method: {} """.format(res) raise ValueError(msg) elif mod_type in {'PxL'}: net_fname = [t[1] for t in io_config['file']['net'] if t[0] == fold][0] ensure_dir(dirname(net_fname), empty=False) model = pxl.PxL(cwd=cwd, machine=machine) msg = """ Succesfully initialized PxL model. Net fname: {} """.format(net_fname) debug_print(msg, V=VERBOSITY) res = model.fit(i=train_fname, o=net_fname, **fit_config) if res != 0: msg = """ Code returned from fit method: {} """.format(res) raise ValueError(msg) elif mod_type in {'Mercs'}: train = pd.read_csv(train_fname) model = mercs.MERCS() model.fit(train, **fit_config, delimiter='.') else: msg = """ Did not recognize model type: {} """.format(mod_type) raise ValueError(msg) return model