Beispiel #1
0
def main():
    # Init
    #fit_fn = "."+os.path.join(this_dir, "resc", "fit")
    #cfg_fn = os.path.join(this_dir, "resc", "config.json")
    #log_fn = os.path.join(this_dir, "prod", "log.txt")

    fit_fn = "./resc/fit"
    cfg_fn = "resc/config.json"
    log_fn = "prod/log.txt"

    # Setup
    cmd = generate_command(fit_fn,
                           cfg_fn,
                           script_prefix="",
                           config_prefix="-c")
    mon = generate_monitor(log_fn, 360)

    #cmd = ["chmod","-R","777","/home/elia/Dropbox/Files/KUL/research/codebases/homework/libs/PxS2/resc/fit"]
    #cmd = ["./resc/predict", "-c", "resc/config.json"]

    msg = """
        cmd:    {}
        """.format(cmd)
    debug_print(msg, V=VERBOSITY)
    run_process(cmd, monitors=mon, cwd=this_dir)

    return
Beispiel #2
0
def main(config_fname):
    # Load config
    config = load_config(config_fname)
    child = config["child"]

    io_config = config["io"]

    msg = """
    Directories in this experiment are: {}
    """.format(config["io"]["dirs"])
    debug_print(msg, level=2, V=VERBOSITY)

    # Make exploration
    explore_config = config["explore"]
    start_idx = config.get("start-idx", None)
    start_idx = determine_start_idx(start_idx, io_config)

    explore_config = explore(explore_config, start_idx)

    # Generate commands
    static_child_config = config[child]  # Static parameters
    cmd_ht, cmd_tl, ofn_ht, ofn_tl = generate_outputs(child, explore_config,
                                                      static_child_config)

    # Save outputs
    save_explore(explore_config, io_config)
    save_outputs(cmd_ht, cmd_tl, io_config, kind="commands")
    save_outputs(ofn_ht, ofn_tl, io_config, kind="outp_fns")

    return
Beispiel #3
0
def inference_and_evaluation(model, test_data, qry_codes, vsd_config,
                             eval_config):
    # Extract config
    _, q_targ, _ = codes_to_query(qry_codes)
    eval_kinds = eval_config.get("kinds", ["macro_f1"])

    msg = """
    run_VersaDummy.py
    eval_kinds: {}
    """.format(qry_codes, eval_kinds)
    debug_print(msg, V=VERBOSITY)

    # Initialize
    head_tuple = ("q_idx", *eval_kinds)
    data_tuple = tuple([0] + [0.0] * len(eval_kinds))
    tuple_list = [data_tuple] * len(qry_codes)

    inf_timing = np.zeros(len(qry_codes))
    # Actions
    for q_idx, q_code in enumerate(qry_codes):
        true_data = test_data[:, q_targ[q_idx]]
        pred_data = model.predict(test_data, q_code=q_code)

        evals = [
            round(eval_dict[kind](true_data, pred_data), PRECISION)
            for kind in eval_kinds
        ]
        tuple_list[q_idx] = q_idx, *evals
        inf_timing[q_idx] = round(model.model_data["inf_time"], PRECISION)

        del pred_data

    results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple)

    return results_df, inf_timing
def main(config_fname, fold):

    # Load config
    config = load_config(config_fname)

    # Configuration matters
    io_config = config['io']
    mod_config = config.get("mod", {})

    fit_config = config.get("fit", {})
    machine = config.get("machine", None)

    io_config['fold'] = fold

    msg = """
    io_config: {}
    """.format(io_config)
    debug_print(msg, V=VERBOSITY, l=2)

    # Loading things
    train_fname = load_input(io_config)

    # Induce
    model = induction(train_fname,
                      fit_config,
                      mod_config,
                      io_config,
                      machine=machine)

    # Save Model + Config
    cfg = {'mod': mod_config, 'fit': fit_config}
    save_model_and_model_config(model, io_config, cfg)

    return 0
Beispiel #5
0
def save_outputs(head_tuple, tuple_list, io_config, kind="commands"):
    fname = io_config["file"][kind]

    msg = """
    Head Tuple:             {}
    len(tuple_list[0]):     {}
    tuple_list[0]:          {}
    """.format(head_tuple, len(tuple_list[0]), tuple_list[0])
    debug_print(msg, V=VERBOSITY)

    df = pd.DataFrame.from_records(tuple_list, columns=head_tuple)
    df.to_csv(fname)
    return
def _build_commands(fnames, shuffle=True):
    """
    Build commands from parameter .json file(s)

    Commands are generated by an instance exp of the Exp() class. This happens
    for each .json file. Afterwards, the commands are extracted from exp, and
    collected in a big list.

    Parameters
    ----------
    fnames

    Returns
    -------

    """

    commands_df = pd.DataFrame()
    for fname in fnames:
        msg = """
        Looking at parameters file: {}
        """.format(fname)
        debug_print(msg, V=VERBOSITY)

        with open(fname, "r") as f:
            parameters = json.load(f)

        # Init
        exp = RunExp()

        # Config
        exp.make_config(**parameters)
        exp.save_config()

        # Generate commands of RunExp's children
        exp.run()

        # Load commands
        commands_df = commands_df.append(exp.load_output(kind="commands"))

        exp_fname = exp.config["io"]["file"]["RunExp"]
        with open(exp_fname, "wb") as f:
            pkl.dump(exp, f)

        del exp

    if shuffle:
        # Shuffle all commands, cf. https://stackoverflow.com/questions/29576430/shuffle-dataframe-rows
        commands_df = commands_df.sample(frac=1).reset_index(drop=True)

    return commands_df
Beispiel #7
0
def load_mod(io_config):
    fold = io_config['fold']

    mod_fnames = io_config['file']['load-mod']
    mod_fname = [t[1] for t in mod_fnames
                 if t[0] == fold][0]

    msg = """
    Loading an external model:      {}
    """.format(mod_fname)
    debug_print(msg, V=VERBOSITY)

    with open(mod_fname, 'rb') as f:
        mod = pkl.load(f)

    ind_time = mod.s['model_data']['ind_time']
    ind_time = round(ind_time, PRECISION)

    return mod, ind_time
Beispiel #8
0
def induction(train_fname, smile_config):
    model = pxs.PxS()

    msg = """
    Succesfully initialized PxS model.
    """
    debug_print(msg, V=VERBOSITY)

    res = model.fit(train_fname, **smile_config)

    msg = """
    Code returned from fit method: {}
    """.format(res)
    debug_print(msg, V=VERBOSITY)

    ind_time = model.s['model_data']['ind_time']
    ind_time = round(ind_time, PRECISION)

    return model, ind_time
def induction(train_fname, cwd, machine, cfg_fit):
    model = pxl.PxL(cwd=cwd, machine=machine)

    msg = """
    Succesfully initialized PxL model.

    Cfg_fit:            {}
    """.format(cfg_fit)
    debug_print(msg, V=VERBOSITY)

    res = model.fit(i=train_fname, **cfg_fit)

    msg = """
    Code returned from fit method: {}
    """.format(res)
    debug_print(msg, V=VERBOSITY)

    ind_time = model.s['model_data']['ind_time']
    ind_time = round(ind_time, PRECISION)

    return model, ind_time
def save_model_and_model_config(model, io_config, mod_config):
    # Config
    fold = io_config['fold']
    mod_config_fname = io_config['file']['mod-config']
    mod_fname = [t[1] for t in io_config['file']['mod'] if t[0] == fold][0]

    # Actions
    ensure_dir(dirname(mod_config_fname), empty=False)
    with open(mod_config_fname, 'w') as f:
        json.dump(mod_config, f, indent=4)

    ensure_dir(dirname(mod_fname), empty=False)
    with open(mod_fname, 'wb') as f:
        pkl.dump(model, f)

    msg = """
    Successful save of model to: {}
    """.format(mod_fname)
    debug_print(msg, V=VERBOSITY)

    return
Beispiel #11
0
def main(csv_fname, cmd_idx):
    """
    Run single command from csv file that specifies many commands.

    The command that should be run corresponds to a row in the .csv file
    with all commands. The specific command that should be run is indicated
    by the row idx.

    Parameters
    ----------
    csv_fname: str
        Filename of the csv containing all commands
    cmd_idx: int
        Index of row that corresponds to command to be run.

    Returns
    -------

    """
    assert isinstance(cmd_idx, int)
    assert isinstance(csv_fname, str)

    # Extract command
    df = pd.read_csv(csv_fname, index_col=0)
    head_tuple = tuple(df.columns)
    data_tuple = tuple(df.iloc[cmd_idx])
    param_dict = {k: v for k, v in zip(head_tuple, data_tuple)}

    msg = """
    param_dict: {}
    """.format(param_dict)
    debug_print(msg, V=VERBOSITY)

    # Run command
    sig = signature(run_script)
    ba = sig.bind(**param_dict)

    run_script(*ba.args, **ba.kwargs)
    return
Beispiel #12
0
def determine_start_idx(start_idx, io_config):
    assert isinstance(start_idx, (int, type(None)))

    if start_idx is None:
        prod_dir = dirname(dirname(io_config["dirs"]["prod"]))

        msg = """
        io_config['dirs']['prod']: {}
        prod_dir assumed to be: {}
        """.format(io_config["dirs"]["prod"], prod_dir)
        debug_print(msg, V=VERBOSITY)

        subdirs = [os.path.join(prod_dir, d) for d in os.listdir(prod_dir)]
        start_idx = [detect_largest_idx_in_directory(d) for d in subdirs]
        start_idx = max(start_idx) + 1

        msg = """
        Automatically detected start-idx: {}
        """.format(start_idx)
        debug_print(msg, V=VERBOSITY)

    return start_idx
Beispiel #13
0
def main(config_fname, fold):

    # Load config
    config = load_config(config_fname)

    # Load external files
    io_config = config['io']
    pxs_config = config['smile']

    io_config['fold'] = fold
    io_config['load'] = io_config['file'].get('load-mod', None) is not None # If this is non-empty, this is set to true.

    msg = """
    io_config: {}
    """.format(io_config)
    debug_print(msg, V=VERBOSITY)

    train_fname, test_fname = load_input(io_config)
    qry_codes = load_qry_codes(io_config)
    pxs_config['cwd'] = io_config['dirs']['prod-tmp']

    # Induce
    if io_config['load']:
        model, ind_time = load_mod(io_config)
    else:
        model, ind_time = induction(train_fname, pxs_config)

    # Inference + Evaluation
    eval_config = config['eval']
    inference_and_evaluation(model,
                             test_fname,
                             qry_codes,
                             pxs_config,
                             eval_config,
                             io_config,
                             ind_time)

    return
Beispiel #14
0
    # Execute
    subprocess.call(bash, env=env)

    return


# For executable script
if __name__ == '__main__':

    # Extracting options
    parser = argparse.ArgumentParser()
    parser.add_argument('--commands', '-c',
                        help='commands_fname_outer_scope')
    parser.add_argument('--local', '-l',
                        help='local_outer_scope, local yes/no',
                        action="store_true")

    args = parser.parse_args()

    commands_fname_outer_scope = args.commands
    local_outer_scope = args.local

    msg = """
    We are running local: {}
    """.format(local_outer_scope)
    debug_print(msg, V=VERBOSITY)

    assert isinstance(commands_fname_outer_scope, str)

    main(commands_fname_outer_scope, local=local_outer_scope)
Beispiel #15
0
def inference_and_evaluation(model,
                             test_fname,
                             qry_codes,
                             smile_config,
                             eval_config,
                             io_config,
                             ind_time):
    # Extract config
    _, q_targ, q_miss = codes_to_query(qry_codes)
    eval_kinds = eval_config.get('kinds', ['macro_f1'])

    msg = """
    run_PxS.py
    eval_kinds: {}
    """.format(qry_codes, eval_kinds)
    debug_print(msg, V=VERBOSITY)

    # Initialize
    head_tuple = ('q_idx', *eval_kinds)
    data_tuple = tuple([0] + [0.0] * len(eval_kinds))
    tuple_list = [data_tuple] * len(qry_codes)

    test_data = pd.read_csv(test_fname)
    inf_timing = np.zeros(len(qry_codes))

    # Actions
    for q_idx, q_code in enumerate(qry_codes):
        smile_config['miss_idx'] = np.array(q_miss[q_idx]).tolist() # For converting types.
        smile_config['targ_idx'] = np.array(q_targ[q_idx]).tolist()
        smile_config['q_idx'] = q_idx

        msg = """
        Diagnostics about query given to PxS:
        {}
        """.format(smile_config)
        debug_print(msg, V=VERBOSITY)

        true_data = test_data.values[:, q_targ[q_idx]]
        pred_data = model.predict(test_fname,
                                  **smile_config)

        if not isinstance(pred_data, np.ndarray):
            pred_data = np.full_like(true_data, fill_value=np.nan)

        uvals_pred_data = np.unique(pred_data)
        msg = """
        Diagnostics about pred_data.
        Unique values:  {}
        Shape:          {}
        Type:           {}
        """.format(uvals_pred_data, pred_data.shape, pred_data.dtype)
        debug_print(msg, V=VERBOSITY)

        evals = [round(eval_dict[kind](true_data, pred_data), PRECISION)
                 for kind in eval_kinds]

        tuple_list[q_idx] = q_idx, *evals
        inf_timing[q_idx] = round(model.s['model_data']['inf_time'], PRECISION)

        del pred_data

        if q_idx % 10 == 0:
            results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple)

            # Write outputs
            # Tidy timings
            timings_df = tidy_timings(ind_time, inf_timing)

            # Save output
            save_results(results_df, io_config)
            save_timings(timings_df, io_config)


    results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple)

    # Tidy timings
    timings_df = tidy_timings(ind_time, inf_timing)

    # Save output
    save_results(results_df, io_config)
    save_timings(timings_df, io_config)

    return
def inference_and_evaluation(model, test_fname, qry_codes, pred_config,
                             eval_config, io_config, ind_time):
    # Extract config
    _, q_targ, q_miss = codes_to_query(qry_codes)
    eval_kinds = eval_config.get('kinds', ['macro_f1'])

    msg = """
    {}
    eval_kinds: {}
    """.format(__file__, qry_codes, eval_kinds)
    debug_print(msg, V=VERBOSITY)

    # Initialize
    head_tuple = ('q_idx', *eval_kinds)
    data_tuple = tuple([0] + [0.0] * len(eval_kinds))
    tuple_list = [data_tuple] * len(qry_codes)

    test_data = pd.read_csv(test_fname, header=None)
    inf_timing = np.zeros(len(qry_codes))

    # Actions
    for q_idx, q_code in enumerate(qry_codes):
        pred_config['miss_idx'] = np.array(
            q_miss[q_idx]).tolist()  # For converting types.
        pred_config['targ_idx'] = np.array(q_targ[q_idx]).tolist()
        pred_config['q_idx'] = q_idx

        msg = """
        cfg_pred:   {}
        """.format(pred_config)
        debug_print(msg, V=VERBOSITY)

        true_data = test_data.values[:, q_targ[q_idx]]
        pred_data = model.predict(test_fname, **pred_config)

        if not isinstance(pred_data, np.ndarray):
            # If all goes wrong
            pred_data = np.full_like(true_data, fill_value=np.nan)

        # region Advanced diagnostics for debugging.
        if VERBOSITY > 0:
            uvals_pred_data = np.unique(pred_data)
            msg = """
            Diagnostics about pred_data.
            Unique values:  {}
            Shape:          {}
            Type:           {}

            Diagnostics about true_data.
            Shape:          {}
            Type:           {}
            """.format(uvals_pred_data, pred_data.shape, pred_data.dtype,
                       true_data.shape, true_data.dtype)
            debug_print(msg, V=VERBOSITY)
        # endregion

        evals = [
            round(eval_dict[kind](true_data, pred_data), PRECISION)
            for kind in eval_kinds
        ]

        tuple_list[q_idx] = q_idx, *evals
        inf_timing[q_idx] = round(model.s['model_data']['inf_time'], PRECISION)

        del pred_data

        # Save every ten queries
        if q_idx % 10 == 0:
            write_outputs(tuple_list, head_tuple, ind_time, inf_timing,
                          io_config)

    write_outputs(tuple_list, head_tuple, ind_time, inf_timing, io_config)

    return
def main(config_fname, fold, q_idx):
    """
    Run a certain system.

    Stages
    ^^^^^^

        1. Load configurations
            In general, we hold on to a `program(config)` kind of philosophy,
            meaning that in principle, we expect this script to be called with
            a single argument which is a configfile. In this configuration file,
            all the details of how the script is supposed to function are to be
            included.

            Hence, the first step is to extract these configurations. Different
            configurations come from different places, but all of them are dicts,
            saved as json files.
        1.b Alter configurations
            A few important parameters are passed on as cmd-parameters, such as
            fold and query-idx. This because it would be overkill to generate
            separate configfiles for each of those. For consistency later on,
            we explicitly put them in the configurations as well.
        2. Induction
            We induce or load our predictive model. Before doing so, we already
            extract some relevant parameters from the config files.
        3. Inference
            We use our model to perform inference tasks (predictions.)


    Parameters
    ----------
    config_fname
    fold
    q_idx

    Returns
    -------

    """

    # Load config (all different categories)
    config = load_config(config_fname)

    io_config = config['io']
    pxl_config = config.get('PxL', {})

    fit_config = {k.replace('fit.', ''):v for k,v in pxl_config.items()
                  if "fit." in k}
    pred_config = {k.replace('predict.', ''): v for k, v in pxl_config.items()
                  if "predict." in k}

    # Alter configuration with cmd-parameters
    io_config['fold'] = fold
    io_config['load'] = io_config['file'].get('load-mod', None) is not None # If this is non-empty, this is set to true.
    io_config['q_idx'] = q_idx

    msg = """
    io_config: {}
    """.format(io_config)
    debug_print(msg, V=VERBOSITY, level=2)

    # Extract relevant parameters
    train_fname, test_fname = load_input(io_config)
    cwd = io_config['dirs']['prod-tmp']
    machine = config.get("machine", None)
    qry_codes = load_qry_codes(io_config)

    # Induce
    if io_config['load']:
        msg = """
        Loading model from disk; {}
        """.format(io_config['file']['load-mod'])
        debug_print(msg, V=VERBOSITY)
        model, ind_time = load_mod(io_config, cwd, machine)
    else:
        model, ind_time = induction(train_fname, cwd, machine, fit_config)

    # Inference + Evaluation
    eval_config = config['eval']
    inference_and_evaluation(model,
                             test_fname,
                             qry_codes,
                             pred_config,
                             eval_config,
                             io_config,
                             ind_time)

    return
def inference_and_evaluation(model,
                             test_fname,
                             qry_codes,
                             pred_config,
                             eval_config,
                             io_config,
                             ind_time):
    # Extract config
    q_idx = io_config['q_idx']
    _, q_targ, q_miss = codes_to_query(qry_codes)
    eval_kinds = eval_config.get('kinds', ['macro_f1'])

    msg = """
    {}
    eval_kinds: {}
    """.format(__file__, qry_codes, eval_kinds)
    debug_print(msg, V=VERBOSITY)

    # Initialize
    head_tuple = ('q_idx', *eval_kinds)
    data_tuple = tuple([0] + [0.0] * len(eval_kinds))
    tuple_list = [data_tuple]

    # Actions
    pred_config['miss_idx'] = np.array(q_miss[q_idx]).tolist()  # For converting types.
    pred_config['targ_idx'] = np.array(q_targ[q_idx]).tolist()
    pred_config['q_idx'] = q_idx

    msg = """
    cfg_pred:   {}
    """.format(pred_config)
    debug_print(msg, V=VERBOSITY)

    test_data = pd.read_csv(test_fname, header=None)
    true_data = test_data.values[:, q_targ[q_idx]]

    pred_data = model.predict(test_fname,
                              **pred_config)

    if not isinstance(pred_data, np.ndarray):
        # If all goes wrong, fill in np.NaN
        pred_data = np.full_like(true_data, fill_value=np.nan)

    # region Advanced diagnostics for debugging.
    if VERBOSITY > 0:
        uvals_pred_data = np.unique(pred_data)
        msg = """
        Diagnostics about pred_data.
        Unique values:  {}
        Shape:          {}
        Type:           {}

        Diagnostics about true_data.
        Shape:          {}
        Type:           {}
        """.format(uvals_pred_data, pred_data.shape, pred_data.dtype, true_data.shape, true_data.dtype)
        debug_print(msg, V=VERBOSITY)
    # endregion

    # Evaluation
    evals = [round(eval_dict[kind](true_data, pred_data), PRECISION)
             for kind in eval_kinds]

    # Collect results
    tuple_list[0] = q_idx, *evals
    results_df = pd.DataFrame.from_records(tuple_list, columns=head_tuple)

    # Collect timings
    inf_timing = round(model.s['model_data']['inf_time'], PRECISION)
    timings_tuple = (q_idx, ind_time, inf_timing)
    timings_df = pd.DataFrame.from_records([timings_tuple],
                                           columns=('q_idx', 'ind_time', 'inf_time'))

    # Write results
    save_results(results_df, io_config)
    save_timings(timings_df, io_config)

    return
def induction(train_fname, fit_config, mod_config, io_config, machine=None):
    # Config
    mod_type = mod_config['type']
    fold = io_config['fold']
    cwd = io_config['dirs']['prod-tmp']

    # Actions
    if mod_type in {'PxS'}:
        net_fname = [t[1] for t in io_config['file']['net'] if t[0] == fold][0]
        ensure_dir(dirname(net_fname), empty=False)

        model = pxs.PxS()

        msg = """
        Succesfully initialized PxS model.
        
        Net fname: {}
        """.format(net_fname)
        debug_print(msg, V=VERBOSITY)

        res = model.fit(train_fname,
                        model_fname=net_fname,
                        cwd=cwd,
                        **fit_config)

        if res != 0:
            msg = """
            Code returned from fit method: {}
            """.format(res)
            raise ValueError(msg)

    elif mod_type in {'PxL'}:
        net_fname = [t[1] for t in io_config['file']['net'] if t[0] == fold][0]
        ensure_dir(dirname(net_fname), empty=False)

        model = pxl.PxL(cwd=cwd, machine=machine)

        msg = """
        Succesfully initialized PxL model.

        Net fname: {}
        """.format(net_fname)
        debug_print(msg, V=VERBOSITY)

        res = model.fit(i=train_fname, o=net_fname, **fit_config)

        if res != 0:
            msg = """
            Code returned from fit method: {}
            """.format(res)
            raise ValueError(msg)

    elif mod_type in {'Mercs'}:
        train = pd.read_csv(train_fname)
        model = mercs.MERCS()
        model.fit(train, **fit_config, delimiter='.')

    else:
        msg = """
            Did not recognize model type: {}
            """.format(mod_type)
        raise ValueError(msg)

    return model