Python import_actual_data 예제들, bayescmd.abc.import_actual_data Python 예제들

예제 #1

0

파일 보기

파일: process_data_summary_stats.py 프로젝트: multimodalspectroscopy/neonatal-desat-modelling

def load_configuration(neonate, verbose=False):
    current_file = Path(os.path.abspath(''))
    config_file = os.path.join(current_file.parents[1], 'config_files', 'abc',
                               'neo_config.json')

    with open(config_file, 'r') as conf_f:
        conf = json.load(conf_f)

    params = conf['priors']

    input_path = os.path.join(current_file.parents[1], 'data',
                              'formatted_data',
                              '{}_formatted.csv'.format(neonate))

    d0 = import_actual_data(input_path)

    targets = conf['targets']
    model_name = conf['model_name']
    inputs = conf['inputs']

    config = {
        "model_name": model_name,
        "targets": targets,
        "times": d0['t'],
        "inputs": inputs,
        "parameters": params,
        "input_path": input_path,
        "zero_flag": conf['zero_flag'],
    }

    if verbose:
        pprint(config)

    return config, d0

예제 #2

0

파일 보기

def load_configuration(model_version, dataset, verbose=False):
    current_file = Path(os.path.abspath(''))
    config_file = os.path.join(
        current_file.parents[2], 'config_files', 'abc',
        'bp_hypothermia_{}'.format(model_version),
        'bp_hypothermia_{}_config.json'.format(model_version))

    with open(config_file, 'r') as conf_f:
        conf = json.load(conf_f)

    params = conf['priors']

    input_path = os.path.join(
        current_file.parents[2], 'data', 'clean_hypothermia',
        '{}_filtered_formatted.csv'.format(dataset.upper()))

    d0 = import_actual_data(input_path)

    targets = conf['targets']
    model_name = conf['model_name']
    inputs = conf['inputs']

    config = {
        "model_name": model_name,
        "targets": targets,
        "inputs": inputs,
        "parameters": params,
        "input_path": input_path,
        "zero_flag": conf['zero_flag'],
    }

    if verbose:
        pprint(config)

    return config, d0

예제 #3

0

파일 보기

파일: process_data_summary_stats.py 프로젝트: multimodalspectroscopy/neonatal-desat-modelling

def get_runs(posterior, conf, n_repeats=50):
    rand_selection = random.sample(range(posterior.shape[0]), n_repeats)
    outputs_list = []
    p_names = list(conf['parameters'].keys())
    posteriors = posterior[p_names].values
    d0 = import_actual_data(conf['input_path'])
    input_data = inputParse(d0, conf['inputs'])
    while len(outputs_list) < n_repeats:
        idx = rand_selection.pop()
        print("\tSample {}, idx:{}".format(len(outputs_list), idx))
        p = dict(zip(p_names, posteriors[idx]))

        _, output = get_output(conf['model_name'],
                               p,
                               conf['times'],
                               input_data,
                               d0,
                               conf['targets'],
                               distance="NRMSE",
                               zero_flag=conf['zero_flag'])
        outputs_list.append(output)
    return outputs_list

예제 #4

0

파일 보기

파일: process_data_summary_stats.py 프로젝트: multimodalspectroscopy/neonatal-desat-modelling

def get_repeated_outputs(df,
                         model_name,
                         parameters,
                         input_path,
                         inputs,
                         targets,
                         n_repeats,
                         zero_flag,
                         neonate,
                         tolerance=None,
                         limit=None,
                         frac=None,
                         openopt_path=None,
                         offset=None,
                         distance='euclidean'):
    """Generate model output and distances multiple times.

    Parameters
    ----------
    model_name : :obj:`str`
        Names of model. Should match the modeldef file for model being generated
        i.e. model_name of 'model`' should have a modeldef file
        'model1.modeldef'.
    parameters : :obj:`dict` of :obj:`str`: :obj:`tuple`
        Dict of model parameters to compare, with value tuple of the prior max
        and min.
    input_path : :obj:`str`
        Path to the true data file
    inputs : :obj:`list` of :obj:`str`
        List of model inputs.
    targets : :obj:`list` of :obj:`str`
        List of model outputs against which the model is being optimised.
    n_repeats : :obj: `int`
        Number of times to generate output data
    frac : :obj:`float`
        Fraction of results to consider. Should be given as a percentage i.e.
        1=1%, 0.1=0.1%
    zero_flag : dict
        Dictionary of form target(:obj:`str`): bool, where bool indicates
        whether to zero that target.

        Note: zero_flag keys should match targets list.
    openopt_path : :obj:`str` or :obj:`None`
        Path to the openopt data file if it exists. Default is None.
    offset : :obj:`dict`
        Dictionary of offset parameters if they are needed
    distance : :obj:`str`, optional
        Distance measure. One of 'euclidean', 'manhattan', 'MAE', 'MSE'.

    Returns
    -------
    fig : :obj:`matplotlib.figure`
        Figure containing all axes.

    """
    p_names = list(parameters.keys())
    sorted_df = df.sort_values(by=distance)

    if tolerance:
        accepted_limit = sum(df[distance].values < tolerance)
    elif limit:
        accepted_limit = limit
    elif frac:
        accepted_limit = frac_calculator(sorted_df, frac)
    else:
        raise ValueError('No limit or fraction given.')

    df_list = []
    if n_repeats > accepted_limit:
        print("Setting number of repeats to quarter of the posterior size\n",
              file=sys.stderr)
        n_repeats = int(accepted_limit / 4)
    d0 = import_actual_data(input_path)
    input_data = inputParse(d0, inputs)

    true_data = pd.read_csv(input_path)
    times = true_data['t'].values

    if openopt_path:
        openopt_data = pd.read_csv(openopt_path)

    if n_repeats > accepted_limit:
        raise ValueError(
            "Number of requested model runs greater than posterior size:"
            "\n\tPosterior Size: {}\n\tNumber of runs: {}".format(
                accepted_limit, n_repeats))

    rand_selection = list(range(accepted_limit))
    random.shuffle(rand_selection)

    outputs_list = []

    posteriors = sorted_df.iloc[:accepted_limit][p_names].values
    select_idx = 0
    with Timer("Running repeat outputs"):
        for i in range(n_repeats):
            try:
                idx = rand_selection.pop()
                p = dict(zip(p_names, posteriors[idx]))
                if offset:
                    p = {**p, **offset}
                output = get_output(model_name,
                                    p,
                                    times,
                                    input_data,
                                    d0,
                                    targets,
                                    distance=distance,
                                    zero_flag=zero_flag)
                outputs_list.append(output)
                print("Sample {}, idx:{}".format(len(outputs_list), idx))

            except (TimeoutError, TimeoutExpired) as e:
                print("Timed out for Sample {}, idx:{}".format(
                    len(outputs_list), idx))
                pprint.pprint(p)
                rand_selection.insert(0, idx)
            except (CalledProcessError) as e:
                print("CalledProcessError for Sample {}, idx:{}".format(
                    len(outputs_list), idx))
                pprint.pprint(p)
                rand_selection.insert(0, idx)

        print("Final number of runs is: {}".format(len(outputs_list)))

    d = {"Errors": {}, "Outputs": {}}
    d['Errors']['Average'] = np.nanmean([o[0]['TOTAL'] for o in outputs_list])
    for target in targets:
        d['Errors'][target] = np.nanmean([o[0][target] for o in outputs_list])
        d['Outputs'][target] = [o[1][target] for o in outputs_list]

    for ii, target in enumerate(targets):
        x = [j for j in times for n in range(len(d['Outputs'][target]))]
        with Timer('Transposing {}'.format(target)):
            y = np.array(d['Outputs'][target]).transpose()
            y = y.ravel()
        with Timer("Crafting DataFrame for {}".format(target)):
            model_name_col = [neonate] * len(x)
            target_col = [target] * len(x)
            df1 = pd.DataFrame({
                "Time": x,
                "Posterior": y,
                "Neonate": model_name_col,
                "Output": target_col
            })
        with Timer("Appending dataframe for {}".format(target)):
            df_list.append(df1.copy())
            del df1
    return pd.concat(df_list), true_data

예제 #5

0

파일 보기

        current_file.parents[3],
        'data', 'ABC', 'nrmse_SA',
        MODEL_VERSION,
        DATASET)

    pfile = os.path.abspath(os.path.join(
                            output_dir,
                            'reduced_sorted_parameters.csv')
                            )

    input_path = os.path.join(current_file.parents[3],
                              'data',
                              'clean_hypothermia',
                              '{}_filtered_formatted.csv'.format(DATASET.upper()))

    d0 = import_actual_data(input_path)

    targets = conf['targets']
    model_name = conf['model_name']
    inputs = conf['inputs']

    config = {
        "model_name": model_name,
        "targets": targets,
        "inputs": inputs,
        "parameters": params,
        "input_path": input_path,
        "zero_flag": conf['zero_flag']
    }

    pprint(config)