def load_configuration(neonate, verbose=False): current_file = Path(os.path.abspath('')) config_file = os.path.join(current_file.parents[1], 'config_files', 'abc', 'neo_config.json') with open(config_file, 'r') as conf_f: conf = json.load(conf_f) params = conf['priors'] input_path = os.path.join(current_file.parents[1], 'data', 'formatted_data', '{}_formatted.csv'.format(neonate)) d0 = import_actual_data(input_path) targets = conf['targets'] model_name = conf['model_name'] inputs = conf['inputs'] config = { "model_name": model_name, "targets": targets, "times": d0['t'], "inputs": inputs, "parameters": params, "input_path": input_path, "zero_flag": conf['zero_flag'], } if verbose: pprint(config) return config, d0
def load_configuration(model_version, dataset, verbose=False): current_file = Path(os.path.abspath('')) config_file = os.path.join( current_file.parents[2], 'config_files', 'abc', 'bp_hypothermia_{}'.format(model_version), 'bp_hypothermia_{}_config.json'.format(model_version)) with open(config_file, 'r') as conf_f: conf = json.load(conf_f) params = conf['priors'] input_path = os.path.join( current_file.parents[2], 'data', 'clean_hypothermia', '{}_filtered_formatted.csv'.format(dataset.upper())) d0 = import_actual_data(input_path) targets = conf['targets'] model_name = conf['model_name'] inputs = conf['inputs'] config = { "model_name": model_name, "targets": targets, "inputs": inputs, "parameters": params, "input_path": input_path, "zero_flag": conf['zero_flag'], } if verbose: pprint(config) return config, d0
def get_runs(posterior, conf, n_repeats=50): rand_selection = random.sample(range(posterior.shape[0]), n_repeats) outputs_list = [] p_names = list(conf['parameters'].keys()) posteriors = posterior[p_names].values d0 = import_actual_data(conf['input_path']) input_data = inputParse(d0, conf['inputs']) while len(outputs_list) < n_repeats: idx = rand_selection.pop() print("\tSample {}, idx:{}".format(len(outputs_list), idx)) p = dict(zip(p_names, posteriors[idx])) _, output = get_output(conf['model_name'], p, conf['times'], input_data, d0, conf['targets'], distance="NRMSE", zero_flag=conf['zero_flag']) outputs_list.append(output) return outputs_list
def get_repeated_outputs(df, model_name, parameters, input_path, inputs, targets, n_repeats, zero_flag, neonate, tolerance=None, limit=None, frac=None, openopt_path=None, offset=None, distance='euclidean'): """Generate model output and distances multiple times. Parameters ---------- model_name : :obj:`str` Names of model. Should match the modeldef file for model being generated i.e. model_name of 'model`' should have a modeldef file 'model1.modeldef'. parameters : :obj:`dict` of :obj:`str`: :obj:`tuple` Dict of model parameters to compare, with value tuple of the prior max and min. input_path : :obj:`str` Path to the true data file inputs : :obj:`list` of :obj:`str` List of model inputs. targets : :obj:`list` of :obj:`str` List of model outputs against which the model is being optimised. n_repeats : :obj: `int` Number of times to generate output data frac : :obj:`float` Fraction of results to consider. Should be given as a percentage i.e. 1=1%, 0.1=0.1% zero_flag : dict Dictionary of form target(:obj:`str`): bool, where bool indicates whether to zero that target. Note: zero_flag keys should match targets list. openopt_path : :obj:`str` or :obj:`None` Path to the openopt data file if it exists. Default is None. offset : :obj:`dict` Dictionary of offset parameters if they are needed distance : :obj:`str`, optional Distance measure. One of 'euclidean', 'manhattan', 'MAE', 'MSE'. Returns ------- fig : :obj:`matplotlib.figure` Figure containing all axes. """ p_names = list(parameters.keys()) sorted_df = df.sort_values(by=distance) if tolerance: accepted_limit = sum(df[distance].values < tolerance) elif limit: accepted_limit = limit elif frac: accepted_limit = frac_calculator(sorted_df, frac) else: raise ValueError('No limit or fraction given.') df_list = [] if n_repeats > accepted_limit: print("Setting number of repeats to quarter of the posterior size\n", file=sys.stderr) n_repeats = int(accepted_limit / 4) d0 = import_actual_data(input_path) input_data = inputParse(d0, inputs) true_data = pd.read_csv(input_path) times = true_data['t'].values if openopt_path: openopt_data = pd.read_csv(openopt_path) if n_repeats > accepted_limit: raise ValueError( "Number of requested model runs greater than posterior size:" "\n\tPosterior Size: {}\n\tNumber of runs: {}".format( accepted_limit, n_repeats)) rand_selection = list(range(accepted_limit)) random.shuffle(rand_selection) outputs_list = [] posteriors = sorted_df.iloc[:accepted_limit][p_names].values select_idx = 0 with Timer("Running repeat outputs"): for i in range(n_repeats): try: idx = rand_selection.pop() p = dict(zip(p_names, posteriors[idx])) if offset: p = {**p, **offset} output = get_output(model_name, p, times, input_data, d0, targets, distance=distance, zero_flag=zero_flag) outputs_list.append(output) print("Sample {}, idx:{}".format(len(outputs_list), idx)) except (TimeoutError, TimeoutExpired) as e: print("Timed out for Sample {}, idx:{}".format( len(outputs_list), idx)) pprint.pprint(p) rand_selection.insert(0, idx) except (CalledProcessError) as e: print("CalledProcessError for Sample {}, idx:{}".format( len(outputs_list), idx)) pprint.pprint(p) rand_selection.insert(0, idx) print("Final number of runs is: {}".format(len(outputs_list))) d = {"Errors": {}, "Outputs": {}} d['Errors']['Average'] = np.nanmean([o[0]['TOTAL'] for o in outputs_list]) for target in targets: d['Errors'][target] = np.nanmean([o[0][target] for o in outputs_list]) d['Outputs'][target] = [o[1][target] for o in outputs_list] for ii, target in enumerate(targets): x = [j for j in times for n in range(len(d['Outputs'][target]))] with Timer('Transposing {}'.format(target)): y = np.array(d['Outputs'][target]).transpose() y = y.ravel() with Timer("Crafting DataFrame for {}".format(target)): model_name_col = [neonate] * len(x) target_col = [target] * len(x) df1 = pd.DataFrame({ "Time": x, "Posterior": y, "Neonate": model_name_col, "Output": target_col }) with Timer("Appending dataframe for {}".format(target)): df_list.append(df1.copy()) del df1 return pd.concat(df_list), true_data
current_file.parents[3], 'data', 'ABC', 'nrmse_SA', MODEL_VERSION, DATASET) pfile = os.path.abspath(os.path.join( output_dir, 'reduced_sorted_parameters.csv') ) input_path = os.path.join(current_file.parents[3], 'data', 'clean_hypothermia', '{}_filtered_formatted.csv'.format(DATASET.upper())) d0 = import_actual_data(input_path) targets = conf['targets'] model_name = conf['model_name'] inputs = conf['inputs'] config = { "model_name": model_name, "targets": targets, "inputs": inputs, "parameters": params, "input_path": input_path, "zero_flag": conf['zero_flag'] } pprint(config)