def __init__(self, data_config: dict, nn_config: dict, data: pd.DataFrame, intervals=None, path: str=None): self.data_config = data_config self.nn_config = nn_config self.intervals = intervals self.data = data[data_config['inputs'] + data_config['outputs']] self.ins = len(self.data_config['inputs']) self.outs = len(self.data_config['outputs']) self.loss = LOSSES[self.nn_config['loss']] self.KModel = KModel self.path = maybe_create_path(path=path)
def __init__(self, data_config: dict, nn_config: dict, data, intervals=None, path: str = None): super(Model, self).__init__(data_config, nn_config) self.intervals = intervals self.data = data self.in_cols = self.data_config['inputs'] self.out_cols = self.data_config['outputs'] self.loss = LOSSES[self.nn_config['loss']] self.KModel = keras.models.Model if keras is not None else None self.path, self.act_path, self.w_path = maybe_create_path(path=path)
def __init__( self, data_config, nn_config, args, intervals, path=None, # if specified, and if it exists, will not be created then verbosity=1): self.data_config = data_config self.nn_config = nn_config self.args = args self.intervals = intervals # dictionary self.verbosity = verbosity self._validate_input() self.batches = {} self.path = maybe_create_path(path=path) self._from_config = False if path is None else True super(Model, self).__init__(nn_config=nn_config, data_config=data_config, path=self.path, verbosity=verbosity)
def make_predictions(x_batches, y_batches, model, epochs_to_evaluate, runtype, save_results=False): all_errors = {} neg_predictions = {} for ep in epochs_to_evaluate: sub_path = model.path + '/' + str(ep) maybe_create_path(path=sub_path) check_point = "checkpoints.ckpt-" + str(ep) x_data, _y_pred, _y_true = model.run_check_point( check_point=check_point, x_batches=x_batches, y_batches=y_batches, scalers=model.scalers[runtype]) # create a separate folder for each target and save its relevent data in that folder for idx, out in enumerate(model.data_config['out_features']): out_path = sub_path + '/' + out + '_' + runtype maybe_create_path(path=out_path) y_pred = _y_pred[:, idx] y_true = _y_true[:, idx] negative_predictions = np.sum(np.array(y_pred) < 0, axis=0) if negative_predictions > 0: print("Warning, {} Negative bacteria predictions found".format( negative_predictions)) neg_predictions[str(ep) + '_' + out] = int(negative_predictions) if negative_predictions > 0: y_true = y_true.copy() else: y_true = np.where(y_true > 0.0, y_true, np.nan) y_true_avail, y_pred_avail = get_pred_where_obs_available( y_true, y_pred) errors = get_errors(y_true_avail, y_pred_avail, model.data_config['monitor']) all_errors[str(ep) + '_' + out] = errors print('shapes of predicted arrays: ', y_pred.shape, y_true.shape, x_data.shape) if model.verbosity > 2: for i, j in zip(y_pred, y_true): print(i, j) plot_scatter(y_true_avail, y_pred_avail, out_path + "/scatter") ndf = pd.DataFrame() # fill ndf with input data for i, inp in enumerate(model.data_config['in_features']): ndf[inp] = x_data[:, i] ndf['true'] = y_true ndf[out] = y_pred # ndf['true_avail'] = test_y_true_avail # ndf['pred_avail'] = test_y_pred_avail ndf.index = get_index(model.batches[runtype + '_index']) # removing duplicated values # TODO why duplicated values exist ndf = ndf[~ndf.index.duplicated(keep='first')] plots_on_last_axis = ['true', out] if runtype == 'all': if model.data_config['batch_making_mode'] == 'event_based': train_idx = get_index(model.batches['train' + '_index']) train_idx = train_idx[~train_idx.duplicated()] else: train_tk = model.batches['train_tk_index'] train_tk_nz = train_tk[np.where(train_tk > 0.0)] train_idx = get_index(train_tk_nz) # test_idx = get_index(model.batches['test' + '_index']) out_df = ndf[out] out_df = out_df[~out_df.index.duplicated()] ndf['train'] = ndf[out][train_idx] # out_df[train_idx] # ndf['test'] = ndf[out][test_idx] plots_on_last_axis.append('train') do_plot(ndf, list(ndf.columns), save_name=out_path + '/' + str(out), obs_logy=True, single_ax_plots=plots_on_last_axis) ndf['Prediction'] = ndf[out] plot_single_output(ndf, out_path + '/' + str(out) + '_single', runtype) plot_bact_points(ndf, out_path + "/bact_points", runtype) if save_results: fpath = os.path.join(out_path + '_' + runtype + '_results.xlsx') ndf.to_excel(fpath) return all_errors, neg_predictions
import os from shutil import copyfile from utils import maybe_create_path models = ['adaboost', 'bayes', 'rdf', 'svm'] marker_mode = ['joint', 'separate'] feature_mode = ['all_feature', 'prime_feature'] class_mode = ['biclass', 'multiclass'] src_items = ['conf_mat.png'] dst_items = ['figures'] src_path = 'exp' dst_path = 'results' if __name__ == '__main__': for dst_i in dst_items: maybe_create_path(os.path.join(dst_path, dst_i)) for model in models: for marker in marker_mode: for feature in feature_mode: for class_ in class_mode: exp_folder = '%s_%s_%s' % (marker, feature, class_) for src_i, dst_i in zip(src_items, dst_items): copyfile(os.path.join(src_path, model, exp_folder, src_i), os.path.join(dst_path, dst_i, model + '_' + exp_folder + '.' + src_i.split('.')[-1]))
metrics_file) for metrics_name in metrics_avg_names: all_marker_values = [ item[metrics_name] for item in all_marker_test_metrics ] double_print( 'overall test %s: %1.1f' % (metrics_name, sum(all_marker_values) / len(all_marker_values)), metrics_file) metrics_file.close() save_yaml(os.path.join(exp_path, 'best_params.yaml'), best_params) fig.savefig(metrics_fig_filename, bbox_inches='tight', pad_inches=1) if __name__ == '__main__': exp_path = os.path.join('exp', args.config, args.sub_setting) if not args.overwrite_config and os.path.exists( os.path.join(exp_path, 'config.yaml')): config = load_ymal(os.path.join(exp_path, 'config.yaml')) else: config = load_ymal(os.path.join('config', args.config + '.yaml')) save_yaml(os.path.join(exp_path, 'config.yaml'), config) if not args.retrain: if os.path.exists(os.path.join(exp_path, 'model')) \ or len(glob.glob(os.path.join(exp_path, 'model', '*.pkl'))) != 0: raise FileExistsError('there are already models saved in %s.' % exp_path) maybe_create_path(exp_path) train_eval(config, exp_path)