def get_dict_by_id(id): from incense import ExperimentLoader loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes') exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] scores = exp.info['scores'] scores = { key: value for key, value in scores.items() if key != 'brownian_qda' } theoretical_mean = exp.info['theoretical_mean'] theoretical_std = exp.info['theoretical_std'] return { 'max_pow': max_pow, 'scores': scores, 'theoretical_mean': theoretical_mean, 'theoretical_std': theoretical_std, 'optimal_accuracy': 1 }
def extract_model_outputs(mongo_db, db_name, run_id=None): loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name) if run_id is None: experiments = loader.find(query={"status": "COMPLETED"}) else: experiments = [loader.find_by_id(run_id)] model_output = {} for e in experiments: for k in e.artifacts.keys(): if 'evaluation_results' in k: art = e.artifacts[k] art.save(tempfile.gettempdir()) tmp_filename = os.path.join(tempfile.gettempdir(), art._make_filename()) exp_res: EvaluationResults = pickle.load( open(tmp_filename, 'rb')) os.remove(tmp_filename) model_output[exp_res.subset_path + "_" + exp_res.machine_id + "_" + str(exp_res.start_time) + "_" + str(e.id)] = exp_res return model_output
def get_dict_by_id(id): from incense import ExperimentLoader loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes' ) exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] scores = exp.info['scores'] scores = { 'optimal': scores['optimal'], 'brownian_qda': scores['brownian_qda'], 'qda': scores['qda'] } return { 'max_pow': max_pow, 'scores': scores, 'optimal_accuracy': 1}
def extract_plot_data_conv_ae(mongo_db, aggregate=True): db_name = 'dcase_task2_fully_conv_ae' loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name) experiments = loader.find(query={ "status": "COMPLETED", "config.num_epochs": 100 }) # e1.config['preprocessing_params']['n_mels'], <- not changed in this experiment # arch_params might be different for different experiments # results per channel multiplier should be compared to baseline results = pd.DataFrame([ (e1.config['machine_type'], e1.config['machine_id'], e1.config['arch_params']['channel_multiplier'], e1.config.get('apply_normalization', False), e1.metrics['eval_rocauc'][0] * 100, e1.metrics['eval_p_rocauc'][0] * 100) for e1 in experiments ]) results = results.rename( columns={ 0: 'machine_type', 1: 'machine_id', 2: 'channel_multiplier', 3: 'apply_normalization', 4: 'rocauc', 5: 'p_rocauc' }) group_by_args = [ 'machine_type', 'machine_id', 'channel_multiplier', 'apply_normalization' ] selected_columns = copy.deepcopy(group_by_args) selected_columns.extend(['rocauc', 'p_rocauc']) results = results[results.columns.intersection(selected_columns)] plot_data = aggregate_data(results, group_by_args, aggregate) plot_data['ID'] = plot_data[['machine_type', 'machine_id']].apply(lambda x: ' '.join(x), axis=1) plot_data['channel_multiplier'] = plot_data['channel_multiplier'].astype( str) plot_data['apply_normalization'] = plot_data['apply_normalization'].astype( str) plot_data['Class'] = plot_data[[ 'channel_multiplier', 'apply_normalization' ]].apply(lambda x: ' '.join(x), axis=1) plot_data['Class'] = plot_data['Class'] + "_" + db_name return plot_data
def __init__(self, mongo_db_uri, db_name, model_path, run_id): # load experiment loader = ExperimentLoader(mongo_uri=mongo_db_uri, db_name=db_name) self.preds = None self.run_id = run_id self.e = loader.find_by_id(run_id) self.model_path = path.join(model_path, self.e.config['uid'], 'best_model.ckpt')
def extract_exp_data_flows(mongo_db, db_name, run_id=None): loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name) if (run_id is not None): experiments = loader.find({"status": "COMPLETED", "_id": run_id}) else: experiments = loader.find({"status": "COMPLETED"}) col_names = [ 'run_id', 'apply_normalization', 'norm_per_set', 'transpose_flatten', 'frames_per_snippet', 'batch_size', 'arch_params.n_hidden', 'fixed_flow_evaluation', 'arch_params.hidden_size', 'arch_params.n_blocks', 'arch_params.flow_model_type', 'arch_params.cond_label_size', 'optimizer', 'optimizer_params.lr', 'status', 'valid_loss' ] results = pd.DataFrame(columns=col_names) for e in experiments: print("Processing experiment {}".format(e.id)) losses = e.metrics['valid_loss'] new_row = pd.DataFrame([[ e.id, e.config['apply_normalization'], e.config.get('norm_per_set', False), e.config.get('transpose_flatten', False), e.config['frames_per_snippet'], e.config['batch_size'], e.config['arch_params']['n_hidden'], e.config.get('fixed_flow_evaluation', False), e.config['arch_params']['hidden_size'], e.config['arch_params']['n_blocks'], e.config['arch_params']['flow_model_type'], e.config['arch_params'].get('cond_label_size', 6), e.config['optimizer'], e.config['optimizer_params']['lr'], e.status, losses[len(losses) - 1 - e.config.get('early_stopping_patience', 0)] ]], columns=col_names) results = results.append(new_row) return results
def plot_experiments(ids, titles, data_path, **kwargs): from incense import ExperimentLoader loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes' ) configure_matplotlib() n_experiments = len(ids) default_figsize = matplotlib.rcParams['figure.figsize'] fig, axes = plt.subplots(2, n_experiments, figsize=( default_figsize[0] * n_experiments, default_figsize[1] * 3)) for i, id in enumerate(ids): exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] compare_tesla = exp.config['compare_tesla'] compare_gm = exp.config['compare_gm'] compare_bmw = exp.config['compare_bmw'] asset_labels_used = get_asset_labels_used( compare_tesla=compare_tesla, compare_gm=compare_gm, compare_bmw=compare_bmw) real_data = get_real_data(data_path, max_pow) real_data = filter_data(real_data, asset_labels_used) plot_trajectories(real_data, max_pow, print_left=(i == 0), axes=axes[0, i]) axes[0, i].set_title(titles[i]) return plot_experiments_common(ids, get_dict_by_id, axes=axes[1], top=0.95, bottom=0.15, **kwargs)
def get_dict_by_id(id): from incense import ExperimentLoader loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes') exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] end_position = exp.config['end_position'] scores = exp.info['scores'] return { 'max_pow': max_pow, 'scores': scores, 'optimal_accuracy': 1 - bayes_error(end_position) }
def plot_confusion_matrix_stat(id, stat, title=None, plot_y_label=True, ylim_top=None): from incense import ExperimentLoader loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes') exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] confusion_matrices = exp.info['confusion_matrices'] title = exp.experiment.name titles_dict = { 'brownian_step': 'Brownian step example', 'brownian_bridge': 'Brownian bridge example', 'brownian_variances': 'Brownian variances example', 'cars': 'Cars experiment' } stat_dict = get_confusion_matrix_stat(confusion_matrices, stat) fig = plot_scores(max_pow=max_pow, scores=stat_dict, _run=None, optimal_accuracy=0, plot_y_label=plot_y_label, ylim_top=ylim_top) fig.axes[0].set_title(titles_dict[title]) return fig
def extract_plot_data_baseline(mongo_db, aggregate=True): db_name = 'dcase2020_task2_baseline' loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name) experiments = loader.find(query={ "status": "COMPLETED", "_id": { "$lt": 329 } }) # at 329 metrics were renamed # arch_params might be different for different experiments # results per channel multiplier should be compared to baseline results = pd.DataFrame([ (e1.config['machine_type'], e1.config['machine_id'], e1.metrics['eval_rocauc'][0] * 100, e1.metrics['eval_p_rocauc'][0] * 100) for e1 in experiments ]) results = results.rename(columns={ 0: 'machine_type', 1: 'machine_id', 2: 'rocauc', 3: 'p_rocauc' }) group_by_args = ['machine_type', 'machine_id'] selected_columns = copy.deepcopy(group_by_args) selected_columns.extend(['rocauc', 'p_rocauc']) results = results[results.columns.intersection(selected_columns)] plot_data = aggregate_data(results, group_by_args, aggregate) plot_data['ID'] = plot_data[['machine_type', 'machine_id']].apply(lambda x: ' '.join(x), axis=1) plot_data['Class'] = 'Baseline' return plot_data
def loader(): loader = ExperimentLoader(mongo_uri=None, db_name="incense_test") return loader
from incense import ExperimentLoader import pandas as pd loader = ExperimentLoader(mongo_uri='rechenknecht2.cp.jku.at:37373', db_name='dcase2020_task2_baseline_v') experiments = loader.find(query={"status": "COMPLETED"}) results = pd.DataFrame([ (e1.config['machine_type'], e1.config['machine_id'], e1.config['preprocessing_params']['n_mels'], e1.metrics['eval_rocauc'][0] * 100, e1.metrics['eval_p_rocauc'][0] * 100) for e1 in experiments ]) results = results.rename(columns={ 0: 'machine_type', 1: 'machine_id', 2: 'n_mels', 3: 'rocauc', 4: 'p_rocauc' }) summary = results.groupby(['machine_type', 'machine_id', 'n_mels']).aggregate(['mean', 'std', 'count'])
def get_loader(uri=mongo_uri, db=db_name): loader = ExperimentLoader(mongo_uri=uri, db_name=db) return loader
def extract_plot_data_flows(mongo_db, aggregate=True, grouping_vars=None): db_name = 'dcase2020_task2_flows_maf' loader = ExperimentLoader(mongo_uri=mongo_db, db_name=db_name) # experiments = loader.find_all() experiments = loader.find({"status": "COMPLETED"}) col_names = [ 'run_id', 'machine_type', 'machine_id', 'rocauc', 'p_rocauc', 'apply_normalization', 'frames_per_snippet', 'n_hidden', 'fixed_flow_evaluation', 'hidden_size', 'n_blocks', 'optimizer', 'optimizer_params.lr', 'status' ] if grouping_vars is None: grouping_vars = col_names[5:] results = pd.DataFrame(columns=col_names) for e in experiments: for type in all_devtest_machines.keys(): for id in all_devtest_machines[type]: print(e.id, type + '_' + id + "_rocauc") rocauc = e.metrics.get(type + '_' + id + "_rocauc", [0])[0] if rocauc is not None: rocauc = rocauc * 100 p_rocauc = e.metrics.get(type + '_' + id + "_p_rocauc", [0])[0] if p_rocauc is not None: p_rocauc = p_rocauc * 100 if rocauc is None or p_rocauc is None: print("Skipping experiment", e.id) continue new_row = pd.DataFrame([[ e.id, type.split('/')[1], id, rocauc, p_rocauc, e.config['apply_normalization'], e.config['frames_per_snippet'], e.config['arch_params']['n_hidden'], e.config.get('fixed_flow_evaluation', False), e.config['arch_params']['hidden_size'], e.config['arch_params']['n_blocks'], e.config['optimizer'], e.config['optimizer_params']['lr'], e.status ]], columns=col_names) results = results.append(new_row) group_by_args = ['run_id', 'machine_type', 'machine_id'] group_by_args.extend(grouping_vars) selected_columns = copy.deepcopy(group_by_args) selected_columns.extend(['rocauc', 'p_rocauc']) results = results[results.columns.intersection(selected_columns)] plot_data = aggregate_data(results, group_by_args, aggregate) plot_data['ID'] = plot_data[['machine_type', 'machine_id']].apply(lambda x: ' '.join(x), axis=1) for var in grouping_vars: plot_data[var] = plot_data[var].astype(str) plot_data['Class'] = plot_data[grouping_vars].apply(lambda x: ' '.join(x), axis=1) plot_data['Class'] = plot_data['Class'] + "_" + db_name return plot_data
def loader(): loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=TEST_DB_NAME) return loader
def get_loader(db_name='sacred'): loader = ExperimentLoader(mongo_uri=mongo_uri, db_name=db_name) return loader
def info_db_loader_pickled(): loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=INFO_DB_NAME, unpickle=False) return loader
def get_experiment(exp_id): uri, database = get_uri_db_pair() loader = ExperimentLoader(mongo_uri=uri, db_name=database) ex = loader.find_by_id(exp_id) return ex
def get_experiment_loader(): return ExperimentLoader(mongo_uri=constants.MONGO_URI, db_name=constants.DB_NAME)
def recent_db_loader(): loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=RECENT_DB_NAME) return loader
def delete_db_loader(): loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=DELETE_DB_NAME) return loader
def heterogenous_db_loader(): loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=HETEROGENOUS_DB_NAME) return loader
def plot_confusion_matrix(id, n_samples, ylim_top=None, optimal_accuracy=[1, 0, 0, 1], theoretical_accuracy=None, title=None): from incense import ExperimentLoader configure_matplotlib() loader = ExperimentLoader( # None if MongoDB is running on localhost or "mongodb://mongo:27017" # when running in devcontainer. mongo_uri=None, db_name='GPBayes') exp = loader.find_by_id(id) max_pow = exp.config['max_pow'] confusion_matrices = exp.info['confusion_matrices'] confusion_matrices = { key: value for key, value in confusion_matrices.items() if key != 'brownian_qda' } if title is None: title = exp.experiment.name titles_dict = { 'brownian_step': 'Brownian step example', 'brownian_bridge': 'Brownian bridge example', 'brownian_variances': 'Brownian variances example', 'cars': 'Cars experiment' } title = titles_dict[title] default_figsize = matplotlib.rcParams['figure.figsize'] fig, axes = plt.subplots(2, 2, figsize=(default_figsize[0] * 2.2, default_figsize[1] * 3)) true_pos = get_confusion_matrix_stat(confusion_matrices, lambda x: x[0, 0]) false_pos = get_confusion_matrix_stat(confusion_matrices, lambda x: x[0, 1]) false_neg = get_confusion_matrix_stat(confusion_matrices, lambda x: x[1, 0]) true_neg = get_confusion_matrix_stat(confusion_matrices, lambda x: x[1, 1]) for i, (scores, index, optimal) in enumerate( zip([true_pos, false_pos, false_neg, true_neg], [(0, 0), (0, 1), (1, 0), (1, 1)], optimal_accuracy)): plot_scores(max_pow=max_pow, scores=scores, _run=None, optimal_accuracy=optimal * n_samples // 2, plot_y_label=False, ylim_top=ylim_top, ylim_bottom=0, plot_legend=False, theoretical_mean=theoretical_accuracy[i] * n_samples // 2 if theoretical_accuracy is not None else None, axes=axes[index], start_pow=0) axes[0, 0].set_xlabel(None) axes[0, 1].set_xlabel(None) axes[0, 0].set_ylabel('Class 0') axes[1, 0].set_ylabel('Class 1') axes[0, 0].set_title('Predicted class 0') axes[0, 1].set_title('Predicted class 1') fig.suptitle(title) fig.tight_layout() fig.subplots_adjust(top=0.89, bottom=0.15, hspace=0.1) handles, labels = axes[0, 0].get_legend_handles_labels() leg = fig.legend(handles, labels, loc="lower center", bbox_to_anchor=(0.5, 0), bbox_transform=fig.transFigure, ncol=7) leg.get_frame().set_alpha(1) return fig
json.dump(json.loads(bson_dumps(config)), f, indent=4) with open(path + 'metrics.json', mode='w') as f: json.dump(json.loads(bson_dumps(metrics)), f, indent=4) for k in exp.artifacts: artifact_content = torch.load(BytesIO(exp.artifacts[k].content)) with open(path + k, mode='wb') as f: torch.save(artifact_content, f) if __name__ == "__main__": args = parser.parse_args() path = args.path loader = ExperimentLoader(mongo_uri=args.mongo_uri, db_name=args.db) if args.r: exp_ids_starts = args.expids[::2] exp_ids_ends = args.expids[1::2] expids = chain( *[range(s, e + 1) for s, e in zip(exp_ids_starts, exp_ids_ends)]) else: expids = args.expids for exp_id in expids: export(exp_id, loader, path)
def info_db_loader(): # Unregister handlers to simulate that sacred is not currently imported. jsonpickle.handlers.unregister(np.ndarray) jsonpickle.handlers.unregister(pd.DataFrame) loader = ExperimentLoader(mongo_uri=MONGO_URI, db_name=INFO_DB_NAME) return loader