def _collect_parameters_and_results(self, start, stop, params): self._result_pool = ResultPool() self._result_pool.load_results(self._folder_name, start, stop, params) if (self._result_pool.have_varying_datasets() or self._result_pool.have_varying_leave_out()): self._dataset_averaged_results = DatasetAveragedResults() self._dataset_averaged_results.extract_results(self._result_pool)
def create_meaned_amp_pred_corrs(prefix='', folder='data/models/paper/ours/cnt/deep4/car/', params='default', perturb_names=('no_dev', 'rand_mad', 'rand_std')): """This takes computed cov_vars and transforms them to corrs and saves corrs.""" if params == 'default': params = dict(cnt_preprocessors="$cz_zero_resample_car_demean", trial_start=1500, trial_stop=4000) res_pool = ResultPool() res_pool.load_results(folder, params=params) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() # sort by dataset filename sort_order = np.argsort([r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [name.replace('.result.pkl', '') for name in result_file_names] if prefix != '': prefix = '.' + prefix for i_file, base_name in enumerate(all_base_names): log.info("Loading {:s}".format(results[i_file].parameters['dataset_filename'])) create_meaned_amp_pred_corrs_for_file(base_name, prefix, perturb_names)
def load_meaned_amp_pred_corrs(prefix='', folder='data/models/paper/ours/cnt/deep4/car/', params='default', perturb_names=('no_dev','rand_mad', 'rand_std')): if params == 'default': params = dict(cnt_preprocessors="$cz_zero_resample_car_demean", trial_start=1500, trial_stop=4000) res_pool = ResultPool() res_pool.load_results(folder, params=params) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() if prefix != '': prefix = '.' + prefix # sort by dataset filename sort_order = np.argsort([r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [name.replace('.result.pkl', '') for name in result_file_names] clean_mask = [] all_corrs = dict() for i_file, base_name in enumerate(all_base_names): # Check that all perturbations exist all_perturbations_exist = True for perturb_name in perturb_names: filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format( prefix, perturb_name) filename = base_name + filename_end if not os.path.isfile(filename): all_perturbations_exist = False log.warn("{:s} does not exist".format( filename)) if not all_perturbations_exist: log.warn("Skipping {:s} since not all perturbations exist".format( base_name)) continue # Check that all exist for subject if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) for perturb_name in perturb_names: filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format( prefix, perturb_name) filename = base_name + filename_end assert os.path.isfile(filename), ("Expect {:s} to exist".format( filename)) this_arr = all_corrs.pop(perturb_name, []) this_corrs = np.load(filename) this_arr.append(this_corrs) all_corrs[perturb_name] = this_arr clean_mask = np.array(clean_mask) return all_corrs, clean_mask
def create_unit_output_class_corrs_for_files(folder_name, params, start, stop, i_all_layers): res_pool = ResultPool() res_pool.load_results(folder_name, params=params) res_file_names = res_pool.result_file_names() all_base_names = [name.replace('.result.pkl', '') for name in res_file_names] start = start or 0 stop = stop or len(all_base_names) for i_file, basename in enumerate(all_base_names[start:stop]): log.info("Running {:s} ({:d} of {:d})".format( basename, i_file + start + 1, stop)) create_unit_output_class_corrs(basename, i_all_layers)
def create_all_amplitude_perturbation_corrs(folder_name, params, start, stop, with_square, with_square_cov, after_softmax, n_samples, perturbations='default'): if perturbations == 'default': perturbations = ( ( 'no_dev', FuncAndArgs( rand_diff, with_blocks=False, #just return 1 deviation_func=lambda arr, axis, keepdims: 1)), ('rand_mad', FuncAndArgs(rand_diff, with_blocks=False, deviation_func=median_absolute_deviation)), ('rand_std', FuncAndArgs(rand_diff, with_blocks=False, deviation_func=np.std)), ) elif perturbations == 'simple_no_scale': perturbations = ( ( 'no_dev', FuncAndArgs( rand_diff, with_blocks=False, #just return 1 deviation_func=lambda arr, axis, keepdims: 1)), ) assert not (with_square and with_square_cov) res_pool = ResultPool() res_pool.load_results(folder_name, params=params) res_file_names = res_pool.result_file_names() all_base_names = [ name.replace('.result.pkl', '') for name in res_file_names ] start = start or 0 stop = stop or len(all_base_names) for i_file, base_name in enumerate(all_base_names[start:stop]): log.info("Running {:s} ({:d} of {:d})".format(base_name, i_file + start + 1, stop)) create_amplitude_perturbation_corrs(base_name, with_square=with_square, with_square_cov=with_square_cov, after_softmax=after_softmax, n_samples=n_samples, perturbations=perturbations)
def create_unit_output_class_corrs_for_files(folder_name, params, start, stop, i_all_layers): res_pool = ResultPool() res_pool.load_results(folder_name, params=params) res_file_names = res_pool.result_file_names() all_base_names = [ name.replace('.result.pkl', '') for name in res_file_names ] start = start or 0 stop = stop or len(all_base_names) for i_file, basename in enumerate(all_base_names[start:stop]): log.info("Running {:s} ({:d} of {:d})".format(basename, i_file + start + 1, stop)) create_unit_output_class_corrs(basename, i_all_layers)
def dataset_to_env_file(wanted_dataset_filename): """ For any dataset filename, give envelope filename These experiments are, where envelopes were calculated from originally""" res_pool= ResultPool() res_pool.load_results('data/models-backup/paper/ours/cnt/deep4/car/', params=dict(cnt_preprocessors="$cz_zero_resample_car_demean", trial_start=1500, trial_stop=4000)) dataset_to_env_file_name = dict() for result, res_file_name in zip(res_pool.result_objects(), res_pool.result_file_names()): dataset_file_name = result.parameters['dataset_filename'] envelope_file_name = res_file_name.replace('.result.pkl', '.env.npy') assert os.path.isfile(envelope_file_name) dataset_to_env_file_name[dataset_file_name] = envelope_file_name return dataset_to_env_file_name[wanted_dataset_filename]
def create_env_class_corrs(folder, params, start, stop): res_pool = ResultPool() res_pool.load_results(folder, params=params) res_file_names = res_pool.result_file_names() all_base_names = [ name.replace('.result.pkl', '') for name in res_file_names ] start = start or 0 stop = stop or len(all_base_names) with_square = True for i_exp, base_name in enumerate(all_base_names[start:stop]): log.info("Running {:s} ({:d} of {:d})".format(base_name, i_exp + start + 1, stop)) create_env_class_corr_file(base_name, with_square)
def create_env_class_corrs(folder, params,start,stop): res_pool = ResultPool() res_pool.load_results(folder, params=params) res_file_names = res_pool.result_file_names() all_base_names = [name.replace('.result.pkl', '') for name in res_file_names] start = start or 0 stop = stop or len(all_base_names) with_square = True for i_exp, base_name in enumerate(all_base_names[start:stop]): log.info("Running {:s} ({:d} of {:d})".format(base_name, i_exp + start + 1, stop)) create_env_class_corr_file(base_name, with_square)
def create_env_corrs(folder_name, params, start, stop): from braindecode.analysis.create_env_class_corrs import create_env_class_corr_file res_pool = ResultPool() res_pool.load_results(folder_name, params=params) res_file_names = res_pool.result_file_names() all_base_names = [name.replace('.result.pkl', '') for name in res_file_names] start = start or 0 stop = stop or len(all_base_names) # Hackhack hardcoded layers, since I know this is correct layers atm i_all_layers = [8,14,20,26,28] #for shallow [3, 4, 5, 7] for i_file, base_name in enumerate(all_base_names[start:stop]): with_square = True log.info("Running {:s} ({:d} of {:d})".format( base_name, i_file+start+1, stop)) create_topo_env_corrs_files(base_name, i_all_layers, with_square) create_env_class_corr_file(base_name, with_square)
def load_amp_corrs(with_square, with_square_corr, cov_or_corr): assert not (with_square and with_square_corr) assert cov_or_corr == 'cov' or cov_or_corr == 'corr' res_pool = ResultPool() res_pool.load_results('data/models/paper/ours/cnt/deep4/car/', params=dict(sensor_names="$all_EEG_sensors", batch_modifier="null", low_cut_off_hz="null", first_nonlin="$elu")) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() # sort by dataset filename sort_order = np.argsort([r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [name.replace('.result.pkl', '') for name in result_file_names] clean_mask = [] all_corrs = dict() for i_file, base_name in enumerate(all_base_names): if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) for perturb_name in ('rand_mad', 'rand_std', 'no_dev'): file_name_end = '.{:s}.amp_{:s}s.npy'.format(perturb_name, cov_or_corr) if with_square: file_name_end = '.square' + file_name_end if with_square_corr: file_name_end = ".corrtosquare" + file_name_end file_name = base_name + file_name_end assert os.path.isfile(file_name), "Expect {:s} to exist".format( file_name) this_arr = all_corrs.pop(perturb_name, []) this_arr.append(np.load(file_name)) all_corrs[perturb_name] = this_arr clean_mask = np.array(clean_mask) return all_corrs, clean_mask
def create_meaned_amp_pred_corrs( prefix='', folder='data/models/paper/ours/cnt/deep4/car/', params='default', perturb_names=('no_dev', 'rand_mad', 'rand_std')): """This takes computed cov_vars and transforms them to corrs and saves corrs.""" if params == 'default': params = dict(cnt_preprocessors="$cz_zero_resample_car_demean", trial_start=1500, trial_stop=4000) res_pool = ResultPool() res_pool.load_results(folder, params=params) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() # sort by dataset filename sort_order = np.argsort( [r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [ name.replace('.result.pkl', '') for name in result_file_names ] if prefix != '': prefix = '.' + prefix for i_file, base_name in enumerate(all_base_names): log.info("Loading {:s}".format( results[i_file].parameters['dataset_filename'])) create_meaned_amp_pred_corrs_for_file(base_name, prefix, perturb_names)
def load_data_frame(folder, params=None, shorten_headers=True): res_pool = ResultPool() res_pool.load_results(folder, params=params) result_objs = res_pool.result_objects() varying_params = res_pool.varying_params() constant_params = res_pool.constant_params() file_names = res_pool.result_file_names() data_frame = to_data_frame(file_names, result_objs, varying_params, constant_params, shorten_headers=shorten_headers) data_frame.attrs = {'folder': folder, 'params': params} return data_frame
def load_analysis_results(folder, params, file_name_end): result_pool = ResultPool() result_pool.load_results(folder, params=params) result_file_names = result_pool.result_file_names() results = result_pool.result_objects() # sort by dataset filename sort_order = np.argsort([r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] analysis_result_per_person = [] clean_mask = [] for file_name, result in zip(result_file_names, results): analysis_result_file = file_name.replace('.result.pkl', file_name_end) assert os.path.isfile(analysis_result_file) analysis_result = np.load(analysis_result_file) if any(s in result.parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) analysis_result_per_person.append(analysis_result) analysis_result_per_person = np.array(analysis_result_per_person) clean_mask = np.array(clean_mask) return analysis_result_per_person, clean_mask
def create_envelopes(folder_name, params, start, stop): res_pool = ResultPool() res_pool.load_results(folder_name, params=params) res_file_names = res_pool.result_file_names() log.info("{:d} files found.".format(len(res_file_names))) yaml_file_names = [name.replace('.result.pkl', '.yaml') for name in res_file_names] stop = stop or len(yaml_file_names) i_file = start for i_file in xrange(start, stop): file_name = yaml_file_names[i_file] log.info("Running {:s} ({:d} of {:d})".format( file_name, i_file+1, stop)) log.info("Parameters {:s}".format( str(res_pool.result_objects()[i_file].parameters))) create_envelopes_for_experiment(file_name)
def load_amp_corrs(with_square, with_square_corr, cov_or_corr): assert not (with_square and with_square_corr) assert cov_or_corr == 'cov' or cov_or_corr == 'corr' res_pool = ResultPool() res_pool.load_results('data/models/paper/ours/cnt/deep4/car/', params=dict(sensor_names="$all_EEG_sensors", batch_modifier="null", low_cut_off_hz="null", first_nonlin="$elu")) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() # sort by dataset filename sort_order = np.argsort( [r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [ name.replace('.result.pkl', '') for name in result_file_names ] clean_mask = [] all_corrs = dict() for i_file, base_name in enumerate(all_base_names): if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) for perturb_name in ('rand_mad', 'rand_std', 'no_dev'): file_name_end = '.{:s}.amp_{:s}s.npy'.format( perturb_name, cov_or_corr) if with_square: file_name_end = '.square' + file_name_end if with_square_corr: file_name_end = ".corrtosquare" + file_name_end file_name = base_name + file_name_end assert os.path.isfile(file_name), "Expect {:s} to exist".format( file_name) this_arr = all_corrs.pop(perturb_name, []) this_arr.append(np.load(file_name)) all_corrs[perturb_name] = this_arr clean_mask = np.array(clean_mask) return all_corrs, clean_mask
def load_patterns(folder='data/models/paper/ours/csp/car/'): res_pool = ResultPool() res_pool.load_results(folder) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() # sort by dataset filename sort_order = np.argsort([r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] # now sorted dataset_names = [r.parameters['dataset_filename'] for r in results] all_patterns = [] clean_mask = [] all_exps = [] for file_name, dataset in zip(result_file_names, dataset_names): log.info("Loading for {:s}".format(dataset)) model_file_name = file_name.replace('.result.pkl', '.pkl') csp_exp = np.load(model_file_name) patterns = csp_exp.binary_csp.patterns pattern_arr = patterns_to_single_array(patterns) pattern_arr = pattern_arr.squeeze() assert not np.any(np.isnan(pattern_arr)) all_patterns.append(pattern_arr) all_exps.append(csp_exp) if any([s in dataset for s in unclean_sets]): clean_mask.append(False) else: clean_mask.append(True) all_patterns = np.array(all_patterns) clean_mask = np.array(clean_mask) return all_patterns, clean_mask, all_exps
def load_analysis_results(folder, params, file_name_end): result_pool = ResultPool() result_pool.load_results(folder, params=params) result_file_names = result_pool.result_file_names() results = result_pool.result_objects() # sort by dataset filename sort_order = np.argsort( [r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] analysis_result_per_person = [] clean_mask = [] for file_name, result in zip(result_file_names, results): analysis_result_file = file_name.replace('.result.pkl', file_name_end) assert os.path.isfile(analysis_result_file) analysis_result = np.load(analysis_result_file) if any(s in result.parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) analysis_result_per_person.append(analysis_result) analysis_result_per_person = np.array(analysis_result_per_person) clean_mask = np.array(clean_mask) return analysis_result_per_person, clean_mask
class ResultPrinter: def __init__(self, folder_name): self._folder_name = folder_name def print_results(self, templates=False, constants=False, sets=False, start=None, stop=None, params=None, shorten=True, ignore=(), markdown=False): print("Printing results in {:s}:".format(self._folder_name)) self._collect_parameters_and_results(start, stop, params) self._format_results() self._print(templates, constants, sets, shorten, ignore, markdown) def _collect_parameters_and_results(self, start, stop, params): self._result_pool = ResultPool() self._result_pool.load_results(self._folder_name, start, stop, params) if (self._result_pool.have_varying_datasets() or self._result_pool.have_varying_leave_out()): self._dataset_averaged_results = DatasetAveragedResults() self._dataset_averaged_results.extract_results(self._result_pool) def _format_results(self): self._formatted_results = [] misclasses_per_experiment = self._result_pool.get_misclasses() for misclasses in misclasses_per_experiment: formatted_result = self._format_result(misclasses) self._formatted_results.append(formatted_result) def _format_result(self, misclasses): """ Format result for one experiment. """ formatted_result = {} self._add_misclasses(formatted_result, misclasses) self._add_best_and_best_epoch(formatted_result, misclasses) return formatted_result def _add_misclasses(self, formatted_result, misclasses): """ Add misclasses for one experiment""" for key in misclasses: # get last epoch from all folds # need transform in case we just have one experiment(?) this_misclasses = self._atleast_2d_or_1d_of_arr(misclasses[key]) final_misclasses = [] for misclass_fold in this_misclasses: final_misclasses.append(misclass_fold[-1]) this_mean = np.mean(final_misclasses) formatted_result[key] = "{:5.2f}%".format((1 - this_mean) * 100) if (len(final_misclasses) > 1): # only for crossval this_std = np.std(final_misclasses) formatted_result[key + '_std'] = "{:4.2f}%".format(this_std * 100) def _add_best_and_best_epoch(self, formatted_result, misclasses): if 'test' in misclasses: # Determine minimal number of epochs and # only take misclass rates until that epoch test_misclasses = deepcopy(misclasses['test']) # transform to list of one list in case of only one experiment test_misclasses = self._atleast_2d_or_1d_of_arr(test_misclasses) min_epoch_num = np.min([len(a) for a in test_misclasses]) same_epochs_misclasses = [ a[0:min_epoch_num] for a in test_misclasses ] same_epochs_misclasses = np.array(same_epochs_misclasses) average_misclass = np.mean(same_epochs_misclasses, axis=0) best_epoch = np.argmin(average_misclass) best_misclass = average_misclass[best_epoch] formatted_result['best'] = "{:5.2f}%".format( (1 - best_misclass) * 100) # +1 for 1-based indexing formatted_result['best_epoch'] = "{:3d}".format(best_epoch + 1) def _atleast_2d_or_1d_of_arr(self, arr): if not isinstance(arr[0], list) and arr.ndim == 1: return np.atleast_2d(arr) else: return arr def _print(self, templates, constants, individual_datasets, shorten, ignore, markdown): if (templates): self._print_templates() if (constants): self._print_constant_parameters() if (individual_datasets): self._print_experiments_result_table(shorten, ignore, markdown) if (self._result_pool.have_varying_datasets() or self._result_pool.have_varying_leave_out()): self._print_experiments_averaged_datasets_result_table( shorten, ignore, markdown) def _print_templates(self): # templates should all be same so just print first one print("Templates ...") for name, template in self._result_pool.template().iteritems(): print(name + ":") # substitute object tags that cannot be loaded properly #value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"\1:", # template) # remove anchors... value = re.sub(r"\&([A-Za-z_0-9]*)", "", template) value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"&\1", value) # substitute stars => references to other objects # which may be somehwere in the actual yaml-training file... value = value.replace("*", "") #yaml.dump(yaml.load(value), sys.stdout, default_flow_style=False) print value def _print_constant_parameters(self): print("Parameters...") yaml.dump(self._result_pool.constant_params(), sys.stdout, default_flow_style=False) print('') def _print_experiments_result_table(self, shorten, ignore_headers=(), markdown=False): table_headers, table_rows = self._create_experiments_headers_and_rows() if shorten: table_headers, table_rows = self._prettify_headers_rows( table_headers, table_rows) if len(ignore_headers) > 0: keep_mask = np.array( [h not in ignore_headers for h in table_headers]) table_headers = np.array(table_headers)[keep_mask] table_rows = np.array(table_rows)[keep_mask] if markdown: self._print_markdown_table(table_headers, table_rows) #self._print_csv_table(table_headers, table_rows) self._print_table(table_headers, table_rows) def _create_experiments_headers_and_rows(self): ''' Create table rows and headers for result table''' param_headers, param_rows = self._create_experiment_param_headers_and_rows( ) result_headers, result_rows = self._create_result_headers_and_rows() # Check that results and params have same amount of rows # and that each row has same size assert (len(param_rows) == len(result_rows)) for i in range(len(param_rows)): assert (len(param_rows[i]) == len(param_rows[0])) assert (len(result_rows[i]) == len(result_rows[0])) table_headers = param_headers + result_headers # merge rows together... table_rows = [ param_rows[i] + result_rows[i] for i in range(len(param_rows)) ] # some hacky header_substitutions for nicer printing header_substitutions = { 'frequency_start': 'freq_start', 'frequency_stop': 'freq_stop', 'updates_per_batch': 'batch_updates', 'dataset_filename': 'filename', 'max_increasing_epochs': '>epochs' } table_headers = [header if not header_substitutions.has_key(header) \ else header_substitutions[header] \ for header in table_headers] return table_headers, table_rows def _create_experiment_param_headers_and_rows(self): varying_param_keys = self._result_pool.varying_params()[0].keys() # Try to put dataset filename in second column after id... # TODELAY: remove testfilename from this if (self._result_pool.have_varying_datasets()): if 'dataset_filename' in varying_param_keys: filenamekey = 'dataset_filename' else: filenamekey = 'filename' varying_param_keys.remove(filenamekey) param_headers = ['id'] + [filenamekey] + varying_param_keys else: param_headers = ['id'] + varying_param_keys param_rows = [] for result_obj_id in range(self._result_pool.num_experiments()): param_row = [] # Put result_obj id nr first file_name = self._result_pool.result_file_names()[result_obj_id] result_obj_file_nr = int( file_name.split('.result.pkl')[0].split('/')[-1]) param_row.append(result_obj_file_nr) varying_params_this_result_obj = self._result_pool.varying_params( )[result_obj_id] # Put dataset filename second if exist if (self._result_pool.have_varying_datasets()): filename = varying_params_this_result_obj[filenamekey] # remove unnecessary details of filename filename = re.sub(r"(./)?data/[^/]*/", '', str(filename)) filename = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '', filename) filename = re.sub(r"_autoclean_.*", '', filename) filename = re.sub(".BBCI.mat", '', filename) param_row.append(filename) # Put rest of parameters for param_key in varying_param_keys: param_value = varying_params_this_result_obj[param_key] # TODELAY: remove again this if if param_key == 'test_filename': param_value = re.sub(r"(./)?data/[^/]*/", '', str(param_value)) param_value = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '', param_value) param_value = re.sub(r"_autoclean_.*", '', param_value) param_value = re.sub("BBCI.mat", '', param_value) param_row.append(param_value) param_rows.append(param_row) return param_headers, param_rows def _create_result_headers_and_rows(self): result_headers = [] result_headers.append('time') with_standard_deviation = self._formatted_results[0].has_key( 'train_std') # just put 'epoch' not 'best_epoch' so that header is not so wide result_type_headers = [ 'test', 'test_sample', 'best', 'epoch', 'train', 'valid' ] for result_type in result_type_headers: # check if result exists, if yes add it if (result_type in self._formatted_results[0] or 'best_' + result_type in self._formatted_results[0]): result_headers.append(result_type) if (with_standard_deviation and result_type in ['test', 'train', 'valid']): result_headers.append('std') result_rows = [] for result_obj_id in range(self._result_pool.num_experiments()): result_row = [] results = self._formatted_results[result_obj_id] #append training time training_time = self._result_pool.training_times()[result_obj_id] result_row.append( str(datetime.timedelta(seconds=round(training_time)))) for result_type in [ 'test', 'test_sample', 'best', 'best_epoch', 'train', 'valid' ]: if result_type in results: result_row.append(results[result_type]) if (with_standard_deviation and result_type in ['test', 'train', 'valid']): result_row.append(results[result_type + "_std"]) result_rows.append(result_row) return result_headers, result_rows def _print_experiments_averaged_datasets_result_table( self, shorten, ignore_headers, markdown): print("\n\nDataset-Averaged Results:") table_headers, table_rows = \ self._create_averaged_dataset_headers_and_rows() if shorten: table_headers, table_rows = self._prettify_headers_rows( table_headers, table_rows) if len(ignore_headers) > 0: keep_mask = np.array( [h not in ignore_headers for h in table_headers]) table_headers = np.array(table_headers)[keep_mask] table_rows = np.array(table_rows)[:, keep_mask] self._print_table(table_headers, table_rows) if markdown: self._print_markdown_table(table_headers, table_rows) def _create_averaged_dataset_headers_and_rows(self): ''' Create table rows and headers for result table averaged over different datasets. So in the end have a table for each unique parameter combination.''' # TODELAY: don't use this weird hack to find out if there is # a cross validation ;) all_result_lists = self._dataset_averaged_results.results() headers = self._create_dataset_averaged_headers(all_result_lists) rows = [] for i, result_list in enumerate(all_result_lists): row = [str(i), str(len(result_list))] # id in table, number of files parameter_row = self._create_parameter_row(result_list, headers) row += parameter_row misclasses = self._compute_final_misclasses(result_list) training_times = [r['training_time'] for r in result_list] result_row = self._create_result_row(headers, misclasses, training_times) row += result_row rows.append(row) return headers, rows @staticmethod def _create_dataset_averaged_headers(all_result_list): params = deepcopy(all_result_list[0][0]['parameters']) misclass_keys = all_result_list[0][0]['misclasses'].keys() # sort to show test before train before valid misclass_keys = sorted(misclass_keys, key=lambda x: ('test' in x) * 1 + ('train' in x) * 2 + ('valid' in x) * 3) result_keys = ["time", "std"] for key in misclass_keys: result_keys.append(key) result_keys.append('std') params.pop('filename', None) params.pop('dataset_filename', None) params.pop('transfer_leave_out', None) params.pop('test_filename', None) params.pop('trainer_filename', None) return ["id", "files"] + params.keys() + result_keys @staticmethod def _create_result_row(headers, misclasses, training_times): row = [] # Add training times row += [ str(datetime.timedelta(seconds=round(np.mean(training_times)))) ] row += [str(datetime.timedelta(seconds=round(np.std(training_times))))] # Add misclass for valid test train etc. assert len(set(misclasses.keys()) - set(headers)) == 0, ("All misclass" "keys should be in headers") # Add results in order they are defined in headers(!) for key in headers: if key in misclasses.keys(): row += [ "{:4.2f}%".format(100 - np.mean(misclasses[key] * 100)) ] row += ["{:4.2f}%".format(np.std(misclasses[key]) * 100)] return row @staticmethod def _create_parameter_row(result_list, headers): parameters = deepcopy(result_list[0]['parameters']) parameters.pop('dataset_filename', None) parameters.pop('filename', None) sorted_params = [] for header in headers: if header in parameters: sorted_params.append(parameters[header]) return sorted_params def _compute_final_misclasses(self, result_list): """ Compute final fold-averaged misclasses for all experiments. Also works if there are no folds(train-test case)""" final_misclasses = {} misclasses = [r['misclasses'] for r in result_list] misclass_keys = misclasses[0].keys() # train,test etc get_last_elem = np.vectorize(lambda a: a[-1]) for key in misclass_keys: this_misclasses = [m[key] for m in misclasses] if np.array(this_misclasses[0][0]).shape != (): this_final_misclasses = get_last_elem(this_misclasses) else: # can only happen in case all experiments have same number of # epochs this_final_misclasses = np.array(this_misclasses)[:, -1:] # avg over folds if necessary this_final_avg_misclasses = np.mean(this_final_misclasses, axis=1) final_misclasses[key] = this_final_avg_misclasses return final_misclasses def _prettify_headers_rows(self, table_headers, table_rows): """ Shorten names for nicer printing """ pretty_headers = [] for header in table_headers: pretty_header = prettify_word(header) pretty_headers.append(pretty_header) pretty_rows = [] for row in table_rows: pretty_row = [] # go through all parameter values and prettify for value in row: pretty_value = prettify_word(str(value)) pretty_row.append(pretty_value) pretty_rows.append(pretty_row) return pretty_headers, pretty_rows def _print_table(self, headers, rows): print("\nTerminal Table\n") # first determine maximum length for any column # two spaces padding max_col_lengths = [0] * len(headers) for column in range(len(headers)): if (len(str(headers[column])) > max_col_lengths[column]): max_col_lengths[column] = len(str(headers[column])) for row in range(len(rows)): if (len(str(rows[row][column])) > max_col_lengths[column]): max_col_lengths[column] = len(str(rows[row][column])) for index, header in enumerate(headers): length = max_col_lengths[index] + 2 sys.stdout.write(("{:<" + str(length) + "}").format(header)) print('') for row in rows: for column, value in enumerate(row): length = max_col_lengths[column] + 2 sys.stdout.write(("{:<" + str(length) + "}").format(value)) print('') def _print_csv_table(self, headers, rows): print("\nCSV Table\n") wr = csv.writer(sys.stdout) wr.writerow(headers) for row in rows: rowstrings = [str(value) for value in row] rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings] wr.writerow(rowstrings) def _print_markdown_table(self, headers, rows): print("\nMarkdown Table\n") headerline = "|".join(headers) headerline = "|" + headerline + "|" print headerline # make seperatorline for table seperatorline = "|".join(["-" for _ in headers]) seperatorline = "|" + seperatorline + "|" print seperatorline for row in rows: rowstrings = [str(value) for value in row] rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings] rowline = "|".join(rowstrings) rowline = "|" + rowline + "|" print rowline
def load_meaned_amp_pred_corrs(prefix='', folder='data/models/paper/ours/cnt/deep4/car/', params='default', perturb_names=('no_dev', 'rand_mad', 'rand_std')): if params == 'default': params = dict(cnt_preprocessors="$cz_zero_resample_car_demean", trial_start=1500, trial_stop=4000) res_pool = ResultPool() res_pool.load_results(folder, params=params) result_file_names = res_pool.result_file_names() results = res_pool.result_objects() if prefix != '': prefix = '.' + prefix # sort by dataset filename sort_order = np.argsort( [r.parameters['dataset_filename'] for r in results]) result_file_names = np.array(result_file_names)[sort_order] results = np.array(results)[sort_order] all_base_names = [ name.replace('.result.pkl', '') for name in result_file_names ] clean_mask = [] all_corrs = dict() for i_file, base_name in enumerate(all_base_names): # Check that all perturbations exist all_perturbations_exist = True for perturb_name in perturb_names: filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format( prefix, perturb_name) filename = base_name + filename_end if not os.path.isfile(filename): all_perturbations_exist = False log.warn("{:s} does not exist".format(filename)) if not all_perturbations_exist: log.warn("Skipping {:s} since not all perturbations exist".format( base_name)) continue # Check that all exist for subject if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets): clean_mask.append(False) else: clean_mask.append(True) for perturb_name in perturb_names: filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format( prefix, perturb_name) filename = base_name + filename_end assert os.path.isfile(filename), ( "Expect {:s} to exist".format(filename)) this_arr = all_corrs.pop(perturb_name, []) this_corrs = np.load(filename) this_arr.append(this_corrs) all_corrs[perturb_name] = this_arr clean_mask = np.array(clean_mask) return all_corrs, clean_mask
class ResultPrinter: def __init__(self, folder_name): self._folder_name = folder_name def print_results(self, templates=False, constants=False, sets=False, start=None, stop=None, params=None, shorten=True, ignore=(), markdown=False): print ("Printing results in {:s}:".format(self._folder_name)) self._collect_parameters_and_results(start, stop, params) self._format_results() self._print(templates, constants, sets, shorten, ignore, markdown) def _collect_parameters_and_results(self, start, stop, params): self._result_pool = ResultPool() self._result_pool.load_results(self._folder_name, start, stop, params) if (self._result_pool.have_varying_datasets() or self._result_pool.have_varying_leave_out()): self._dataset_averaged_results = DatasetAveragedResults() self._dataset_averaged_results.extract_results(self._result_pool) def _format_results(self): self._formatted_results = [] misclasses_per_experiment = self._result_pool.get_misclasses() for misclasses in misclasses_per_experiment: formatted_result = self._format_result(misclasses) self._formatted_results.append(formatted_result) def _format_result(self, misclasses): """ Format result for one experiment. """ formatted_result = {} self._add_misclasses(formatted_result, misclasses) self._add_best_and_best_epoch(formatted_result, misclasses) return formatted_result def _add_misclasses(self, formatted_result, misclasses): """ Add misclasses for one experiment""" for key in misclasses: # get last epoch from all folds # need transform in case we just have one experiment(?) this_misclasses = self._atleast_2d_or_1d_of_arr(misclasses[key]) final_misclasses = [] for misclass_fold in this_misclasses: final_misclasses.append(misclass_fold[-1]) this_mean = np.mean(final_misclasses) formatted_result[key] = "{:5.2f}%".format((1 - this_mean) * 100) if (len(final_misclasses) > 1): # only for crossval this_std = np.std(final_misclasses) formatted_result[key + '_std'] = "{:4.2f}%".format( this_std * 100) def _add_best_and_best_epoch(self, formatted_result, misclasses): if 'test' in misclasses: # Determine minimal number of epochs and # only take misclass rates until that epoch test_misclasses = deepcopy(misclasses['test']) # transform to list of one list in case of only one experiment test_misclasses = self._atleast_2d_or_1d_of_arr(test_misclasses) min_epoch_num = np.min([len(a) for a in test_misclasses]) same_epochs_misclasses = [a[0:min_epoch_num] for a in test_misclasses] same_epochs_misclasses = np.array(same_epochs_misclasses) average_misclass = np.mean(same_epochs_misclasses, axis=0) best_epoch = np.argmin(average_misclass) best_misclass = average_misclass[best_epoch] formatted_result['best'] = "{:5.2f}%".format((1 - best_misclass) * 100) # +1 for 1-based indexing formatted_result['best_epoch'] = "{:3d}".format(best_epoch + 1) def _atleast_2d_or_1d_of_arr(self, arr): if not isinstance(arr[0], list) and arr.ndim == 1: return np.atleast_2d(arr) else: return arr def _print(self, templates, constants, individual_datasets, shorten, ignore, markdown): if (templates): self._print_templates() if (constants): self._print_constant_parameters() if (individual_datasets): self._print_experiments_result_table(shorten, ignore, markdown) if (self._result_pool.have_varying_datasets() or self._result_pool.have_varying_leave_out()): self._print_experiments_averaged_datasets_result_table(shorten, ignore, markdown) def _print_templates(self): # templates should all be same so just print first one print("Templates ...") for name, template in self._result_pool.template().iteritems(): print(name + ":") # substitute object tags that cannot be loaded properly #value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"\1:", # template) # remove anchors... value = re.sub(r"\&([A-Za-z_0-9]*)", "", template) value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"&\1", value) # substitute stars => references to other objects # which may be somehwere in the actual yaml-training file... value = value.replace("*", "") #yaml.dump(yaml.load(value), sys.stdout, default_flow_style=False) print value def _print_constant_parameters(self): print("Parameters...") yaml.dump(self._result_pool.constant_params(), sys.stdout, default_flow_style=False) print('') def _print_experiments_result_table(self, shorten, ignore_headers=(), markdown=False): table_headers, table_rows = self._create_experiments_headers_and_rows() if shorten: table_headers, table_rows = self._prettify_headers_rows(table_headers, table_rows) if len(ignore_headers)> 0: keep_mask = np.array([h not in ignore_headers for h in table_headers]) table_headers = np.array(table_headers)[keep_mask] table_rows = np.array(table_rows)[keep_mask] if markdown: self._print_markdown_table(table_headers, table_rows) #self._print_csv_table(table_headers, table_rows) self._print_table(table_headers, table_rows) def _create_experiments_headers_and_rows(self): ''' Create table rows and headers for result table''' param_headers, param_rows = self._create_experiment_param_headers_and_rows() result_headers, result_rows = self._create_result_headers_and_rows() # Check that results and params have same amount of rows # and that each row has same size assert(len(param_rows) == len(result_rows)) for i in range(len(param_rows)): assert(len(param_rows[i]) == len(param_rows[0])) assert(len(result_rows[i]) == len(result_rows[0])) table_headers = param_headers + result_headers # merge rows together... table_rows = [param_rows[i] + result_rows[i] for i in range(len(param_rows))] # some hacky header_substitutions for nicer printing header_substitutions = { 'frequency_start': 'freq_start', 'frequency_stop': 'freq_stop', 'updates_per_batch': 'batch_updates', 'dataset_filename': 'filename', 'max_increasing_epochs': '>epochs'} table_headers = [header if not header_substitutions.has_key(header) \ else header_substitutions[header] \ for header in table_headers] return table_headers, table_rows def _create_experiment_param_headers_and_rows(self): varying_param_keys = self._result_pool.varying_params()[0].keys() # Try to put dataset filename in second column after id... # TODELAY: remove testfilename from this if (self._result_pool.have_varying_datasets()): if 'dataset_filename' in varying_param_keys: filenamekey = 'dataset_filename' else: filenamekey = 'filename' varying_param_keys.remove(filenamekey) param_headers = ['id'] + [filenamekey] + varying_param_keys else: param_headers = ['id'] + varying_param_keys param_rows = [] for result_obj_id in range(self._result_pool.num_experiments()): param_row = [] # Put result_obj id nr first file_name = self._result_pool.result_file_names()[result_obj_id] result_obj_file_nr = int(file_name.split('.result.pkl')[0].split('/')[-1]) param_row.append(result_obj_file_nr) varying_params_this_result_obj = self._result_pool.varying_params()[result_obj_id] # Put dataset filename second if exist if (self._result_pool.have_varying_datasets()): filename = varying_params_this_result_obj[filenamekey] # remove unnecessary details of filename filename = re.sub(r"(./)?data/[^/]*/", '', str(filename)) filename = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '', filename) filename = re.sub(r"_autoclean_.*", '', filename) filename = re.sub(".BBCI.mat", '', filename) param_row.append(filename) # Put rest of parameters for param_key in varying_param_keys: param_value = varying_params_this_result_obj[param_key] # TODELAY: remove again this if if param_key == 'test_filename': param_value = re.sub(r"(./)?data/[^/]*/", '', str(param_value)) param_value = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '', param_value) param_value = re.sub(r"_autoclean_.*", '', param_value) param_value = re.sub("BBCI.mat", '', param_value) param_row.append(param_value) param_rows.append(param_row) return param_headers, param_rows def _create_result_headers_and_rows(self): result_headers = [] result_headers.append('time') with_standard_deviation = self._formatted_results[0].has_key('train_std') # just put 'epoch' not 'best_epoch' so that header is not so wide result_type_headers = ['test', 'test_sample', 'best', 'epoch', 'train', 'valid'] for result_type in result_type_headers: # check if result exists, if yes add it if (result_type in self._formatted_results[0] or 'best_' + result_type in self._formatted_results[0]): result_headers.append(result_type) if (with_standard_deviation and result_type in ['test', 'train', 'valid']): result_headers.append('std') result_rows = [] for result_obj_id in range(self._result_pool.num_experiments()): result_row = [] results = self._formatted_results[result_obj_id] #append training time training_time = self._result_pool.training_times()[result_obj_id] result_row.append(str(datetime.timedelta( seconds=round(training_time)))) for result_type in ['test', 'test_sample', 'best', 'best_epoch', 'train', 'valid']: if result_type in results: result_row.append(results[result_type]) if (with_standard_deviation and result_type in ['test', 'train', 'valid']): result_row.append(results[result_type + "_std"]) result_rows.append(result_row) return result_headers, result_rows def _print_experiments_averaged_datasets_result_table(self, shorten, ignore_headers, markdown): print ("\n\nDataset-Averaged Results:") table_headers, table_rows = \ self._create_averaged_dataset_headers_and_rows() if shorten: table_headers, table_rows = self._prettify_headers_rows(table_headers, table_rows) if len(ignore_headers)> 0: keep_mask = np.array([h not in ignore_headers for h in table_headers]) table_headers = np.array(table_headers)[keep_mask] table_rows = np.array(table_rows)[:, keep_mask] self._print_table(table_headers, table_rows) if markdown: self._print_markdown_table(table_headers, table_rows) def _create_averaged_dataset_headers_and_rows(self): ''' Create table rows and headers for result table averaged over different datasets. So in the end have a table for each unique parameter combination.''' # TODELAY: don't use this weird hack to find out if there is # a cross validation ;) all_result_lists = self._dataset_averaged_results.results() headers = self._create_dataset_averaged_headers(all_result_lists) rows = [] for i, result_list in enumerate(all_result_lists): row = [str(i), str(len(result_list))] # id in table, number of files parameter_row = self._create_parameter_row(result_list, headers) row += parameter_row misclasses = self._compute_final_misclasses(result_list) training_times = [r['training_time'] for r in result_list] result_row = self._create_result_row(headers, misclasses, training_times) row += result_row rows.append(row) return headers, rows @ staticmethod def _create_dataset_averaged_headers(all_result_list): params = deepcopy(all_result_list[0][0]['parameters']) misclass_keys = all_result_list[0][0]['misclasses'].keys() # sort to show test before train before valid misclass_keys = sorted(misclass_keys, key=lambda x: ('test' in x) * 1 + ('train' in x ) * 2 + ('valid' in x ) * 3) result_keys = ["time", "std"] for key in misclass_keys: result_keys.append(key) result_keys.append('std') params.pop('filename', None) params.pop('dataset_filename', None) params.pop('transfer_leave_out', None) params.pop('test_filename', None) params.pop('trainer_filename', None) return ["id", "files"] + params.keys() + result_keys @staticmethod def _create_result_row(headers, misclasses, training_times): row = [] # Add training times row += [str(datetime.timedelta( seconds=round(np.mean(training_times))))] row += [str(datetime.timedelta( seconds=round(np.std(training_times))))] # Add misclass for valid test train etc. assert len(set(misclasses.keys()) - set(headers)) == 0, ("All misclass" "keys should be in headers") # Add results in order they are defined in headers(!) for key in headers: if key in misclasses.keys(): row += ["{:4.2f}%".format(100 - np.mean(misclasses[key] * 100))] row += ["{:4.2f}%".format(np.std(misclasses[key]) * 100)] return row @staticmethod def _create_parameter_row(result_list, headers): parameters = deepcopy(result_list[0]['parameters']) parameters.pop('dataset_filename', None) parameters.pop('filename', None) sorted_params = [] for header in headers: if header in parameters: sorted_params.append(parameters[header]) return sorted_params def _compute_final_misclasses(self, result_list): """ Compute final fold-averaged misclasses for all experiments. Also works if there are no folds(train-test case)""" final_misclasses = {} misclasses = [r['misclasses'] for r in result_list] misclass_keys = misclasses[0].keys() # train,test etc get_last_elem = np.vectorize(lambda a : a[-1]) for key in misclass_keys: this_misclasses = [m[key] for m in misclasses] if np.array(this_misclasses[0][0]).shape != (): this_final_misclasses = get_last_elem(this_misclasses) else: # can only happen in case all experiments have same number of # epochs this_final_misclasses = np.array(this_misclasses)[:,-1:] # avg over folds if necessary this_final_avg_misclasses = np.mean(this_final_misclasses, axis=1) final_misclasses[key] = this_final_avg_misclasses return final_misclasses def _prettify_headers_rows(self, table_headers, table_rows): """ Shorten names for nicer printing """ pretty_headers = [] for header in table_headers: pretty_header = prettify_word(header) pretty_headers.append(pretty_header) pretty_rows = [] for row in table_rows: pretty_row = [] # go through all parameter values and prettify for value in row: pretty_value = prettify_word(str(value)) pretty_row.append(pretty_value) pretty_rows.append(pretty_row) return pretty_headers, pretty_rows def _print_table(self, headers, rows): print("\nTerminal Table\n") # first determine maximum length for any column # two spaces padding max_col_lengths = [0] * len(headers) for column in range(len(headers)): if (len(str(headers[column])) > max_col_lengths[column]): max_col_lengths[column] = len(str(headers[column])) for row in range(len(rows)): if(len(str(rows[row][column])) > max_col_lengths[column]): max_col_lengths[column] = len(str(rows[row][column])) for index, header in enumerate(headers): length = max_col_lengths[index] + 2 sys.stdout.write(("{:<" + str(length) + "}").format(header)) print('') for row in rows: for column, value in enumerate(row): length = max_col_lengths[column] + 2 sys.stdout.write(("{:<" + str(length) + "}").format(value)) print('') def _print_csv_table(self, headers, rows): print("\nCSV Table\n") wr = csv.writer(sys.stdout) wr.writerow(headers) for row in rows: rowstrings = [str(value) for value in row] rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings] wr.writerow(rowstrings) def _print_markdown_table(self, headers, rows): print("\nMarkdown Table\n") headerline = "|".join(headers) headerline = "|" + headerline + "|" print headerline # make seperatorline for table seperatorline = "|".join(["-" for _ in headers]) seperatorline = "|" + seperatorline + "|" print seperatorline for row in rows: rowstrings = [str(value) for value in row] rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings] rowline = "|".join(rowstrings) rowline = "|" + rowline + "|" print rowline