コード例 #1
0
 def _collect_parameters_and_results(self, start, stop, params):
     self._result_pool = ResultPool()
     self._result_pool.load_results(self._folder_name, start, stop, params)
     if (self._result_pool.have_varying_datasets()
             or self._result_pool.have_varying_leave_out()):
         self._dataset_averaged_results = DatasetAveragedResults()
         self._dataset_averaged_results.extract_results(self._result_pool)
コード例 #2
0
ファイル: amp_corrs.py プロジェクト: robintibor/braindecode
def create_meaned_amp_pred_corrs(prefix='',
        folder='data/models/paper/ours/cnt/deep4/car/',
        params='default',
        perturb_names=('no_dev', 'rand_mad', 'rand_std')):
    """This takes computed cov_vars and transforms them to corrs 
    and saves corrs."""
    if params == 'default':
        params = dict(cnt_preprocessors="$cz_zero_resample_car_demean",
            trial_start=1500, trial_stop=4000)
    res_pool = ResultPool()
    res_pool.load_results(folder,
        params=params)
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()
    
    # sort by dataset filename
    sort_order = np.argsort([r.parameters['dataset_filename'] for r in results])
    
    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]
    
    all_base_names = [name.replace('.result.pkl', '')
        for name in result_file_names]
    if prefix != '':
        prefix = '.' + prefix
    for i_file, base_name in enumerate(all_base_names):
        log.info("Loading {:s}".format(results[i_file].parameters['dataset_filename']))
        create_meaned_amp_pred_corrs_for_file(base_name, prefix, perturb_names)
コード例 #3
0
ファイル: amp_corrs.py プロジェクト: robintibor/braindecode
def load_meaned_amp_pred_corrs(prefix='',
        folder='data/models/paper/ours/cnt/deep4/car/',
        params='default',
        perturb_names=('no_dev','rand_mad', 'rand_std')):
    if params == 'default':
        params = dict(cnt_preprocessors="$cz_zero_resample_car_demean",
            trial_start=1500, trial_stop=4000)
    res_pool = ResultPool()
    res_pool.load_results(folder,
        params=params)
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()
    if prefix != '':
        prefix = '.' + prefix
    
    # sort by dataset filename
    sort_order = np.argsort([r.parameters['dataset_filename'] for r in results])
    
    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]
    
    all_base_names = [name.replace('.result.pkl', '')
        for name in result_file_names]
    clean_mask = []
    all_corrs = dict()
    for i_file, base_name in enumerate(all_base_names):
        # Check that all perturbations exist
        all_perturbations_exist = True
        for perturb_name in perturb_names:
            filename_end =  '{:s}.{:s}.amp_cov_var_corrs.npy'.format(
                prefix, perturb_name)
            filename = base_name + filename_end
            if not os.path.isfile(filename):
                all_perturbations_exist = False
                log.warn("{:s} does not exist".format(
                filename))
        if not all_perturbations_exist:
            log.warn("Skipping {:s} since not all perturbations exist".format(
                base_name))
            continue
                
        # Check that all exist for subject
        if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        for perturb_name in perturb_names:
            filename_end =  '{:s}.{:s}.amp_cov_var_corrs.npy'.format(
                prefix, perturb_name)
            filename = base_name + filename_end
            assert os.path.isfile(filename), ("Expect {:s} to exist".format(
                filename))
            this_arr = all_corrs.pop(perturb_name, [])
            this_corrs = np.load(filename)
            this_arr.append(this_corrs)
            all_corrs[perturb_name] = this_arr
                
    clean_mask = np.array(clean_mask)
    return all_corrs, clean_mask
コード例 #4
0
def create_unit_output_class_corrs_for_files(folder_name, params,
        start, stop, i_all_layers):
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    all_base_names = [name.replace('.result.pkl', '')
        for name in res_file_names]
    start = start or 0
    stop = stop or len(all_base_names)
    for i_file, basename in enumerate(all_base_names[start:stop]):
        log.info("Running {:s} ({:d} of {:d})".format(
            basename, i_file + start + 1, stop))
        create_unit_output_class_corrs(basename, i_all_layers)
コード例 #5
0
def create_all_amplitude_perturbation_corrs(folder_name,
                                            params,
                                            start,
                                            stop,
                                            with_square,
                                            with_square_cov,
                                            after_softmax,
                                            n_samples,
                                            perturbations='default'):
    if perturbations == 'default':
        perturbations = (
            (
                'no_dev',
                FuncAndArgs(
                    rand_diff,
                    with_blocks=False,  #just return 1
                    deviation_func=lambda arr, axis, keepdims: 1)),
            ('rand_mad',
             FuncAndArgs(rand_diff,
                         with_blocks=False,
                         deviation_func=median_absolute_deviation)),
            ('rand_std',
             FuncAndArgs(rand_diff, with_blocks=False, deviation_func=np.std)),
        )
    elif perturbations == 'simple_no_scale':
        perturbations = (
            (
                'no_dev',
                FuncAndArgs(
                    rand_diff,
                    with_blocks=False,  #just return 1
                    deviation_func=lambda arr, axis, keepdims: 1)), )
    assert not (with_square and with_square_cov)
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    all_base_names = [
        name.replace('.result.pkl', '') for name in res_file_names
    ]
    start = start or 0
    stop = stop or len(all_base_names)
    for i_file, base_name in enumerate(all_base_names[start:stop]):
        log.info("Running {:s} ({:d} of {:d})".format(base_name,
                                                      i_file + start + 1,
                                                      stop))
        create_amplitude_perturbation_corrs(base_name,
                                            with_square=with_square,
                                            with_square_cov=with_square_cov,
                                            after_softmax=after_softmax,
                                            n_samples=n_samples,
                                            perturbations=perturbations)
コード例 #6
0
def create_unit_output_class_corrs_for_files(folder_name, params, start, stop,
                                             i_all_layers):
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    all_base_names = [
        name.replace('.result.pkl', '') for name in res_file_names
    ]
    start = start or 0
    stop = stop or len(all_base_names)
    for i_file, basename in enumerate(all_base_names[start:stop]):
        log.info("Running {:s} ({:d} of {:d})".format(basename,
                                                      i_file + start + 1,
                                                      stop))
        create_unit_output_class_corrs(basename, i_all_layers)
コード例 #7
0
def dataset_to_env_file(wanted_dataset_filename):
    """ For any dataset filename, give envelope filename
    These experiments are, where envelopes were calculated from originally"""
    res_pool= ResultPool()
    res_pool.load_results('data/models-backup/paper/ours/cnt/deep4/car/',
             params=dict(cnt_preprocessors="$cz_zero_resample_car_demean",
                 trial_start=1500, trial_stop=4000))

    dataset_to_env_file_name = dict()
    
    for result, res_file_name in zip(res_pool.result_objects(), res_pool.result_file_names()):
        dataset_file_name = result.parameters['dataset_filename']
        envelope_file_name = res_file_name.replace('.result.pkl', '.env.npy')
        assert os.path.isfile(envelope_file_name)
        dataset_to_env_file_name[dataset_file_name] = envelope_file_name
    return dataset_to_env_file_name[wanted_dataset_filename]
コード例 #8
0
def create_env_class_corrs(folder, params, start, stop):
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    res_file_names = res_pool.result_file_names()

    all_base_names = [
        name.replace('.result.pkl', '') for name in res_file_names
    ]
    start = start or 0
    stop = stop or len(all_base_names)

    with_square = True
    for i_exp, base_name in enumerate(all_base_names[start:stop]):
        log.info("Running {:s} ({:d} of {:d})".format(base_name,
                                                      i_exp + start + 1, stop))
        create_env_class_corr_file(base_name, with_square)
コード例 #9
0
 def _collect_parameters_and_results(self, start, stop, params):
     self._result_pool = ResultPool()
     self._result_pool.load_results(self._folder_name, start, stop, params)
     if (self._result_pool.have_varying_datasets() or
             self._result_pool.have_varying_leave_out()):
         self._dataset_averaged_results = DatasetAveragedResults()
         self._dataset_averaged_results.extract_results(self._result_pool)
コード例 #10
0
def create_env_class_corrs(folder, params,start,stop):
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    res_file_names = res_pool.result_file_names()

    all_base_names = [name.replace('.result.pkl', '')
            for name in res_file_names]
    start = start or 0
    stop = stop or len(all_base_names)
    
    
    with_square = True    
    for i_exp, base_name in enumerate(all_base_names[start:stop]):
        log.info("Running {:s} ({:d} of {:d})".format(base_name,
            i_exp + start + 1, stop))
        create_env_class_corr_file(base_name, with_square)
コード例 #11
0
def create_env_corrs(folder_name, params, start, stop):
    from braindecode.analysis.create_env_class_corrs import create_env_class_corr_file
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    all_base_names = [name.replace('.result.pkl', '')
        for name in res_file_names]
    start = start or 0
    stop = stop or len(all_base_names) 
    # Hackhack hardcoded layers, since I know this is correct layers atm
    i_all_layers = [8,14,20,26,28] #for shallow [3, 4, 5, 7]
    for i_file, base_name in enumerate(all_base_names[start:stop]):
        with_square = True
        log.info("Running {:s} ({:d} of {:d})".format(
            base_name, i_file+start+1, stop))
        create_topo_env_corrs_files(base_name, i_all_layers, with_square)
        create_env_class_corr_file(base_name, with_square)
コード例 #12
0
def create_env_corrs(folder_name, params, start, stop):
    from braindecode.analysis.create_env_class_corrs import create_env_class_corr_file
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    all_base_names = [name.replace('.result.pkl', '')
        for name in res_file_names]
    start = start or 0
    stop = stop or len(all_base_names) 
    # Hackhack hardcoded layers, since I know this is correct layers atm
    i_all_layers = [8,14,20,26,28] #for shallow [3, 4, 5, 7]
    for i_file, base_name in enumerate(all_base_names[start:stop]):
        with_square = True
        log.info("Running {:s} ({:d} of {:d})".format(
            base_name, i_file+start+1, stop))
        create_topo_env_corrs_files(base_name, i_all_layers, with_square)
        create_env_class_corr_file(base_name, with_square)
コード例 #13
0
ファイル: amp_corrs.py プロジェクト: robintibor/braindecode
def load_amp_corrs(with_square, with_square_corr, cov_or_corr):
    assert not (with_square and with_square_corr)
    assert cov_or_corr == 'cov' or cov_or_corr == 'corr'
    res_pool = ResultPool()
    res_pool.load_results('data/models/paper/ours/cnt/deep4/car/',
        params=dict(sensor_names="$all_EEG_sensors", batch_modifier="null",
        low_cut_off_hz="null", first_nonlin="$elu"))
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()
    
    # sort by dataset filename
    sort_order = np.argsort([r.parameters['dataset_filename'] for r in results])
    
    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]
    
    all_base_names = [name.replace('.result.pkl', '')
        for name in result_file_names]
    clean_mask = []
    all_corrs = dict()
    for i_file, base_name in enumerate(all_base_names):
        if any(s in results[i_file].parameters['dataset_filename'] for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        for perturb_name in ('rand_mad', 'rand_std', 'no_dev'):
            file_name_end =  '.{:s}.amp_{:s}s.npy'.format(perturb_name,
                cov_or_corr)
            if with_square:
                file_name_end = '.square' + file_name_end
            if with_square_corr:
                file_name_end = ".corrtosquare" + file_name_end
            file_name = base_name + file_name_end
            assert os.path.isfile(file_name), "Expect {:s} to exist".format(
                file_name)
            this_arr = all_corrs.pop(perturb_name, [])
            this_arr.append(np.load(file_name))
            all_corrs[perturb_name] = this_arr
            
    clean_mask = np.array(clean_mask)
    return all_corrs, clean_mask
コード例 #14
0
ファイル: amp_corrs.py プロジェクト: vatthaphon/braindevel
def create_meaned_amp_pred_corrs(
    prefix='',
    folder='data/models/paper/ours/cnt/deep4/car/',
    params='default',
    perturb_names=('no_dev', 'rand_mad', 'rand_std')):
    """This takes computed cov_vars and transforms them to corrs 
    and saves corrs."""
    if params == 'default':
        params = dict(cnt_preprocessors="$cz_zero_resample_car_demean",
                      trial_start=1500,
                      trial_stop=4000)
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()

    # sort by dataset filename
    sort_order = np.argsort(
        [r.parameters['dataset_filename'] for r in results])

    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]

    all_base_names = [
        name.replace('.result.pkl', '') for name in result_file_names
    ]
    if prefix != '':
        prefix = '.' + prefix
    for i_file, base_name in enumerate(all_base_names):
        log.info("Loading {:s}".format(
            results[i_file].parameters['dataset_filename']))
        create_meaned_amp_pred_corrs_for_file(base_name, prefix, perturb_names)
コード例 #15
0
def load_data_frame(folder, params=None, shorten_headers=True):
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    result_objs = res_pool.result_objects()
    varying_params = res_pool.varying_params()
    constant_params = res_pool.constant_params()
    file_names = res_pool.result_file_names()
    data_frame = to_data_frame(file_names,
                               result_objs,
                               varying_params,
                               constant_params,
                               shorten_headers=shorten_headers)
    data_frame.attrs = {'folder': folder, 'params': params}
    return data_frame
コード例 #16
0
ファイル: pandas_util.py プロジェクト: robintibor/braindecode
def load_data_frame(folder, params=None, shorten_headers=True):
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    result_objs = res_pool.result_objects()
    varying_params = res_pool.varying_params()
    constant_params = res_pool.constant_params()
    file_names = res_pool.result_file_names()
    data_frame = to_data_frame(file_names, result_objs, varying_params,
        constant_params, shorten_headers=shorten_headers)
    data_frame.attrs = {'folder': folder, 'params': params}
    return data_frame
コード例 #17
0
ファイル: env_corrs.py プロジェクト: robintibor/braindecode
def load_analysis_results(folder, params, file_name_end):
    result_pool = ResultPool()
    result_pool.load_results(folder, params=params)
    result_file_names = result_pool.result_file_names()
    results = result_pool.result_objects() # sort by dataset filename
    sort_order = np.argsort([r.parameters['dataset_filename'] for r in results])
    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]
    analysis_result_per_person = []
    clean_mask = []
    for file_name, result in zip(result_file_names, results):
        analysis_result_file = file_name.replace('.result.pkl', 
            file_name_end)
        assert os.path.isfile(analysis_result_file)
        analysis_result = np.load(analysis_result_file)
        if any(s in result.parameters['dataset_filename'] for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        analysis_result_per_person.append(analysis_result)
    analysis_result_per_person = np.array(analysis_result_per_person)
    clean_mask = np.array(clean_mask)
    return analysis_result_per_person, clean_mask
コード例 #18
0
ファイル: envelopes.py プロジェクト: vatthaphon/braindevel
def create_envelopes(folder_name, params, start, stop):
    res_pool = ResultPool()
    res_pool.load_results(folder_name, params=params)
    res_file_names = res_pool.result_file_names()
    log.info("{:d} files found.".format(len(res_file_names)))
    yaml_file_names = [name.replace('.result.pkl', '.yaml')
        for name in res_file_names]
    stop = stop or len(yaml_file_names)
    i_file = start
    for i_file in xrange(start, stop):
        file_name = yaml_file_names[i_file]
        log.info("Running {:s} ({:d} of {:d})".format(
            file_name, i_file+1, stop))
        log.info("Parameters {:s}".format(
            str(res_pool.result_objects()[i_file].parameters)))
        create_envelopes_for_experiment(file_name)
コード例 #19
0
def dataset_to_env_file(wanted_dataset_filename):
    """ For any dataset filename, give envelope filename
    These experiments are, where envelopes were calculated from originally"""
    res_pool= ResultPool()
    res_pool.load_results('data/models-backup/paper/ours/cnt/deep4/car/',
             params=dict(cnt_preprocessors="$cz_zero_resample_car_demean",
                 trial_start=1500, trial_stop=4000))

    dataset_to_env_file_name = dict()
    
    for result, res_file_name in zip(res_pool.result_objects(), res_pool.result_file_names()):
        dataset_file_name = result.parameters['dataset_filename']
        envelope_file_name = res_file_name.replace('.result.pkl', '.env.npy')
        assert os.path.isfile(envelope_file_name)
        dataset_to_env_file_name[dataset_file_name] = envelope_file_name
    return dataset_to_env_file_name[wanted_dataset_filename]
コード例 #20
0
ファイル: amp_corrs.py プロジェクト: vatthaphon/braindevel
def load_amp_corrs(with_square, with_square_corr, cov_or_corr):
    assert not (with_square and with_square_corr)
    assert cov_or_corr == 'cov' or cov_or_corr == 'corr'
    res_pool = ResultPool()
    res_pool.load_results('data/models/paper/ours/cnt/deep4/car/',
                          params=dict(sensor_names="$all_EEG_sensors",
                                      batch_modifier="null",
                                      low_cut_off_hz="null",
                                      first_nonlin="$elu"))
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()

    # sort by dataset filename
    sort_order = np.argsort(
        [r.parameters['dataset_filename'] for r in results])

    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]

    all_base_names = [
        name.replace('.result.pkl', '') for name in result_file_names
    ]
    clean_mask = []
    all_corrs = dict()
    for i_file, base_name in enumerate(all_base_names):
        if any(s in results[i_file].parameters['dataset_filename']
               for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        for perturb_name in ('rand_mad', 'rand_std', 'no_dev'):
            file_name_end = '.{:s}.amp_{:s}s.npy'.format(
                perturb_name, cov_or_corr)
            if with_square:
                file_name_end = '.square' + file_name_end
            if with_square_corr:
                file_name_end = ".corrtosquare" + file_name_end
            file_name = base_name + file_name_end
            assert os.path.isfile(file_name), "Expect {:s} to exist".format(
                file_name)
            this_arr = all_corrs.pop(perturb_name, [])
            this_arr.append(np.load(file_name))
            all_corrs[perturb_name] = this_arr

    clean_mask = np.array(clean_mask)
    return all_corrs, clean_mask
コード例 #21
0
def load_patterns(folder='data/models/paper/ours/csp/car/'):
    res_pool = ResultPool()
    res_pool.load_results(folder)

    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()

    # sort by dataset filename
    sort_order = np.argsort([r.parameters['dataset_filename'] for r in results])

    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]

    # now sorted
    dataset_names = [r.parameters['dataset_filename'] for r in results]
    all_patterns = []
    clean_mask = []
    all_exps = []
    for file_name, dataset in zip(result_file_names, dataset_names):
        log.info("Loading for {:s}".format(dataset))
        model_file_name = file_name.replace('.result.pkl', '.pkl')
        csp_exp = np.load(model_file_name)
        patterns = csp_exp.binary_csp.patterns
        pattern_arr = patterns_to_single_array(patterns)
        pattern_arr = pattern_arr.squeeze()
        assert not np.any(np.isnan(pattern_arr))
        all_patterns.append(pattern_arr)
        all_exps.append(csp_exp)
        
        if any([s in dataset for s in unclean_sets]):
            clean_mask.append(False)
        else:
            clean_mask.append(True)

    all_patterns = np.array(all_patterns)
    clean_mask = np.array(clean_mask)
    return all_patterns, clean_mask, all_exps
コード例 #22
0
def load_analysis_results(folder, params, file_name_end):
    result_pool = ResultPool()
    result_pool.load_results(folder, params=params)
    result_file_names = result_pool.result_file_names()
    results = result_pool.result_objects()  # sort by dataset filename
    sort_order = np.argsort(
        [r.parameters['dataset_filename'] for r in results])
    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]
    analysis_result_per_person = []
    clean_mask = []
    for file_name, result in zip(result_file_names, results):
        analysis_result_file = file_name.replace('.result.pkl', file_name_end)
        assert os.path.isfile(analysis_result_file)
        analysis_result = np.load(analysis_result_file)
        if any(s in result.parameters['dataset_filename']
               for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        analysis_result_per_person.append(analysis_result)
    analysis_result_per_person = np.array(analysis_result_per_person)
    clean_mask = np.array(clean_mask)
    return analysis_result_per_person, clean_mask
コード例 #23
0
class ResultPrinter:
    def __init__(self, folder_name):
        self._folder_name = folder_name

    def print_results(self,
                      templates=False,
                      constants=False,
                      sets=False,
                      start=None,
                      stop=None,
                      params=None,
                      shorten=True,
                      ignore=(),
                      markdown=False):
        print("Printing results in {:s}:".format(self._folder_name))
        self._collect_parameters_and_results(start, stop, params)
        self._format_results()
        self._print(templates, constants, sets, shorten, ignore, markdown)

    def _collect_parameters_and_results(self, start, stop, params):
        self._result_pool = ResultPool()
        self._result_pool.load_results(self._folder_name, start, stop, params)
        if (self._result_pool.have_varying_datasets()
                or self._result_pool.have_varying_leave_out()):
            self._dataset_averaged_results = DatasetAveragedResults()
            self._dataset_averaged_results.extract_results(self._result_pool)

    def _format_results(self):
        self._formatted_results = []
        misclasses_per_experiment = self._result_pool.get_misclasses()
        for misclasses in misclasses_per_experiment:
            formatted_result = self._format_result(misclasses)
            self._formatted_results.append(formatted_result)

    def _format_result(self, misclasses):
        """ Format result for one experiment. """
        formatted_result = {}
        self._add_misclasses(formatted_result, misclasses)
        self._add_best_and_best_epoch(formatted_result, misclasses)
        return formatted_result

    def _add_misclasses(self, formatted_result, misclasses):
        """ Add misclasses for one experiment"""
        for key in misclasses:
            # get last epoch from all folds
            # need transform in case we just have one experiment(?)
            this_misclasses = self._atleast_2d_or_1d_of_arr(misclasses[key])
            final_misclasses = []
            for misclass_fold in this_misclasses:
                final_misclasses.append(misclass_fold[-1])
            this_mean = np.mean(final_misclasses)
            formatted_result[key] = "{:5.2f}%".format((1 - this_mean) * 100)
            if (len(final_misclasses) > 1):  # only for crossval
                this_std = np.std(final_misclasses)
                formatted_result[key + '_std'] = "{:4.2f}%".format(this_std *
                                                                   100)

    def _add_best_and_best_epoch(self, formatted_result, misclasses):
        if 'test' in misclasses:
            # Determine minimal number of epochs and
            # only take misclass rates until that epoch
            test_misclasses = deepcopy(misclasses['test'])
            # transform to list of one list in case of only one experiment
            test_misclasses = self._atleast_2d_or_1d_of_arr(test_misclasses)
            min_epoch_num = np.min([len(a) for a in test_misclasses])
            same_epochs_misclasses = [
                a[0:min_epoch_num] for a in test_misclasses
            ]
            same_epochs_misclasses = np.array(same_epochs_misclasses)
            average_misclass = np.mean(same_epochs_misclasses, axis=0)
            best_epoch = np.argmin(average_misclass)
            best_misclass = average_misclass[best_epoch]
            formatted_result['best'] = "{:5.2f}%".format(
                (1 - best_misclass) * 100)
            # +1 for 1-based indexing
            formatted_result['best_epoch'] = "{:3d}".format(best_epoch + 1)

    def _atleast_2d_or_1d_of_arr(self, arr):
        if not isinstance(arr[0], list) and arr.ndim == 1:
            return np.atleast_2d(arr)
        else:
            return arr

    def _print(self, templates, constants, individual_datasets, shorten,
               ignore, markdown):
        if (templates):
            self._print_templates()
        if (constants):
            self._print_constant_parameters()
        if (individual_datasets):
            self._print_experiments_result_table(shorten, ignore, markdown)
        if (self._result_pool.have_varying_datasets()
                or self._result_pool.have_varying_leave_out()):
            self._print_experiments_averaged_datasets_result_table(
                shorten, ignore, markdown)

    def _print_templates(self):
        # templates should all be same so just print first one
        print("Templates ...")
        for name, template in self._result_pool.template().iteritems():
            print(name + ":")
            # substitute object tags that cannot be loaded properly
            #value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"\1:",
            #   template)
            # remove anchors...
            value = re.sub(r"\&([A-Za-z_0-9]*)", "", template)
            value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"&\1", value)
            # substitute stars => references to other objects
            # which may be somehwere in the actual yaml-training file...
            value = value.replace("*", "")
            #yaml.dump(yaml.load(value), sys.stdout, default_flow_style=False)
            print value

    def _print_constant_parameters(self):
        print("Parameters...")
        yaml.dump(self._result_pool.constant_params(),
                  sys.stdout,
                  default_flow_style=False)
        print('')

    def _print_experiments_result_table(self,
                                        shorten,
                                        ignore_headers=(),
                                        markdown=False):
        table_headers, table_rows = self._create_experiments_headers_and_rows()
        if shorten:
            table_headers, table_rows = self._prettify_headers_rows(
                table_headers, table_rows)
        if len(ignore_headers) > 0:
            keep_mask = np.array(
                [h not in ignore_headers for h in table_headers])
            table_headers = np.array(table_headers)[keep_mask]
            table_rows = np.array(table_rows)[keep_mask]
        if markdown:
            self._print_markdown_table(table_headers, table_rows)
        #self._print_csv_table(table_headers, table_rows)
        self._print_table(table_headers, table_rows)

    def _create_experiments_headers_and_rows(self):
        ''' Create table rows and headers for result table'''
        param_headers, param_rows = self._create_experiment_param_headers_and_rows(
        )
        result_headers, result_rows = self._create_result_headers_and_rows()
        # Check that results and params have same amount of rows
        # and that each row has same size
        assert (len(param_rows) == len(result_rows))
        for i in range(len(param_rows)):
            assert (len(param_rows[i]) == len(param_rows[0]))
            assert (len(result_rows[i]) == len(result_rows[0]))

        table_headers = param_headers + result_headers
        # merge rows together...
        table_rows = [
            param_rows[i] + result_rows[i] for i in range(len(param_rows))
        ]
        # some hacky header_substitutions for nicer printing
        header_substitutions = {
            'frequency_start': 'freq_start',
            'frequency_stop': 'freq_stop',
            'updates_per_batch': 'batch_updates',
            'dataset_filename': 'filename',
            'max_increasing_epochs': '>epochs'
        }
        table_headers = [header if not header_substitutions.has_key(header) \
            else header_substitutions[header] \
            for header in table_headers]
        return table_headers, table_rows

    def _create_experiment_param_headers_and_rows(self):
        varying_param_keys = self._result_pool.varying_params()[0].keys()
        # Try to put dataset filename in second column after id...
        # TODELAY: remove testfilename from this
        if (self._result_pool.have_varying_datasets()):
            if 'dataset_filename' in varying_param_keys:
                filenamekey = 'dataset_filename'
            else:
                filenamekey = 'filename'
            varying_param_keys.remove(filenamekey)
            param_headers = ['id'] + [filenamekey] + varying_param_keys
        else:
            param_headers = ['id'] + varying_param_keys
        param_rows = []
        for result_obj_id in range(self._result_pool.num_experiments()):
            param_row = []
            # Put result_obj id nr first
            file_name = self._result_pool.result_file_names()[result_obj_id]
            result_obj_file_nr = int(
                file_name.split('.result.pkl')[0].split('/')[-1])
            param_row.append(result_obj_file_nr)
            varying_params_this_result_obj = self._result_pool.varying_params(
            )[result_obj_id]
            # Put dataset filename second if exist
            if (self._result_pool.have_varying_datasets()):
                filename = varying_params_this_result_obj[filenamekey]
                # remove unnecessary details of filename
                filename = re.sub(r"(./)?data/[^/]*/", '', str(filename))
                filename = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '',
                                  filename)
                filename = re.sub(r"_autoclean_.*", '', filename)
                filename = re.sub(".BBCI.mat", '', filename)
                param_row.append(filename)
            # Put rest of parameters
            for param_key in varying_param_keys:
                param_value = varying_params_this_result_obj[param_key]
                # TODELAY: remove again this if
                if param_key == 'test_filename':
                    param_value = re.sub(r"(./)?data/[^/]*/", '',
                                         str(param_value))
                    param_value = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_",
                                         '', param_value)
                    param_value = re.sub(r"_autoclean_.*", '', param_value)
                    param_value = re.sub("BBCI.mat", '', param_value)
                param_row.append(param_value)
            param_rows.append(param_row)
        return param_headers, param_rows

    def _create_result_headers_and_rows(self):
        result_headers = []
        result_headers.append('time')
        with_standard_deviation = self._formatted_results[0].has_key(
            'train_std')

        # just put 'epoch' not 'best_epoch' so that header is not so wide
        result_type_headers = [
            'test', 'test_sample', 'best', 'epoch', 'train', 'valid'
        ]
        for result_type in result_type_headers:
            # check if result exists, if yes add it
            if (result_type in self._formatted_results[0]
                    or 'best_' + result_type in self._formatted_results[0]):
                result_headers.append(result_type)
                if (with_standard_deviation
                        and result_type in ['test', 'train', 'valid']):
                    result_headers.append('std')
        result_rows = []
        for result_obj_id in range(self._result_pool.num_experiments()):
            result_row = []
            results = self._formatted_results[result_obj_id]
            #append training time
            training_time = self._result_pool.training_times()[result_obj_id]
            result_row.append(
                str(datetime.timedelta(seconds=round(training_time))))
            for result_type in [
                    'test', 'test_sample', 'best', 'best_epoch', 'train',
                    'valid'
            ]:
                if result_type in results:
                    result_row.append(results[result_type])
                    if (with_standard_deviation
                            and result_type in ['test', 'train', 'valid']):
                        result_row.append(results[result_type + "_std"])
            result_rows.append(result_row)
        return result_headers, result_rows

    def _print_experiments_averaged_datasets_result_table(
            self, shorten, ignore_headers, markdown):
        print("\n\nDataset-Averaged Results:")
        table_headers, table_rows = \
            self._create_averaged_dataset_headers_and_rows()
        if shorten:
            table_headers, table_rows = self._prettify_headers_rows(
                table_headers, table_rows)
        if len(ignore_headers) > 0:
            keep_mask = np.array(
                [h not in ignore_headers for h in table_headers])
            table_headers = np.array(table_headers)[keep_mask]
            table_rows = np.array(table_rows)[:, keep_mask]

        self._print_table(table_headers, table_rows)
        if markdown:
            self._print_markdown_table(table_headers, table_rows)

    def _create_averaged_dataset_headers_and_rows(self):
        ''' Create table rows and headers for result table averaged over
        different datasets.
        So in the end have a table for each unique parameter combination.'''
        # TODELAY: don't use this weird hack to find out if there is
        # a cross validation ;)
        all_result_lists = self._dataset_averaged_results.results()
        headers = self._create_dataset_averaged_headers(all_result_lists)
        rows = []
        for i, result_list in enumerate(all_result_lists):
            row = [str(i),
                   str(len(result_list))]  # id in table, number of files
            parameter_row = self._create_parameter_row(result_list, headers)
            row += parameter_row
            misclasses = self._compute_final_misclasses(result_list)
            training_times = [r['training_time'] for r in result_list]
            result_row = self._create_result_row(headers, misclasses,
                                                 training_times)
            row += result_row
            rows.append(row)
        return headers, rows

    @staticmethod
    def _create_dataset_averaged_headers(all_result_list):
        params = deepcopy(all_result_list[0][0]['parameters'])
        misclass_keys = all_result_list[0][0]['misclasses'].keys()
        # sort to show test before train before valid
        misclass_keys = sorted(misclass_keys,
                               key=lambda x: ('test' in x) * 1 +
                               ('train' in x) * 2 + ('valid' in x) * 3)
        result_keys = ["time", "std"]
        for key in misclass_keys:
            result_keys.append(key)
            result_keys.append('std')
        params.pop('filename', None)
        params.pop('dataset_filename', None)
        params.pop('transfer_leave_out', None)
        params.pop('test_filename', None)
        params.pop('trainer_filename', None)
        return ["id", "files"] + params.keys() + result_keys

    @staticmethod
    def _create_result_row(headers, misclasses, training_times):
        row = []
        # Add training times
        row += [
            str(datetime.timedelta(seconds=round(np.mean(training_times))))
        ]
        row += [str(datetime.timedelta(seconds=round(np.std(training_times))))]
        # Add misclass for valid test train etc.
        assert len(set(misclasses.keys()) -
                   set(headers)) == 0, ("All misclass"
                                        "keys should be in headers")
        # Add results in order they are defined in headers(!)
        for key in headers:
            if key in misclasses.keys():
                row += [
                    "{:4.2f}%".format(100 - np.mean(misclasses[key] * 100))
                ]
                row += ["{:4.2f}%".format(np.std(misclasses[key]) * 100)]
        return row

    @staticmethod
    def _create_parameter_row(result_list, headers):
        parameters = deepcopy(result_list[0]['parameters'])
        parameters.pop('dataset_filename', None)
        parameters.pop('filename', None)
        sorted_params = []
        for header in headers:
            if header in parameters:
                sorted_params.append(parameters[header])
        return sorted_params

    def _compute_final_misclasses(self, result_list):
        """ Compute final fold-averaged misclasses for all experiments.
        Also works if there are no folds(train-test case)"""
        final_misclasses = {}
        misclasses = [r['misclasses'] for r in result_list]
        misclass_keys = misclasses[0].keys()  # train,test etc
        get_last_elem = np.vectorize(lambda a: a[-1])
        for key in misclass_keys:
            this_misclasses = [m[key] for m in misclasses]
            if np.array(this_misclasses[0][0]).shape != ():
                this_final_misclasses = get_last_elem(this_misclasses)
            else:
                # can only happen in case all experiments have same number of
                # epochs
                this_final_misclasses = np.array(this_misclasses)[:, -1:]

            # avg over folds if necessary
            this_final_avg_misclasses = np.mean(this_final_misclasses, axis=1)
            final_misclasses[key] = this_final_avg_misclasses

        return final_misclasses

    def _prettify_headers_rows(self, table_headers, table_rows):
        """ Shorten names for nicer printing """
        pretty_headers = []
        for header in table_headers:
            pretty_header = prettify_word(header)
            pretty_headers.append(pretty_header)
        pretty_rows = []
        for row in table_rows:
            pretty_row = []
            # go through all parameter values and prettify
            for value in row:
                pretty_value = prettify_word(str(value))
                pretty_row.append(pretty_value)
            pretty_rows.append(pretty_row)
        return pretty_headers, pretty_rows

    def _print_table(self, headers, rows):
        print("\nTerminal Table\n")
        # first determine maximum length for any column
        # two spaces padding
        max_col_lengths = [0] * len(headers)
        for column in range(len(headers)):
            if (len(str(headers[column])) > max_col_lengths[column]):
                max_col_lengths[column] = len(str(headers[column]))
            for row in range(len(rows)):
                if (len(str(rows[row][column])) > max_col_lengths[column]):
                    max_col_lengths[column] = len(str(rows[row][column]))

        for index, header in enumerate(headers):
            length = max_col_lengths[index] + 2
            sys.stdout.write(("{:<" + str(length) + "}").format(header))
        print('')
        for row in rows:
            for column, value in enumerate(row):
                length = max_col_lengths[column] + 2
                sys.stdout.write(("{:<" + str(length) + "}").format(value))
            print('')

    def _print_csv_table(self, headers, rows):
        print("\nCSV Table\n")
        wr = csv.writer(sys.stdout)
        wr.writerow(headers)
        for row in rows:
            rowstrings = [str(value) for value in row]
            rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings]
            wr.writerow(rowstrings)

    def _print_markdown_table(self, headers, rows):
        print("\nMarkdown Table\n")
        headerline = "|".join(headers)
        headerline = "|" + headerline + "|"
        print headerline

        # make seperatorline for table
        seperatorline = "|".join(["-" for _ in headers])
        seperatorline = "|" + seperatorline + "|"
        print seperatorline
        for row in rows:
            rowstrings = [str(value) for value in row]
            rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings]
            rowline = "|".join(rowstrings)
            rowline = "|" + rowline + "|"
            print rowline
コード例 #24
0
ファイル: amp_corrs.py プロジェクト: vatthaphon/braindevel
def load_meaned_amp_pred_corrs(prefix='',
                               folder='data/models/paper/ours/cnt/deep4/car/',
                               params='default',
                               perturb_names=('no_dev', 'rand_mad',
                                              'rand_std')):
    if params == 'default':
        params = dict(cnt_preprocessors="$cz_zero_resample_car_demean",
                      trial_start=1500,
                      trial_stop=4000)
    res_pool = ResultPool()
    res_pool.load_results(folder, params=params)
    result_file_names = res_pool.result_file_names()
    results = res_pool.result_objects()
    if prefix != '':
        prefix = '.' + prefix

    # sort by dataset filename
    sort_order = np.argsort(
        [r.parameters['dataset_filename'] for r in results])

    result_file_names = np.array(result_file_names)[sort_order]
    results = np.array(results)[sort_order]

    all_base_names = [
        name.replace('.result.pkl', '') for name in result_file_names
    ]
    clean_mask = []
    all_corrs = dict()
    for i_file, base_name in enumerate(all_base_names):
        # Check that all perturbations exist
        all_perturbations_exist = True
        for perturb_name in perturb_names:
            filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format(
                prefix, perturb_name)
            filename = base_name + filename_end
            if not os.path.isfile(filename):
                all_perturbations_exist = False
                log.warn("{:s} does not exist".format(filename))
        if not all_perturbations_exist:
            log.warn("Skipping {:s} since not all perturbations exist".format(
                base_name))
            continue

        # Check that all exist for subject
        if any(s in results[i_file].parameters['dataset_filename']
               for s in unclean_sets):
            clean_mask.append(False)
        else:
            clean_mask.append(True)
        for perturb_name in perturb_names:
            filename_end = '{:s}.{:s}.amp_cov_var_corrs.npy'.format(
                prefix, perturb_name)
            filename = base_name + filename_end
            assert os.path.isfile(filename), (
                "Expect {:s} to exist".format(filename))
            this_arr = all_corrs.pop(perturb_name, [])
            this_corrs = np.load(filename)
            this_arr.append(this_corrs)
            all_corrs[perturb_name] = this_arr

    clean_mask = np.array(clean_mask)
    return all_corrs, clean_mask
コード例 #25
0
class ResultPrinter:
    def __init__(self, folder_name):
        self._folder_name = folder_name
    
    def print_results(self, templates=False,
            constants=False,
            sets=False,
            start=None, stop=None,
            params=None, shorten=True,
            ignore=(),
            markdown=False):
        print ("Printing results in {:s}:".format(self._folder_name))
        self._collect_parameters_and_results(start, stop, params)
        self._format_results()
        self._print(templates, constants, sets, shorten, ignore, markdown)
        
    def _collect_parameters_and_results(self, start, stop, params):
        self._result_pool = ResultPool()
        self._result_pool.load_results(self._folder_name, start, stop, params)
        if (self._result_pool.have_varying_datasets() or
                self._result_pool.have_varying_leave_out()):
            self._dataset_averaged_results = DatasetAveragedResults()
            self._dataset_averaged_results.extract_results(self._result_pool)

    def _format_results(self):
        self._formatted_results = []
        misclasses_per_experiment = self._result_pool.get_misclasses()
        for misclasses in misclasses_per_experiment:
            formatted_result = self._format_result(misclasses)
            self._formatted_results.append(formatted_result)
        
    def _format_result(self, misclasses):
        """ Format result for one experiment. """
        formatted_result = {}
        self._add_misclasses(formatted_result, misclasses)
        self._add_best_and_best_epoch(formatted_result, misclasses)
        return formatted_result
    
    def _add_misclasses(self, formatted_result, misclasses):
        """ Add misclasses for one experiment"""
        for key in misclasses:
            # get last epoch from all folds
            # need transform in case we just have one experiment(?)
            this_misclasses = self._atleast_2d_or_1d_of_arr(misclasses[key])
            final_misclasses = []
            for misclass_fold in this_misclasses:
                final_misclasses.append(misclass_fold[-1])
            this_mean = np.mean(final_misclasses)
            formatted_result[key] = "{:5.2f}%".format((1 - this_mean) * 100)
            if (len(final_misclasses) > 1): # only for crossval
                this_std = np.std(final_misclasses)
                formatted_result[key + '_std'] = "{:4.2f}%".format(
                    this_std * 100)
    
    def _add_best_and_best_epoch(self, formatted_result, misclasses):
        if 'test' in misclasses:
            # Determine minimal number of epochs and 
            # only take misclass rates until that epoch
            test_misclasses = deepcopy(misclasses['test'])
            # transform to list of one list in case of only one experiment
            test_misclasses = self._atleast_2d_or_1d_of_arr(test_misclasses)
            min_epoch_num = np.min([len(a) for a in test_misclasses])
            same_epochs_misclasses = [a[0:min_epoch_num] for a in test_misclasses]
            same_epochs_misclasses = np.array(same_epochs_misclasses)
            average_misclass = np.mean(same_epochs_misclasses, axis=0)
            best_epoch = np.argmin(average_misclass)
            best_misclass = average_misclass[best_epoch]
            formatted_result['best'] = "{:5.2f}%".format((1 - best_misclass) * 100)
            # +1 for 1-based indexing
            formatted_result['best_epoch'] = "{:3d}".format(best_epoch + 1)
           
    def _atleast_2d_or_1d_of_arr(self, arr):
        if not isinstance(arr[0], list) and arr.ndim == 1:
            return np.atleast_2d(arr)
        else:
            return arr 
            
    def _print(self, templates, constants, individual_datasets, shorten, ignore,
        markdown):
        if (templates):
            self._print_templates()
        if (constants):
            self._print_constant_parameters()
        if (individual_datasets):
            self._print_experiments_result_table(shorten, ignore, markdown)
        if (self._result_pool.have_varying_datasets() or
                self._result_pool.have_varying_leave_out()):
            self._print_experiments_averaged_datasets_result_table(shorten,
                ignore, markdown)
        
    def _print_templates(self):
        # templates should all be same so just print first one
        print("Templates ...")
        for name, template in self._result_pool.template().iteritems():
            print(name + ":")
            # substitute object tags that cannot be loaded properly
            #value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"\1:",
            #   template)
            # remove anchors...
            value = re.sub(r"\&([A-Za-z_0-9]*)", "",
               template)
            value = re.sub(r"!obj:[a-zA-Z0-9_.]*\.([A-Za-z_]*)", r"&\1",
               value)
            # substitute stars => references to other objects
            # which may be somehwere in the actual yaml-training file...
            value = value.replace("*", "")
            #yaml.dump(yaml.load(value), sys.stdout, default_flow_style=False)
            print value
    
    def _print_constant_parameters(self):
        print("Parameters...")
        yaml.dump(self._result_pool.constant_params(), sys.stdout, 
            default_flow_style=False)
        print('')
    
    def _print_experiments_result_table(self, shorten, ignore_headers=(),
            markdown=False):
        table_headers, table_rows = self._create_experiments_headers_and_rows()
        if shorten:
            table_headers, table_rows = self._prettify_headers_rows(table_headers,
                table_rows)
        if len(ignore_headers)> 0:
            keep_mask = np.array([h not in ignore_headers 
                for h in table_headers])
            table_headers = np.array(table_headers)[keep_mask]
            table_rows = np.array(table_rows)[keep_mask]
        if markdown:
            self._print_markdown_table(table_headers, table_rows)
        #self._print_csv_table(table_headers, table_rows)
        self._print_table(table_headers, table_rows)
    
    def _create_experiments_headers_and_rows(self):
        ''' Create table rows and headers for result table'''
        param_headers, param_rows = self._create_experiment_param_headers_and_rows()
        result_headers, result_rows = self._create_result_headers_and_rows()
        # Check that results and params have same amount of rows
        # and that each row has same size
        assert(len(param_rows) == len(result_rows))
        for i in range(len(param_rows)):
            assert(len(param_rows[i]) == len(param_rows[0]))
            assert(len(result_rows[i]) == len(result_rows[0]))
        
        table_headers = param_headers + result_headers
        # merge rows together...
        table_rows = [param_rows[i] + result_rows[i] for i in range(len(param_rows))]
        # some hacky header_substitutions for nicer printing
        header_substitutions = {
            'frequency_start': 'freq_start',
            'frequency_stop': 'freq_stop',
            'updates_per_batch': 'batch_updates',
            'dataset_filename': 'filename',
            'max_increasing_epochs': '>epochs'}
        table_headers = [header if not header_substitutions.has_key(header) \
            else header_substitutions[header] \
            for header in table_headers]
        return table_headers, table_rows
    
    def _create_experiment_param_headers_and_rows(self):
        varying_param_keys = self._result_pool.varying_params()[0].keys()
        # Try to put dataset filename in second column after id...
        # TODELAY: remove testfilename from this
        if (self._result_pool.have_varying_datasets()):
            if 'dataset_filename' in varying_param_keys:
                filenamekey = 'dataset_filename'
            else:
                filenamekey = 'filename'
            varying_param_keys.remove(filenamekey)
            param_headers = ['id'] + [filenamekey] + varying_param_keys   
        else:
            param_headers = ['id'] + varying_param_keys       
        param_rows = []
        for result_obj_id in range(self._result_pool.num_experiments()):
            param_row = []
            # Put result_obj id nr first
            file_name = self._result_pool.result_file_names()[result_obj_id]
            result_obj_file_nr = int(file_name.split('.result.pkl')[0].split('/')[-1])
            param_row.append(result_obj_file_nr)
            varying_params_this_result_obj = self._result_pool.varying_params()[result_obj_id]
            # Put dataset filename second if exist
            if (self._result_pool.have_varying_datasets()):
                filename = varying_params_this_result_obj[filenamekey]
                # remove unnecessary details of filename
                filename = re.sub(r"(./)?data/[^/]*/", '', str(filename))
                filename = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '',
                    filename)
                filename = re.sub(r"_autoclean_.*", '', filename)
                filename = re.sub(".BBCI.mat", '', filename)
                param_row.append(filename)
            # Put rest of parameters
            for param_key in varying_param_keys:
                param_value = varying_params_this_result_obj[param_key]
                # TODELAY: remove again this if
                if param_key == 'test_filename':
                    param_value = re.sub(r"(./)?data/[^/]*/", '', str(param_value))
                    param_value = re.sub(r"MoS[Cc][0-9]*S[0-9]*R[0-9]*_ds10_", '',
                        param_value)
                    param_value = re.sub(r"_autoclean_.*", '', param_value)
                    param_value = re.sub("BBCI.mat", '', param_value)
                param_row.append(param_value)
            param_rows.append(param_row)
        return param_headers, param_rows
    
    def _create_result_headers_and_rows(self):
        result_headers = []
        result_headers.append('time')
        with_standard_deviation = self._formatted_results[0].has_key('train_std')
        
        # just put 'epoch' not 'best_epoch' so that header is not so wide
        result_type_headers = ['test', 'test_sample', 'best', 'epoch', 'train', 'valid']
        for result_type in result_type_headers:
            # check if result exists, if yes add it
            if (result_type in self._formatted_results[0] or 
                'best_' + result_type in self._formatted_results[0]):
                result_headers.append(result_type)
                if (with_standard_deviation and
                       result_type in ['test', 'train', 'valid']):
                    result_headers.append('std')
        result_rows = []
        for result_obj_id in range(self._result_pool.num_experiments()):
            result_row = []
            results = self._formatted_results[result_obj_id]
            #append training time
            training_time = self._result_pool.training_times()[result_obj_id]
            result_row.append(str(datetime.timedelta(
                seconds=round(training_time))))
            for result_type in ['test', 'test_sample', 'best', 'best_epoch', 'train', 'valid']:
                if result_type in results:
                    result_row.append(results[result_type])
                    if (with_standard_deviation and
                            result_type in ['test', 'train', 'valid']):
                        result_row.append(results[result_type + "_std"])
            result_rows.append(result_row)
        return result_headers, result_rows
    
        
    def _print_experiments_averaged_datasets_result_table(self, shorten,
        ignore_headers, markdown):
        print ("\n\nDataset-Averaged Results:")
        table_headers, table_rows = \
            self._create_averaged_dataset_headers_and_rows()
        if shorten:
            table_headers, table_rows = self._prettify_headers_rows(table_headers,
                table_rows)
        if len(ignore_headers)> 0:
            keep_mask = np.array([h not in ignore_headers 
                for h in table_headers])
            table_headers = np.array(table_headers)[keep_mask]
            table_rows = np.array(table_rows)[:, keep_mask]
        
        self._print_table(table_headers, table_rows)
        if markdown:
            self._print_markdown_table(table_headers, table_rows)
            
    def _create_averaged_dataset_headers_and_rows(self):
        ''' Create table rows and headers for result table averaged over
        different datasets.
        So in the end have a table for each unique parameter combination.'''
        # TODELAY: don't use this weird hack to find out if there is
        # a cross validation ;)
        all_result_lists = self._dataset_averaged_results.results()
        headers = self._create_dataset_averaged_headers(all_result_lists)
        rows = []
        for i, result_list in enumerate(all_result_lists):
            row = [str(i), str(len(result_list))] # id in table, number of files
            parameter_row = self._create_parameter_row(result_list, headers)
            row += parameter_row
            misclasses = self._compute_final_misclasses(result_list)
            training_times = [r['training_time'] for r in result_list]
            result_row = self._create_result_row(headers, misclasses,
                training_times)
            row += result_row
            rows.append(row)
        return headers, rows

    @ staticmethod
    def _create_dataset_averaged_headers(all_result_list):
        params = deepcopy(all_result_list[0][0]['parameters'])
        misclass_keys = all_result_list[0][0]['misclasses'].keys()
        # sort to show test before train before valid
        misclass_keys = sorted(misclass_keys, 
            key=lambda x: ('test' in x) * 1 + ('train' in x ) * 2 + 
            ('valid' in x ) * 3)
        result_keys = ["time", "std"]
        for key in misclass_keys:
            result_keys.append(key)
            result_keys.append('std')
        params.pop('filename', None)
        params.pop('dataset_filename', None)
        params.pop('transfer_leave_out', None)
        params.pop('test_filename', None)
        params.pop('trainer_filename', None)
        return ["id", "files"] + params.keys() + result_keys
    @staticmethod
    def _create_result_row(headers, misclasses, training_times):
        row = []
        # Add training times
        row += [str(datetime.timedelta(
                 seconds=round(np.mean(training_times))))]
        row += [str(datetime.timedelta(
                 seconds=round(np.std(training_times))))]
        # Add misclass for valid test train etc.
        assert len(set(misclasses.keys()) - set(headers)) == 0, ("All misclass"
            "keys should be in headers")
        # Add results in order they are defined in headers(!)
        for key in headers:
            if key in misclasses.keys():
                row += ["{:4.2f}%".format(100 - np.mean(misclasses[key] * 100))]
                row += ["{:4.2f}%".format(np.std(misclasses[key]) * 100)]
        return row
    
    @staticmethod 
    def _create_parameter_row(result_list, headers):
        parameters = deepcopy(result_list[0]['parameters'])
        parameters.pop('dataset_filename', None)
        parameters.pop('filename', None)
        sorted_params = []
        for header in headers:
            if header in parameters:
                sorted_params.append(parameters[header])
        return sorted_params
        
        
    def _compute_final_misclasses(self, result_list):
        """ Compute final fold-averaged misclasses for all experiments.
        Also works if there are no folds(train-test case)"""
        final_misclasses = {}
        misclasses = [r['misclasses'] for r in result_list]
        misclass_keys = misclasses[0].keys() # train,test etc
        get_last_elem = np.vectorize(lambda a : a[-1])
        for key in misclass_keys:
            this_misclasses = [m[key] for m in misclasses]
            if  np.array(this_misclasses[0][0]).shape != ():
                this_final_misclasses = get_last_elem(this_misclasses)
            else:
                # can only happen in case all experiments have same number of
                # epochs
                this_final_misclasses = np.array(this_misclasses)[:,-1:]
            
            # avg over folds if necessary
            this_final_avg_misclasses = np.mean(this_final_misclasses, axis=1)
            final_misclasses[key] = this_final_avg_misclasses
            
        return final_misclasses
        
    def _prettify_headers_rows(self, table_headers, table_rows):
        """ Shorten names for nicer printing """
        pretty_headers = []
        for header in table_headers:
            pretty_header = prettify_word(header)
            pretty_headers.append(pretty_header)
        pretty_rows = []
        for row in table_rows:
            pretty_row = []
            # go through all parameter values and prettify
            for value in row:
                pretty_value = prettify_word(str(value))
                pretty_row.append(pretty_value)
            pretty_rows.append(pretty_row)
        return pretty_headers, pretty_rows  

    def _print_table(self, headers, rows):
        print("\nTerminal Table\n")
        # first determine maximum length for any column
        # two spaces padding
        max_col_lengths = [0] * len(headers)
        for column in range(len(headers)):
            if (len(str(headers[column])) > max_col_lengths[column]):
                max_col_lengths[column] = len(str(headers[column]))
            for row in range(len(rows)):
                if(len(str(rows[row][column])) > max_col_lengths[column]):
                    max_col_lengths[column] = len(str(rows[row][column]))
        
        for index, header in enumerate(headers):
            length = max_col_lengths[index] + 2
            sys.stdout.write(("{:<" + str(length) + "}").format(header))
        print('')
        for row in rows:
            for column, value in enumerate(row):
                length = max_col_lengths[column] + 2
                sys.stdout.write(("{:<" + str(length) + "}").format(value))
            print('')
    
    def _print_csv_table(self, headers, rows):
        print("\nCSV Table\n")
        wr = csv.writer(sys.stdout)
        wr.writerow(headers)
        for row in rows:
            rowstrings = [str(value) for value in row]
            rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings]
            wr.writerow(rowstrings)
            
    def _print_markdown_table(self, headers, rows):
        print("\nMarkdown Table\n")
        headerline = "|".join(headers)
        headerline = "|" + headerline + "|"
        print headerline
        
        # make seperatorline for table
        seperatorline = "|".join(["-" for _ in headers])
        seperatorline = "|" + seperatorline + "|"
        print seperatorline
        for row in rows:
            rowstrings = [str(value) for value in row]
            rowstrings = [rowstr.replace("$", "") for rowstr in rowstrings]
            rowline = "|".join(rowstrings)
            rowline = "|" + rowline + "|"
            print rowline