def collate_experiments(root: Path, output: str) -> pd.DataFrame: data = [] for run_label in tqdm(list(root.iterdir()), desc='Collating reproductions'): if run_label.name in {'.DS_Store'}: continue _date, _reference, model, dataset = run_label.name.split('_') # get size with (run_label / 'configuration_copied.json').open() as file: config = json.load(file) model_size = get_model_size(**config['pipeline']) for replicate in (run_label / 'replicates').iterdir(): with (replicate / 'results.json').open() as rf: result = flatten_dictionary(json.load(rf)) result.update( dict( # reference=reference, model=get_model_cls(model).__name__, dataset=dataset, replicate=int(replicate.name.split('-')[1]), model_bytes=model_size, )) del result['losses'] data.append(result) df = pd.DataFrame(data=data) df = df.set_index(['dataset', 'model', 'replicate']).reset_index() df.to_csv(output, sep='\t', index=False) return df
def test_flatten_dictionary_prefix(self): """Test if the output of flatten_dictionary is correct.""" nested_dictionary = { "a": { "b": { "c": 1, "d": 2, }, "e": 3, }, } expected_output = { "Test.a.b.c": 1, "Test.a.b.d": 2, "Test.a.e": 3, } observed_output = flatten_dictionary(nested_dictionary, prefix="Test") self._compare(observed_output, expected_output)
def test_flatten_dictionary_mixed_key_type(self): """Test if the output of flatten_dictionary is correct if some keys are not strings.""" nested_dictionary = { "a": { 5: { "c": 1, "d": 2, }, "e": 3, }, } expected_output = { "a.5.c": 1, "a.5.d": 2, "a.e": 3, } observed_output = flatten_dictionary(nested_dictionary) self._compare(observed_output, expected_output)
def test_flatten_dictionary_prefix(self): """Test if the output of flatten_dictionary is correct.""" nested_dictionary = { 'a': { 'b': { 'c': 1, 'd': 2, }, 'e': 3, }, } expected_output = { 'Test.a.b.c': 1, 'Test.a.b.d': 2, 'Test.a.e': 3, } observed_output = flatten_dictionary(nested_dictionary, prefix='Test') self._compare(observed_output, expected_output)
def test_flatten_dictionary_mixed_key_type(self): """Test if the output of flatten_dictionary is correct if some keys are not strings.""" nested_dictionary = { 'a': { 5: { 'c': 1, 'd': 2, }, 'e': 3, }, } expected_output = { 'a.5.c': 1, 'a.5.d': 2, 'a.e': 3, } observed_output = flatten_dictionary(nested_dictionary) self._compare(observed_output, expected_output)
def test_flatten_dictionary(self): """Test if the output of flatten_dictionary is correct.""" nested_dictionary = { 'a': { 'b': { 'c': 1, 'd': 2 }, 'e': 3, } } expected_output = { 'a.b.c': 1, 'a.b.d': 2, 'a.e': 3, } observed_output = flatten_dictionary(nested_dictionary) self._compare(observed_output, expected_output)