def test_latest_year_month_day_comparison(self): older_file = DataFile(path='2020_01_01.txt', contents={}) newer_file = DataFile(path='2020_02_01.txt', contents={}) data_dir = DataDirectory('top', contents={ '2020_01_01.txt': older_file, '2020_02_01.txt': newer_file }) self.assertEqual(data_dir.latest(), newer_file)
def test_select_hint_one_exact_match_one_fuzzy_match(self): sql_file = DataFile(path='query.sql', contents={}) sqlite_file = DataFile(path='db.sqlite', contents={}) data_dir = DataDirectory('top', contents={ 'query.sql': sql_file, 'db.sqlite': sqlite_file }) hint = 'sql' self.assertEqual(data_dir.select(hint), sql_file)
def test_ls_two_same_type_files_full(self): file1 = DataFile(path='file1.txt', contents={}) file2 = DataFile(path='file2.txt', contents={}) data_dir = DataDirectory('top', contents={ 'file1.txt': file1, 'file2.txt': file2 }) expected_result = {'top': ['file1.txt', 'file2.txt']} self.assertEqual(data_dir._build_ls_tree(full=True), expected_result)
def test_integration(self): dd = DataDirectory(self.test_dir) self.assertTrue('test.csv' in dd.contents) self.assertTrue(type(dd.contents['test.csv']) == DataFile) dd.ls() # TEST LOAD test_csv = dd['test.csv'].load() pd.testing.assert_frame_equal(test_csv, self.raw_df) # TEST SELECT # I have no idea where the `/private` prefix comes from - Path.resolve?? test_csv_path = str(dd.select('test').path).replace('/private', '') self.assertEqual(test_csv_path, Path(self.test_dir, 'test.csv').__str__()) test_csv_path = str(dd.select('csv').path).replace('/private', '') self.assertEqual(test_csv_path, Path(self.test_dir, 'test.csv').__str__()) # TEST LATEST test_csv_path = str(dd.latest().path).replace('/private', '') self.assertEqual(test_csv_path, Path(self.test_dir, 'test.csv').__str__()) # TEST SAVE new_df = self.raw_df.copy() dd.save(new_df, 'new_df.csv') self.assertTrue('new_df.csv' in dd.contents) self.assertTrue(type(dd.contents['new_df.csv']) == DataFile)
def test_ls_two_same_type_files_in_subdir(self): file1 = DataFile(path='file1.txt', contents={}) file2 = DataFile(path='file2.txt', contents={}) subdir = DataDirectory(path='subdir', contents={ 'file1.txt': file1, 'file2.txt': file2 }) data_dir = DataDirectory('top', contents={'subdir': subdir}) expected_result = {'top': [{'subdir': ['2 txt items']}]} self.assertEqual(data_dir._build_ls_tree(), expected_result)
def test_select_hint_fuzzy_match_two_files_raises_value_error(self): sql_file = DataFile(path='query.sql', contents={}) sqlite_file = DataFile(path='db.sqlite', contents={}) data_dir = DataDirectory('top', contents={ 'query.sql': sql_file, 'db.sqlite': sqlite_file }) hint = 'sq' self.assertRaises(ValueError, data_dir.select, hint)
def test_save_adds_to_dir_contents(self): initial_directory_contents = {} file_name = 'file.test' expected_file_path = '$HOME/{}'.format(file_name) expected_directory_contents_after_save = { file_name: DataFile(expected_file_path) } data_directory = DataDirectory( path='$HOME', contents=initial_directory_contents, magic_data_interface=TestMagicDataInterface) data_directory.save(42, file_name) # check that the contents keys (the file names) are the same self.assertEqual(data_directory.contents.keys(), expected_directory_contents_after_save.keys()) # check that the contents values are type DataFiles for k in expected_directory_contents_after_save: self.assertEqual(type(data_directory.contents[k]), DataFile)
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'data_dir', help= 'Data directory to view. Must be previously registered with datatc.') args = parser.parse_args() dd = DataDirectory.load(args.data_dir) data_dir_path = str(dd.path) graph_elements, node_metadata, root_nodes = data_dir_to_graph(dd) layout = get_app_layout(graph_elements, node_metadata, root_nodes, data_dir=data_dir_path) app.layout = layout app.run_server(debug=True)
def test_ls_empty_sub_dir(self): data_dir = DataDirectory('empty_dir', contents={}) top_dir = DataDirectory('top_dir', contents={'empty_dir': data_dir}) expected_result = {'top_dir': [{'empty_dir': []}]} self.assertEqual(top_dir._build_ls_tree(), expected_result)
def test_ls_one_file_full_in_subdir(self): file1 = DataFile(path='file1.txt', contents={}) subdir = DataDirectory(path='subdir', contents={'file1.txt': file1}) data_dir = DataDirectory('top', contents={'subdir': subdir}) expected_result = {'top': [{'subdir': ['file1.txt']}]} self.assertEqual(data_dir._build_ls_tree(full=True), expected_result)
def test_latest_empty_dir_returns_none(self): data_dir = DataDirectory('top', contents={}) self.assertEqual(data_dir.latest(), None)
def test_ls_one_file(self): file1 = DataFile(path='file1.txt', contents={}) data_dir = DataDirectory('top', contents={'file1.txt': file1}) expected_result = {'top': ['file1.txt']} self.assertEqual(data_dir._build_ls_tree(), expected_result)