def test_valid_path_df_metadata_split_betn_file_kw(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv']) C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable='ltable_ID')
def test_valid_path_candset_with_diff_metadataextn_2(self): cm.del_catalog() path_a = os.sep.join([io_datasets_path, 'A.csv']) A = read_csv_metadata(path_a, metadata_extn='.mdx') pd_A = pd.read_csv(path_a) self.assertEqual(A.equals(pd_A), True) self.assertEqual(cm.get_key(A), 'ID')
def test_invalid_path(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A.pkl']) save_table(A, p) A1 = load_table(None)
def test_invalid_path_cannotwrite(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'temp', 'A_saved.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(A, p)
def test_valid_path_df_metadata_set_to_none_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv']) C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable=None)
def test_valid_path_df_metadata_invalid_rtable(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') # path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv']) C = read_csv_metadata(path_c, rtable="temp", ltable=A)
def test_valid_path_type_is_not_string(self): cm.del_catalog() with self.assertRaises(AssertionError) as ctx: read_csv_metadata(1001) actual = str(ctx.exception) expected = 'Input file path: 1001 \nis not of type string' self.assertEqual(actual, expected)
def test_valid_path_df_metadata_invalid_ltable(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') # path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv']) C = read_csv_metadata(path_c, ltable="temp", rtable=B)
def test_valid_path_table_3(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A.pkl']) save_table(A, p, metadata_ext='pkll') A1 = load_table(p, metadata_ext='pklll') self.assertEqual(A.equals(A), True)
def test_invalid_path_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A_saved.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(A, p) p1 = os.sep.join([sndbx_path, 'temp', 'A_saved.pkl']) A1 = load_object(p1)
def test_invalid_path_cannotwrite(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(A, p)
def test_valid_object_1(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A_saved.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(A, p) A1 = load_object(p) self.assertEqual(A.equals(A1), True)
def test_valid_path_table_1(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A.pkl']) save_table(A, p) A1 = load_table(p) self.assertEqual(A.equals(A), True) self.assertEqual(cm.get_key(A), cm.get_key(A1))
def test_valid_path_type_is_string(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) with self.assertRaises(AssertionError) as ctx: to_csv_metadata(A, 1001) actual = str(ctx.exception) expected = 'Input object 1001 is not of type string' self.assertEqual(actual, expected)
def test_valid_path_type_is_string(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) with self.assertRaises(AssertionError) as ctx: to_csv_metadata(A, 1001) actual = str(ctx.exception) expected = 'Input file path: 1001 \nis not of type string' self.assertEqual(actual, expected)
def test_valid_path_candset_wi_valid_metadata(self): cm.del_catalog() A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') # not initializing with ID will raise key_error C = read_csv_metadata(path_c, ltable=A, rtable=B) pd_C = pd.read_csv(path_c) self.assertEqual(C.equals(pd_C), True) self.assertEqual(len(cm.get_all_properties(C).keys()), 5) self.assertEqual(cm.get_key(C), '_id') self.assertEqual(cm.get_fk_ltable(C), 'ltable_ID') self.assertEqual(cm.get_fk_rtable(C), 'rtable_ID')
def test_invalid_data_frame_type(self): cm.del_catalog() del_files_in_dir(sndbx_path) p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv']) with self.assertRaises(AssertionError) as ctx: to_csv_metadata(1001, p) actual = str(ctx.exception) expected = 'Input object: 1001 \nis not of type pandas dataframe' self.assertEqual(actual, expected)
def test_invalid_data_frame_type(self): cm.del_catalog() del_files_in_dir(sndbx_path) p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv']) with self.assertRaises(AssertionError) as ctx: to_csv_metadata(1001, p) actual = str(ctx.exception) expected = 'Input object 1001 is not of type pandas dataframe' self.assertEqual(actual, expected)
def test_valid_path_candset_wi_valid_metadata(self): cm.del_catalog() A = read_csv_metadata(path_a) B = read_csv_metadata( path_b, key='ID') # not initializing with ID will raise key_error C = read_csv_metadata(path_c, ltable=A, rtable=B) pd_C = pd.read_csv(path_c) self.assertEqual(C.equals(pd_C), True) self.assertEqual(len(cm.get_all_properties(C).keys()), 5) self.assertEqual(cm.get_key(C), '_id') self.assertEqual(cm.get_fk_ltable(C), 'ltable_ID') self.assertEqual(cm.get_fk_rtable(C), 'rtable_ID')
def test_valid_path_df_chk_catalog_1(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(A, p) A1 = read_csv_metadata(p) self.assertEqual(cm.get_key(A1), cm.get_key(A), 'The keys in the catalog are not same')
def test_valid_path_table_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) p = os.sep.join([sndbx_path, 'C.pkl']) save_table(C, p) C1 = load_table(p) self.assertEqual(C.equals(C1), True) self.assertEqual(cm.get_key(C), cm.get_key(C1)) # self.assertEqual(cm.get_ltable(C).equals(cm.get_ltable(C1)), True) # self.assertEqual(cm.get_rtable(C).equals(cm.get_rtable(C1)), True) self.assertEqual(cm.get_fk_ltable(C), cm.get_fk_ltable(C1)) self.assertEqual(cm.get_fk_rtable(C), cm.get_fk_rtable(C1))
def test_valid_path_df_chk_metadatafile_3(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(A, p, metadata_extn='mdx') p_meta_1=os.sep.join([sndbx_path, 'A_saved.mdx']) m1 = _get_metadata_from_file(p_meta_1) p_meta_2=os.sep.join([io_datasets_path, 'expected_A.metadata']) m2 = _get_metadata_from_file(p_meta_2) self.assertEqual(m1, m2, 'The metadata information is not same.')
def test_valid_path_df_chk_metadatafile_3(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(A, p, metadata_extn='mdx') p_meta_1 = os.sep.join([sndbx_path, 'A_saved.mdx']) m1 = _get_metadata_from_file(p_meta_1) p_meta_2 = os.sep.join([io_datasets_path, 'expected_A.metadata']) m2 = _get_metadata_from_file(p_meta_2) self.assertEqual(m1, m2, 'The metadata information is not same.')
def test_valid_path_df_chk_catalog_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) p = os.sep.join([sndbx_path, 'C_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(C, p) C1 = read_csv_metadata(p, ltable=A, rtable=B) self.assertEqual(cm.get_all_properties(C1), cm.get_all_properties(C), 'The properties in the ' 'catalog are not same')
def test_valid_object_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_blocking(A, B, validate_inferred_attr_types=False) rb = RuleBasedBlocker() rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table) C = rb.block_tables(A, B, show_progress=False) self.assertEqual(len(C), 15) p = os.sep.join([sndbx_path, 'C.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(rb, p) rb1 = load_object(p) C1 = rb1.block_tables(A, B, show_progress=False) self.assertEqual(C.equals(C1), True)
def test_valid_object_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_blocking(A, B) rb = RuleBasedBlocker() rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table) C = rb.block_tables(A, B, show_progress=False) self.assertEqual(len(C), 15) p = os.sep.join([sndbx_path, 'C.pkl']) creat_dir_ifnot_exists(sndbx_path) save_object(rb, p) rb1 = load_object(p) C1 = rb1.block_tables(A, B, show_progress=False) self.assertEqual(C.equals(C1), True)
def test_valid_path_df_chk_metadatafile_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) p = os.sep.join([sndbx_path, 'C_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(C, p) p_meta_1=os.sep.join([sndbx_path, 'C_saved.metadata']) m1 = _get_metadata_from_file(p_meta_1) p_meta_2=os.sep.join([io_datasets_path, 'expected_C.metadata']) m2 = _get_metadata_from_file(p_meta_2) self.assertEqual(m1, m2, 'The metadata information is not same.')
def test_valid_path_df_chk_metadatafile_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) p = os.sep.join([sndbx_path, 'C_saved.csv']) creat_dir_ifnot_exists(sndbx_path) to_csv_metadata(C, p) p_meta_1 = os.sep.join([sndbx_path, 'C_saved.metadata']) m1 = _get_metadata_from_file(p_meta_1) p_meta_2 = os.sep.join([io_datasets_path, 'expected_C.metadata']) m2 = _get_metadata_from_file(p_meta_2) self.assertEqual(m1, m2, 'The metadata information is not same.')
def test_invalid_metadataextn(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A.pkl']) save_table(A, p, metadata_ext=10)
def test_invalid_path_table(self): cm.del_catalog() del_files_in_dir(sndbx_path) save_table(None, None)
def test_valid_path_wo_metadata(self): cm.del_catalog() B = read_csv_metadata(path_b) pd_B = pd.read_csv(path_b) self.assertEqual(B.equals(pd_B), True) self.assertEqual(cm.is_dfinfo_present(B), True)
def test_invalid_table_1(self): cm.del_catalog() del_files_in_dir(sndbx_path) # A = read_csv_metadata(path_a) p = os.sep.join([sndbx_path, 'A.pkl']) save_table(10, p)
def test_valid_path_wi_metadata_unknownprop(self): cm.del_catalog() p = os.sep.join([io_datasets_path, 'InvalidMetadata1.csv']) IM = read_csv_metadata(p) self.assertEqual(cm.is_dfinfo_present(IM), True) self.assertEqual(cm.get_property(IM, 'key1'), 'ID')
def test_del_catalog_valid(self): A = read_csv_metadata(path_a) cm.del_catalog() cg = cm.get_catalog() self.assertEqual(len(cg), 0)
def test_valid_path_wi_invalidmetadata_wrongkey(self): cm.del_catalog() p = os.sep.join([io_datasets_path, 'InvalidMetadata2.csv']) IM = read_csv_metadata(p)
def test_valid_path_wi_invalidmetadata_replace_key(self): cm.del_catalog() p = os.sep.join([io_datasets_path, 'A_key_zipcode.csv']) IM = read_csv_metadata(p, key='ID') self.assertEqual(cm.is_dfinfo_present(IM), True) self.assertEqual(cm.is_property_present_for_df(IM, 'key'), True)
def test_valid_path_wi_invalidmetadata_wrongformat(self): cm.del_catalog() p = os.sep.join([io_datasets_path, 'A_md_wrongformat.csv']) IM = read_csv_metadata(p, key='ID')
def test_invalid_path_2(self): cm.del_catalog() del_files_in_dir(sndbx_path) A = read_csv_metadata(path_a) # p = os.sep.join([sndbx_path, 'A.pkl']) save_table(A, None)
def test_invalid_nonstr_path(self): cm.del_catalog() A = read_csv_metadata(10)
def setUp(self): cm.del_catalog()
def test_invalid_str_path(self): cm.del_catalog() p = os.sep.join([io_datasets_path, 'xyz.csv']) A = read_csv_metadata(p)
def test_is_catalog_empty(self): A = read_csv_metadata(path_a) cm.del_catalog() self.assertEqual(cm.is_catalog_empty(), True)
def tearDown(self): cm.del_catalog()