コード例 #1
0
 def test_valid_path_df_metadata_split_betn_file_kw(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID')
     path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv'])
     C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable='ltable_ID')
コード例 #2
0
 def test_valid_path_candset_with_diff_metadataextn_2(self):
     cm.del_catalog()
     path_a = os.sep.join([io_datasets_path, 'A.csv'])
     A = read_csv_metadata(path_a, metadata_extn='.mdx')
     pd_A = pd.read_csv(path_a)
     self.assertEqual(A.equals(pd_A), True)
     self.assertEqual(cm.get_key(A), 'ID')
コード例 #3
0
 def test_invalid_path(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(A, p)
     A1 = load_table(None)
コード例 #4
0
 def test_invalid_path_cannotwrite(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'temp', 'A_saved.pkl'])
     creat_dir_ifnot_exists(sndbx_path)
     save_object(A, p)
コード例 #5
0
    def test_valid_path_df_metadata_set_to_none_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv'])

        C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable=None)
コード例 #6
0
    def test_valid_path_df_metadata_invalid_rtable(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        # path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv'])

        C = read_csv_metadata(path_c, rtable="temp", ltable=A)
コード例 #7
0
    def test_valid_path_type_is_not_string(self):
        cm.del_catalog()
        with self.assertRaises(AssertionError) as ctx:
            read_csv_metadata(1001)

        actual = str(ctx.exception)
        expected = 'Input file path: 1001 \nis not of type string'
        self.assertEqual(actual, expected)
コード例 #8
0
    def test_valid_path_df_metadata_invalid_ltable(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        # path_c = os.sep.join([io_datasets_path, 'C_partialmeta.csv'])

        C = read_csv_metadata(path_c, ltable="temp", rtable=B)
コード例 #9
0
    def test_valid_path_table_3(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A.pkl'])
        save_table(A, p, metadata_ext='pkll')

        A1 = load_table(p, metadata_ext='pklll')
        self.assertEqual(A.equals(A), True)
コード例 #10
0
 def test_invalid_path_2(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A_saved.pkl'])
     creat_dir_ifnot_exists(sndbx_path)
     save_object(A, p)
     p1 = os.sep.join([sndbx_path, 'temp', 'A_saved.pkl'])
     A1 = load_object(p1)
コード例 #11
0
    def test_valid_path_table_3(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A.pkl'])
        save_table(A, p, metadata_ext='pkll')

        A1 = load_table(p, metadata_ext='pklll')
        self.assertEqual(A.equals(A), True)
コード例 #12
0
    def test_invalid_path_cannotwrite(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)

        p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv'])

        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(A, p)
コード例 #13
0
    def test_valid_object_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A_saved.pkl'])
        creat_dir_ifnot_exists(sndbx_path)
        save_object(A, p)

        A1 = load_object(p)
        self.assertEqual(A.equals(A1), True)
コード例 #14
0
    def test_valid_path_table_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A.pkl'])
        save_table(A, p)

        A1 = load_table(p)
        self.assertEqual(A.equals(A), True)
        self.assertEqual(cm.get_key(A), cm.get_key(A1))
コード例 #15
0
    def test_valid_path_type_is_string(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        with self.assertRaises(AssertionError) as ctx:
            to_csv_metadata(A, 1001)

        actual = str(ctx.exception)
        expected = 'Input object 1001 is not of type string'
        self.assertEqual(actual, expected)
コード例 #16
0
    def test_valid_path_table_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A.pkl'])
        save_table(A, p)

        A1 = load_table(p)
        self.assertEqual(A.equals(A), True)
        self.assertEqual(cm.get_key(A), cm.get_key(A1))
コード例 #17
0
    def test_valid_object_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        p = os.sep.join([sndbx_path, 'A_saved.pkl'])
        creat_dir_ifnot_exists(sndbx_path)
        save_object(A, p)

        A1 = load_object(p)
        self.assertEqual(A.equals(A1), True)
コード例 #18
0
    def test_valid_path_type_is_string(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        with self.assertRaises(AssertionError) as ctx:
            to_csv_metadata(A, 1001)

        actual = str(ctx.exception)
        expected = 'Input file path: 1001 \nis not of type string'
        self.assertEqual(actual, expected)
コード例 #19
0
 def test_valid_path_candset_wi_valid_metadata(self):
     cm.del_catalog()
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b, key='ID') # not initializing with ID will raise key_error
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     pd_C = pd.read_csv(path_c)
     self.assertEqual(C.equals(pd_C), True)
     self.assertEqual(len(cm.get_all_properties(C).keys()), 5)
     self.assertEqual(cm.get_key(C), '_id')
     self.assertEqual(cm.get_fk_ltable(C), 'ltable_ID')
     self.assertEqual(cm.get_fk_rtable(C), 'rtable_ID')
コード例 #20
0
    def test_invalid_data_frame_type(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)

        p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv'])
        with self.assertRaises(AssertionError) as ctx:
            to_csv_metadata(1001, p)

        actual = str(ctx.exception)
        expected = 'Input object: 1001 \nis not of type pandas dataframe'
        self.assertEqual(actual, expected)
コード例 #21
0
    def test_invalid_data_frame_type(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)

        p = os.sep.join([sndbx_path, 'temp', 'A_saved.csv'])
        with self.assertRaises(AssertionError) as ctx:
            to_csv_metadata(1001, p)

        actual = str(ctx.exception)
        expected = 'Input object 1001 is not of type pandas dataframe'
        self.assertEqual(actual, expected)
コード例 #22
0
 def test_valid_path_candset_wi_valid_metadata(self):
     cm.del_catalog()
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(
         path_b, key='ID')  # not initializing with ID will raise key_error
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     pd_C = pd.read_csv(path_c)
     self.assertEqual(C.equals(pd_C), True)
     self.assertEqual(len(cm.get_all_properties(C).keys()), 5)
     self.assertEqual(cm.get_key(C), '_id')
     self.assertEqual(cm.get_fk_ltable(C), 'ltable_ID')
     self.assertEqual(cm.get_fk_rtable(C), 'rtable_ID')
コード例 #23
0
    def test_valid_path_df_chk_catalog_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)

        p = os.sep.join([sndbx_path, 'A_saved.csv'])

        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(A, p)

        A1 = read_csv_metadata(p)

        self.assertEqual(cm.get_key(A1), cm.get_key(A), 'The keys in the catalog are not same')
コード例 #24
0
    def test_valid_path_df_chk_catalog_1(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)

        p = os.sep.join([sndbx_path, 'A_saved.csv'])

        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(A, p)

        A1 = read_csv_metadata(p)

        self.assertEqual(cm.get_key(A1), cm.get_key(A),
                         'The keys in the catalog are not same')
コード例 #25
0
    def test_valid_path_table_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)
        p = os.sep.join([sndbx_path, 'C.pkl'])
        save_table(C, p)

        C1 = load_table(p)
        self.assertEqual(C.equals(C1), True)
        self.assertEqual(cm.get_key(C), cm.get_key(C1))
        # self.assertEqual(cm.get_ltable(C).equals(cm.get_ltable(C1)), True)
        # self.assertEqual(cm.get_rtable(C).equals(cm.get_rtable(C1)), True)
        self.assertEqual(cm.get_fk_ltable(C), cm.get_fk_ltable(C1))
        self.assertEqual(cm.get_fk_rtable(C), cm.get_fk_rtable(C1))
コード例 #26
0
    def test_valid_path_df_chk_metadatafile_3(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)

        p = os.sep.join([sndbx_path, 'A_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(A, p, metadata_extn='mdx')

        p_meta_1=os.sep.join([sndbx_path, 'A_saved.mdx'])
        m1 = _get_metadata_from_file(p_meta_1)

        p_meta_2=os.sep.join([io_datasets_path, 'expected_A.metadata'])
        m2 = _get_metadata_from_file(p_meta_2)

        self.assertEqual(m1, m2, 'The metadata information is not same.')
コード例 #27
0
    def test_valid_path_table_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)
        p = os.sep.join([sndbx_path, 'C.pkl'])
        save_table(C, p)

        C1 = load_table(p)
        self.assertEqual(C.equals(C1), True)
        self.assertEqual(cm.get_key(C), cm.get_key(C1))
        # self.assertEqual(cm.get_ltable(C).equals(cm.get_ltable(C1)), True)
        # self.assertEqual(cm.get_rtable(C).equals(cm.get_rtable(C1)), True)
        self.assertEqual(cm.get_fk_ltable(C), cm.get_fk_ltable(C1))
        self.assertEqual(cm.get_fk_rtable(C), cm.get_fk_rtable(C1))
コード例 #28
0
    def test_valid_path_df_chk_metadatafile_3(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)

        p = os.sep.join([sndbx_path, 'A_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(A, p, metadata_extn='mdx')

        p_meta_1 = os.sep.join([sndbx_path, 'A_saved.mdx'])
        m1 = _get_metadata_from_file(p_meta_1)

        p_meta_2 = os.sep.join([io_datasets_path, 'expected_A.metadata'])
        m2 = _get_metadata_from_file(p_meta_2)

        self.assertEqual(m1, m2, 'The metadata information is not same.')
コード例 #29
0
    def test_valid_path_df_chk_catalog_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')

        C = read_csv_metadata(path_c, ltable=A, rtable=B)

        p = os.sep.join([sndbx_path, 'C_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(C, p)

        C1 = read_csv_metadata(p, ltable=A, rtable=B)

        self.assertEqual(cm.get_all_properties(C1), cm.get_all_properties(C), 'The properties in the '
                                                                                  'catalog are not same')
コード例 #30
0
    def test_valid_path_df_chk_catalog_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')

        C = read_csv_metadata(path_c, ltable=A, rtable=B)

        p = os.sep.join([sndbx_path, 'C_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(C, p)

        C1 = read_csv_metadata(p, ltable=A, rtable=B)

        self.assertEqual(cm.get_all_properties(C1), cm.get_all_properties(C),
                         'The properties in the '
                         'catalog are not same')
コード例 #31
0
    def test_valid_object_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        feature_table = get_features_for_blocking(A, B, validate_inferred_attr_types=False)
        rb = RuleBasedBlocker()
        rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table)
        C = rb.block_tables(A, B, show_progress=False)
        self.assertEqual(len(C), 15)
        p = os.sep.join([sndbx_path, 'C.pkl'])
        creat_dir_ifnot_exists(sndbx_path)
        save_object(rb, p)

        rb1 = load_object(p)
        C1 = rb1.block_tables(A, B, show_progress=False)
        self.assertEqual(C.equals(C1), True)
コード例 #32
0
    def test_valid_object_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        feature_table = get_features_for_blocking(A, B)
        rb = RuleBasedBlocker()
        rb.add_rule('zipcode_zipcode_exm(ltuple, rtuple) != 1', feature_table)
        C = rb.block_tables(A, B, show_progress=False)
        self.assertEqual(len(C), 15)
        p = os.sep.join([sndbx_path, 'C.pkl'])
        creat_dir_ifnot_exists(sndbx_path)
        save_object(rb, p)

        rb1 = load_object(p)
        C1 = rb1.block_tables(A, B, show_progress=False)
        self.assertEqual(C.equals(C1), True)
コード例 #33
0
    def test_valid_path_df_chk_metadatafile_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)

        p = os.sep.join([sndbx_path, 'C_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(C, p)

        p_meta_1=os.sep.join([sndbx_path, 'C_saved.metadata'])
        m1 = _get_metadata_from_file(p_meta_1)

        p_meta_2=os.sep.join([io_datasets_path, 'expected_C.metadata'])
        m2 = _get_metadata_from_file(p_meta_2)

        self.assertEqual(m1, m2, 'The metadata information is not same.')
コード例 #34
0
    def test_valid_path_df_chk_metadatafile_2(self):
        cm.del_catalog()
        del_files_in_dir(sndbx_path)
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)

        p = os.sep.join([sndbx_path, 'C_saved.csv'])
        creat_dir_ifnot_exists(sndbx_path)
        to_csv_metadata(C, p)

        p_meta_1 = os.sep.join([sndbx_path, 'C_saved.metadata'])
        m1 = _get_metadata_from_file(p_meta_1)

        p_meta_2 = os.sep.join([io_datasets_path, 'expected_C.metadata'])
        m2 = _get_metadata_from_file(p_meta_2)

        self.assertEqual(m1, m2, 'The metadata information is not same.')
コード例 #35
0
 def test_invalid_metadataextn(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(A, p, metadata_ext=10)
コード例 #36
0
 def test_invalid_metadataextn(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(A, p, metadata_ext=10)
コード例 #37
0
 def test_invalid_path_table(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     save_table(None, None)
コード例 #38
0
 def test_valid_path_wo_metadata(self):
     cm.del_catalog()
     B = read_csv_metadata(path_b)
     pd_B = pd.read_csv(path_b)
     self.assertEqual(B.equals(pd_B), True)
     self.assertEqual(cm.is_dfinfo_present(B), True)
コード例 #39
0
 def test_invalid_table_1(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     # A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(10, p)
コード例 #40
0
 def test_valid_path_wi_metadata_unknownprop(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'InvalidMetadata1.csv'])
     IM = read_csv_metadata(p)
     self.assertEqual(cm.is_dfinfo_present(IM), True)
     self.assertEqual(cm.get_property(IM, 'key1'), 'ID')
コード例 #41
0
 def test_del_catalog_valid(self):
     A = read_csv_metadata(path_a)
     cm.del_catalog()
     cg = cm.get_catalog()
     self.assertEqual(len(cg), 0)
コード例 #42
0
 def test_valid_path_wi_invalidmetadata_wrongkey(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'InvalidMetadata2.csv'])
     IM = read_csv_metadata(p)
コード例 #43
0
 def test_valid_path_wi_metadata_unknownprop(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'InvalidMetadata1.csv'])
     IM = read_csv_metadata(p)
     self.assertEqual(cm.is_dfinfo_present(IM), True)
     self.assertEqual(cm.get_property(IM, 'key1'), 'ID')
コード例 #44
0
 def test_valid_path_wi_invalidmetadata_wrongkey(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'InvalidMetadata2.csv'])
     IM = read_csv_metadata(p)
コード例 #45
0
 def test_valid_path_wi_invalidmetadata_replace_key(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'A_key_zipcode.csv'])
     IM = read_csv_metadata(p, key='ID')
     self.assertEqual(cm.is_dfinfo_present(IM), True)
     self.assertEqual(cm.is_property_present_for_df(IM, 'key'), True)
コード例 #46
0
 def test_valid_path_wi_invalidmetadata_wrongformat(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'A_md_wrongformat.csv'])
     IM = read_csv_metadata(p, key='ID')
コード例 #47
0
 def test_invalid_path_2(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     A = read_csv_metadata(path_a)
     # p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(A, None)
コード例 #48
0
 def test_invalid_table_1(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     # A = read_csv_metadata(path_a)
     p = os.sep.join([sndbx_path, 'A.pkl'])
     save_table(10, p)
コード例 #49
0
 def test_invalid_nonstr_path(self):
     cm.del_catalog()
     A = read_csv_metadata(10)
コード例 #50
0
 def test_invalid_path_table(self):
     cm.del_catalog()
     del_files_in_dir(sndbx_path)
     save_table(None, None)
コード例 #51
0
 def setUp(self):
     cm.del_catalog()
コード例 #52
0
 def test_valid_path_wi_invalidmetadata_replace_key(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'A_key_zipcode.csv'])
     IM = read_csv_metadata(p, key='ID')
     self.assertEqual(cm.is_dfinfo_present(IM), True)
     self.assertEqual(cm.is_property_present_for_df(IM, 'key'), True)
コード例 #53
0
 def test_invalid_str_path(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'xyz.csv'])
     A = read_csv_metadata(p)
コード例 #54
0
 def setUp(self):
     cm.del_catalog()
コード例 #55
0
 def test_is_catalog_empty(self):
     A = read_csv_metadata(path_a)
     cm.del_catalog()
     self.assertEqual(cm.is_catalog_empty(), True)
コード例 #56
0
 def test_invalid_nonstr_path(self):
     cm.del_catalog()
     A = read_csv_metadata(10)
コード例 #57
0
 def tearDown(self):
     cm.del_catalog()
コード例 #58
0
 def test_valid_path_wi_invalidmetadata_wrongformat(self):
     cm.del_catalog()
     p = os.sep.join([io_datasets_path, 'A_md_wrongformat.csv'])
     IM = read_csv_metadata(p, key='ID')
コード例 #59
0
 def tearDown(self):
     cm.del_catalog()
コード例 #60
0
 def test_valid_path_wo_metadata(self):
     cm.del_catalog()
     B = read_csv_metadata(path_b)
     pd_B = pd.read_csv(path_b)
     self.assertEqual(B.equals(pd_B), True)
     self.assertEqual(cm.is_dfinfo_present(B), True)