def test_get_filtered_table(self):
        A = pd.DataFrame([['a1', 'A', 0.11, 53704]])
        A.columns = ['ID', 'name', 'price', 'zip code']
        em.set_key(A, 'ID')
        B = pd.DataFrame([['b1', 'A', 0.11, 54321]])
        B.columns = ['ID', 'name', 'price', 'zip code']
        em.set_key(B, 'ID')
        A_key = 'ID'
        B_key = 'ID'
        ltable_col_dict = db._build_col_name_index_dict(A)
        rtable_col_dict = db._build_col_name_index_dict(B)
        attr_corres = [('ID', 'ID'), ('name', 'name'),
                         ('price', 'price'),
                         ('zip code', 'zip code')]
        db._filter_corres_list(A, B, A_key, B_key, ltable_col_dict,
                rtable_col_dict, attr_corres)

        filtered_A, filtered_B = db._get_filtered_table(A, B, attr_corres)

        expected_filtered_A = pd.DataFrame([['a1', 'A']])
        expected_filtered_A.columns = ['ID', 'name']
        em.set_key(expected_filtered_A, 'ID')
        expected_filtered_B = pd.DataFrame([['b1', 'A']])
        expected_filtered_B.columns = ['ID', 'name']
        em.set_key(expected_filtered_B, 'ID')

        self.assertEqual(expected_filtered_A.equals(filtered_A), True)
        self.assertEqual(expected_filtered_B.equals(filtered_B), True)
    def test_get_filtered_table(self):
        A = pd.DataFrame([['a1', 'A', 0.11, 53704]])
        A.columns = ['ID', 'name', 'price', 'zip code']
        em.set_key(A, 'ID')
        B = pd.DataFrame([['b1', 'A', 0.11, 54321]])
        B.columns = ['ID', 'name', 'price', 'zip code']
        em.set_key(B, 'ID')
        A_key = 'ID'
        B_key = 'ID'
        ltable_col_dict = db._build_col_name_index_dict(A)
        rtable_col_dict = db._build_col_name_index_dict(B)
        attr_corres = [('ID', 'ID'), ('name', 'name'),
                         ('price', 'price'),
                         ('zip code', 'zip code')]
        db._filter_corres_list(A, B, A_key, B_key, ltable_col_dict,
                rtable_col_dict, attr_corres)

        filtered_A, filtered_B = db._get_filtered_table(A, B, attr_corres)

        expected_filtered_A = pd.DataFrame([['a1', 'A']])
        expected_filtered_A.columns = ['ID', 'name']
        em.set_key(expected_filtered_A, 'ID')
        expected_filtered_B = pd.DataFrame([['b1', 'A']])
        expected_filtered_B.columns = ['ID', 'name']
        em.set_key(expected_filtered_B, 'ID')

        self.assertEqual(expected_filtered_A.equals(filtered_A), True)
        self.assertEqual(expected_filtered_B.equals(filtered_B), True)
 def test_select_features_3(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     A_key = em.get_key(A)
     B_key = em.get_key(B)
     corres_list = [(0, 0)]
     A_filtered, B_filtered = db._get_filtered_table(A, B, A_key, B_key, corres_list)
     actual_selected_features = db._select_features(
         A_filtered, B_filtered, A_key)
     expected_selected_features = []
     self.assertEqual(actual_selected_features, expected_selected_features)
    def test_select_features_3(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        cols_A = list(A.columns)
        cols_B = list(B.columns)

        corres_list = [(cols_A[0], cols_B[0])]
        A_filtered, B_filtered = db._get_filtered_table(A, B, corres_list)
        actual_selected_features = db._select_features(
            A_filtered, B_filtered, A_key, B_key)
        expected_selected_features = []
        self.assertEqual(actual_selected_features, expected_selected_features)
 def test_select_features_2(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     A_key = em.get_key(A)
     B_key = em.get_key(B)
     cols_A = list(A.columns)
     cols_B = list(B.columns)
     corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]), (cols_A[4],
                                                                     cols_B[4])]
     A_filtered, B_filtered = db._get_filtered_table(A, B, corres_list)
     actual_selected_features = db._select_features(
         A_filtered, B_filtered, A_key, B_key)
     expected_selected_features = [1, 2]
     self.assertEqual(actual_selected_features, expected_selected_features)
    def test_get_feature_weight_2(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        corres_list = [(0, 0), (1, 1), (4, 4), (5, 5)]
        A_filtered, B_filtered = db._get_filtered_table(
            A, B, A_key, B_key, corres_list)
        A_wlist = db._get_feature_weight(A_filtered)
        expected_A_wlist = [2.0, 2.0, 2.0, 1.4]
        self.assertEqual(A_wlist, expected_A_wlist)

        B_wlist = db._get_feature_weight(B_filtered)
        expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333]
        self.assertEqual(B_wlist, expected_B_wlist)
Ejemplo n.º 7
0
    def test_get_feature_weight_2(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        cols_A = list(A.columns)
        cols_B = list(B.columns)
        corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]),
                       (cols_A[4], cols_B[4]), (cols_A[5], cols_B[5])]
        A_filtered, B_filtered = db._get_filtered_table(
            A, B, A_key, B_key, corres_list)
        A_wlist = db._get_feature_weight(A_filtered)
        expected_A_wlist = [2.0, 2.0, 2.0, 1.4]
        self.assertEqual(A_wlist, expected_A_wlist)

        B_wlist = db._get_feature_weight(B_filtered)
        expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333]
        self.assertEqual(B_wlist, expected_B_wlist)
    def test_get_feature_weight_2(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        cols_A = list(A.columns)
        cols_B = list(B.columns)
        corres_list = [(cols_A[0], cols_B[0]), (cols_A[1], cols_B[1]), (cols_A[4],
                                                                        cols_B[4]),
                       (cols_A[5], cols_B[5])]
        A_filtered, B_filtered = db._get_filtered_table(
            A, B, corres_list)
        A_wlist = db._get_feature_weight(A_filtered)
        expected_A_wlist = [2.0, 2.0, 2.0, 1.4]
        self.assertEqual(A_wlist, expected_A_wlist)

        B_wlist = db._get_feature_weight(B_filtered)
        expected_B_wlist = [2.0, 2.0, 2.0, 1.3333333333333333]
        self.assertEqual(B_wlist, expected_B_wlist)