def test_assemble_topk_table_2(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        topk_heap = [(0.2727272727272727, 1, 0), (0.23076923076923078, 0, 4),
                     (0.16666666666666666, 0, 3)]
        ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key)
        expected_columns = ['_id', 'ltable_ID', 'rtable_ID',
                            'ltable_name', 'ltable_birth_year',
                            'ltable_hourly_wage',
                            'ltable_address', 'ltable_zipcode', 'rtable_name',
                            'rtable_birth_year', 'rtable_hourly_wage',
                            'rtable_address', 'rtable_zipcode']
        self.assertEqual(len(ret_dataframe), 3)
        self.assertEqual(list(ret_dataframe.columns), expected_columns)

        expected_recs = [[0, 'a2', 'b1', 'Michael Franklin',
                          1988, 27.5, '1652 Stockton St, San Francisco',
                          94122, 'Mark Levene', 1987, 29.5,
                          '108 Clement St, San Francisco', 94107],
                         [1, 'a1', 'b5', 'Kevin Smith',
                          1989, 30.0, '607 From St, San Francisco', 94107,
                          'Alfons Kemper', 1984, 35.0,
                          '170 Post St, Apt 4,  San Francisco', 94122],
                         [2, 'a1', 'b4', 'Kevin Smith',
                          1989, 30.0, '607 From St, San Francisco', 94107,
                          'Joseph Kuan', 1982, 26.0,
                          '108 South Park, San Francisco', 94122]]
        self.assertEqual(list(ret_dataframe.ix[0]), expected_recs[0])
        self.assertEqual(list(ret_dataframe.ix[1]), expected_recs[1])
        self.assertEqual(list(ret_dataframe.ix[2]), expected_recs[2])
    def test_assemble_topk_table_2(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        A_key = em.get_key(A)
        B_key = em.get_key(B)
        topk_heap = [(0.2727272727272727, 1, 0), (0.23076923076923078, 0, 4),
                     (0.16666666666666666, 0, 3)]
        ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key)
        expected_columns = ['_id', 'ltable_ID', 'rtable_ID',
                            'ltable_name', 'ltable_birth_year',
                            'ltable_hourly_wage',
                            'ltable_address', 'ltable_zipcode', 'rtable_name',
                            'rtable_birth_year', 'rtable_hourly_wage',
                            'rtable_address', 'rtable_zipcode']
        self.assertEqual(len(ret_dataframe), 3)
        self.assertEqual(list(ret_dataframe.columns), expected_columns)

        expected_recs = [[0, 'a2', 'b1', 'Michael Franklin',
                          1988, 27.5, '1652 Stockton St, San Francisco',
                          94122, 'Mark Levene', 1987, 29.5,
                          '108 Clement St, San Francisco', 94107],
                         [1, 'a1', 'b5', 'Kevin Smith',
                          1989, 30.0, '607 From St, San Francisco', 94107,
                          'Alfons Kemper', 1984, 35.0,
                          '170 Post St, Apt 4,  San Francisco', 94122],
                         [2, 'a1', 'b4', 'Kevin Smith',
                          1989, 30.0, '607 From St, San Francisco', 94107,
                          'Joseph Kuan', 1982, 26.0,
                          '108 South Park, San Francisco', 94122]]
        self.assertEqual(list(ret_dataframe.loc[0]), expected_recs[0])
        self.assertEqual(list(ret_dataframe.loc[1]), expected_recs[1])
        self.assertEqual(list(ret_dataframe.loc[2]), expected_recs[2])
Ejemplo n.º 3
0
 def test_assemble_topk_table_1(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     topk_heap = []
     ret_dataframe = db._assemble_topk_table(topk_heap, A, B)
     self.assertEqual(len(ret_dataframe), 0)
     self.assertEqual(list(ret_dataframe.columns), [])
 def test_assemble_topk_table_1(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     A_key = em.get_key(A)
     B_key = em.get_key(B)
     topk_heap = []
     ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key)
     self.assertEqual(len(ret_dataframe), 0)
     self.assertEqual(list(ret_dataframe.columns), [])