def test_debugblocker_7(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     C = read_csv_metadata(path_c, ltable=A, rtable=B,
                           fk_ltable='ltable_ID',
                           fk_rtable='rtable_ID',
                           key='_id')
     attr_corres = [('ID', 'ID'), ['ID', 'ID']]
     db.debug_blocker(C, A, B, 200, attr_corres)
 def test_debugblocker_7(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     C = read_csv_metadata(path_c, ltable=A, rtable=B,
                           fk_ltable='ltable_ID',
                           fk_rtable='rtable_ID',
                           key='_id')
     attr_corres = [('ID', 'ID'), ['ID', 'ID']]
     db.debug_blocker(C, A, B, 200, attr_corres)
 def test_debugblocker_14(self):
     path_ltable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable,
                                   fk_ltable='ltable_ID',
                                   fk_rtable='rtable_book_id',
                                   key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable,
                                      output_size, attr_corres)
     expected_columns = ['_id', 'ltable_ID', 'rtable_book_id',
                         'ltable_title', 'ltable_desc', 'ltable_year',
                         'ltable_lang', 'ltable_author', 'ltable_publisher',
                         'rtable_book_title', 'rtable_description',
                         'rtable_pub_year', 'rtable_language',
                         'rtable_author', 'rtable_publisher']
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.ix[0])
     expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis',
         2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', 
         float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00']
     print(ret_record)
     print(expected_record)
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])
 def test_debugblocker_14(self):
     path_ltable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable,
                                   fk_ltable='ltable_ID',
                                   fk_rtable='rtable_book_id',
                                   key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable,
                                      output_size, attr_corres)
     expected_columns = ['_id', 'ltable_ID', 'rtable_book_id',
                         'ltable_title', 'ltable_desc', 'ltable_year',
                         'ltable_lang', 'ltable_author', 'ltable_publisher',
                         'rtable_book_title', 'rtable_description',
                         'rtable_pub_year', 'rtable_language',
                         'rtable_author', 'rtable_publisher']
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.loc[0])
     expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis',
         2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', 
         float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00']
     print(ret_record)
     print(expected_record)
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])
    def test_debugblocker_12(self):
        llist = [[0]]
        rlist = [[0]]
        ltable = pd.DataFrame(llist)
        rtable = pd.DataFrame(rlist)
        ltable.columns = ['ID']
        rtable.columns = ['ID']
        lkey = 'ID'
        rkey = 'ID'
        em.set_key(ltable, lkey)
        em.set_key(rtable, rkey)
        cand_set = pd.DataFrame([[0, 0, 0]])
        cand_set.columns = ['_id', 'ltable_ID', 'rtable_ID']
        cm.set_candset_properties(cand_set, '_id', 'ltable_ID',
                                  'rtable_ID', ltable, rtable)

        db.debug_blocker(cand_set, ltable, rtable)
    def test_debugblocker_18(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        ret_table = db.debug_blocker(C, A, B, n_jobs = 2)
    def test_debugblocker_12(self):
        llist = [[0]]
        rlist = [[0]]
        ltable = pd.DataFrame(llist)
        rtable = pd.DataFrame(rlist)
        ltable.columns = ['ID']
        rtable.columns = ['ID']
        lkey = 'ID'
        rkey = 'ID'
        em.set_key(ltable, lkey)
        em.set_key(rtable, rkey)
        cand_set = pd.DataFrame([[0, 0, 0]])
        cand_set.columns = ['_id', 'ltable_ID', 'rtable_ID']
        cm.set_candset_properties(cand_set, '_id', 'ltable_ID',
                                  'rtable_ID', ltable, rtable)

        db.debug_blocker(cand_set, ltable, rtable)
    def test_debugblocker_18(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        ret_table = db.debug_blocker(C, A, B, n_jobs = 2)
    def test_debugblocker_13(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        actual_ret_table = db.debug_blocker(C, A, B)
        test_file_path = os.sep.join(
            [debugblocker_datasets_path, 'test_debugblocker_13_out.csv'])
        expected_ret_table = read_csv_metadata(test_file_path,
                                               ltable=A, rtable=B,
                                               fk_ltable='ltable_ID',
                                               fk_rtable='rtable_ID',
                                               key='_id')
        self.assertEqual(len(expected_ret_table), len(actual_ret_table))
    def test_debugblocker_13(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        actual_ret_table = db.debug_blocker(C, A, B)
        test_file_path = os.sep.join(
            [debugblocker_datasets_path, 'test_debugblocker_13_out.csv'])
        expected_ret_table = read_csv_metadata(test_file_path,
                                               ltable=A, rtable=B,
                                               fk_ltable='ltable_ID',
                                               fk_rtable='rtable_ID',
                                               key='_id')
        self.assertEqual(len(expected_ret_table), len(actual_ret_table))
 def test_debugblocker_14(self):
     path_ltable = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand,
                                  ltable=ltable,
                                  rtable=rtable,
                                  fk_ltable='ltable_ID',
                                  fk_rtable='rtable_book_id',
                                  key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable, output_size,
                                      attr_corres)
     expected_columns = [
         '_id', 'similarity', 'ltable_ID', 'rtable_book_id', 'ltable_title',
         'ltable_desc', 'ltable_year', 'ltable_lang', 'ltable_author',
         'ltable_publisher', 'rtable_book_title', 'rtable_description',
         'rtable_pub_year', 'rtable_language', 'rtable_author',
         'rtable_publisher'
     ]
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.ix[0])
     expected_record = [
         0, 0.33333333333333331, 2, 'B002', 'Thinking in Java',
         'learn how to program in Java', 2000, 'ENG', 'Johnnie Doe',
         pd.np.nan, 'Thinking in C', 'learn programming in C++', '1990',
         pd.np.nan, 'Jane Doe', 'BCD publisher'
     ]
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])
 def test_debugblocker_4(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     output_size = '200'
     db.debug_blocker(C, A, B, output_size)
 def test_debugblocker_2(self):
     A = read_csv_metadata(path_a)
     B = []
     C = []
     db.debug_blocker(C, A, B)
 def test_debugblocker_1(self):
     A = []
     B = []
     C = []
     db.debug_blocker(C, A, B)
 def test_debugblocker_3(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = None
     db.debug_blocker(C, A, B)
 def test_debugblocker_11(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = pd.DataFrame([])
     output_size = 0
     db.debug_blocker(C, A, B, output_size)
 def test_debugblocker_1(self):
     A = []
     B = []
     C = []
     db.debug_blocker(C, A, B)
 def test_debugblocker_5(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     attr_corres = set()
     db.debug_blocker(C, A, B, 200, attr_corres)
 def test_debugblocker_3(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = None
     db.debug_blocker(C, A, B)
 def test_debugblocker_2(self):
     A = read_csv_metadata(path_a)
     B = []
     C = []
     db.debug_blocker(C, A, B)
 def test_debugblocker_5(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     attr_corres = set()
     db.debug_blocker(C, A, B, 200, attr_corres)
 def test_debugblocker_4(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     output_size = '200'
     db.debug_blocker(C, A, B, output_size)
 def test_debugblocker_10(self):
     A = read_csv_metadata(path_a)
     B = pd.DataFrame([])
     C = pd.DataFrame([])
     db.debug_blocker(C, A, B)
 def test_debugblocker_10(self):
     A = read_csv_metadata(path_a)
     B = pd.DataFrame([])
     C = pd.DataFrame([])
     db.debug_blocker(C, A, B)
Exemple #25
0
 def time_debug_blocking(self):
     db.debug_blocker(self.A, self.B, self.C)
 def test_debugblocker_11(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = pd.DataFrame([])
     output_size = 0
     db.debug_blocker(C, A, B, output_size)
Exemple #27
0
 def time_debug_blocking(self):
     db.debug_blocker(self.A, self.B, self.C, self.output_size,
                      self.attr_corres)