Beispiel #1
0
 def test_join_with_index(self):
     """
     Test Join operation with index
     """
     R = Table.inputfromfile('test_csv/real_sales1.csv')
     S = Table.inputfromfile('test_csv/real_sales1.csv')
     start = time.time()
     R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange')
     end = time.time()
     self.assertIsNotNone(R1.table)
     for row in R1.table:
         self.assertEqual(row['R_pricerange'], row['S_pricerange'])
     without_index = end - start
     Table.Hash(R, 'pricerange')
     Table.Hash(S, 'pricerange')
     start = time.time()
     R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange')
     end = time.time()
     with_index = end - start
     self.assertIsNotNone(R1.table)
     for row in R1.table:
         self.assertEqual(row['R_pricerange'], row['S_pricerange'])
     self.assertLess(with_index, without_index*0.9)
     start = time.time()
     R1 = Table.join(R, S, 'R', 'S', 'S.pricerange = R.pricerange')
     end = time.time()
     with_index = end - start
     self.assertIsNotNone(R1.table)
     for row in R1.table:
         self.assertEqual(row['R_pricerange'], row['S_pricerange'])
     self.assertLess(with_index, without_index*0.9)
Beispiel #2
0
 def test_hash(self):
     """Test building hash index on column: saleid"""
     self.R = Table.inputfromfile('test_csv/sales1.csv')
     Table.Hash(self.R, 'saleid')
     self.assertIsNotNone(self.R.index)
     self.assertIsNotNone(self.R.index['saleid'])
     test_index = self.R.index['saleid']
     self.assertEqual([0], test_index[36])
     self.assertEqual([1], test_index[784])
     self.assertEqual([2], test_index[801])
     self.assertEqual([3], test_index[905])
Beispiel #3
0
 def test_hash_with_pricerange(self):
     """Test building hash index on column: pricerange"""
     R = Table.inputfromfile('test_csv/sales1.csv')
     Table.Hash(R, 'pricerange')
     self.assertIsNotNone(R.index)
     self.assertIsNotNone(R.index['pricerange'])
     test_index = R.index['pricerange']
     self.assertEqual([0, 1], test_index.get('moderate'))
     self.assertEqual([2, 3, 5, 6, 7, 10, 11], test_index.get('outrageous'))
     self.assertEqual([4], test_index.get('supercheap'))
     self.assertEqual([9], test_index.get('cheap'))
     self.assertEqual([8], test_index.get('expensive'))
Beispiel #4
0
 def test_select_with_index_flip(self):
     """Test select with hash index on column pricerage: 'cheap' = pricerange """
     R = Table.inputfromfile('test_csv/real_sales1.csv')
     condition = "'cheap' = pricerange"
     start = time.time()
     for i in range(10):
         R1 = Table.select(R, condition)
     end = time.time()
     without_index = end - start
     Table.Hash(R, 'pricerange')
     start = time.time()
     for i in range(10):
         R1 = Table.select(R, condition)
     end = time.time()
     with_index = end - start
     for row in R1.table:
         self.assertEqual(row['pricerange'], 'cheap')
     self.assertLess(with_index, without_index/5)