def test_join_with_index(self): """ Test Join operation with index """ R = Table.inputfromfile('test_csv/real_sales1.csv') S = Table.inputfromfile('test_csv/real_sales1.csv') start = time.time() R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange') end = time.time() self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) without_index = end - start Table.Hash(R, 'pricerange') Table.Hash(S, 'pricerange') start = time.time() R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange') end = time.time() with_index = end - start self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) self.assertLess(with_index, without_index*0.9) start = time.time() R1 = Table.join(R, S, 'R', 'S', 'S.pricerange = R.pricerange') end = time.time() with_index = end - start self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) self.assertLess(with_index, without_index*0.9)
def test_hash(self): """Test building hash index on column: saleid""" self.R = Table.inputfromfile('test_csv/sales1.csv') Table.Hash(self.R, 'saleid') self.assertIsNotNone(self.R.index) self.assertIsNotNone(self.R.index['saleid']) test_index = self.R.index['saleid'] self.assertEqual([0], test_index[36]) self.assertEqual([1], test_index[784]) self.assertEqual([2], test_index[801]) self.assertEqual([3], test_index[905])
def test_hash_with_pricerange(self): """Test building hash index on column: pricerange""" R = Table.inputfromfile('test_csv/sales1.csv') Table.Hash(R, 'pricerange') self.assertIsNotNone(R.index) self.assertIsNotNone(R.index['pricerange']) test_index = R.index['pricerange'] self.assertEqual([0, 1], test_index.get('moderate')) self.assertEqual([2, 3, 5, 6, 7, 10, 11], test_index.get('outrageous')) self.assertEqual([4], test_index.get('supercheap')) self.assertEqual([9], test_index.get('cheap')) self.assertEqual([8], test_index.get('expensive'))
def test_select_with_index_flip(self): """Test select with hash index on column pricerage: 'cheap' = pricerange """ R = Table.inputfromfile('test_csv/real_sales1.csv') condition = "'cheap' = pricerange" start = time.time() for i in range(10): R1 = Table.select(R, condition) end = time.time() without_index = end - start Table.Hash(R, 'pricerange') start = time.time() for i in range(10): R1 = Table.select(R, condition) end = time.time() with_index = end - start for row in R1.table: self.assertEqual(row['pricerange'], 'cheap') self.assertLess(with_index, without_index/5)