def test_join_with_index(self): """ Test Join operation with index """ R = Table.inputfromfile('test_csv/real_sales1.csv') S = Table.inputfromfile('test_csv/real_sales1.csv') start = time.time() R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange') end = time.time() self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) without_index = end - start Table.Hash(R, 'pricerange') Table.Hash(S, 'pricerange') start = time.time() R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.pricerange') end = time.time() with_index = end - start self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) self.assertLess(with_index, without_index*0.9) start = time.time() R1 = Table.join(R, S, 'R', 'S', 'S.pricerange = R.pricerange') end = time.time() with_index = end - start self.assertIsNotNone(R1.table) for row in R1.table: self.assertEqual(row['R_pricerange'], row['S_pricerange']) self.assertLess(with_index, without_index*0.9)
def test_movavg(self): """ Test movavg """ R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/moving_avg_of_step_1.csv') R1 = Table.movavg(R, ['qty', 1]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertAlmostEqual(R1.table[i]['avg(qty)'], expect.table[i]['avg(qty)'], places=4) expect = Table.inputfromfile('test_csv/moving_avg_of_step_3.csv') R1 = Table.movavg(R, ['qty', 3]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertAlmostEqual(R1.table[i]['avg(qty)'], expect.table[i]['avg(qty)'], places=4) expect = Table.inputfromfile('test_csv/moving_avg_of_step_n.csv') R1 = Table.movavg(R, ['qty', len(R.table)]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertAlmostEqual(R1.table[i]['avg(qty)'], expect.table[i]['avg(qty)'], places=3)
def test_movsum(self): """ Test movsum """ R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/moving_sum_of_step_1.csv') R1 = Table.movsum(R, ['qty', 1]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i]['sum(qty)'], expect.table[i]['sum(qty)']) expect = Table.inputfromfile('test_csv/moving_sum_of_step_3.csv') R1 = Table.movsum(R, ['qty', 3]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i]['sum(qty)'], expect.table[i]['sum(qty)']) expect = Table.inputfromfile('test_csv/moving_sum_of_step_4.csv') R1 = Table.movsum(R, ['qty', 4]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i]['sum(qty)'], expect.table[i]['sum(qty)']) expect = Table.inputfromfile('test_csv/moving_sum_of_step_n.csv') R1 = Table.movsum(R, ['qty', len(R1.table)]) expect = Table.sort(expect, ['saleid']) R1 = Table.sort(R1, ['saleid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i]['sum(qty)'], expect.table[i]['sum(qty)'])
def test_join_with_greater(self): '''Test join wtih condition: R.qty > S.Q''' R = Table.inputfromfile('test_csv/sales1.csv') S = Table.inputfromfile('test_csv/sales2.csv') R1 = Table.join(R, S, 'R', 'S', 'R.qty >S.Q') self.assertIsNotNone(R1.table) for row in R1.table: self.assertIsNotNone(row['R_qty'], row['S_Q']) self.assertGreater(row['R_qty'], row['S_Q'])
def test_join_with_same_customerid(self): """Test join wtih condition: R.customerid = S.C""" R = Table.inputfromfile('test_csv/sales1.csv') S = Table.inputfromfile('test_csv/sales2.csv') R1 = Table.join(R, S, 'R', 'S', 'R.customerid=S.C') self.assertIsNotNone(R1.table) for row in R1.table: self.assertIsNotNone(row['R_customerid'], row['S_C']) self.assertEqual(row['R_customerid'], row['S_C'])
def test_join_with_same_princerage(self): '''Test join wtih condition: R.pricerange = S.P''' R = Table.inputfromfile('test_csv/sales1.csv') S = Table.inputfromfile('test_csv/sales2.csv') R1 = Table.join(R, S, 'R', 'S', 'R.pricerange = S.P') self.assertIsNotNone(R1.table) for row in R1.table: self.assertIsNotNone(row['R_pricerange'], row['S_P']) self.assertEqual(row['R_pricerange'], row['S_P'])
def test_join_with_multiple_condtion(self): '''Test join wtih condition: (R.qty >= S.Q) and (R.pricerange = S.P)''' R = Table.inputfromfile('test_csv/sales1.csv') S = Table.inputfromfile('test_csv/sales2.csv') R1 = Table.join(R, S, 'R', 'S', '(R.qty >= S.Q) and (R.pricerange = S.P)') self.assertIsNotNone(R1.table) for row in R1.table: self.assertIsNotNone(row['R_qty'], row['S_Q']) self.assertGreaterEqual(row['R_qty'], row['S_Q']) self.assertIsNotNone(row['R_pricerange'], row['S_P']) self.assertEqual(row['R_pricerange'], row['S_P'])
def test_join_column_name(self): '''Test join wtih the output column name''' R = Table.inputfromfile('test_csv/sales1.csv') S = Table.inputfromfile('test_csv/sales2.csv') R1 = Table.join(R, S, 'R1', 'S1', 'True') self.assertIsNotNone(R1.table) keyset = R1.table[0].keys() for key in R.table[0].keys(): new_key = 'R1_{}'.format(key) self.assertTrue(new_key in keyset) for key in S.table[0].keys(): new_key = 'S1_{}'.format(key) self.assertTrue(new_key in keyset)
def test_select_with_greater_equal_item_id(self): '''Test select with condtion: itemid >= 20''' R = Table.inputfromfile('test_csv/sales1.csv') condition = "itemid >= 20" R1 = Table.select(R, condition) for row in R1.table: self.assertGreaterEqual(row['itemid'], 20)
def test_select_with_not_equal_item_id(self): '''Test select with condtion: itemid != 14''' R = Table.inputfromfile('test_csv/sales1.csv') condition = "itemid != 14" R1 = Table.select(R, condition) for row in R1.table: self.assertNotEqual(row['itemid'], 14)
def test_select_with_equal_pricerange(self): '''Test select with condtion: pricerange = 'moderate' ''' R = Table.inputfromfile('test_csv/sales1.csv') condition = "pricerange = 'moderate'" R1 = Table.select(R, condition) for row in R1.table: self.assertEquals(row['pricerange'], 'moderate')
def test_sort_with_single_column_Nan(self): """Test table with: sort(R, pricerange)""" R = Table.inputfromfile('test_csv/sales1.csv') R = Table.sort(R, ['pricerange']) self.assertIsNotNone(R.table) cur = R.table[0]['pricerange'] for row in R.table: self.assertGreaterEqual(row['pricerange'], cur) cur = row['pricerange']
def test_select_with_multiple_condition(self): '''Test select with condtion: itemid >= 20 and pricerange == 'moderate' ''' R = Table.inputfromfile('test_csv/sales1.csv') condition = "(itemid >= 20) and (pricerange == 'moderate')" R1 = Table.select(R, condition) for row in R1.table: self.assertGreaterEqual(row['itemid'], 20) self.assertEquals(row['pricerange'], 'moderate')
def test_sum_with_sales_1_and_column_saleid(self): '''Test sum with: sum(saleid)''' R = Table.inputfromfile('test_csv/sales1.csv') R1 = Table.sum(R, 'saleid') self.assertIsNotNone(R1.table) sum = 0 for row in R.table: sum = sum + row['saleid'] for row in R1.table: self.assertIsNotNone(row['sum(saleid)']) self.assertEqual(row['sum(saleid)'], sum)
def test_hash(self): """Test building hash index on column: saleid""" self.R = Table.inputfromfile('test_csv/sales1.csv') Table.Hash(self.R, 'saleid') self.assertIsNotNone(self.R.index) self.assertIsNotNone(self.R.index['saleid']) test_index = self.R.index['saleid'] self.assertEqual([0], test_index[36]) self.assertEqual([1], test_index[784]) self.assertEqual([2], test_index[801]) self.assertEqual([3], test_index[905])
def test_avggroup(self): """ Test avggroup """ R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/avg_qty_groupby_princerange.csv') R1 = Table.avggroup(R, ['qty', 'pricerange']) expect = Table.sort(expect, ['pricerange']) R1 = Table.sort(R1, ['pricerange']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertAlmostEqual(R1.table[i]['avg(qty)'], expect.table[i]['avg(qty)'], places=4) R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/avg_qty_groupby_customerid_princerange.csv') R1 = Table.avggroup(R, ['qty', 'pricerange', 'customerid']) expect = Table.sort(expect, ['pricerange', 'customerid']) R1 = Table.sort(R1, ['pricerange', 'customerid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertAlmostEqual(R1.table[i]['avg(qty)'], expect.table[i]['avg(qty)'], places=4)
def test_countgroup(self): """ Test countgroup """ R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/count_saleid_groupby_pricerange.csv') R1 = Table.countgroup(R, ['pricerange']) expect = Table.sort(expect, ['pricerange']) R1 = Table.sort(R1, ['pricerange']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i], expect.table[i]) R = Table.inputfromfile('test_csv/real_sales1.csv') expect = Table.inputfromfile('test_csv/count_saleid_groupby_pricerange_customerid.csv') R1 = Table.countgroup(R, ['pricerange', 'customerid']) expect = Table.sort(expect, ['pricerange', 'customerid']) R1 = Table.sort(R1, ['pricerange', 'customerid']) self.assertEqual(len(R1.table), len(expect.table)) for i in range(len(R1.table)): self.assertEqual(R1.table[i], expect.table[i])
def test_avg_with_sales_1_and_column_saleid(self): '''Test avg with: avg(saleid)''' R = Table.inputfromfile('test_csv/sales1.csv') R1 = Table.avg(R, 'saleid') self.assertIsNotNone(R1.table) sum = 0 for row in R.table: sum = sum + row['saleid'] avg = sum / len(R.table) for row in R1.table: self.assertIsNotNone(row['avg(saleid)']) self.assertEqual(row['avg(saleid)'], avg)
def test_Btree_with_pricerange(self): """Test building Btree index on column: pricerange""" R = Table.inputfromfile('test_csv/sales1.csv') Table.Btree(R, 'pricerange') self.assertIsNotNone(R.index) self.assertIsNotNone(R.index['pricerange']) test_index = R.index['pricerange'] self.assertEqual([0, 1], test_index.get('moderate')) self.assertEqual([2, 3, 5, 6, 7, 10, 11], test_index.get('outrageous')) self.assertEqual([4], test_index.get('supercheap')) self.assertEqual([9], test_index.get('cheap')) self.assertEqual([8], test_index.get('expensive'))
def test_project_with_sales_1_and_column_saleid_itemid(self): '''Test projection with two columns''' R = Table.inputfromfile('test_csv/sales1.csv') R1 = Table.projection(R, ['saleid', 'itemid']) self.assertIsNotNone(R1.table) for row in R1.table: self.assertIsNotNone(row['saleid']) self.assertIsNotNone(row['itemid']) self.assertRaises(KeyError, lambda: row['customerid']) self.assertRaises(KeyError, lambda: row['storeid']) self.assertRaises(KeyError, lambda: row['time']) self.assertRaises(KeyError, lambda: row['qty']) self.assertRaises(KeyError, lambda: row['pricerange'])
def test_sort_with_mulitple_columns(self): """Test table with: sort(R, salesid, storeid)""" R = Table.inputfromfile('test_csv/sales1.csv') R = Table.sort(R, ['saleid', 'storeid']) self.assertIsNotNone(R.table) saleid = R.table[0]['saleid'] storeid = R.table[0]['storeid'] for row in R.table: self.assertGreaterEqual(row['saleid'], saleid) if (row['saleid'] == saleid): self.assertGreaterEqual(row['storeid'], storeid) saleid = row['saleid'] storeid = row['storeid']
def test_select_with_equal_id(self): '''Test select with condtion: saleid = 36''' R = Table.inputfromfile('sales1') condition = 'saleid = 36' R1 = Table.select(R,condition) self.assertIsNotNone(R1.table[0]) self.assertEqual(R1.table[0]['saleid'],36) self.assertEqual(R1.table[0]['itemid'],14) self.assertEqual(R1.table[0]['customerid'],2) self.assertEqual(R1.table[0]['storeid'],38) self.assertEqual(R1.table[0]['time'],49) self.assertEqual(R1.table[0]['qty'],15) self.assertEqual(R1.table[0]['pricerange'],'moderate') self.assertEqual(len(R1.table),1)
def test_concate(self): """Test table with: concat(R,R)""" R = Table.inputfromfile('test_csv/sales1.csv') R1 = Table.concat(R, R) self.assertIsNotNone(R1.table) index_R1 = 0 index_R = 0 while index_R1 < len(R1.table): if index_R < len(R.table): self.assertEqual(R1.table[index_R1], R.table[index_R]) index_R = index_R + 1 index_R1 = index_R1 + 1 else: index_R = 0
def test_select_with_equal_id(self): '''Test select with condtion: saleid = 36''' self.R = Table.inputfromfile('test_csv/sales1.csv') condition = 'saleid = 36' self.R1 = Table.select(self.R, condition) self.assertIsNotNone(self.R1.table[0]) expected = { 'saleid': 36, 'itemid': 14, 'customerid': 2, 'storeid': 38, 'time': 49, 'qty': 15, 'pricerange': 'moderate' } self.assertEqual(self.R1.table[0], expected) self.assertEqual(len(self.R1.table), 1)
def test_select_with_index_btree(self): """Test select with btree index on column pricerage""" R = Table.inputfromfile('test_csv/real_sales1.csv') condition = "pricerange = 'cheap'" start = time.time() for i in range(10): R1 = Table.select(R, condition) end = time.time() without_index = end - start Table.Btree(R, 'pricerange') start = time.time() for i in range(10): R1 = Table.select(R, condition) end = time.time() with_index = end - start for row in R1.table: self.assertEqual(row['pricerange'], 'cheap') self.assertLess(with_index, without_index / 5)
def test_inputfile_sales1(self): """Test inputfile: import sales1.csv""" self.R = Table.inputfromfile('test_csv/sales1.csv') self.assertIsNotNone(self.R.table) self.assertIsNotNone(self.R.table[0]['saleid']) self.assertIsNotNone(self.R.table[0]['itemid']) self.assertIsNotNone(self.R.table[0]['customerid']) self.assertIsNotNone(self.R.table[0]['storeid']) self.assertIsNotNone(self.R.table[0]['time']) self.assertIsNotNone(self.R.table[0]['qty']) self.assertIsNotNone(self.R.table[0]['pricerange']) self.assertEqual(self.R.table[0]['saleid'], 36) self.assertEqual(self.R.table[0]['itemid'], 14) self.assertEqual(self.R.table[0]['customerid'], 2) self.assertEqual(self.R.table[0]['storeid'], 38) self.assertEqual(self.R.table[0]['time'], 49) self.assertEqual(self.R.table[0]['qty'], 15) self.assertEqual(self.R.table[0]['pricerange'], 'moderate') self.assertEqual(len(self.R.table), 12)
def test_inputfile_sales2(self): """Test inputfile: import sales2.csv""" self.S = Table.inputfromfile('test_csv/sales2.csv') self.assertIsNotNone(self.S.table) self.assertIsNotNone(self.S.table[0]['saleid']) self.assertIsNotNone(self.S.table[0]['I']) self.assertIsNotNone(self.S.table[0]['C']) self.assertIsNotNone(self.S.table[0]['S']) self.assertIsNotNone(self.S.table[0]['T']) self.assertIsNotNone(self.S.table[0]['Q']) self.assertIsNotNone(self.S.table[0]['P']) self.assertEqual(self.S.table[0]['saleid'], 3506) self.assertEqual(self.S.table[0]['I'], 13517) self.assertEqual(self.S.table[0]['C'], 16566) self.assertEqual(self.S.table[0]['S'], 45) self.assertEqual(self.S.table[0]['T'], 73) self.assertEqual(self.S.table[0]['Q'], 19) self.assertEqual(self.S.table[0]['P'], 'expensive') self.assertEqual(len(self.S.table), 10)
def test_count(self): """ Test count """ R = Table.inputfromfile('test_csv/sales1.csv') self.assertEqual(12, Table.count(R))