def test_random_table(self): s=Scheduler() module=RandomTable(['a', 'b'], rows=10000, scheduler=s) self.assertEqual(module.df().columns[0],'a') self.assertEqual(module.df().columns[1],'b') self.assertEqual(len(module.df().columns), 3) # add the UPDATE_COLUMN prlen = Every(proc=print_len, constant_time=True, scheduler=s) prlen.input.df = module.output.df s.start() self.assertEqual(len(module.df()), 10000) self.assertFalse(module.df()['a'].isnull().any()) self.assertFalse(module.df()['b'].isnull().any())
def test_random_table2(self): s=Scheduler() # produces more than 4M rows per second on my laptop module=RandomTable(10, rows=10000000, force_valid_ids=True, scheduler=s) self.assertEqual(len(module.df().columns), 11) # add the UPDATE_COLUMN self.assertEqual(module.df().columns[0],'_1') self.assertEqual(module.df().columns[1],'_2') prlen = Every(proc=print_len, constant_time=True, scheduler=s) prlen.input.df = module.output.df s.start() self.assertEqual(len(module.df()), 10000000) self.assertFalse(module.df()['_1'].isnull().any()) self.assertFalse(module.df()['_2'].isnull().any())
def test_max(self): s=Scheduler() random = RandomTable(10, rows=10000, scheduler=s) max=Max(scheduler=s) max.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = max.output.df s.start() res1 = random.df()[random.columns.difference([random.UPDATE_COLUMN])].max() res2 = last_row(max.df(), remove_update=True) self.assertTrue(np.allclose(res1, res2))
def test_var(self): s=Scheduler() random = RandomTable(1, rows=1000, scheduler=s) var=Var(scheduler=s) var.input.df = random.output.df pr=Print(scheduler=s) pr.input.df = var.output.df s.start() res1 = random.df()[1].var() res2 = last_row(var.df(), remove_update=True) #print 'res1:', res1 #print 'res2:', res2 self.assertTrue(np.allclose(res1, res2))