def test_create_cache_train_once(self): self.ctx = context.DataContext(store.MemoryStore('test', verbose=True), self.data, train_once=True) f = base.Normalize(base.F(10) + base.F('a')) ctx = self.ctx r = f.create(ctx) r = r[r.columns[0]] self.assertAlmostEqual(r.mean(), 0) self.assertAlmostEqual(r.std(), 1) # now add some new data idx = len(self.data) + 1000 ctx.data = ctx.data.append(DataFrame([100, 200], columns=['a'], index=Index([idx, idx+1]))) r = f.create(ctx) r = r[r.columns[0]] self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std()) # drop all the other data ... should still use old prep data ctx.data = ctx.data.ix[[idx, idx+1]] r = f.create(ctx) r = r[r.columns[0]] self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std()) # new train_index should NOT bust old cache ctx.data = ctx.data.ix[[idx, idx+1]] ctx.train_index = ctx.data.index ctx.prep_index = ctx.data.index r = f.create(ctx) r = r[r.columns[0]] self.assertAlmostEqual(r[idx], (100 - self.data['a'].mean()) / self.data['a'].std())
def setUp(self): self.data = make_data(10) self.ctx = context.DataContext(store.MemoryStore('test', verbose=True), self.data)
def setUp(self): self.data = make_data(10) self.ctx = context.DataContext(data=self.data)
def setUp(self): self.data = make_data(10) self.data['groups'] = self.data['ints'].apply(lambda x: x > 5) self.ctx = context.DataContext(store.MemoryStore(verbose=True), self.data)