def test_fill_missing(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) # check that we fill the end correctly op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='hour', width=1, skip_missing=False) now *= 1000 rv = op( operators.DataChunk((now, now + ((self.hours + 5) * 3600 * 1000)), True, True, [self.testdata])) self.assertEqual(len(rv[0]), self.hours + 5) self.assertEqual(np.sum(np.isnan(rv[0][-5:, 1])), 5) self.assertEqual(np.sum(np.isnan(rv[0][:-5, 1])), 0) # and the beginning op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='hour', width=1, skip_missing=False) rv = op( operators.DataChunk( (now - (5 * 3600 * 1000), now + ((self.hours) * 3600 * 1000)), True, True, [self.testdata])) self.assertEqual(len(rv[0]), self.hours + 5) self.assertEqual(np.sum(np.isnan(rv[0][:5, 1])), 5) self.assertEqual(np.sum(np.isnan(rv[0][5:, 1])), 0)
def test_day(self): op = grouping.GroupByDatetimeField(self.inputs, arithmetic.mean, field='day') outdata = op([self.testdata]) self.assertEquals(len(outdata), 1) self.assertEquals(len(outdata[0]), (self.hours / 24) + 1)
def test_inclusive(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) # startshape = self.testdata.shape # startdata = np.copy(self.testdata) # op = grouping.GroupByDatetimeField(self.inputs, oputils.NullOperator, field='day') # rv = op([self.testdata[:30, :]]) # self.assertEquals(rv[0].shape, (24, 2)) # # check for mutations # self.assertEquals(self.testdata.shape, startshape) # self.assertEquals(np.sum(startdata - self.testdata), 0) op2 = grouping.GroupByDatetimeField(self.inputs, oputils.NullOperator, field='day', inclusive=(True, True), snap_times=False) rv = op2([self.testdata[0:30, :]]) self.assertEquals(rv[0].shape, (25, 2)) self.assertEquals(rv[0][0, 0], self.testdata[0, 0]) self.assertEquals(rv[0][24, 0], self.testdata[24, 0])
def test_snap_times(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='day', snap_times=True) rv = op([self.testdata[10:30]]) self.assertEquals(rv[0][0, 0], self.testdata[0, 0])
def test_offset(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) op = grouping.GroupByDatetimeField(self.inputs, oputils.NullOperator, field='day') for i in xrange(0, 24): rv = op([self.testdata[i:25 + i, :]]) self.assertEquals(rv[0].shape, (24 - i, 2)) op.reset()
def test_hour(self): op = grouping.GroupByDatetimeField(self.inputs, arithmetic.mean, field='hour') outdata = op([self.testdata]) # don't get an output for the last hour self.assertEquals(len(outdata[0]), self.hours - 1) for i in xrange(0, len(outdata[0])): # should have only been one thing in each bucket self.assertEquals(outdata[0][i, 1], i) # make sure we snapped to the beginning of the window dt = datetime.datetime.utcfromtimestamp(outdata[0][i, 0] / 1000) self.assertEquals(dt.minute, 0) self.assertEquals(dt.second, 0)
def test_slide(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='hour', width=4, slide=2) rv = op([self.testdata]) self.assertEquals(np.sum(rv[0][:, 0] - self.testdata[:-2:2, 0]), 0) self.assertEquals(np.sum(rv[0][:, 1] - self.testdata[:-2:2, 1]), 0)
def test_flush(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='hour', width=1) rv = op( operators.DataChunk( (now * 1000, now * 1000 + (self.hours * 3600 * 1000)), True, True, [self.testdata])) # if we don't properly flush the last hour, we should only get hours - 1 results self.assertEquals((rv[0][-1, 0] - (now * 1000)) / (3600 * 1000), self.hours - 1)
def test_increment(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) for incr in [2, 4, 6, 8, 12, 24]: op = grouping.GroupByDatetimeField(self.inputs, arithmetic.first, field='hour', width=incr) rv = op([self.testdata[:25, :]]) # check the shape self.assertEquals(len(rv[0]), 24 / incr) for i in xrange(0, 24 / incr): # the timestamps self.assertEquals(rv[0][i, 0], self.testdata[i * incr, 0]) # and the values self.assertEquals(rv[0][i, 1], i * incr) del op
def test_oneatatime(self): now = dtutil.strptime_tz("1 1 2000 0", "%m %d %Y %H", tzstr="America/Los_Angeles") now = dtutil.dt2ts(now) self.setUp(now) op = grouping.GroupByDatetimeField(self.inputs, oputils.NullOperator, field='day') for i in xrange(0, 24): rv = op([self.testdata[i:i + 1, :]]) self.assertEquals(rv[0].shape, operators.null.shape) rv = op([self.testdata[24:25, :]]) self.assertEquals(rv[0].shape, (24, 2)) # make sure we snapped self.assertEquals(np.sum(rv[0][:, 0] - self.testdata[0, 0]), 0) # and got back the right data self.assertEquals(np.sum(rv[0][:, 1] - self.testdata[:24, 1]), 0)