Esempio n. 1
0
    def test_fill_missing(self):

        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        # check that we fill the end correctly
        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1,
                                           skip_missing=False)
        now *= 1000
        rv = op(
            operators.DataChunk((now, now + ((self.hours + 5) * 3600 * 1000)),
                                True, True, [self.testdata]))
        self.assertEqual(len(rv[0]), self.hours + 5)
        self.assertEqual(np.sum(np.isnan(rv[0][-5:, 1])), 5)
        self.assertEqual(np.sum(np.isnan(rv[0][:-5, 1])), 0)

        # and the beginning
        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1,
                                           skip_missing=False)
        rv = op(
            operators.DataChunk(
                (now - (5 * 3600 * 1000), now + ((self.hours) * 3600 * 1000)),
                True, True, [self.testdata]))
        self.assertEqual(len(rv[0]), self.hours + 5)
        self.assertEqual(np.sum(np.isnan(rv[0][:5, 1])), 5)
        self.assertEqual(np.sum(np.isnan(rv[0][5:, 1])), 0)
Esempio n. 2
0
 def test_day(self):
     op = grouping.GroupByDatetimeField(self.inputs,
                                        arithmetic.mean,
                                        field='day')
     outdata = op([self.testdata])
     self.assertEquals(len(outdata), 1)
     self.assertEquals(len(outdata[0]), (self.hours / 24) + 1)
Esempio n. 3
0
    def test_inclusive(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        #         startshape = self.testdata.shape
        #         startdata = np.copy(self.testdata)

        #         op = grouping.GroupByDatetimeField(self.inputs, oputils.NullOperator, field='day')
        #         rv = op([self.testdata[:30, :]])
        #         self.assertEquals(rv[0].shape, (24, 2))
        #         # check for mutations
        #         self.assertEquals(self.testdata.shape, startshape)
        #         self.assertEquals(np.sum(startdata - self.testdata), 0)

        op2 = grouping.GroupByDatetimeField(self.inputs,
                                            oputils.NullOperator,
                                            field='day',
                                            inclusive=(True, True),
                                            snap_times=False)
        rv = op2([self.testdata[0:30, :]])
        self.assertEquals(rv[0].shape, (25, 2))
        self.assertEquals(rv[0][0, 0], self.testdata[0, 0])
        self.assertEquals(rv[0][24, 0], self.testdata[24, 0])
Esempio n. 4
0
    def test_snap_times(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='day',
                                           snap_times=True)
        rv = op([self.testdata[10:30]])
        self.assertEquals(rv[0][0, 0], self.testdata[0, 0])
Esempio n. 5
0
    def test_offset(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           oputils.NullOperator,
                                           field='day')
        for i in xrange(0, 24):
            rv = op([self.testdata[i:25 + i, :]])
            self.assertEquals(rv[0].shape, (24 - i, 2))
            op.reset()
Esempio n. 6
0
    def test_hour(self):
        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.mean,
                                           field='hour')
        outdata = op([self.testdata])

        # don't get an output for the last hour
        self.assertEquals(len(outdata[0]), self.hours - 1)
        for i in xrange(0, len(outdata[0])):
            # should have only been one thing in each bucket
            self.assertEquals(outdata[0][i, 1], i)
            # make sure we snapped to the beginning of the window
            dt = datetime.datetime.utcfromtimestamp(outdata[0][i, 0] / 1000)
            self.assertEquals(dt.minute, 0)
            self.assertEquals(dt.second, 0)
Esempio n. 7
0
    def test_slide(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=4,
                                           slide=2)
        rv = op([self.testdata])
        self.assertEquals(np.sum(rv[0][:, 0] - self.testdata[:-2:2, 0]), 0)
        self.assertEquals(np.sum(rv[0][:, 1] - self.testdata[:-2:2, 1]), 0)
Esempio n. 8
0
    def test_flush(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1)
        rv = op(
            operators.DataChunk(
                (now * 1000, now * 1000 + (self.hours * 3600 * 1000)), True,
                True, [self.testdata]))

        # if we don't properly flush the last hour, we should only get hours - 1 results
        self.assertEquals((rv[0][-1, 0] - (now * 1000)) / (3600 * 1000),
                          self.hours - 1)
Esempio n. 9
0
    def test_increment(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        for incr in [2, 4, 6, 8, 12, 24]:
            op = grouping.GroupByDatetimeField(self.inputs,
                                               arithmetic.first,
                                               field='hour',
                                               width=incr)
            rv = op([self.testdata[:25, :]])
            # check the shape
            self.assertEquals(len(rv[0]), 24 / incr)
            for i in xrange(0, 24 / incr):
                # the timestamps
                self.assertEquals(rv[0][i, 0], self.testdata[i * incr, 0])
                # and the values
                self.assertEquals(rv[0][i, 1], i * incr)
            del op
Esempio n. 10
0
    def test_oneatatime(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           oputils.NullOperator,
                                           field='day')
        for i in xrange(0, 24):
            rv = op([self.testdata[i:i + 1, :]])
            self.assertEquals(rv[0].shape, operators.null.shape)

        rv = op([self.testdata[24:25, :]])

        self.assertEquals(rv[0].shape, (24, 2))
        # make sure we snapped
        self.assertEquals(np.sum(rv[0][:, 0] - self.testdata[0, 0]), 0)
        # and got back the right data
        self.assertEquals(np.sum(rv[0][:, 1] - self.testdata[:24, 1]), 0)