Beispiel #1
0
    def test_fill_missing(self):

        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        # check that we fill the end correctly
        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1,
                                           skip_missing=False)
        now *= 1000
        rv = op(
            operators.DataChunk((now, now + ((self.hours + 5) * 3600 * 1000)),
                                True, True, [self.testdata]))
        self.assertEqual(len(rv[0]), self.hours + 5)
        self.assertEqual(np.sum(np.isnan(rv[0][-5:, 1])), 5)
        self.assertEqual(np.sum(np.isnan(rv[0][:-5, 1])), 0)

        # and the beginning
        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1,
                                           skip_missing=False)
        rv = op(
            operators.DataChunk(
                (now - (5 * 3600 * 1000), now + ((self.hours) * 3600 * 1000)),
                True, True, [self.testdata]))
        self.assertEqual(len(rv[0]), self.hours + 5)
        self.assertEqual(np.sum(np.isnan(rv[0][:5, 1])), 5)
        self.assertEqual(np.sum(np.isnan(rv[0][5:, 1])), 0)
Beispiel #2
0
    def apply_operator(self, opdata, first, last):
        tic = time.time()

        # process
        for d in opdata:
            d[:, 0] *= 1000

        opdata = operators.DataChunk((self.data_spec['start'],
                                      self.data_spec['end']), 
                                     first, last, opdata)
        redata = self.op.process(opdata)

        log.msg("STATS: Operator processing took %0.6fs" % (time.time() - tic))
        # log.msg("writing " + str(map(len, redata)))
        # construct a return value with metadata and data merged
        redata = map(self.build_result, zip(redata, self.op.outputs))

        # print "processing and writing took", time.time() - tic

        if not self._stop:
            self.consumer.write(json.dumps(redata))
            self.consumer.write('\r\n')
            if last:
                self.consumer.unregisterProducer()
                self.consumer.finish()
Beispiel #3
0
    def apply_operator(self, opdata, region, first, last):
        tic = time.time()
        self.chunk_loaded_idx += 1
        # process
        for d in opdata:
            d[:, 0] *= 1000

        opdata = operators.DataChunk(region, first, last, opdata)
        redata = self.op.process(opdata)

        log.msg("STATS: Operator processing took %0.6fs" % (time.time() - tic))
        # log.msg("writing " + str(map(len, redata)))
        # construct a return value with metadata and data merged
        return map(self.build_result, zip(redata, self.op.outputs))
Beispiel #4
0
    def test_flush(self):
        now = dtutil.strptime_tz("1 1 2000 0",
                                 "%m %d %Y %H",
                                 tzstr="America/Los_Angeles")
        now = dtutil.dt2ts(now)
        self.setUp(now)

        op = grouping.GroupByDatetimeField(self.inputs,
                                           arithmetic.first,
                                           field='hour',
                                           width=1)
        rv = op(
            operators.DataChunk(
                (now * 1000, now * 1000 + (self.hours * 3600 * 1000)), True,
                True, [self.testdata]))

        # if we don't properly flush the last hour, we should only get hours - 1 results
        self.assertEquals((rv[0][-1, 0] - (now * 1000)) / (3600 * 1000),
                          self.hours - 1)
Beispiel #5
0
 def process(self, data):
     subdata = util.flatten((c.process(data) for c in self.children))
     subdata = operators.DataChunk(data.region, data.first, data.last,
                                   subdata)
     return self.op(subdata)