Beispiel #1
0
    def test_bad_args(self):
        """Trigger exceptions and warnings, etc."""

        uin = Stream()

        with warnings.catch_warnings(record=True) as wrn:
            Pipeline().from_source(uin).window_by('1h', utc=False)
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, PipelineWarning))

        # bad arg
        with self.assertRaises(PipelineException):
            Pipeline().from_source(dict())

        # no source
        with self.assertRaises(PipelineException):
            Pipeline().to_keyed_collections()

        # can't iterate on unbounded source
        with self.assertRaises(PipelineIOException):
            list(uin.events())

        # bad emit on type
        with self.assertRaises(PipelineIOException):
            (Pipeline().from_source(
                TimeSeries(
                    dict(name='events',
                         events=DEEP_EVENT_LIST))).emit_on('BOGUS').aggregate({
                             'max_in': {
                                 'direction.in': Functions.max()
                             }
                         }).to_event_list())
Beispiel #2
0
    def test_aggregate_deep_path(self):
        """Make sure that the aggregator will work on a deep path."""

        elist = (Pipeline().from_source(
            TimeSeries(
                dict(name='events',
                     events=DEEP_EVENT_LIST))).emit_on('flush').aggregate(
                         dict(out_max={'direction.out': Functions.max()
                                       })).to_event_list())

        self.assertEqual(elist[0].get('out_max'), 4)

        # Make sure it works with the the non-string version to aggregate
        # multiple columns

        elist = (Pipeline().from_source(
            TimeSeries(
                dict(name='events',
                     events=DEEP_EVENT_LIST))).emit_on('flush').aggregate({
                         'in_max': {
                             'direction.in': Functions.max()
                         },
                         'out_max': {
                             'direction.out': Functions.max()
                         },
                     }).to_event_list())

        self.assertEqual(elist[0].get('out_max'), 4)
        self.assertEqual(elist[0].get('in_max'), 8)
Beispiel #3
0
    def test_bad_conversion_error(self):
        """Test converting a non-Event."""

        # pylint: disable=missing-docstring

        stream1 = Stream()

        def cback1(event):  # pylint: disable=unused-argument
            pass

        (Pipeline().from_source(stream1).as_indexed_events(
            dict(duration='1h')).to(EventOut, cback1))

        with self.assertRaises(ProcessorException):
            stream1.add_event(Pipeline())
Beispiel #4
0
    def test_streaming_start_stop(self):
        """turn the stream off and on."""
        def cback(collection, window_key, group_by):  # pylint: disable=unused-argument
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            RESULTS = collection

        source = Stream()

        (Pipeline().from_source(source).offset_by(3, 'in').to(
            CollectionOut, cback))

        source.add_event(EVENTLIST1[0])
        source.add_event(EVENTLIST1[1])

        source.stop()

        source.add_event(EVENTLIST1[2])

        # Spurious lint error due to upstream tinkering
        # with the global variable
        # pylint: disable=no-member

        # source stopped, event shouldn't be added
        self.assertEqual(RESULTS.size(), 2)

        source.start()

        source.add_event(EVENTLIST1[2])

        self.assertEqual(RESULTS.size(), 3)
Beispiel #5
0
    def test_streaming_multiple_chains(self):
        """streaming events with two pipelines."""
        def cback(collection, window_key, group_by):  # pylint: disable=unused-argument
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            RESULTS = collection

        def cback2(collection, window_key, group_by):  # pylint: disable=unused-argument
            """callback to pass in."""
            global RESULTS2  # pylint: disable=global-statement
            RESULTS2 = collection

        source = Stream()

        pip1 = (Pipeline().from_source(source).offset_by(1, 'in').offset_by(
            2, 'in').to(CollectionOut, cback))

        pip1.offset_by(3, 'in').to(CollectionOut, cback2)

        source.add_event(EVENTLIST1[0])

        # Spurious lint error due to upstream tinkering
        # with the global variable
        # pylint: disable=no-member

        self.assertEqual(RESULTS.size(), 1)
        self.assertEqual(RESULTS2.size(), 1)

        self.assertEqual(RESULTS.at(0).get('in'), 4)
        self.assertEqual(RESULTS2.at(0).get('in'), 7)
Beispiel #6
0
    def test_map(self):
        """test .map()"""
        def mapper(event):
            """swap in and out."""
            return event.set_data({
                'in': event.get('out'),
                'out': event.get('in')
            })

        timeseries = TimeSeries(IN_OUT_DATA)

        # this pointless bit is for coverage
        pip = Pipeline()

        kcol = (Pipeline(pip).from_source(timeseries.collection()).map(
            mapper).emit_on('flush').to_keyed_collections())

        self.assertEqual(kcol.get('all').at(0).get('in'), 37)
        self.assertEqual(kcol.get('all').at(0).get('out'), 80)
Beispiel #7
0
    def test_simple_take(self):
        """take 10 events in batch."""

        timeseries = TimeSeries(SEPT_2014_DATA)

        kcol = (
            Pipeline().from_source(timeseries).take(10).to_keyed_collections())

        new_ts = TimeSeries(dict(name='result', collection=kcol.get('all')))
        self.assertEqual(new_ts.size(), 10)
Beispiel #8
0
    def test_single_select(self):
        """select a single column."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).select(
            'in').to_keyed_collections())

        new_ts = TimeSeries(
            dict(name='new_timeseries', collection=kcol.get('all')))

        self.assertEqual(new_ts.columns(), ['in'])
Beispiel #9
0
    def test_subset_select(self):
        """select multiple columns."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).select(
            ['out', 'perpendicular']).to_keyed_collections())

        new_ts = TimeSeries(
            dict(name='new_timeseries', collection=kcol.get('all')))

        self.assertEqual(set(new_ts.columns()), set(['out', 'perpendicular']))
Beispiel #10
0
    def test_simple_collapse(self):
        """collapse a subset of columns."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).collapse(
            ['in', 'out'], 'in_out_sum',
            Functions.sum()).emit_on('flush').to_keyed_collections())

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
Beispiel #11
0
    def test_event_to_event_noop(self):
        """Event to Event as a noop."""

        stream1 = Stream()

        def cback1(event):  # pylint: disable=missing-docstring
            self.assertEqual(event, self._event)

        (Pipeline().from_source(stream1).as_events().to(EventOut, cback1))

        stream1.add_event(self._event)
Beispiel #12
0
    def test_simple_offset_chain(self):
        """test a simple offset chain."""
        timeseries = TimeSeries(DATA)

        kcol = (Pipeline().from_source(timeseries.collection()).offset_by(
            1, 'value').offset_by(2).to_keyed_collections())

        self.assertEqual(kcol['all'].at(0).get(), 55)
        self.assertEqual(kcol['all'].at(1).get(), 21)
        self.assertEqual(kcol['all'].at(2).get(), 29)
        self.assertEqual(kcol['all'].at(3).get(), 96)
Beispiel #13
0
    def test_simple_filter(self):
        """filter events in a batch."""
        def filter_cb(event):
            """filter callback"""
            return event.value() > 65

        timeseries = TimeSeries(SEPT_2014_DATA)

        kcol = (Pipeline().from_source(timeseries).filter(
            filter_cb).to_keyed_collections())

        self.assertEqual(kcol.get('all').size(), 39)
Beispiel #14
0
    def test_tre_to_tre_noop(self):
        """TimeRangeEvent -> TimeRangeEvent noop."""

        stream1 = Stream()

        def cback1(event):  # pylint: disable=missing-docstring
            self.assertEqual(event, self._tre)

        (Pipeline().from_source(stream1).as_time_range_events().to(
            EventOut, cback1))

        stream1.add_event(self._tre)
Beispiel #15
0
    def test_idxe_to_idxe_noop(self):
        """IndexedEvent -> IndexedEvent noop."""

        stream1 = Stream()

        def cback1(event):  # pylint: disable=missing-docstring
            self.assertEqual(event, self._idxe)

        (Pipeline().from_source(stream1).as_indexed_events().to(
            EventOut, cback1))

        stream1.add_event(self._idxe)
Beispiel #16
0
    def test_sum_and_find_max(self):
        """sum elements, find max get result out."""
        def cback(event):
            """catch the return"""
            self.assertEqual(event.get('max_total'), 117)

        timeseries = TimeSeries(IN_OUT_DATA)

        (Pipeline().from_source(timeseries).emit_on('flush').collapse(
            ['in', 'out'], 'total', Functions.sum()).aggregate(
                dict(max_total=dict(total=Functions.max()))).to(
                    EventOut, cback))

        # Same test but as an event list

        elist = (Pipeline().from_source(timeseries).emit_on('flush').collapse(
            ['in', 'out'], 'total', Functions.sum()).aggregate(
                dict(max_total=dict(total=Functions.max()))).to_event_list())

        self.assertEqual(len(elist), 1)
        self.assertEqual(elist[0].get('max_total'), 117)
Beispiel #17
0
    def test_copy_ctor(self):
        """work the copy constructor for coverage."""

        con = Converter(Pipeline(), Options(type=Event))

        con2 = Converter(con)

        self.assertEqual(con._convert_to, con2._convert_to)  # pylint: disable=protected-access

        con3 = con2.clone()

        self.assertEqual(con3._convert_to, con2._convert_to)  # pylint: disable=protected-access
Beispiel #18
0
    def test_event_to_tre_conversion(self):
        """test converting Event objects to TimeRangeEvent."""

        # pylint: disable=missing-docstring

        stream1 = Stream()

        def cback1(event):
            self.assertEqual(ms_from_dt(event.begin()), 1426316400000)
            self.assertEqual(ms_from_dt(event.end()), 1426320000000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream1).as_time_range_events(
            dict(alignment='front', duration='1h')).to(EventOut, cback1))

        stream1.add_event(self._event)

        stream2 = Stream()

        def cback2(event):
            self.assertEqual(ms_from_dt(event.begin()), 1426314600000)
            self.assertEqual(ms_from_dt(event.end()), 1426318200000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream2).as_time_range_events(
            dict(alignment='center', duration='1h')).to(EventOut, cback2))

        stream2.add_event(self._event)

        stream3 = Stream()

        def cback3(event):
            self.assertEqual(ms_from_dt(event.begin()), 1426312800000)
            self.assertEqual(ms_from_dt(event.end()), 1426316400000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream3).as_time_range_events(
            dict(alignment='behind', duration='1h')).to(EventOut, cback3))

        stream3.add_event(self._event)
Beispiel #19
0
    def test_take_and_group_by(self):
        """take events with different group by keys."""
        def gb_callback(event):
            """group into two groups."""
            return 'high' if event.value() > 65 else 'low'

        timeseries = TimeSeries(SEPT_2014_DATA)

        kcol = (Pipeline().from_source(timeseries).emit_on('flush').group_by(
            gb_callback).take(10).to_keyed_collections())

        self.assertEqual(kcol.get('low').size(), 10)

        self.assertEqual(kcol.get('low').at(0).value(), 52)
        self.assertEqual(kcol.get('low').at(1).value(), 26)

        self.assertEqual(kcol.get('high').size(), 10)

        self.assertEqual(kcol.get('high').at(0).value(), 80)
        self.assertEqual(kcol.get('high').at(1).value(), 88)
        self.assertEqual(kcol.get('high').at(8).value(), 88)
        self.assertEqual(kcol.get('high').at(9).value(), 94)

        # test clearing it - recombines them into a single key

        kcol = (Pipeline().from_source(timeseries).emit_on('flush').group_by(
            gb_callback).take(10).clear_group_by().to_keyed_collections())

        self.assertEqual(kcol.get('all').size(), 20)

        # group by as above but window and take the first two in each window
        kcol = (Pipeline().from_source(timeseries).emit_on('flush').window_by(
            '1d').group_by(gb_callback).take(2).to_keyed_collections())

        for k, v in list(kcol.items()):
            self.assertTrue(k.startswith('1d'))
            self.assertTrue((k.endswith('high') or k.endswith('low')))
            self.assertEqual(v.size(), 2)
Beispiel #20
0
    def test_event_conversion_bad_args(self):
        """test bad args for Event conversion."""

        stream1 = Stream()

        def cback(event):  # pylint: disable=missing-docstring, unused-argument
            pass

        # no duration
        (Pipeline().from_source(stream1).as_time_range_events(
            dict(alignment='front')).to(EventOut, cback))

        with self.assertRaises(ProcessorException):
            stream1.add_event(self._event)

        stream2 = Stream()

        # bad alignment
        (Pipeline().from_source(stream2).as_time_range_events(
            dict(alignment='bogus', duration='1h')).to(EventOut, cback))

        with self.assertRaises(ProcessorException):
            stream2.add_event(self._event)
Beispiel #21
0
    def test_idxe_to_tre(self):
        """IndexedEvent -> TimeRangeEvent conversion."""

        stream1 = Stream()

        def cback1(event):  # pylint: disable=missing-docstring
            self.assertEqual(ms_from_dt(event.begin()), 1426316400000)
            self.assertEqual(ms_from_dt(event.end()), 1426320000000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream1).as_time_range_events().to(
            EventOut, cback1))

        stream1.add_event(self._idxe)
Beispiel #22
0
    def test_idxe_to_event(self):
        """IndexedEvent -> Event conversion."""

        stream1 = Stream()

        # pylint: disable=missing-docstring

        def cback1(event):
            self.assertEqual(ms_from_dt(event.timestamp()), 1426318200000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream1).as_events(
            dict(alignment='center')).to(EventOut, cback1))

        stream1.add_event(self._idxe)

        stream2 = Stream()

        def cback2(event):
            self.assertEqual(ms_from_dt(event.timestamp()), 1426316400000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream2).as_events(dict(alignment='lag')).to(
            EventOut, cback2))

        stream2.add_event(self._idxe)

        stream3 = Stream()

        def cback3(event):
            self.assertEqual(ms_from_dt(event.timestamp()), 1426320000000)
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream3).as_events(dict(alignment='lead')).to(
            EventOut, cback3))

        stream3.add_event(self._idxe)
Beispiel #23
0
    def test_tre_to_idxe_error(self):
        """Test converting TimeRangeEvent object to IndexedEvent error."""

        # pylint: disable=missing-docstring

        stream1 = Stream()

        def cback1(event):  # pylint: disable=unused-argument
            pass

        (Pipeline().from_source(stream1).as_indexed_events(
            dict(duration='1h')).to(EventOut, cback1))

        with self.assertRaises(ProcessorException):
            stream1.add_event(self._tre)
Beispiel #24
0
    def test_event_to_idxe_conversion(self):
        """Test converting Event object to IndexedEvent."""

        # pylint: disable=missing-docstring

        stream1 = Stream()

        def cback1(event):
            self.assertEqual(event.index_as_string(), '1h-396199')
            self.assertEqual(event.get(), 3)

        (Pipeline().from_source(stream1).as_indexed_events(
            dict(duration='1h')).to(EventOut, cback1))

        stream1.add_event(self._event)
Beispiel #25
0
    def test_multiple_collapse_chains(self):
        """multiple collapsers."""
        timeseries = TimeSeries(IN_OUT_DATA)

        kcol = (Pipeline().from_source(timeseries).collapse(
            ['in', 'out'], 'in_out_sum', Functions.sum()).collapse(
                ['in', 'out'], 'in_out_max',
                Functions.max()).emit_on('flush').to_keyed_collections())

        self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117)
        self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110)
        self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)

        self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80)
        self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88)
        self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
Beispiel #26
0
    def test_filter_and_take_chain(self):
        """filter events, then apply take"""
        def filter_cb(event):
            """filter callback"""
            return event.value() > 65

        timeseries = TimeSeries(SEPT_2014_DATA)

        kcol = (Pipeline().from_source(timeseries).filter(filter_cb).take(
            10).to_keyed_collections())

        self.assertEqual(kcol.get('all').size(), 10)
        self.assertEqual(kcol.get('all').at(0).value(), 80)
        self.assertEqual(kcol.get('all').at(1).value(), 88)
        self.assertEqual(kcol.get('all').at(8).value(), 88)
        self.assertEqual(kcol.get('all').at(9).value(), 94)
Beispiel #27
0
    def test_callback_offset_chain(self):
        """pass a callback in rather than retrieving a keyed collection."""
        def cback(collection, window_key, group_by):  # pylint: disable=unused-argument
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            RESULTS = collection

        timeseries = TimeSeries(DATA)

        (Pipeline().from_source(timeseries.collection()).offset_by(
            1, 'value').offset_by(2).to(CollectionOut, cback))

        # Spurious lint error due to upstream tinkering
        # with the global variable
        # pylint: disable=no-member

        self.assertEqual(RESULTS.at(0).get(), 55)
        self.assertEqual(RESULTS.at(1).get(), 21)
        self.assertEqual(RESULTS.at(2).get(), 29)
        self.assertEqual(RESULTS.at(3).get(), 96)
Beispiel #28
0
    def test_first_point(self):
        """Make sure the first point is handled right when it is perfectly aligned."""

        data = dict(
            name="traffic",
            columns=["time", "value"],
            points=[
                [1473490770000, 10],
                [1473490800000, 20],
                [1473490830000, 30],
                [1473490860000, 40]
            ]
        )

        base_30_sec = (
            Pipeline()
            .from_source(TimeSeries(data))
            .align(window='30s', method='linear', limit=10)
            .to_keyed_collections()
        )

        self.assertEqual(base_30_sec.get('all').size(), 4)
Beispiel #29
0
    def test_group_by_and_count(self):
        """group by and also count."""

        timeseries = TimeSeries(SEPT_2014_DATA)

        # pylint: disable=missing-docstring

        def gb_callback(event):
            """group into two groups."""
            return 'high' if event.value() > 65 else 'low'

        def cback(count, window_key, group_by):  # pylint: disable=unused-argument
            """callback to pass in."""
            global RESULTS  # pylint: disable=global-statement
            if RESULTS is None:
                RESULTS = dict()
            RESULTS[group_by] = count

        (Pipeline().from_source(timeseries).take(10).group_by(
            gb_callback).emit_on('flush').count(cback))

        self.assertEqual(RESULTS.get('high'), 4)
        self.assertEqual(RESULTS.get('low'), 6)
Beispiel #30
0
    def test_linear_stream(self):
        """Test streaming on linear fill"""
        def cback(collection, window_key, group_by):
            """the callback"""
            global RESULTS  # pylint: disable=global-statement
            RESULTS = collection

        events = [
            Event(1400425947000, 1),
            Event(1400425948000, 2),
            Event(1400425949000, dict(value=None)),
            Event(1400425950000, dict(value=None)),
            Event(1400425951000, dict(value=None)),
            Event(1400425952000, 5),
            Event(1400425953000, 6),
            Event(1400425954000, 7),
        ]

        stream = Stream()

        (Pipeline().from_source(stream).fill(method='linear',
                                             field_spec='value').to(
                                                 CollectionOut, cback))

        for i in events:
            stream.add_event(i)

        self.assertEqual(RESULTS.size(), len(events))

        self.assertEqual(RESULTS.at(0).get(), 1)
        self.assertEqual(RESULTS.at(1).get(), 2)
        self.assertEqual(RESULTS.at(2).get(), 2.75)  # filled
        self.assertEqual(RESULTS.at(3).get(), 3.5)  # filled
        self.assertEqual(RESULTS.at(4).get(), 4.25)  # filled
        self.assertEqual(RESULTS.at(5).get(), 5)
        self.assertEqual(RESULTS.at(6).get(), 6)
        self.assertEqual(RESULTS.at(7).get(), 7)
Beispiel #31
0
    def test_bad_args(self):
        """Trigger error states for coverage."""

        simple_missing_data = dict(
            name="traffic",
            columns=["time", "direction"],
            points=[
                [1400425947000, {'in': 1, 'out': None, 'drop': None}],
                [1400425948000, {'in': None, 'out': 4, 'drop': None}],
                [1400425949000, {'in': None, 'out': None, 'drop': 13}],
                [1400425950000, {'in': None, 'out': None, 'drop': 14}],
                [1400425960000, {'in': 9, 'out': 8, 'drop': None}],
                [1400425970000, {'in': 11, 'out': 10, 'drop': 16}],
            ]
        )

        ts = TimeSeries(simple_missing_data)

        # bad ctor arg
        with self.assertRaises(ProcessorException):
            f = Filler(dict())

        # invalid method
        with self.assertRaises(TimeSeriesException):
            ts.fill(method='bogus')

        # limit not int
        with self.assertRaises(ProcessorException):
            ts.fill(fill_limit='z')

        # direct access to filler via pipeline needs to take a single path
        with self.assertRaises(ProcessorException):
            pip = Pipeline()
            pip.fill(method='linear', field_spec=['direction.in', 'direction.out'])

        # invalid method
        with self.assertRaises(ProcessorException):
            pip = Pipeline()
            pip.fill(method='bogus')

        # catch bad path at various points
        with warnings.catch_warnings(record=True) as wrn:
            ts.fill(field_spec='bad.path')
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, ProcessorWarning))

        with warnings.catch_warnings(record=True) as wrn:
            ts.fill(field_spec='bad.path', method='linear')
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, ProcessorWarning))

        with warnings.catch_warnings(record=True) as wrn:
            ts.fill(field_spec='direction.bogus')
            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, ProcessorWarning))

        # trigger warnings about non-numeric values in linear.

        with warnings.catch_warnings(record=True) as wrn:
            simple_missing_data = dict(
                name="traffic",
                columns=["time", "direction"],
                points=[
                    [1400425947000, {'in': 1, 'out': None}],
                    [1400425948000, {'in': 'non_numeric', 'out': 4}],
                    [1400425949000, {'in': 5, 'out': None}],
                ]
            )

            ts = TimeSeries(simple_missing_data)

            ts.fill(field_spec='direction.in', method='linear')

            self.assertEqual(len(wrn), 1)
            self.assertTrue(issubclass(wrn[0].category, ProcessorWarning))

        # empty series for coverage caught a bug
        empty = TimeSeries(dict(
            name="Sensor values",
            columns=["time", "temperature"],
            points=[
            ]
        ))

        self.assertEqual(empty.fill(field_spec='temperature').size(), 0)