def test_bad_args(self): """Trigger exceptions and warnings, etc.""" uin = Stream() with warnings.catch_warnings(record=True) as wrn: Pipeline().from_source(uin).window_by('1h', utc=False) self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, PipelineWarning)) # bad arg with self.assertRaises(PipelineException): Pipeline().from_source(dict()) # no source with self.assertRaises(PipelineException): Pipeline().to_keyed_collections() # can't iterate on unbounded source with self.assertRaises(PipelineIOException): list(uin.events()) # bad emit on type with self.assertRaises(PipelineIOException): (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('BOGUS').aggregate({ 'max_in': { 'direction.in': Functions.max() } }).to_event_list())
def test_aggregate_deep_path(self): """Make sure that the aggregator will work on a deep path.""" elist = (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('flush').aggregate( dict(out_max={'direction.out': Functions.max() })).to_event_list()) self.assertEqual(elist[0].get('out_max'), 4) # Make sure it works with the the non-string version to aggregate # multiple columns elist = (Pipeline().from_source( TimeSeries( dict(name='events', events=DEEP_EVENT_LIST))).emit_on('flush').aggregate({ 'in_max': { 'direction.in': Functions.max() }, 'out_max': { 'direction.out': Functions.max() }, }).to_event_list()) self.assertEqual(elist[0].get('out_max'), 4) self.assertEqual(elist[0].get('in_max'), 8)
def test_bad_conversion_error(self): """Test converting a non-Event.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) with self.assertRaises(ProcessorException): stream1.add_event(Pipeline())
def test_streaming_start_stop(self): """turn the stream off and on.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection source = Stream() (Pipeline().from_source(source).offset_by(3, 'in').to( CollectionOut, cback)) source.add_event(EVENTLIST1[0]) source.add_event(EVENTLIST1[1]) source.stop() source.add_event(EVENTLIST1[2]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member # source stopped, event shouldn't be added self.assertEqual(RESULTS.size(), 2) source.start() source.add_event(EVENTLIST1[2]) self.assertEqual(RESULTS.size(), 3)
def test_streaming_multiple_chains(self): """streaming events with two pipelines.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection def cback2(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS2 # pylint: disable=global-statement RESULTS2 = collection source = Stream() pip1 = (Pipeline().from_source(source).offset_by(1, 'in').offset_by( 2, 'in').to(CollectionOut, cback)) pip1.offset_by(3, 'in').to(CollectionOut, cback2) source.add_event(EVENTLIST1[0]) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member self.assertEqual(RESULTS.size(), 1) self.assertEqual(RESULTS2.size(), 1) self.assertEqual(RESULTS.at(0).get('in'), 4) self.assertEqual(RESULTS2.at(0).get('in'), 7)
def test_map(self): """test .map()""" def mapper(event): """swap in and out.""" return event.set_data({ 'in': event.get('out'), 'out': event.get('in') }) timeseries = TimeSeries(IN_OUT_DATA) # this pointless bit is for coverage pip = Pipeline() kcol = (Pipeline(pip).from_source(timeseries.collection()).map( mapper).emit_on('flush').to_keyed_collections()) self.assertEqual(kcol.get('all').at(0).get('in'), 37) self.assertEqual(kcol.get('all').at(0).get('out'), 80)
def test_simple_take(self): """take 10 events in batch.""" timeseries = TimeSeries(SEPT_2014_DATA) kcol = ( Pipeline().from_source(timeseries).take(10).to_keyed_collections()) new_ts = TimeSeries(dict(name='result', collection=kcol.get('all'))) self.assertEqual(new_ts.size(), 10)
def test_single_select(self): """select a single column.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).select( 'in').to_keyed_collections()) new_ts = TimeSeries( dict(name='new_timeseries', collection=kcol.get('all'))) self.assertEqual(new_ts.columns(), ['in'])
def test_subset_select(self): """select multiple columns.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).select( ['out', 'perpendicular']).to_keyed_collections()) new_ts = TimeSeries( dict(name='new_timeseries', collection=kcol.get('all'))) self.assertEqual(set(new_ts.columns()), set(['out', 'perpendicular']))
def test_simple_collapse(self): """collapse a subset of columns.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).collapse( ['in', 'out'], 'in_out_sum', Functions.sum()).emit_on('flush').to_keyed_collections()) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108)
def test_event_to_event_noop(self): """Event to Event as a noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._event) (Pipeline().from_source(stream1).as_events().to(EventOut, cback1)) stream1.add_event(self._event)
def test_simple_offset_chain(self): """test a simple offset chain.""" timeseries = TimeSeries(DATA) kcol = (Pipeline().from_source(timeseries.collection()).offset_by( 1, 'value').offset_by(2).to_keyed_collections()) self.assertEqual(kcol['all'].at(0).get(), 55) self.assertEqual(kcol['all'].at(1).get(), 21) self.assertEqual(kcol['all'].at(2).get(), 29) self.assertEqual(kcol['all'].at(3).get(), 96)
def test_simple_filter(self): """filter events in a batch.""" def filter_cb(event): """filter callback""" return event.value() > 65 timeseries = TimeSeries(SEPT_2014_DATA) kcol = (Pipeline().from_source(timeseries).filter( filter_cb).to_keyed_collections()) self.assertEqual(kcol.get('all').size(), 39)
def test_tre_to_tre_noop(self): """TimeRangeEvent -> TimeRangeEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._tre) (Pipeline().from_source(stream1).as_time_range_events().to( EventOut, cback1)) stream1.add_event(self._tre)
def test_idxe_to_idxe_noop(self): """IndexedEvent -> IndexedEvent noop.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(event, self._idxe) (Pipeline().from_source(stream1).as_indexed_events().to( EventOut, cback1)) stream1.add_event(self._idxe)
def test_sum_and_find_max(self): """sum elements, find max get result out.""" def cback(event): """catch the return""" self.assertEqual(event.get('max_total'), 117) timeseries = TimeSeries(IN_OUT_DATA) (Pipeline().from_source(timeseries).emit_on('flush').collapse( ['in', 'out'], 'total', Functions.sum()).aggregate( dict(max_total=dict(total=Functions.max()))).to( EventOut, cback)) # Same test but as an event list elist = (Pipeline().from_source(timeseries).emit_on('flush').collapse( ['in', 'out'], 'total', Functions.sum()).aggregate( dict(max_total=dict(total=Functions.max()))).to_event_list()) self.assertEqual(len(elist), 1) self.assertEqual(elist[0].get('max_total'), 117)
def test_copy_ctor(self): """work the copy constructor for coverage.""" con = Converter(Pipeline(), Options(type=Event)) con2 = Converter(con) self.assertEqual(con._convert_to, con2._convert_to) # pylint: disable=protected-access con3 = con2.clone() self.assertEqual(con3._convert_to, con2._convert_to) # pylint: disable=protected-access
def test_event_to_tre_conversion(self): """test converting Event objects to TimeRangeEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_time_range_events( dict(alignment='front', duration='1h')).to(EventOut, cback1)) stream1.add_event(self._event) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.begin()), 1426314600000) self.assertEqual(ms_from_dt(event.end()), 1426318200000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream2).as_time_range_events( dict(alignment='center', duration='1h')).to(EventOut, cback2)) stream2.add_event(self._event) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.begin()), 1426312800000) self.assertEqual(ms_from_dt(event.end()), 1426316400000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream3).as_time_range_events( dict(alignment='behind', duration='1h')).to(EventOut, cback3)) stream3.add_event(self._event)
def test_take_and_group_by(self): """take events with different group by keys.""" def gb_callback(event): """group into two groups.""" return 'high' if event.value() > 65 else 'low' timeseries = TimeSeries(SEPT_2014_DATA) kcol = (Pipeline().from_source(timeseries).emit_on('flush').group_by( gb_callback).take(10).to_keyed_collections()) self.assertEqual(kcol.get('low').size(), 10) self.assertEqual(kcol.get('low').at(0).value(), 52) self.assertEqual(kcol.get('low').at(1).value(), 26) self.assertEqual(kcol.get('high').size(), 10) self.assertEqual(kcol.get('high').at(0).value(), 80) self.assertEqual(kcol.get('high').at(1).value(), 88) self.assertEqual(kcol.get('high').at(8).value(), 88) self.assertEqual(kcol.get('high').at(9).value(), 94) # test clearing it - recombines them into a single key kcol = (Pipeline().from_source(timeseries).emit_on('flush').group_by( gb_callback).take(10).clear_group_by().to_keyed_collections()) self.assertEqual(kcol.get('all').size(), 20) # group by as above but window and take the first two in each window kcol = (Pipeline().from_source(timeseries).emit_on('flush').window_by( '1d').group_by(gb_callback).take(2).to_keyed_collections()) for k, v in list(kcol.items()): self.assertTrue(k.startswith('1d')) self.assertTrue((k.endswith('high') or k.endswith('low'))) self.assertEqual(v.size(), 2)
def test_event_conversion_bad_args(self): """test bad args for Event conversion.""" stream1 = Stream() def cback(event): # pylint: disable=missing-docstring, unused-argument pass # no duration (Pipeline().from_source(stream1).as_time_range_events( dict(alignment='front')).to(EventOut, cback)) with self.assertRaises(ProcessorException): stream1.add_event(self._event) stream2 = Stream() # bad alignment (Pipeline().from_source(stream2).as_time_range_events( dict(alignment='bogus', duration='1h')).to(EventOut, cback)) with self.assertRaises(ProcessorException): stream2.add_event(self._event)
def test_idxe_to_tre(self): """IndexedEvent -> TimeRangeEvent conversion.""" stream1 = Stream() def cback1(event): # pylint: disable=missing-docstring self.assertEqual(ms_from_dt(event.begin()), 1426316400000) self.assertEqual(ms_from_dt(event.end()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_time_range_events().to( EventOut, cback1)) stream1.add_event(self._idxe)
def test_idxe_to_event(self): """IndexedEvent -> Event conversion.""" stream1 = Stream() # pylint: disable=missing-docstring def cback1(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426318200000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_events( dict(alignment='center')).to(EventOut, cback1)) stream1.add_event(self._idxe) stream2 = Stream() def cback2(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426316400000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream2).as_events(dict(alignment='lag')).to( EventOut, cback2)) stream2.add_event(self._idxe) stream3 = Stream() def cback3(event): self.assertEqual(ms_from_dt(event.timestamp()), 1426320000000) self.assertEqual(event.get(), 3) (Pipeline().from_source(stream3).as_events(dict(alignment='lead')).to( EventOut, cback3)) stream3.add_event(self._idxe)
def test_tre_to_idxe_error(self): """Test converting TimeRangeEvent object to IndexedEvent error.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): # pylint: disable=unused-argument pass (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) with self.assertRaises(ProcessorException): stream1.add_event(self._tre)
def test_event_to_idxe_conversion(self): """Test converting Event object to IndexedEvent.""" # pylint: disable=missing-docstring stream1 = Stream() def cback1(event): self.assertEqual(event.index_as_string(), '1h-396199') self.assertEqual(event.get(), 3) (Pipeline().from_source(stream1).as_indexed_events( dict(duration='1h')).to(EventOut, cback1)) stream1.add_event(self._event)
def test_multiple_collapse_chains(self): """multiple collapsers.""" timeseries = TimeSeries(IN_OUT_DATA) kcol = (Pipeline().from_source(timeseries).collapse( ['in', 'out'], 'in_out_sum', Functions.sum()).collapse( ['in', 'out'], 'in_out_max', Functions.max()).emit_on('flush').to_keyed_collections()) self.assertEqual(kcol.get('all').at(0).get('in_out_sum'), 117) self.assertEqual(kcol.get('all').at(1).get('in_out_sum'), 110) self.assertEqual(kcol.get('all').at(2).get('in_out_sum'), 108) self.assertEqual(kcol.get('all').at(0).get('in_out_max'), 80) self.assertEqual(kcol.get('all').at(1).get('in_out_max'), 88) self.assertEqual(kcol.get('all').at(2).get('in_out_max'), 56)
def test_filter_and_take_chain(self): """filter events, then apply take""" def filter_cb(event): """filter callback""" return event.value() > 65 timeseries = TimeSeries(SEPT_2014_DATA) kcol = (Pipeline().from_source(timeseries).filter(filter_cb).take( 10).to_keyed_collections()) self.assertEqual(kcol.get('all').size(), 10) self.assertEqual(kcol.get('all').at(0).value(), 80) self.assertEqual(kcol.get('all').at(1).value(), 88) self.assertEqual(kcol.get('all').at(8).value(), 88) self.assertEqual(kcol.get('all').at(9).value(), 94)
def test_callback_offset_chain(self): """pass a callback in rather than retrieving a keyed collection.""" def cback(collection, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement RESULTS = collection timeseries = TimeSeries(DATA) (Pipeline().from_source(timeseries.collection()).offset_by( 1, 'value').offset_by(2).to(CollectionOut, cback)) # Spurious lint error due to upstream tinkering # with the global variable # pylint: disable=no-member self.assertEqual(RESULTS.at(0).get(), 55) self.assertEqual(RESULTS.at(1).get(), 21) self.assertEqual(RESULTS.at(2).get(), 29) self.assertEqual(RESULTS.at(3).get(), 96)
def test_first_point(self): """Make sure the first point is handled right when it is perfectly aligned.""" data = dict( name="traffic", columns=["time", "value"], points=[ [1473490770000, 10], [1473490800000, 20], [1473490830000, 30], [1473490860000, 40] ] ) base_30_sec = ( Pipeline() .from_source(TimeSeries(data)) .align(window='30s', method='linear', limit=10) .to_keyed_collections() ) self.assertEqual(base_30_sec.get('all').size(), 4)
def test_group_by_and_count(self): """group by and also count.""" timeseries = TimeSeries(SEPT_2014_DATA) # pylint: disable=missing-docstring def gb_callback(event): """group into two groups.""" return 'high' if event.value() > 65 else 'low' def cback(count, window_key, group_by): # pylint: disable=unused-argument """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS[group_by] = count (Pipeline().from_source(timeseries).take(10).group_by( gb_callback).emit_on('flush').count(cback)) self.assertEqual(RESULTS.get('high'), 4) self.assertEqual(RESULTS.get('low'), 6)
def test_linear_stream(self): """Test streaming on linear fill""" def cback(collection, window_key, group_by): """the callback""" global RESULTS # pylint: disable=global-statement RESULTS = collection events = [ Event(1400425947000, 1), Event(1400425948000, 2), Event(1400425949000, dict(value=None)), Event(1400425950000, dict(value=None)), Event(1400425951000, dict(value=None)), Event(1400425952000, 5), Event(1400425953000, 6), Event(1400425954000, 7), ] stream = Stream() (Pipeline().from_source(stream).fill(method='linear', field_spec='value').to( CollectionOut, cback)) for i in events: stream.add_event(i) self.assertEqual(RESULTS.size(), len(events)) self.assertEqual(RESULTS.at(0).get(), 1) self.assertEqual(RESULTS.at(1).get(), 2) self.assertEqual(RESULTS.at(2).get(), 2.75) # filled self.assertEqual(RESULTS.at(3).get(), 3.5) # filled self.assertEqual(RESULTS.at(4).get(), 4.25) # filled self.assertEqual(RESULTS.at(5).get(), 5) self.assertEqual(RESULTS.at(6).get(), 6) self.assertEqual(RESULTS.at(7).get(), 7)
def test_bad_args(self): """Trigger error states for coverage.""" simple_missing_data = dict( name="traffic", columns=["time", "direction"], points=[ [1400425947000, {'in': 1, 'out': None, 'drop': None}], [1400425948000, {'in': None, 'out': 4, 'drop': None}], [1400425949000, {'in': None, 'out': None, 'drop': 13}], [1400425950000, {'in': None, 'out': None, 'drop': 14}], [1400425960000, {'in': 9, 'out': 8, 'drop': None}], [1400425970000, {'in': 11, 'out': 10, 'drop': 16}], ] ) ts = TimeSeries(simple_missing_data) # bad ctor arg with self.assertRaises(ProcessorException): f = Filler(dict()) # invalid method with self.assertRaises(TimeSeriesException): ts.fill(method='bogus') # limit not int with self.assertRaises(ProcessorException): ts.fill(fill_limit='z') # direct access to filler via pipeline needs to take a single path with self.assertRaises(ProcessorException): pip = Pipeline() pip.fill(method='linear', field_spec=['direction.in', 'direction.out']) # invalid method with self.assertRaises(ProcessorException): pip = Pipeline() pip.fill(method='bogus') # catch bad path at various points with warnings.catch_warnings(record=True) as wrn: ts.fill(field_spec='bad.path') self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, ProcessorWarning)) with warnings.catch_warnings(record=True) as wrn: ts.fill(field_spec='bad.path', method='linear') self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, ProcessorWarning)) with warnings.catch_warnings(record=True) as wrn: ts.fill(field_spec='direction.bogus') self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, ProcessorWarning)) # trigger warnings about non-numeric values in linear. with warnings.catch_warnings(record=True) as wrn: simple_missing_data = dict( name="traffic", columns=["time", "direction"], points=[ [1400425947000, {'in': 1, 'out': None}], [1400425948000, {'in': 'non_numeric', 'out': 4}], [1400425949000, {'in': 5, 'out': None}], ] ) ts = TimeSeries(simple_missing_data) ts.fill(field_spec='direction.in', method='linear') self.assertEqual(len(wrn), 1) self.assertTrue(issubclass(wrn[0].category, ProcessorWarning)) # empty series for coverage caught a bug empty = TimeSeries(dict( name="Sensor values", columns=["time", "temperature"], points=[ ] )) self.assertEqual(empty.fill(field_spec='temperature').size(), 0)