def test_fixed_window(self): """Test fixed window rollup""" timeseries = TimeSeries(SEPT_2014_DATA) daily_avg = timeseries.fixed_window_rollup( '1d', dict(value=dict(value=Functions.avg()))) self.assertEqual(daily_avg.size(), 5) self.assertEqual(daily_avg.at(0).value(), 46.875) self.assertEqual(daily_avg.at(2).value(), 54.083333333333336) self.assertEqual(daily_avg.at(4).value(), 51.85) # not really a rollup, each data point will create one # aggregation index. timeseries = TimeSeries(SEPT_2014_DATA) hourly_avg = timeseries.hourly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points'))) self.assertEqual(hourly_avg.at(0).value(), 80.0) self.assertEqual(hourly_avg.at(2).value(), 52.0) self.assertEqual(hourly_avg.at(4).value(), 26.0)
def test_fixed_window(self): """Test fixed window rollup""" timeseries = TimeSeries(SEPT_2014_DATA) daily_avg = timeseries.fixed_window_rollup( '1d', dict(value=dict(value=Functions.avg()))) self.assertEqual(daily_avg.size(), 5) self.assertEqual(daily_avg.at(0).value(), 46.875) self.assertEqual(daily_avg.at(2).value(), 54.083333333333336) self.assertEqual(daily_avg.at(4).value(), 51.85) # not really a rollup, each data point will create one # aggregation index. timeseries = TimeSeries(SEPT_2014_DATA) hourly_avg = timeseries.hourly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual(hourly_avg.size(), len(SEPT_2014_DATA.get('points'))) self.assertEqual(hourly_avg.at(0).value(), 80.0) self.assertEqual(hourly_avg.at(2).value(), 52.0) self.assertEqual(hourly_avg.at(4).value(), 26.0)
def test_non_fixed_rollups(self): """Work the calendar rollup logic / utc / etc.""" timeseries = TimeSeries(SEPT_2014_DATA) # just silence the warnings, not do anything with them. with warnings.catch_warnings(record=True): daily_avg = timeseries.daily_rollup( dict(value=dict(value=Functions.avg()))) ts_1 = SEPT_2014_DATA.get('points')[0][0] self.assertEqual( Index.get_daily_index_string(dt_from_ms(ts_1), utc=False), daily_avg.at(0).index().to_string()) monthly_avg = timeseries.monthly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False), monthly_avg.at(0).index().to_string()) yearly_avg = timeseries.yearly_rollup( dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False), yearly_avg.at(0).index().to_string())
def test_non_fixed_rollups(self): """Work the calendar rollup logic / utc / etc.""" timeseries = TimeSeries(SEPT_2014_DATA) # just silence the warnings, not do anything with them. with warnings.catch_warnings(record=True): daily_avg = timeseries.daily_rollup(dict(value=dict(value=Functions.avg()))) ts_1 = SEPT_2014_DATA.get('points')[0][0] self.assertEqual( Index.get_daily_index_string(dt_from_ms(ts_1), utc=False), daily_avg.at(0).index().to_string() ) monthly_avg = timeseries.monthly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_monthly_index_string(dt_from_ms(ts_1), utc=False), monthly_avg.at(0).index().to_string() ) yearly_avg = timeseries.yearly_rollup(dict(value=dict(value=Functions.avg()))) self.assertEqual( Index.get_yearly_index_string(dt_from_ms(ts_1), utc=False), yearly_avg.at(0).index().to_string() )
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=1, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=2, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()}, } ) .as_time_range_events(dict(alignment='lag')) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() ( Pipeline() .from_source(uin) .window_by('1h') .emit_on('eachEvent') .aggregate( { 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_aggregation_filtering(self): """test the filtering modifers to the agg functions.""" event_objects = [ Event(1429673400000, {'in': 1, 'out': 2}), Event(1429673460000, {'in': 3, 'out': None}), Event(1429673520000, {'in': 5, 'out': 6}), ] series = TimeSeries(dict(name='events', events=event_objects)) self.assertEqual(series.sum('out', Filters.ignore_missing), 8) self.assertEqual(series.avg('out', Filters.ignore_missing), 4) self.assertEqual(series.min('out', Filters.zero_missing), 0) self.assertEqual(series.max('out', Filters.propagate_missing), None) self.assertEqual(series.mean('out', Filters.ignore_missing), 4) self.assertEqual(series.median('out', Filters.zero_missing), 2) self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294) avg_f = Functions.avg(Filters.none_if_empty) self.assertIsNone(avg_f([])) def bad_filtering_function(): # pylint: disable=missing-docstring pass with self.assertRaises(FilterException): series.sum('out', bad_filtering_function)
def test_event_map_function_arg_and_reduce(self): # pylint: disable=invalid-name """Test Event.map() with a custom function and Event.reduce()""" def map_sum(event): # pylint: disable=missing-docstring # return 'sum', event.get('in') + event.get('out') return dict(sum=event.get('in') + event.get('out')) result = Event.map(self._get_event_series(), map_sum) self.assertEqual(set(result), set({'sum': [13, 17, 21, 26]})) res = Event.reduce(result, Functions.avg()) self.assertEqual(set(res), set({'sum': 19.25}))
def test_aggregation_filtering(self): """Test the new filtering methods for cleaning stuff.""" elist = [ Event(1429673400000, {'in': 1, 'out': 1}), Event(1429673460000, {'in': 2, 'out': 5}), Event(1429673520000, {'in': 3, 'out': None}), ] coll = Collection(elist) self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6) self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6) self.assertEqual(coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None) self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2) self.assertEqual(coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3) self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2) self.assertEqual(coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
def test_aggregation_filtering(self): """Test the new filtering methods for cleaning stuff.""" elist = [ Event(1429673400000, { 'in': 1, 'out': 1 }), Event(1429673460000, { 'in': 2, 'out': 5 }), Event(1429673520000, { 'in': 3, 'out': None }), ] coll = Collection(elist) self.assertEqual(coll.aggregate(Functions.sum(), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'in'), 6) self.assertEqual( coll.aggregate(Functions.sum(Filters.propagate_missing), 'out'), None) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.ignore_missing), 'out'), 3) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'in'), 2) self.assertEqual( coll.aggregate(Functions.avg(Filters.zero_missing), 'out'), 2)
def test_aggregation_filtering(self): """test the filtering modifers to the agg functions.""" event_objects = [ Event(1429673400000, { 'in': 1, 'out': 2 }), Event(1429673460000, { 'in': 3, 'out': None }), Event(1429673520000, { 'in': 5, 'out': 6 }), ] series = TimeSeries(dict(name='events', events=event_objects)) self.assertEqual(series.sum('out', Filters.ignore_missing), 8) self.assertEqual(series.avg('out', Filters.ignore_missing), 4) self.assertEqual(series.min('out', Filters.zero_missing), 0) self.assertEqual(series.max('out', Filters.propagate_missing), None) self.assertEqual(series.mean('out', Filters.ignore_missing), 4) self.assertEqual(series.median('out', Filters.zero_missing), 2) self.assertEqual(series.stdev('out', Filters.zero_missing), 2.494438257849294) avg_f = Functions.avg(Filters.none_if_empty) self.assertIsNone(avg_f([])) def bad_filtering_function(): # pylint: disable=missing-docstring pass with self.assertRaises(FilterException): series.sum('out', bad_filtering_function)
def test_simple_map_reduce(self): """test simple map/reduce.""" result = Event.map_reduce(self._get_event_series(), ['in', 'out'], Functions.avg()) self.assertEqual(set(result), set({'in': 5.0, 'out': 14.25}))
def test_windowed_average(self): """aggregate events into by windowed avg.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(event.index())] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199').get('out_avg'), 3) self.assertEqual(RESULTS.get('1h-396200').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1h-396200').get('out_avg'), 8)
def test_bad_processor_args(self): """Feed the Processors bad args.""" # neither Pipeline or copy ctor with self.assertRaises(ProcessorException): Aggregator(dict()) with self.assertRaises(ProcessorException): Collapser(dict()) with self.assertRaises(ProcessorException): Converter(dict()) with self.assertRaises(ProcessorException): Filter(dict()) with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Offset(dict()) with self.assertRaises(ProcessorException): Selector(dict()) with self.assertRaises(ProcessorException): Taker(dict()) pip = Pipeline() # not passed a callable function with self.assertRaises(ProcessorException): Filter(pip) # bad agg args # no opts with self.assertRaises(ProcessorException): Aggregator(pip) # wrong opt type with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields=list() ) ) # bad opt keys with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields={1: 'foo'} ) ) # bad opt value with self.assertRaises(ProcessorException): Aggregator( pip, Options( fields={'in': 'foo'} ) ) # stream w/no window strat with self.assertRaises(ProcessorException): pip2 = Pipeline(pip._d.update(dict(mode='stream'))) # pylint: disable=protected-access Aggregator( pip2, Options( fields={'in': Functions.avg()} ) ) # bad Converter args # no type in opts with self.assertRaises(ProcessorException): Converter(pip) # bad opt type with self.assertRaises(ProcessorException): Converter( pip, Options( type=Pipeline ) ) # bad Mapper Args with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Mapper(pip)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=57)), { 'type': 'a', 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=58)), { 'type': 'a', 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=7, minute=59)), { 'type': 'b', 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=0)), { 'type': 'a', 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=8, minute=1)), { 'type': 'b', 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() (Pipeline().from_source(uin).group_by('type').window_by( Capsule(duration='1h', type='fixed')).emit_on('eachEvent').aggregate({ 'type': { 'type': Functions.keep() }, 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() } }).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_collect_and_aggregate(self): """collect events together and aggregate.""" events_in = [ Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=57)), {'type': 'a', 'in': 3, 'out': 1} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=58)), {'type': 'a', 'in': 9, 'out': 2} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=7, minute=59)), {'type': 'b', 'in': 6, 'out': 6} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=0)), {'type': 'a', 'in': 4, 'out': 7} ), Event( aware_dt_from_args(dict(year=2015, month=3, day=14, hour=8, minute=1)), {'type': 'b', 'in': 5, 'out': 9} ), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}:{1}'.format(event.index(), event.get('type'))] = event uin = Stream() ( Pipeline() .from_source(uin) .group_by('type') .window_by( Capsule( duration='1h', type='fixed' ) ) .emit_on('eachEvent') .aggregate( { 'type': {'type': Functions.keep()}, 'in_avg': {'in': Functions.avg()}, 'out_avg': {'out': Functions.avg()} } ) .to(EventOut, cback) ) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1h-396199:a').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:a').get('out_avg'), 1.5) self.assertEqual(RESULTS.get('1h-396199:b').get('in_avg'), 6) self.assertEqual(RESULTS.get('1h-396199:b').get('out_avg'), 6) self.assertEqual(RESULTS.get('1h-396200:a').get('in_avg'), 4) self.assertEqual(RESULTS.get('1h-396200:a').get('out_avg'), 7) self.assertEqual(RESULTS.get('1h-396200:b').get('in_avg'), 5) self.assertEqual(RESULTS.get('1h-396200:b').get('out_avg'), 9)
def test_aggregate_and_conversion(self): """Aggregate/average and convert to TimeRangeEvent.""" events_in = [ Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=57)), { 'in': 3, 'out': 1 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=58)), { 'in': 9, 'out': 2 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=1, minute=59)), { 'in': 6, 'out': 6 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=0)), { 'in': 4, 'out': 7 }), Event( aware_dt_from_args( dict(year=2015, month=3, day=14, hour=2, minute=1)), { 'in': 5, 'out': 9 }), ] def cback(event): """callback to pass in.""" global RESULTS # pylint: disable=global-statement if RESULTS is None: RESULTS = dict() RESULTS['{0}'.format(ms_from_dt(event.timestamp()))] = event uin = Stream() (Pipeline().from_source(uin).window_by('1h').emit_on( 'eachEvent').aggregate({ 'in_avg': { 'in': Functions.avg() }, 'out_avg': { 'out': Functions.avg() }, }).as_time_range_events(dict(alignment='lag')).to(EventOut, cback)) for i in events_in: uin.add_event(i) self.assertEqual(RESULTS.get('1426294800000').get('in_avg'), 6) self.assertEqual(RESULTS.get('1426294800000').get('out_avg'), 3) self.assertEqual(RESULTS.get('1426298400000').get('in_avg'), 4.5) self.assertEqual(RESULTS.get('1426298400000').get('out_avg'), 8)
def test_bad_processor_args(self): """Feed the Processors bad args.""" # neither Pipeline or copy ctor with self.assertRaises(ProcessorException): Aggregator(dict()) with self.assertRaises(ProcessorException): Collapser(dict()) with self.assertRaises(ProcessorException): Converter(dict()) with self.assertRaises(ProcessorException): Filter(dict()) with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Offset(dict()) with self.assertRaises(ProcessorException): Selector(dict()) with self.assertRaises(ProcessorException): Taker(dict()) pip = Pipeline() # not passed a callable function with self.assertRaises(ProcessorException): Filter(pip) # bad agg args # no opts with self.assertRaises(ProcessorException): Aggregator(pip) # wrong opt type with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields=list())) # bad opt keys with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields={1: 'foo'})) # bad opt value with self.assertRaises(ProcessorException): Aggregator(pip, Options(fields={'in': 'foo'})) # stream w/no window strat with self.assertRaises(ProcessorException): pip2 = Pipeline(pip._d.update(dict(mode='stream'))) # pylint: disable=protected-access Aggregator(pip2, Options(fields={'in': Functions.avg()})) # bad Converter args # no type in opts with self.assertRaises(ProcessorException): Converter(pip) # bad opt type with self.assertRaises(ProcessorException): Converter(pip, Options(type=Pipeline)) # bad Mapper Args with self.assertRaises(ProcessorException): Mapper(dict()) with self.assertRaises(ProcessorException): Mapper(pip)