Beispiel #1
0
    def build_project_data():
        years = {
            '$project': {
                'datetime': '$datetime',
                'count': '$count',
                'sum': '$sum',
                'sum2': '$sum2',
                'min': '$min',
                'max': '$max',
            }
        }

        months = {
            '$project': {
                'datetime': '$months.datetime',
                'count': '$months.count',
                'sum': '$months.sum',
                'sum2': '$months.sum2',
                'min': '$months.min',
                'max': '$months.max',
            }
        }

        days = {
            '$project': {
                'datetime': '$months.days.datetime',
                'count': '$months.days.count',
                'sum': '$months.days.sum',
                'sum2': '$months.days.sum2',
                'min': '$months.days.min',
                'max': '$months.days.max',
            }
        }

        hours = {
            '$project': {
                'datetime': '$months.days.hours.datetime',
                'count': '$months.days.hours.count',
                'sum': '$months.days.hours.sum',
                'sum2': '$months.days.hours.sum2',
                'min': '$months.days.hours.min',
                'max': '$months.days.hours.max',
            }
        }

        return [
            (aggregateby.Aggregateby(0), years),
            (aggregateby.Aggregateby(1), months),
            (aggregateby.Aggregateby(2), days),
            (aggregateby.Aggregateby(3), hours),
        ]
Beispiel #2
0
    def test_can_parse_valid_str_aggregateby(
        self,
        int_aggregateby,
        aggregation_keys,
    ):
        parsed_aggregateby = aggregateby.Aggregateby(int_aggregateby)

        self.assertEqual(parsed_aggregateby.aggregation_keys, aggregation_keys)
Beispiel #3
0
    def test_aggregateby_freq(
        self,
        interval,
        coef,
        expected_freq,
    ):
        parsed_aggregateby = aggregateby.Aggregateby(interval, coef=coef)

        self.assertEqual(parsed_aggregateby.freq, expected_freq)
Beispiel #4
0
    def build_unwind_and_match_data():
        years_pipeline = []

        months_pipeline = [{
            '$unwind': '$months',
        }, {
            '$match': {
                'months.datetime': {
                    '$gte': datetime(2001, 3, 1),
                    '$lte': datetime(2001, 4, 1),
                }
            }
        }]

        days_pipeline = [{
            '$unwind': '$months',
        }, {
            '$match': {
                'months.datetime': {
                    '$gte': datetime(2001, 3, 1),
                    '$lte': datetime(2001, 4, 1),
                }
            }
        }, {
            '$unwind': '$months.days',
        }, {
            '$match': {
                'months.days.datetime': {
                    '$gte': datetime(2001, 3, 22),
                    '$lte': datetime(2001, 4, 2),
                }
            }
        }]

        hours_pipeline = [{
            '$unwind': '$months',
        }, {
            '$match': {
                'months.datetime': {
                    '$gte': datetime(2001, 3, 1),
                    '$lte': datetime(2001, 4, 1),
                }
            }
        }, {
            '$unwind': '$months.days',
        }, {
            '$match': {
                'months.days.datetime': {
                    '$gte': datetime(2001, 3, 22),
                    '$lte': datetime(2001, 4, 2),
                }
            }
        }, {
            '$unwind': '$months.days.hours',
        }, {
            '$match': {
                'months.days.hours.datetime': {
                    '$gte': datetime(2001, 3, 22, 12),
                    '$lte': datetime(2001, 4, 2, 0),
                }
            }
        }]

        start = datetime(2001, 3, 22, 12)
        end = datetime(2001, 4, 2)

        return [
            (start, end, aggregateby.Aggregateby(0), years_pipeline),
            (start, end, aggregateby.Aggregateby(1), months_pipeline),
            (start, end, aggregateby.Aggregateby(2), days_pipeline),
            (start, end, aggregateby.Aggregateby(3), hours_pipeline),
        ]
Beispiel #5
0
 def raw_data_groupbys_and_expected_outputs():
     return [
         (
             # raw data
             [],
             # aggregateby
             aggregateby.Aggregateby(3),
             # groupby
             [],
             # expected df index
             pd.Index([], name='datetime'),
             # expected df data
             [],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 0,
                 'sum': 0,
                 'sum2': 0,
                 'min': pd.np.inf,
                 'max': -pd.np.inf,
             }],
             # aggregateby
             aggregateby.Aggregateby(2),
             # groupby
             [],
             # expected df index
             pd.Index([datetime(1987, 5, 8)], name='datetime'),
             # expected df data
             [
                 [0, pd.np.inf, -pd.np.inf, pd.np.nan, pd.np.nan],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 5,
                 'sum': 8.2,
                 'sum2': 14.32,
                 'min': 1.1,
                 'max': 2.3,
             }],
             # aggregateby
             aggregateby.Aggregateby(2),
             # groupby
             [],
             # expected df index
             pd.Index([datetime(1987, 5, 8)], name='datetime'),
             # expected df data
             [
                 [5, 1.1, 2.3, 1.64, 0.417612],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 3,
                 'sum': 4.1,
                 'sum2': 5.79,
                 'min': 1.1,
                 'max': 1.7,
             }, {
                 'datetime': datetime(1987, 5, 8),
                 'count': 2,
                 'sum': 4.1,
                 'sum2': 8.53,
                 'min': 1.8,
                 'max': 2.3,
             }],
             # aggregateby
             aggregateby.Aggregateby(2),
             # groupby
             [],
             # expected df index
             pd.Index([datetime(1987, 5, 8)], name='datetime'),
             # expected df data
             [
                 [5, 1.1, 2.3, 1.64, 0.417612],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 3,
                 'sum': 4.1,
                 'sum2': 5.79,
                 'min': 1.1,
                 'max': 1.7,
                 'plop': 'lol',
             }, {
                 'datetime': datetime(1987, 5, 8),
                 'count': 2,
                 'sum': 4.1,
                 'sum2': 8.53,
                 'min': 1.8,
                 'max': 2.3,
                 'plop': 'mdr',
             }],
             # aggregateby
             aggregateby.Aggregateby(2),
             # groupby
             ['plop'],
             # expected df index
             pd.MultiIndex.from_product([[
                 datetime(1987, 5, 8),
             ], [
                 'lol',
                 'mdr',
             ]],
                                        names=['datetime', 'plop']),
             # expected df data
             [
                 [3, 1.1, 1.7, 1.366666, 0.249443],
                 [2, 1.8, 2.3, 2.05, 0.25],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 3,
                 'sum': 21.6 * 3,
                 'sum2': 21.6**2 * 3,
                 'min': 21.6,
                 'max': 21.6,
             }],
             # aggregateby
             aggregateby.Aggregateby(2),
             # groupby
             [],
             # expected df index
             pd.Index([datetime(1987, 5, 8)], name='datetime'),
             # expected df data
             [
                 [3, 21.6, 21.6, 21.6, 0.0],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 3,
                 'sum': 4.1,
                 'sum2': 5.79,
                 'min': 1.1,
                 'max': 1.7,
             }, {
                 'datetime': datetime(1987, 5, 9),
                 'count': 2,
                 'sum': 4.1,
                 'sum2': 8.53,
                 'min': 1.8,
                 'max': 2.3,
             }, {
                 'datetime': datetime(1987, 5, 11),
                 'count': 1,
                 'sum': 2.3,
                 'sum2': 2.3**2,
                 'min': 2.3,
                 'max': 2.3,
             }],
             # aggregateby
             aggregateby.Aggregateby(2, coef=2),
             # groupby
             [],
             # expected df index
             pd.Index([
                 datetime(1987, 5, 8),
                 datetime(1987, 5, 10),
             ],
                      name='datetime'),
             # expected df data
             [
                 [5, 1.1, 2.3, 1.64, 0.417612],
                 [1, 2.3, 2.3, 2.3, 0.0],
             ],
         ),
         (
             # raw data
             [{
                 'datetime': datetime(1987, 5, 8),
                 'count': 3,
                 'sum': 4.1,
                 'sum2': 5.79,
                 'min': 1.1,
                 'max': 1.7,
                 'plop': 'A',
             }, {
                 'datetime': datetime(1987, 5, 9),
                 'count': 2,
                 'sum': 4.1,
                 'sum2': 8.53,
                 'min': 1.8,
                 'max': 2.3,
                 'plop': 'A',
             }, {
                 'datetime': datetime(1987, 5, 11),
                 'count': 1,
                 'sum': 2.3,
                 'sum2': 2.3**2,
                 'min': 2.3,
                 'max': 2.3,
                 'plop': 'B',
             }],
             # aggregateby
             aggregateby.Aggregateby(2, coef=2),
             # groupby
             ['plop'],
             # expected df index
             pd.MultiIndex(
                 levels=[[
                     datetime(1987, 5, 8),
                     datetime(1987, 5, 10),
                 ], [
                     'A',
                     'B',
                 ]],
                 labels=[[0, 1], [0, 1]],
                 names=['datetime', 'plop'],
             ),
             # expected df data
             [
                 [5, 1.1, 2.3, 1.64, 0.417612],
                 [1, 2.3, 2.3, 2.3, 0.0],
             ],
         )
     ]