def test_compute_aggregate_deleted_not_removed(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids}
    with pytest.raises(ValueError):
        utils.compute_aggregate(data, '1h', 'ending',
                                'UTC', 'sum', aggobs)
def test_compute_aggregate_missing_obs_with_index(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:2]}
    with pytest.raises(KeyError):
        utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum',
                                aggobs[:-2], nindex)
def test_compute_aggregate_missing_from_data(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    aggobs = list(aggobs[:-2]) + [
        _make_aggobs('09ed7cf6-ea0b-11e9-a7da-f4939fed889')]
    with pytest.raises(KeyError):
        utils.compute_aggregate(data, '1h', 'ending',
                                'UTC', 'sum', aggobs)
def test_compute_aggregate_deleted_not_removed_yet(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    # with last aggobs, would try and get data before effective_until,
    # but was deleted, so raise error
    aggobs = list(aggobs[:-2]) + [
        _make_aggobs(ids[4], oda=pd.Timestamp('20191009T0000Z'),
                     eu=pd.Timestamp('20191004T0700Z'))]
    with pytest.raises(ValueError):
        utils.compute_aggregate(data, '1h', 'ending',
                                'UTC', 'sum', aggobs)
def test_compute_aggregate_missing_data(aggobs, ids):
    data = {
        id_: pd.DataFrame({
            'value': [1] * 10,
            'quality_flag': [0] * 10
        },
                          index=nindex)
        for id_ in ids[:3]
    }
    data[ids[-1]] = pd.DataFrame({
        'value': [1] * 8,
        'quality_flag': [0] * 8
    },
                                 index=nindex[:-2])
    aggobs = list(aggobs[:-2]) + [_make_aggobs(ids[-1])]
    agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs)
    pdt.assert_frame_equal(
        agg,
        pd.DataFrame({
            'value':
            pd.Series([3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 4.0, None, None],
                      index=nindex),
            'quality_flag':
            pd.Series([0] * 10, index=nindex)
        }))
def test_compute_aggregate_no_overlap(ids):
    data = {
        ids[0]:
        pd.DataFrame({
            'value': [1, 2, 3],
            'quality_flag': [2, 10, 338]
        },
                     index=pd.DatetimeIndex([
                         '20191002T0100Z', '20191002T0130Z', '20191002T0230Z'
                     ])),
        ids[1]:
        pd.DataFrame({
            'value': [3, 2, 1],
            'quality_flag': [9, 880, 10]
        },
                     index=pd.DatetimeIndex([
                         '20191002T0200Z', '20191002T0230Z', '20191002T0300Z'
                     ]))
    }
    aggobs = [
        _make_aggobs(ids[0]),
        _make_aggobs(ids[1], pd.Timestamp('20191002T0200Z'))
    ]
    agg = utils.compute_aggregate(data, '30min', 'ending', 'UTC', 'median',
                                  aggobs)
    expected = pd.DataFrame(
        {
            'value': [1.0, 2.0, None, 2.5, None],
            'quality_flag': [2, 10, 9, 338 | 880, 10]
        },
        index=pd.DatetimeIndex([
            '20191002T0100Z', '20191002T0130Z', '20191002T0200Z',
            '20191002T0230Z', '20191002T0300Z'
        ]))
    pdt.assert_frame_equal(agg, expected)
Example #7
0
def test_compute_aggregate_partial_missing_values_with_index(aggobs, ids):
    data = {
        id_: pd.DataFrame({
            'value': [1] * 10,
            'quality_flag': [0] * 10
        },
                          index=nindex)
        for id_ in ids[:2]
    }
    data[ids[2]] = pd.DataFrame({
        'value': [1] * 5,
        'quality_flag': [0] * 5
    },
                                index=nindex[5:])
    agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum',
                                  aggobs[:-2], nindex)
    expected = pd.DataFrame({
        'value':
        pd.Series(
            [np.nan, np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 3.0, 3.0],
            index=nindex),
        'quality_flag':
        pd.Series([0] * 10, index=nindex)
    })
    pdt.assert_frame_equal(agg, expected)
def test_compute_aggregate_out_of_effective(aggobs, ids):
    limited_aggobs = [aggob
                      for aggob in aggobs
                      if aggob['effective_until'] is not None]
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    max_time = pd.Series([o['effective_until'] for o in limited_aggobs]).max()
    ooe_index = pd.date_range(
        max_time + pd.Timedelta('1H'),
        max_time + pd.Timedelta('25H'),
        freq='60min'
    )
    with pytest.raises(ValueError) as e:
        utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum',
                                limited_aggobs, ooe_index)
    assert str(e.value) == 'No effective observations in data'
def test_compute_aggregate_missing_values_with_index(
        aggobs, ids, dfindex, missing_idx):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    agg = utils.compute_aggregate(data, '1h', 'ending',
                                  'UTC', 'sum', aggobs[:-2], dfindex)
    assert pd.isnull(agg['value'][missing_idx])
def test_compute_aggregate_mean(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    agg = utils.compute_aggregate(data, '1h', 'ending',
                                  'UTC', 'mean', aggobs[:-2])
    pdt.assert_frame_equal(agg, pd.DataFrame(
        {'value': pd.Series([1.0] * 10, index=nindex),
         'quality_flag':  pd.Series([0]*10, index=nindex)})
        )
def test_compute_aggregate_index_provided(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    the_index = nindex.copy()[::2]
    agg = utils.compute_aggregate(data, '1h', 'ending',
                                  'UTC', 'sum', aggobs[:-2], the_index)
    pdt.assert_frame_equal(agg, pd.DataFrame(
        {'value': pd.Series([2.0, 2.0, 2.0, 2.0, 3.0],
                            index=the_index),
         'quality_flag':  pd.Series([0]*5, index=the_index)})
        )
def test_compute_aggregate_deleted_but_removed_before(aggobs, ids):
    data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10},
                              index=nindex)
            for id_ in ids[:3]}
    # aggobs[-1] properly removed
    aggobs = list(aggobs[:-2]) + [aggobs[-1]]
    agg = utils.compute_aggregate(data, '1h', 'ending',
                                  'UTC', 'sum', aggobs)
    pdt.assert_frame_equal(agg, pd.DataFrame(
        {'value': pd.Series([2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0],
                            index=nindex),
         'quality_flag':  pd.Series([0]*10, index=nindex)}))
def test_compute_aggregate_missing_before_effective(ids):
    data = {ids[0]: pd.DataFrame(
        {'value': [1, 2, 3, 0, 0], 'quality_flag': [2, 10, 338, 0, 0]},
        index=pd.DatetimeIndex([
            '20191002T0100Z', '20191002T0130Z', '20191002T0200Z',
            '20191002T0230Z', '20191002T0300Z'])),
            ids[1]: pd.DataFrame(
        {'value': [None, 2.0, 1.0], 'quality_flag': [0, 880, 10]},
        index=pd.DatetimeIndex([
            '20191002T0200Z', '20191002T0230Z', '20191002T0300Z']))}
    aggobs = [_make_aggobs(ids[0]),
              _make_aggobs(ids[1], pd.Timestamp('20191002T0201Z'))]
    agg = utils.compute_aggregate(data, '30min', 'ending',
                                  'UTC', 'max', aggobs)
    expected = pd.DataFrame(
        {'value': [1.0, 2.0, 3.0, 2.0, 1.0],
         'quality_flag': [2, 10, 338, 880, 10]},
        index=pd.DatetimeIndex([
            '20191002T0100Z', '20191002T0130Z', '20191002T0200Z',
            '20191002T0230Z', '20191002T0300Z']))
    pdt.assert_frame_equal(agg, expected)
def test_compute_aggregate_empty_data(aggobs, ids):
    data = {}
    with pytest.raises(KeyError):
        utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:2])
def test_compute_aggregate_bad_cols():
    data = {'a': pd.DataFrame([0], index=pd.DatetimeIndex(['20191001T1200Z']))}
    with pytest.raises(KeyError):
        utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'mean',
                                [_make_aggobs('a')])
    def get(self, aggregate_id, *args):
        """
        ---
        summary: Get Aggregate data.
        description: Get the timeseries values from the Aggregate entry.
        tags:
        - Aggregates
        parameters:
          - aggregate_id
          - start_time
          - end_time
          - accepts
        responses:
          200:
            description: Sucessfully retrieved aggregate values.
            content:
              application/json:
                schema:
                  $ref: '#/components/schemas/AggregateValues'
              text/csv:
                schema:
                  type: string
                example: |-
                  timestamp,value,quality_flag
                  2018-10-29T12:00:00Z,32.93,0
                  2018-10-29T13:00:00Z,25.17,0

          400:
            $ref: '#/components/responses/400-TimerangeTooLarge'
          401:
            $ref: '#/components/responses/401-Unauthorized'
          404:
            $ref: '#/components/responses/404-NotFound'
          422:
            description: Failed to compute aggregate values
        """
        start, end = validate_start_end()
        storage = get_storage()
        aggregate = storage.read_aggregate(aggregate_id)

        interval_length = f"{aggregate['interval_length']}min"
        interval_label = aggregate['interval_label']
        timezone = aggregate['timezone']

        # Create a timedelta to add/substract from end/start to get data
        # outside of start/end when aggregating
        interval_offset = pd.Timedelta(interval_length) - pd.Timedelta('1ns')

        if interval_label == 'ending':
            index_start = start.ceil(interval_length)
            index_end = end.ceil(interval_length)

            # adjust start to include all values in the previous interval
            start = index_start - interval_offset
            end = index_end
        else:
            index_start = start.floor(interval_length)
            index_end = end.floor(interval_length)

            # adjust end to include all values in the final interval
            end = index_end + interval_offset
            start = index_start

        indv_obs = storage.read_aggregate_values(aggregate_id, start, end)

        request_index = pd.date_range(
            index_start.tz_convert(timezone),
            index_end.tz_convert(timezone),
            freq=interval_length,
        )

        # compute agg
        try:
            values = compute_aggregate(
                indv_obs, interval_length, interval_label, timezone,
                aggregate['aggregate_type'], aggregate['observations'],
                request_index)
        except (KeyError, ValueError) as err:
            raise BaseAPIException(422, values=str(err))
        accepts = request.accept_mimetypes.best_match(['application/json',
                                                       'text/csv'])
        if accepts == 'application/json':
            values.index.name = 'timestamp'
            data = AggregateValuesSchema().dump(
                {"aggregate_id": aggregate_id, "values": values})
            return jsonify(data)
        else:
            meta_url = url_for('aggregates.metadata',
                               aggregate_id=aggregate_id,
                               _external=True)
            csv_header = f'# aggregate_id: {aggregate_id}\n# metadata: {meta_url}\n'  # NOQA
            csv_values = values.to_csv(columns=['value', 'quality_flag'],
                                       index_label='timestamp',
                                       date_format='%Y%m%dT%H:%M:%S%z')
            csv_data = csv_header + csv_values
            response = make_response(csv_data, 200)
            response.mimetype = 'text/csv'
            return response