def test_compute_aggregate_deleted_not_removed(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids} with pytest.raises(ValueError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs)
def test_compute_aggregate_missing_obs_with_index(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:2]} with pytest.raises(KeyError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:-2], nindex)
def test_compute_aggregate_missing_from_data(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} aggobs = list(aggobs[:-2]) + [ _make_aggobs('09ed7cf6-ea0b-11e9-a7da-f4939fed889')] with pytest.raises(KeyError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs)
def test_compute_aggregate_deleted_not_removed_yet(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} # with last aggobs, would try and get data before effective_until, # but was deleted, so raise error aggobs = list(aggobs[:-2]) + [ _make_aggobs(ids[4], oda=pd.Timestamp('20191009T0000Z'), eu=pd.Timestamp('20191004T0700Z'))] with pytest.raises(ValueError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs)
def test_compute_aggregate_missing_data(aggobs, ids): data = { id_: pd.DataFrame({ 'value': [1] * 10, 'quality_flag': [0] * 10 }, index=nindex) for id_ in ids[:3] } data[ids[-1]] = pd.DataFrame({ 'value': [1] * 8, 'quality_flag': [0] * 8 }, index=nindex[:-2]) aggobs = list(aggobs[:-2]) + [_make_aggobs(ids[-1])] agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs) pdt.assert_frame_equal( agg, pd.DataFrame({ 'value': pd.Series([3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 3.0, 4.0, None, None], index=nindex), 'quality_flag': pd.Series([0] * 10, index=nindex) }))
def test_compute_aggregate_no_overlap(ids): data = { ids[0]: pd.DataFrame({ 'value': [1, 2, 3], 'quality_flag': [2, 10, 338] }, index=pd.DatetimeIndex([ '20191002T0100Z', '20191002T0130Z', '20191002T0230Z' ])), ids[1]: pd.DataFrame({ 'value': [3, 2, 1], 'quality_flag': [9, 880, 10] }, index=pd.DatetimeIndex([ '20191002T0200Z', '20191002T0230Z', '20191002T0300Z' ])) } aggobs = [ _make_aggobs(ids[0]), _make_aggobs(ids[1], pd.Timestamp('20191002T0200Z')) ] agg = utils.compute_aggregate(data, '30min', 'ending', 'UTC', 'median', aggobs) expected = pd.DataFrame( { 'value': [1.0, 2.0, None, 2.5, None], 'quality_flag': [2, 10, 9, 338 | 880, 10] }, index=pd.DatetimeIndex([ '20191002T0100Z', '20191002T0130Z', '20191002T0200Z', '20191002T0230Z', '20191002T0300Z' ])) pdt.assert_frame_equal(agg, expected)
def test_compute_aggregate_partial_missing_values_with_index(aggobs, ids): data = { id_: pd.DataFrame({ 'value': [1] * 10, 'quality_flag': [0] * 10 }, index=nindex) for id_ in ids[:2] } data[ids[2]] = pd.DataFrame({ 'value': [1] * 5, 'quality_flag': [0] * 5 }, index=nindex[5:]) agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:-2], nindex) expected = pd.DataFrame({ 'value': pd.Series( [np.nan, np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 3.0, 3.0], index=nindex), 'quality_flag': pd.Series([0] * 10, index=nindex) }) pdt.assert_frame_equal(agg, expected)
def test_compute_aggregate_out_of_effective(aggobs, ids): limited_aggobs = [aggob for aggob in aggobs if aggob['effective_until'] is not None] data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} max_time = pd.Series([o['effective_until'] for o in limited_aggobs]).max() ooe_index = pd.date_range( max_time + pd.Timedelta('1H'), max_time + pd.Timedelta('25H'), freq='60min' ) with pytest.raises(ValueError) as e: utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', limited_aggobs, ooe_index) assert str(e.value) == 'No effective observations in data'
def test_compute_aggregate_missing_values_with_index( aggobs, ids, dfindex, missing_idx): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:-2], dfindex) assert pd.isnull(agg['value'][missing_idx])
def test_compute_aggregate_mean(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'mean', aggobs[:-2]) pdt.assert_frame_equal(agg, pd.DataFrame( {'value': pd.Series([1.0] * 10, index=nindex), 'quality_flag': pd.Series([0]*10, index=nindex)}) )
def test_compute_aggregate_index_provided(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} the_index = nindex.copy()[::2] agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:-2], the_index) pdt.assert_frame_equal(agg, pd.DataFrame( {'value': pd.Series([2.0, 2.0, 2.0, 2.0, 3.0], index=the_index), 'quality_flag': pd.Series([0]*5, index=the_index)}) )
def test_compute_aggregate_deleted_but_removed_before(aggobs, ids): data = {id_: pd.DataFrame({'value': [1] * 10, 'quality_flag': [0] * 10}, index=nindex) for id_ in ids[:3]} # aggobs[-1] properly removed aggobs = list(aggobs[:-2]) + [aggobs[-1]] agg = utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs) pdt.assert_frame_equal(agg, pd.DataFrame( {'value': pd.Series([2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 3.0], index=nindex), 'quality_flag': pd.Series([0]*10, index=nindex)}))
def test_compute_aggregate_missing_before_effective(ids): data = {ids[0]: pd.DataFrame( {'value': [1, 2, 3, 0, 0], 'quality_flag': [2, 10, 338, 0, 0]}, index=pd.DatetimeIndex([ '20191002T0100Z', '20191002T0130Z', '20191002T0200Z', '20191002T0230Z', '20191002T0300Z'])), ids[1]: pd.DataFrame( {'value': [None, 2.0, 1.0], 'quality_flag': [0, 880, 10]}, index=pd.DatetimeIndex([ '20191002T0200Z', '20191002T0230Z', '20191002T0300Z']))} aggobs = [_make_aggobs(ids[0]), _make_aggobs(ids[1], pd.Timestamp('20191002T0201Z'))] agg = utils.compute_aggregate(data, '30min', 'ending', 'UTC', 'max', aggobs) expected = pd.DataFrame( {'value': [1.0, 2.0, 3.0, 2.0, 1.0], 'quality_flag': [2, 10, 338, 880, 10]}, index=pd.DatetimeIndex([ '20191002T0100Z', '20191002T0130Z', '20191002T0200Z', '20191002T0230Z', '20191002T0300Z'])) pdt.assert_frame_equal(agg, expected)
def test_compute_aggregate_empty_data(aggobs, ids): data = {} with pytest.raises(KeyError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'sum', aggobs[:2])
def test_compute_aggregate_bad_cols(): data = {'a': pd.DataFrame([0], index=pd.DatetimeIndex(['20191001T1200Z']))} with pytest.raises(KeyError): utils.compute_aggregate(data, '1h', 'ending', 'UTC', 'mean', [_make_aggobs('a')])
def get(self, aggregate_id, *args): """ --- summary: Get Aggregate data. description: Get the timeseries values from the Aggregate entry. tags: - Aggregates parameters: - aggregate_id - start_time - end_time - accepts responses: 200: description: Sucessfully retrieved aggregate values. content: application/json: schema: $ref: '#/components/schemas/AggregateValues' text/csv: schema: type: string example: |- timestamp,value,quality_flag 2018-10-29T12:00:00Z,32.93,0 2018-10-29T13:00:00Z,25.17,0 400: $ref: '#/components/responses/400-TimerangeTooLarge' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' 422: description: Failed to compute aggregate values """ start, end = validate_start_end() storage = get_storage() aggregate = storage.read_aggregate(aggregate_id) interval_length = f"{aggregate['interval_length']}min" interval_label = aggregate['interval_label'] timezone = aggregate['timezone'] # Create a timedelta to add/substract from end/start to get data # outside of start/end when aggregating interval_offset = pd.Timedelta(interval_length) - pd.Timedelta('1ns') if interval_label == 'ending': index_start = start.ceil(interval_length) index_end = end.ceil(interval_length) # adjust start to include all values in the previous interval start = index_start - interval_offset end = index_end else: index_start = start.floor(interval_length) index_end = end.floor(interval_length) # adjust end to include all values in the final interval end = index_end + interval_offset start = index_start indv_obs = storage.read_aggregate_values(aggregate_id, start, end) request_index = pd.date_range( index_start.tz_convert(timezone), index_end.tz_convert(timezone), freq=interval_length, ) # compute agg try: values = compute_aggregate( indv_obs, interval_length, interval_label, timezone, aggregate['aggregate_type'], aggregate['observations'], request_index) except (KeyError, ValueError) as err: raise BaseAPIException(422, values=str(err)) accepts = request.accept_mimetypes.best_match(['application/json', 'text/csv']) if accepts == 'application/json': values.index.name = 'timestamp' data = AggregateValuesSchema().dump( {"aggregate_id": aggregate_id, "values": values}) return jsonify(data) else: meta_url = url_for('aggregates.metadata', aggregate_id=aggregate_id, _external=True) csv_header = f'# aggregate_id: {aggregate_id}\n# metadata: {meta_url}\n' # NOQA csv_values = values.to_csv(columns=['value', 'quality_flag'], index_label='timestamp', date_format='%Y%m%dT%H:%M:%S%z') csv_data = csv_header + csv_values response = make_response(csv_data, 200) response.mimetype = 'text/csv' return response