def parse_json(json_str): """Parse a string of json values into a DataFrame Parameters ---------- json_str: str Returns ------- pandas.DataFrame Raises ------ BadAPIRequestError If the 'values' key is missing, or if the contents of the values key cannot be parsed into a DataFrame. """ try: json_dict = json.loads(''.join(s for s in json_str if s in string.printable)) except json.decoder.JSONDecodeError: raise BadAPIRequest(error='Malformed JSON.') try: raw_values = json_dict['values'] except (TypeError, KeyError): error = 'Supplied JSON does not contain "values" field.' raise BadAPIRequest(error=error) try: value_df = pd.DataFrame(raw_values) except ValueError: raise BadAPIRequest({'error': 'Malformed JSON'}) return value_df
def validate_index_period(index, interval_length, previous_time): """ Validate that the index conforms to interval_length. Parameters ---------- index : pd.DatetimeIndex interval_length : int Regular period of data in minutes previous_time : pd.Timestamp or None The last time in the database before the start of index. May be None. Raises ------ BadApiRequest If there are any errors """ if len(index) == 0: raise BadAPIRequest({'timestamp': ['No times to validate']}) errors = [] start = index[0] end = index[-1] freq = pd.Timedelta(f'{interval_length}min') expected_index = pd.date_range(start=start, end=end, freq=freq) missing_times = expected_index.difference(index) if len(missing_times) > 0: errors.append(f'Missing {len(missing_times)} timestamps. ' f'First missing timestamp is {missing_times[0]}. ' 'Uploads must have equally spaced timestamps ' f'from {start} to {end} with {interval_length} ' 'minutes between each timestamp.') extra_times = index.difference(expected_index) if len(extra_times) > 0: errors.append(f'{len(extra_times)} extra times present in index. ' f'First extra time is {extra_times[0]}. ' 'Uploads must have equally spaced timestamps ' f'from {start} to {end} with {interval_length} ' 'minutes between each timestamp.') if previous_time is not None: if (start - previous_time).total_seconds() % freq.total_seconds() != 0: errors.append( f'Start of timeseries is not a multiple of {interval_length} ' 'minutes past the previous time of ' f'{previous_time.isoformat()}.') if errors: raise BadAPIRequest({'timestamp': errors})
def post(self, report_id, status): """ --- summary: Update the report status tags: - Reports parameters: - report_id - status responses: 204: description: Updated status successfully. 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#components/responses/404-NotFound' """ if status not in REPORT_STATUS_OPTIONS: raise BadAPIRequest({ 'status': 'Must be one of ' '{",".join(REPORT_STATUS_OPTIONS)}.' }) storage = get_storage() storage.store_report_status(report_id, status) return '', 204
def validate_parsable_values(): """Can be called from a POST view/endpoint to examine posted data for mimetype and attempt to parse to a DataFrame. Raises ------ BadAPIRequest If the data cannot be parsed. werkzeug.exceptions.RequestEntityTooLarge If the `Content-Length` header is greater than the application's `MAX_CONTENT_LENGTH` config variable. """ # Default for content length in case of empty body content_length = int(request.headers.get('Content-Length', 0)) if (content_length > current_app.config['MAX_CONTENT_LENGTH']): raise RequestEntityTooLarge if request.mimetype == 'multipart/form-data': decoded_data, mimetype = decode_file_in_request_body() else: decoded_data = request.get_data(as_text=True) mimetype = request.mimetype value_df = parse_values(decoded_data, mimetype) if value_df.size == 0: raise BadAPIRequest({ 'error': ("Posted data contained no values."), }) return value_df
def get(self, object_type): """ --- summary: List all permitted actions on all objects of a given type. description: |- Get a list of object ids and the actions the user is permitted to perform on each object. parameters: - object_type tags: - Users responses: 200: description: List of actions the user can make on the object. content: application/json: schema: $ref: '#/components/schemas/ActionsOnTypeList' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ if object_type not in ALLOWED_OBJECT_TYPES: raise BadAPIRequest({ 'object_type': 'Must be one of: ' f'{", ".join(ALLOWED_OBJECT_TYPES)}' }) storage = get_storage() object_dict = storage.list_actions_on_all_objects_of_type(object_type) json_response = {'object_type': object_type, 'objects': object_dict} return Response(ActionsOnTypeList().dumps(json_response), mimetype="application/json")
def parse_csv(csv_string): """Parse a csv into a dataframe and raise appropriate errors Parameters ---------- csv_string: str String representation of csv to read into a dataframe Returns ------- pandas.DataFrame Raises ------ BadAPIRequestError If the string cannot be parsed. """ raw_data = StringIO(csv_string) try: value_df = pd.read_csv(raw_data, na_values=[-999.0, -9999.0], keep_default_na=True, comment='#') except (pd.errors.EmptyDataError, pd.errors.ParserError): raise BadAPIRequest({'error': 'Malformed CSV'}) return value_df
def validate_forecast_values(forecast_df): """Validates that posted values are parseable and of the expectedtypes. Parameters ---------- forecast_df: Pandas DataFrame Raises ------ BadAPIRequestError If an expected field is missing or contains an entry of incorrect type. """ errors = {} try: forecast_df['value'] = pd.to_numeric(forecast_df['value'], downcast='float') except ValueError: error = ('Invalid item in "value" field. Ensure that all values ' 'are integers, floats, empty, NaN, or NULL.') errors.update({'value': [error]}) except KeyError: errors.update({'value': ['Missing "value" field.']}) try: forecast_df['timestamp'] = pd.to_datetime(forecast_df['timestamp'], utc=True) except ValueError: error = ('Invalid item in "timestamp" field. Ensure that ' 'timestamps are ISO8601 compliant') errors.update({'timestamp': [error]}) except KeyError: errors.update({'timestamp': ['Missing "timestamp" field.']}) if errors: raise BadAPIRequest(errors)
def _check_post_for_errs(self, aggregate_id, agg_observations, storage): errors = defaultdict(partial(defaultdict, dict)) for i, update_obs in enumerate(agg_observations): if 'effective_from' in update_obs: obs_id = str(update_obs['observation_id']) obs = storage.read_observation(obs_id) agg = storage.read_aggregate(aggregate_id) for aggobs in agg['observations']: if ( aggobs['observation_id'] == obs_id and aggobs['effective_until'] is None ): errors['observations'][str(i)]['effective_from'] = ( 'Observation already present and valid in' ' aggregate') if obs['interval_length'] > agg['interval_length']: errors['observations'][str(i)]['interval_length'] = ( 'Observation interval length is not less than or ' 'equal to the aggregate interval length') if obs['variable'] != agg['variable']: errors['observations'][str(i)]['variable'] = ( 'Observation does not have the same variable as the ' 'aggregate.') if obs['interval_value_type'] not in ( 'interval_mean', 'instantaneous'): errors['observations'][str(i)]['interval_value_type'] = ( 'Only observations with interval_mean and ' 'instantaneous interval_value_type are valid in ' 'aggregates') if errors: raise BadAPIRequest(errors)
def get(self, observation_id, *args): """ --- summary: Get dates where flag not present. description: | Get the dates where and Observation data is NOT flagged with the given flag. tags: - Observations parameters: - observation_id - start_time - end_time - flag - timezone responses: 200: description: Unflagged observation values retrieved successfully. content: application/json: schema: $ref: '#/components/schemas/ObservationUnflagged' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ errors = {} try: start, end = validate_start_end() except BadAPIRequest as err: errors = err.errors tz = request.args.get('timezone', 'UTC') flag = request.args.get('flag', None) if tz not in ALLOWED_TIMEZONES: errors['timezone'] = f'Unknown timezone {tz}' if flag is None: errors['flag'] = 'Must provide the flag parameter' else: try: int(flag) except ValueError: errors['flag'] = 'Flag must be an integer' else: if int(flag) > (2**16 - 1) or int(flag) < 0: errors['flag'] = ('Flag must be a 2 byte unsigned ' 'integer between 0 and 65535') if errors: raise BadAPIRequest(errors) storage = get_storage() out = { 'dates': storage.find_unflagged_observation_dates(observation_id, start, end, flag, tz), 'observation_id': observation_id } data = ObservationUnflaggedSchema().dump(out) return jsonify(data)
def validate_start_end(): """Parses start and end query parameters into pandas Timestamps. Returns ------- start: Pandas Timestamp end: Pandas TimeStamp Raises ------ BadAPIRequest If start and end values cannot be parsed. """ errors = {} start = request.args.get('start', None) end = request.args.get('end', None) if start is not None: try: start = parse_to_timestamp(start) except ValueError: errors.update({'start': ['Invalid start date format']}) else: errors.update({'start': ['Must provide a start time']}) if end is not None: try: end = parse_to_timestamp(end) except ValueError: errors.update({'end': ['Invalid end date format']}) else: errors.update({'end': ['Must provide a end time']}) if errors: raise BadAPIRequest(errors) # parse_to_timestamp ensures there is a tz if end.tzinfo != start.tzinfo: end = end.tz_convert(start.tzinfo) if end - start > current_app.config['MAX_DATA_RANGE_DAYS']: raise BadAPIRequest({ 'end': [ f'Only {current_app.config["MAX_DATA_RANGE_DAYS"].days} days of ' 'data may be requested per request' ] }) return start, end
def post(self, site_id, *args): """ --- summary: Update Site metadata. tags: - Sites parameters: - site_id requestBody: description: >- JSON object of site metadata to update. If modeling parameters are to be updated, all parameters for a given tracking_type are required even if most values are the same to ensure proper validation. An empty object for modeling_parameters will have the effect of clearing all modeling parameters. required: True content: application/json: schema: $ref: '#/components/schemas/SiteUpdate' responses: 200: description: Site updated successfully content: application/json: schema: type: string format: uuid description: The uuid of the updated site. headers: Location: schema: type: string format: uri description: Url of the updated site. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ data = request.get_json() try: changes = SiteUpdateSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() changes.update(changes.pop('modeling_parameters', {})) storage.update_site(site_id, **changes) response = make_response(site_id, 200) response.headers['Location'] = url_for('sites.single', site_id=site_id) return response
def decode_file_in_request_body(): """Decode the data from a utf-8 encoded file into a string and return the contents and the file's mimetype. Returns ------- decoded_data: str The posted utf-8 data as a string. posted_file.mimetype: str MIME type of the file in the request body. Raises ------ BadAPIRequest - There is more than one file in the request. - If the request does not contain a file. - The file does not contain valid utf-8. """ posted_files = list(request.files.keys()) if len(posted_files) > 1: error = "Multiple files found. Please upload one file at a time." raise BadAPIRequest(error=error) try: posted_filename = posted_files[0] posted_file = request.files[posted_filename] except IndexError: error = "Missing file in request body." raise BadAPIRequest(error=error) posted_data = posted_file.read() try: decoded_data = posted_data.decode('utf-8') except UnicodeDecodeError: error = 'File could not be decoded as UTF-8.' raise BadAPIRequest(error=error) return decoded_data, posted_file.mimetype
def post(self, observation_id, *args): """ --- summary: Update Observation metadata. tags: - Observations parameters: - observation_id requestBody: description: >- JSON object of observation metadata to update. If 'uncertainty' is explicitly set to null, the value will be cleared from the stored metadata. required: True content: application/json: schema: $ref: '#/components/schemas/ObservationUpdate' responses: 200: description: Observation updated successfully content: application/json: schema: type: string format: uuid description: The uuid of the updated observation. headers: Location: schema: type: string format: uri description: Url of the updated observation. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ data = request.get_json() try: changes = ObservationUpdateSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() storage.update_observation(observation_id, **changes) response = make_response(observation_id, 200) response.headers['Location'] = url_for('observations.single', observation_id=observation_id) return response
def restrict_forecast_upload_window(extra_parameters, get_forecast, first_time): """ Check that the first_time falls within the window before the next initialization time of the forecast from the current time. Accounts for forecast lead_time_to_start and interval_label. Requires 'read' permission on the forecast in question. Parameters ---------- extra_parameters : str The extra_parameters string for the forecast. If '"restrict_upload": true' is not found in the string, no restriction occurs and this function returns immediately. get_forecast : func Function to get the forecast from the database. first_time : datetime-like First timestamp in the posted forecast timeseries. Raises ------ NotFoundException When the user does not have 'read' permission for the forecast or it doesn't exist. BadAPIRequest If the first_time of the timeseries is not consistent for the next initaliziation time of the forecast. """ if not _restrict_in_extra(extra_parameters): return try: fx_dict = get_forecast().copy() except (StorageAuthError, NotFoundException): raise NotFoundException( errors={'404': 'Cannot read forecast or forecast does not exist'}) # we don't care about the axis or constant values for probabilistic fx_dict['site'] = Site('name', 0, 0, 0, 'UTC') fx = Forecast.from_dict(fx_dict) next_issue_time = fx_utils.get_next_issue_time(fx, _current_utc_timestamp()) expected_start = next_issue_time + fx.lead_time_to_start if fx.interval_label == 'ending': expected_start += fx.interval_length if first_time != expected_start: raise BadAPIRequest( errors={ 'issue_time': (f'Currently only accepting forecasts issued for {next_issue_time}.' f' Expecting forecast series to start at {expected_start}.') })
def parse_values(decoded_data, mimetype): """Attempts to parse a string of data into a DataFrame based on MIME type. Parameters ---------- decoded_data: str A string of data to parse. mimetype: str The MIME type of the data. Returns ------- pandas.DataFrame Raises ------ BadAPIRequest - If the MIME type is not one of 'text/csv', 'application/json', or 'application/vnd.ms-excel' - If parsing fails, see parse_json or parse_csv for conditions. - If the file contains more than the maximum allowed number of datapoints. """ if mimetype == 'text/csv' or mimetype == 'application/vnd.ms-excel': values = parse_csv(decoded_data) elif mimetype == 'application/json': values = parse_json(decoded_data) else: error = "Unsupported Content-Type or MIME type." raise BadAPIRequest(error=error) if values.index.size > current_app.config.get('MAX_POST_DATAPOINTS'): raise BadAPIRequest({ 'error': ('File exceeds maximum number of datapoints. ' f'{current_app.config.get("MAX_POST_DATAPOINTS")} ' f'datapoints allowed, {values.index.size} datapoints ' 'found in file.') }) return values
def post(self, *args): """ --- summary: Create observation. tags: - Observations description: >- Create a new Observation by posting metadata. Note that POST requests to this endpoint without a trailing slash will result in a redirect response. requestBody: description: JSON representation of an observation. required: True content: application/json: schema: $ref: '#/components/schemas/ObservationDefinition' responses: 201: description: Observation created successfully content: application/json: schema: type: string format: uuid description: The uuid of the created observation. headers: Location: schema: type: string format: uri description: Url of the created observation. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ data = request.get_json() try: observation = ObservationPostSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() observation_id = storage.store_observation(observation) response = make_response(observation_id, 201) response.headers['Location'] = url_for('observations.single', observation_id=observation_id) return response
def post(self, *args): """ --- summary: Create aggregate. tags: - Aggregates description: >- Create a new Aggregate by posting metadata. Note that POST requests to this endpoint without a trailing slash will result in a redirect response. requestBody: description: JSON respresentation of an aggregate. required: True content: application/json: schema: $ref: '#/components/schemas/AggregateDefinition' responses: 201: description: Aggregate created successfully content: application/json: schema: type: string format: uuid description: The uuid of the created aggregate. headers: Location: schema: type: string format: uri description: Url of the created aggregate. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' """ data = request.get_json() try: aggregate = AggregatePostSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() aggregate_id = storage.store_aggregate(aggregate) response = make_response(aggregate_id, 201) response.headers['Location'] = url_for('aggregates.single', aggregate_id=aggregate_id) return response
def post(self): """ --- summary: Create a new report. tags: - Reports requestBody: description: Metadata of the report to create. content: application/json: schema: $ref: '#/components/schemas/ReportMetadata' responses: 201: description: Report created successfully. content: application/json: schema: type: string format: uuid description: The uuid of the created report. headers: Location: schema: type: string format: uri description: Url of the created report. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ report = request.get_json() # report already valid json, so just store that # if validates properly errs = ReportPostSchema().validate(report) if errs: raise BadAPIRequest(errs) storage = get_storage() report_id = storage.store_report(report) response = make_response(report_id, 201) response.headers['Location'] = url_for('reports.single', report_id=report_id) enqueue_report(report_id, request.url_root.rstrip('/')) return response
def validate_event_data(data): """ Validate that the data is either 0 or 1 Parameters ---------- data : pd.Dataframe with 'value' column Raises ------ BadApiRequest If there are any errors """ isbool = (data['value'] == 0) | (data['value'] == 1) if not isbool.all(): indx = isbool.reset_index()[~isbool.values].index.astype('str') raise BadAPIRequest({ 'value': ['Invalid event values at locations %s' % ', '.join(indx)] })
def validate_latitude_longitude(): """Validates latitude and longitude parameters Returns ------- latitude: float longitude: float Raises ------ BadAPIRequest If latitude and longitude values are not provided or not in range. """ errors = {} lat = request.args.get('latitude', None) lon = request.args.get('longitude', None) if lat is not None: try: lat = float(lat) except ValueError: errors.update({'latitude': ['Must be a float']}) else: if lat > 90 or lat < -90: errors.update({'latitude': ['Must be within [-90, 90].']}) else: errors.update({'latitude': ['Must provide a latitude']}) if lon is not None: try: lon = float(lon) except ValueError: errors.update({'longitude': ['Must be a float']}) else: if lon > 180 or lon < -180: errors.update({'longitude': ['Must be within (-180, 180].']}) else: errors.update({'longitude': ['Must provide a longitude']}) if errors: raise BadAPIRequest(errors) return lat, lon
def post(self, report_id): """ --- summary: Store processed values used in the report. tags: - Reports parameters: - report_id requestBody: description: >- JSON object mapping uuids to processed data used in report generation. required: True content: application/json: schema: $ref: '#/components/schemas/ReportValuesPostSchema' responses: 201: description: UUID of the stored values. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ # while developing, just read data as string and then # storage will convert to bytes for the blob column raw_values = request.get_json() try: report_values = ReportValuesPostSchema().load(raw_values) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() value_id = storage.store_report_values( report_id, report_values['object_id'], report_values['processed_values']) return value_id, 201
def post(self): """ --- summary: Create a new Role. tags: - Roles responses: 201: description: Role created successfully. content: application/json: schema: type: string format: uuid description: The uuid of the created role. headers: Location: schema: type: string format: uri description: Url of the created role. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ data = request.get_json() try: role = RolePostSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() role_id = storage.store_role(role) response = make_response(role_id, 201) response.headers['Location'] = url_for('roles.single', role_id=role_id) return response
def validate_observation_values(observation_df, quality_flag_range=(0, 1)): """ Validate the columns of an observation value DataFrame. Parameters ---------- observation_df : pandas.DataFrame DataFrame to validate columns and values quality_flag_range : tuple, default (0, 1) Range of allowable quality_flag Returns ------- pandas.DataFrame With types adjusted as appropriate Raises ------ BadAPIRequest For any errors in the columns or values """ errors = defaultdict(list) try: observation_df['value'] = pd.to_numeric(observation_df['value'], downcast='float') except ValueError: errors['value'].append( 'Invalid item in "value" field. Ensure that all ' 'values are integers, floats, empty, NaN, or NULL.') except KeyError: errors['value'].append('Missing "value" field.') try: observation_df['timestamp'] = pd.to_datetime( observation_df['timestamp'], utc=True) except ValueError: errors['timestamp'].append('Invalid item in "timestamp" field. Ensure ' 'that timestamps are ISO8601 compliant') except KeyError: errors['timestamp'].append('Missing "timestamp" field.') try: observation_df['quality_flag'].astype(int) except KeyError: errors['quality_flag'].append('Missing "quality_flag" field.') except (ValueError, TypeError): errors['quality_flag'].append( 'Item in "quality_flag" field is not an integer.') else: if not np.isclose(observation_df['quality_flag'].mod(1), 0, 1e-12).all(): errors['quality_flag'].append( 'Item in "quality_flag" field is not an integer.') if not observation_df['quality_flag'].between( *quality_flag_range).all(): errors['quality_flag'].append( 'Item in "quality_flag" field out of range ' f'{quality_flag_range}.') if errors: raise BadAPIRequest(errors) return observation_df
def post(self, aggregate_id, *args): """ --- summary: Update an aggregate. description: >- Update an aggregate name, extra_parameters, timezone, description, and add or remove observations (i.e. only one of 'effective_until' or 'effective_from' may be specified per observation). If an observation is already part of an aggregate, effective_until must be set before it can be added again. Any attempt to set 'effective_until' will apply to all observations with the given ID in the aggregate. tags: - Aggregates parameters: - aggregate_id requestBody: required: True content: application/json: schema: $ref: '#/components/schemas/AggregateMetadataUpdate' responses: 200: description: Successfully updated aggregate metadata. content: application/json: schema: type: string format: uuid description: The uuid of the created aggregate. headers: Location: schema: type: string format: uri description: Url of the updated aggregate. 400: $ref: '#/components/responses/400-BadRequest' 401: $ref: '#/components/responses/401-Unauthorized' 404: $ref: '#/components/responses/404-NotFound' """ # post for consistency with other endpoints data = request.get_json() try: aggregate = AggregateUpdateSchema().load(data) except ValidationError as err: raise BadAPIRequest(err.messages) storage = get_storage() self._check_post_for_errs( aggregate_id, aggregate.get('observations', []), storage) for i, update_obs in enumerate(aggregate.pop('observations', [])): obs_id = str(update_obs['observation_id']) if 'effective_from' in update_obs: storage.add_observation_to_aggregate( aggregate_id, obs_id, update_obs['effective_from']) elif 'effective_until' in update_obs: storage.remove_observation_from_aggregate( aggregate_id, obs_id, update_obs['effective_until']) if aggregate: storage.update_aggregate(aggregate_id, **aggregate) response = make_response(aggregate_id, 200) response.headers['Location'] = url_for('aggregates.single', aggregate_id=aggregate_id) return response
def test_badapirequest_kwargs(): exc = BadAPIRequest(**error_dict) assert exc.status_code == 400 assert exc.errors['error'] == ['message'] assert exc.errors['error2'] == ['message'] assert exc.errors['error3'] == [('went', 'wrong')]