def exchange_delegate_token(token, repository_id): """ Exchange a token for a delegated token :param token: a JWT granting the onboarding service access to write on the client's behalf :param repository_id: the target repsitory's ID :returns: a new JWT authorized to write to the repository :raises: HTTPError """ try: new_token = yield oauth2.get_token(options.url_auth, options.service_id, options.client_secret, scope=oauth2.Write(repository_id), jwt=token, ssl_options=ssl_server_options()) except httpclient.HTTPError as exc: if exc.code in (403, 400): try: body = json.loads(exc.response.body) errors = [x['message'] for x in body['errors']] except (AttributeError, KeyError): errors = exc.message raise exceptions.HTTPError(403, errors, source='authentication') else: msg = 'Error authorizing access to the repository' logging.exception(msg) raise exceptions.HTTPError(500, msg) raise Return(new_token)
def delete(self, repository_id): """ Respond with JSON containing audit of assets deleted :param repository_id: str """ token = yield self.get_token(repository_id) self.verify_content_type() self.verify_body_size() repository = yield get_repository(repository_id) repository_url = repository['service']['location'] data, http_status, errors = yield assets.delete( self.request.body, self.request.headers.get('Content-Type', None), repository_url, repository_id, token=token, r2rml_url=self.get_argument("r2rml_url", None)) if not errors: self.finish({'status': 200, 'data': data}) else: raise exceptions.HTTPError(http_status, { 'errors': errors, 'data': data })
def reduce_errors(errors): """ raise an error for a list of errors :param errors: a list of httpclient.HTTPError :return: httpclient.HTTPError """ logging.info('Error: %s', errors) http_errors = [e for e in errors if isinstance(e, httpclient.HTTPError)] non_http_errors = [e for e in errors if e not in http_errors] # if non of the errors are http errors throw a # 500 internal error if len(non_http_errors) == len(errors): logging.error('Non HTTP Errors: %s', errors) return exceptions.HTTPError(500, errors[0].args) # if there is only one http error, return it if len(http_errors) == 1: return http_errors[0] bad_requests = [err for err in http_errors if err.code == 400] # if any error is a bad request, the request is bad if bad_requests: return bad_requests[0] internal_errors = [err for err in http_errors if err.code == 500] # if all are internal errors, it must be an internal error if len(internal_errors) == len(http_errors): return errors[0] # TODO we need to build up the scenarios as we go along # for now, just raise the first return http_errors[0]
def raise_client_http_error(error): """ Raise the httpclient.HTTPError as exceptions.HTTPError :param error: an httpclient.HTTPError :raise: exceptions.HTTPError """ raise exceptions.HTTPError(error.code, json.loads(error.response.body))
def get_token(self, repository_id): """Get a token granting access to the repository""" token = self.request.headers.get('Authorization') if token is None: raise exceptions.HTTPError(401, 'OAuth token not provided') token = token.split()[-1] new_token = yield exchange_delegate_token(token, repository_id) raise Return(new_token)
def verify_body_size(self): """Verify the size of the body is within the limit of the system""" content_length = long(self.request.headers.get('Content-Length', 0)) logging.debug("Request size:{}".format(content_length)) if content_length > options.max_post_body_size: msg = 'Content length:{} is too large. Max allowed is:{}'.format( content_length, options.max_post_body_size) raise exceptions.HTTPError(400, msg)
def get(self, repository_id): repository = self.repositories.get(repository_id) if not repository: raise exceptions.HTTPError(404, 'Not found') last_indexed = repository.get('last') if last_indexed: last_indexed = last_indexed.isoformat() self.finish({'status': 200, 'last_indexed': last_indexed})
def verify_content_type(self): """Return a 415 Unsupported Media Type error if invalid Content-Type""" content_type = self.request.headers.get('Content-Type', '') if content_type.split(';')[0] not in self.CONTENT_TYPES: msg = ('Unsupported content type "{}". ' 'Content-Type header must be one of {}'.format( content_type, self.CONTENT_TYPES)) raise exceptions.HTTPError(415, msg)
def delete(self, entity_type, source_id_type, source_id, repository_id): """ Obtain the list of entities matching source_id/type in a repository and delete them :param entity_type: the type of the entity to get data for :param source_id_type: the type of the id (this can be a list) :param source_id: the id of the entity (this can be a list) :param repository_id: the id of the repository to delete the entity from :return: status 204 No Content on success or error code """ id_types = source_id_type.split(',') ids = source_id.split(',') if len(id_types) != len(ids): raise exceptions.HTTPError( 404, 'Inconsistent source_id/source_type_id lengths (%r,%r)' % (source_id_type, source_id, repository_id)) all_ids = [] for id_type, id in zip(id_types, ids): item = { 'source_id_type': quote_plus(id_type), 'source_id': quote_plus(id) } all_ids.append(item) try: yield self.database.delete(entity_type, all_ids, repository_id) except exceptions.HTTPError: # Raise a 404 because the URL contains invaild IDs that do not exist raise exceptions.HTTPError( 404, 'Not found (%r,%r,%r)' % (source_id_type, source_id, repository_id)) result = {'status': 204} self.finish(result)
def get(self, entity_type, source_id_type, source_id): """ Obtain the list of repositories knowing information about an asset based on the id :param entity_type: the type of the entity to get data for :param source_id_type: the type of the id :param source_id: the id of the entity :return: JSON object containing requested information on an entity depending on the query_type """ try: related_depth = int(self.get_argument("related_depth", default="0")) except ValueError: related_depth = 0 related_depth = max(0, min(options.max_related_depth, related_depth)) try: results = yield self.database.query( [{ 'source_id_type': quote_plus(source_id_type), 'source_id': quote_plus(source_id) }], related_depth) except exceptions.HTTPError: # Raise a 404 because the URL contains an invaild ID that does # not exist raise exceptions.HTTPError( 404, 'Not found (%r,%r)' % (source_id_type, source_id)) data = results[0] if not data.get('repositories') and not data.get('relations'): raise exceptions.HTTPError(404, 'Not found') result = {'status': 200, 'data': data} self.finish(result)
def transform(data, content_type, r2rml_url): """ Transforms source data into RDF triples :param data: the source data :param content_type: the http request content type :param r2rml_url: karma mapping file url :return: Transformed data and errors """ logging.debug('>>> transform') response = None http_status = 200 errors = [] try: token = yield oauth2.get_token(options.url_auth, options.service_id, options.client_secret, scope=oauth2.Write( options.url_transformation), ssl_options=ssl_server_options()) except httpclient.HTTPError as exc: logging.exception('Error getting token for the transformation service') raise exceptions.HTTPError(500, 'Internal Server Error') headers = {'Accept': 'application/json', 'Content-Type': content_type} client = API(options.url_transformation, token=token, ssl_options=ssl_server_options()) if r2rml_url: params = urlencode({'r2rml_url': r2rml_url}) client.transformation.assets.path += '?{}'.format(params) try: client.transformation.assets.prepare_request(request_timeout=180, headers=headers, body=data) response = yield client.transformation.assets.post() except httpclient.HTTPError as exc: response = exc.response logging.exception('Transformation service error body:{}'.format( exc.response)) http_status = exc.code errors = json.loads(exc.response.body)['errors'] logging.debug('<<< transform') raise Return((response, http_status, errors))
def get_repository(repository_id): """ Get the repository service address from accounts service for storing data. :param repository_id: the repository ID :return: url of the repository url :raise: HTTPError """ client = API(options.url_accounts, ssl_options=ssl_server_options()) try: response = yield client.accounts.repositories[repository_id].get() logging.debug(response['data']) raise Return(response['data']) except KeyError: error = 'Cannot find a repository' raise exceptions.HTTPError(404, error)
def post(self, entity_type): """Can return as many IDs as configured via max_id_generation_count Return a HTTP response with a JSON containing a list of generated IDs or a 400 with appropriate error message if invalid param was provided. """ if self.request.body: body = json_decode(self.request.body) else: body = {} result, errors = validate(body) if errors: raise exceptions.HTTPError(400, errors) hub_keys = [ generate_hub_key(result['resolver_id'], options.hub_id, result['repository_id'], entity_type) for _ in range(result['count']) ] self.finish({'status': 200, 'data': hub_keys})
def query(self, ids, related_depth=0): """ Get repositories for a set of entities :param ids: a list of dictionaries containing "id" & "id_type" :param related_depth: maximum depth when searching for related ids. """ validated_ids, errors = [], [] for x in ids: # source_id / source_id_type if 'id' in x: x['source_id'] = x['id'] x['source_id_type'] = x['id_type'] if 'source_id_type' not in x or 'source_id' not in x: errors.append(x) elif x['source_id_type'] == HUB_KEY: try: parsed = hubkey.parse_hub_key(x['source_id']) x['source_id'] = parsed['entity_id'] validated_ids.append(x) except ValueError: errors.append(x) else: # NOTE: internal representation of the index will use # id_type and id to construct URI and assusmes that id_type # and and have been url_quoted x['source_id'] = urllib.quote_plus(x['source_id']) x['source_id_type'] = urllib.quote_plus(x['source_id_type']) validated_ids.append(x) if errors: raise exceptions.HTTPError(400, errors) result = yield self._query_ids(validated_ids, related_depth) raise gen.Return(result)
def mock_validate(_): raise exceptions.HTTPError(400, 'errormsg')
def mock_validate(data, format=None): raise exceptions.HTTPError(400, 'errormsg')
def mock_store(body, namespace, content_type): raise exceptions.HTTPError(400, 'errormsg')
def delete(self, entity_type, ids, repository_id): """ Delete the triples relating to an entity (if they're not used by another entity) :param entity_type: the type of the entity :param ids: a list of dictionaries containing "id" & "id_type" :param repository_id: the repository from which to delete """ entity_type = self.map_to_entity_type(entity_type) validated_ids, errors = [], [] for x in ids: # source_id / source_id_type if 'id' in x: x['source_id'] = x['id'] x['source_id_type'] = x['id_type'] if 'source_id_type' not in x or 'source_id' not in x: errors.append(x) elif x['source_id_type'] == HUB_KEY: try: parsed = hubkey.parse_hub_key(x['source_id']) x['source_id'] = parsed['entity_id'] validated_ids.append(x) except ValueError: errors.append(x) else: # NOTE: internal representation of the index will use # id_type and id to construct URI and assusmes that id_type # and and have been url_quoted x['source_id'] = urllib.quote_plus(x['source_id']) x['source_id_type'] = urllib.quote_plus(x['source_id_type']) validated_ids.append(x) if errors: raise exceptions.HTTPError(400, errors) # get all the entities that match the the ids in this repo entities = yield self._getMatchingEntities(validated_ids, repository_id) logging.debug('searching for ids ' + str(validated_ids)) logging.debug('found entities ' + str(entities)) # for each entity find all the ids associated with it for entity in entities: idsAndTypes = yield self._getEntityIdsAndTypes(entity) logging.debug('for entity ' + str(entity) + ' got these ids ' + str(idsAndTypes)) assetMatch = yield self._checkIdsAndTypesIdentical( validated_ids, idsAndTypes) logging.debug('identical? : ' + str(assetMatch)) if assetMatch: # loop through each set of ids for this entity for idAndType in idsAndTypes: # if these ids are NOT used for anything else then delete them result = yield self._countMatchesNotIncluding( idAndType, entity) count = int(result[0].get('count', '0')) if count == 1: yield self._deleteIds(idAndType) # delete the entity itself yield self._deleteEntity(entity) raise gen.Return()