def close_transaction(program, project, transaction_id): """ Close a transaction. The transaction is prevented from being committed in the future. """ with flask.current_app.db.session_scope(): try: tx_log = (flask.current_app.db.nodes( models.submission.TransactionLog).filter( models.submission.TransactionLog.id == transaction_id).one()) except sqlalchemy.orm.exc.NoResultFound: project_id = '{}-{}'.format(program, project) raise NotFoundError( 'Unable to find transaction_log with id {} for project {}'. format(transaction_id, project_id)) # Check if already closed. if tx_log.closed: raise UserError("This transaction log is already closed.") # Check if dry_run. if tx_log.is_dry_run is False: raise UserError("This transaction log is not a dry run. " "Closing it would have no effect.") # Check if already committed. if tx_log.committed_by is not None: raise UserError("This transaction log has already been committed. " "Closing it would have no effect.") tx_log.closed = True return flask.jsonify({ 'code': 200, 'message': 'Closed transaction.', 'transaction_id': transaction_id, })
def _add_wrapper_to_bulk_transaction(transaction, wrapper, index): required_keys = {"doc_format", "doc", "name"} # Check object keys if required_keys - set(wrapper.keys()): raise UserError("Missing required field in document {}: {}".format( index, list(required_keys - set(wrapper.keys())))) name, doc, doc_format = unpack_bulk_wrapper(wrapper) # Parse doc doc_format = wrapper["doc_format"].lower() if doc_format == "json": try: data = utils.parse.parse_json(doc) except Exception as e: raise UserError("Unable to parse doc {}: {}".format(name, e)) elif doc_format == "tsv": data, errors = utils.transforms.TSVToJSONConverter().convert(doc) elif doc_format == "csv": data, errors = utils.transforms.CSVToJSONConverter().convert(doc) else: raise UnsupportedError(doc_format) # Add doc to transaction transaction.add_doc(name, doc_format, doc, data)
def _add_wrapper_to_bulk_transaction(transaction, wrapper, index): required_keys = {'doc_format', 'doc', 'name'} # Check object keys if required_keys - set(wrapper.keys()): raise UserError('Missing required field in document {}: {}'.format( index, list(required_keys - set(wrapper.keys())))) name, doc, doc_format = unpack_bulk_wrapper(wrapper) # Parse doc doc_format = wrapper['doc_format'].upper() if doc_format == 'json': try: data = utils.parse.parse_json(doc) except Exception as e: raise UserError('Unable to parse doc {}: {}'.format(name, e)) elif doc_format == 'tsv': data, errors = utils.transforms.TSVToJSONConverter().convert(doc) elif doc_format == 'csv': data, errors = utils.transforms.CSVToJSONConverter().convert(doc) else: raise UnsupportedError(doc_format) # Add doc to transaction transaction.add_doc(name, doc_format, doc, data)
def handle_xml_transaction(role, program, project, parser, **tx_kwargs): """ Handle XML transactions. Provide a parser that has a function with the signature ``parser.loads(doc)`` to load a doc and ``parser.json`` property to retreive the parsed docs. """ parsing_errors = ( lxml.etree.XMLSchemaError, lxml.etree.XMLSyntaxError, lxml.etree.DocumentInvalid, SchemaError, ) try: parser.loads(flask.request.get_data()) except parsing_errors as e: # pylint: disable=catching-non-exception flask.current_app.logger.error(e) raise UserError("Unable to parse xml: {}".format(e)) except Exception as exc: flask.current_app.logger.exception(exc) raise UserError("Unable to parse xml") data = parser.json original = flask.request.get_data() name = flask.request.headers.get("X-Document-Name", None) return _single_transaction(role, program, project, name, "XML", original, data, **tx_kwargs)
def parse_ids(ids): """ Parse a list of ids from `ids` of unknown type. Args: ids: valid types are string, unicode, and list of strings Return: list: ids from `ids` of unknown type Raises: UserError: if any ids are invalid """ if not ids: raise UserError('Please provide valid ids') if isinstance(ids, (str, unicode)): ids = ids.split(',') elif not isinstance(ids, list): raise UserError('Invalid list of ids: {}'.format(ids)) # Assert that all entries in list are string or unicode if not all(isinstance(id_, (str, unicode)) for id_ in ids): raise UserError('Ids must be strings: {}'.format(ids)) return ids
def close_transaction(program, project, transaction_id): """ Close a transaction. The transaction is prevented from being committed in the future. Summary: Close a transaction Tags: dry run Args: program (str): |program_id| project (str): |project_id| transaction_id (int): transaction_id Responses: 200: Success 404: Resource not found. 403: Unauthorized request. """ with flask.current_app.db.session_scope(): try: tx_log = ( flask.current_app.db.nodes(models.submission.TransactionLog) .filter(models.submission.TransactionLog.id == transaction_id) .one() ) except sqlalchemy.orm.exc.NoResultFound: project_id = "{}-{}".format(program, project) raise NotFoundError( "Unable to find transaction_log with id {} for project {}".format( transaction_id, project_id ) ) # Check if already closed. if tx_log.closed: raise UserError("This transaction log is already closed.") # Check if dry_run. if tx_log.is_dry_run is False: raise UserError( "This transaction log is not a dry run. " "Closing it would have no effect." ) # Check if already committed. if tx_log.committed_by is not None: raise UserError( "This transaction log has already been committed. " "Closing it would have no effect." ) tx_log.closed = True return flask.jsonify( { "code": 200, "message": "Closed transaction.", "transaction_id": transaction_id, } )
def commit_dry_run_transaction(program, project, transaction_id): """ See documentation for committing a dry run transaction. This call should only succeed if: 1. transaction_id points to a dry_run transaction 2. transaction_id points to a transaction that hasn't been committed already 3. transaction_id points to a successful transaction """ with flask.current_app.db.session_scope(): try: tx_log = ( flask.current_app.db.nodes(models.submission.TransactionLog) .filter(models.submission.TransactionLog.id == transaction_id) .one() ) except sqlalchemy.orm.exc.NoResultFound: raise NotFoundError( 'Unable to find transaction_log with id: {} for project {}' .format(transaction_id, '{}-{}'.format(program, project)) ) # Check state. if tx_log.state not in STATES_COMITTABLE_DRY_RUN: raise UserError( 'Unable to commit transaction log in state {}.' .format(tx_log.state) ) # Check not closed. if tx_log.closed: raise UserError('Unable to commit closed transaction log.') # Check not committed. if tx_log.committed_by is not None: raise UserError( "This transaction_log was committed already by transaction " "'{}'.".format(tx_log.committed_by) ) # Check is dry_run if tx_log.is_dry_run is not True: raise UserError( "Cannot submit transaction_log '{}', not a dry_run." .format(tx_log.id) ) # Check project if tx_log.project != project or tx_log.program != program: raise UserError( "Cannot submit transaction_log '{}', in project {}-{}." .format(tx_log.id, program, project) ) response, code = resubmit_transaction(tx_log) response_data = json.loads(response.get_data()) tx_log.committed_by = response_data['transaction_id'] return response, code
def handle_bulk_transaction(role, program, project, **tx_kwargs): """ TODO """ wrappers = utils.parse.parse_request_json() # Assert wrapper is list of JSON objects invalid_format_msg = ( 'Bulk transfers must be an array of JSON objects of format: {\n' ' "name": string,\n' ' "doc_format": string,\n' ' "doc": string,\n' '}' ) if not isinstance(wrappers, list): raise UserError(invalid_format_msg) for wrapper in wrappers: if not isinstance(wrapper, dict): raise UserError(invalid_format_msg) is_async = tx_kwargs.pop('is_async', utils.is_flag_set(FLAG_IS_ASYNC)) transaction = BulkUploadTransaction( program=program, project=project, role=role, user=flask.g.user, logger=flask.current_app.logger, signpost=flask.current_app.signpost, db_driver=flask.current_app.db, external_proxies=utils.get_external_proxies(), **tx_kwargs ) if is_async: session = transaction.db_driver.session_scope(can_inherit=False) with session, transaction: response = { "code": 200, "message": "Transaction submitted.", "transaction_id": transaction.transaction_id, } flask.current_app.async_pool.schedule( bulk_transaction_worker, transaction, wrappers ) return flask.jsonify(response) else: response, code = bulk_transaction_worker(transaction, wrappers) return flask.jsonify(response), code
def get_entities_by_id(program, project, entity_id_string): """ Retrieve existing GDC entities by ID. The return type of a :http:method:`get` on this endpoint is a JSON array containing JSON object elements, each corresponding to a provided ID. Return results are unordered. If any ID is not found in the database, a status code of 404 is returned with the missing IDs. Args: program (str): |program_id| project (str): |project_id| entity_id_string (str): A comma-separated list of ids specifying the entities to retrieve. :reqheader Content-Type: |reqheader_Content-Type| :reqheader Accept: |reqheader_Accept| :reqheader X-Auth-Token: |reqheader_X-Auth-Token| :resheader Content-Type: |resheader_Content-Type| :statuscode 200: Success. :statuscode 404: Entity not found. :statuscode 403: Unauthorized request. """ entity_ids = entity_id_string.split(',') with flask.current_app.db.session_scope(): nodes = flask.current_app.db.nodes().ids(entity_ids).all() entities = {n.node_id: n for n in nodes} missing_entities = set(entity_ids) - set(entities.keys()) if missing_entities: raise UserError('Not found: {}'.format(', '.join(missing_entities), code=404)) return flask.jsonify( {'entities': utils.create_entity_list(entities.values())})
def get_file_from_index_by_hash(self): """ Return the record entity from "signpost" (index client) NOTE: Should only ever be called for data and metadata files. """ document = None # ################################################################ # SignpostClient is used instead of IndexClient for the GDCAPI. # This means that the client doesn't have access to IndexClient's # methods, causing exceptions to occur. # # Temporary workaround until gdcapi uses indexd # ################################################################ if not self._config.get("USE_SIGNPOST", False): # Check if there is an existing record with this hash and size, i.e. # this node already has an index record. params = self._get_file_hashes_and_size() # document: indexclient.Document # if `document` exists, `document.did` is the UUID that is already # registered in indexd for this entity. if params: try: document = self.transaction.signpost.get_with_params(params) except requests.HTTPError as e: raise UserError( code=e.response.status_code, message="Fail to register the data node in indexd. Detail {}".format( e.message ), ) return document
def get_file_from_index_by_hash(self): """ Return the record entity from index client NOTE: Should only ever be called for data and metadata files. """ document = None # Check if there is an existing record with this hash and size, i.e. # this node already has an index record. params = self._get_file_hashes_and_size() # document: indexclient.Document # if `document` exists, `document.did` is the UUID that is already # registered in indexd for this entity. if params: try: document = self.transaction.index_client.get_with_params( params) except requests.HTTPError as e: raise UserError( code=e.response.status_code, message="Fail to register the data node in indexd. Detail {}" .format(e.message), ) return document
def delete_project(program, project): """ Delete project under a specific program Summary: Delete a project Tags: project Args: program (str): |program_id| project (str): |project_id| Responses: 204: Success. 400: User error. 404: Resource not found. 403: Unauthorized request. """ with flask.current_app.db.session_scope() as session: node = utils.lookup_project(flask.current_app.db, program, project) if node.edges_in: raise UserError("ERROR: Can not delete the project.\ Project {} is not empty".format(project)) transaction_args = dict(program=program, project=project, flask_config=flask.current_app.config) with (transactions.deletion.transaction.DeletionTransaction( **transaction_args)) as trans: session.delete(node) trans.claim_transaction_log() trans.write_transaction_log() session.commit() return flask.jsonify(trans.json), 204
def get_manifest(program, project): """ Create a json manifest of the files. Summary: Get a manifest of data files Tags: file Args: program (str): |program_id| project (str): |project_id| Responses: 200: Success 400: User error. 404: Resource not found. 403: Unauthorized request. """ id_string = flask.request.args.get("ids", "").strip() if not id_string: raise UserError( "No ids specified. Use query parameter 'ids', e.g." " 'ids=id1,id2'." ) requested_ids = id_string.split(",") docs = utils.manifest.get_manifest(program, project, requested_ids) response = flask.make_response( yaml.safe_dump({"files": docs}, default_flow_style=False) ) filename = "submission_manifest.yaml" response.headers["Content-Disposition"] = "attachment; filename={}".format(filename) return response
def get_manifest(program, project, ids): """ Use the ExportFile exporter to create a json export of the file. This json export is used as the base for the manifest. :return: a list of file dictionary objects """ errors = [] exporter = ExportFile(program=program, project=project, ids=ids) # Verify that all nodes are actually data_files for node in exporter.nodes: if node._dictionary['category'] not in ['data_file']: msg = '{} {} is not a data file.'.format(node.label, node.node_id) errors.append(msg) if errors: raise UserError('. '.join(errors)) # The exporter returns files nested under their types, so flatten # it here and add the local_file_path files = [ dict(local_file_path=doc.get('file_name'), **doc) for file_type in exporter.result.values() for doc in file_type ] return files
def commit(self, _=True): """ If successful, write the result of this transaction to the database, otherwise rollback. """ self.write_transaction_log() if not self.success: self.set_transaction_log_state(TX_LOG_STATE_FAILED) self.rollback() raise UserError(message='Bulk Transaction failed', json=self.json) if self.dry_run: self.set_transaction_log_state(TX_LOG_STATE_SUCCEEDED) return self.rollback() try: self.flush() self.session.commit() self.set_transaction_log_state(TX_LOG_STATE_SUCCEEDED) except Exception as e: # pylint: disable=broad-except self.logger.exception(e) msg = 'Unable to write to database, please try again' self.transactional_errors.append(msg) self.set_transaction_log_state(TX_LOG_STATE_ERRORED) self.session.rollback()
def delete_program(program): """ Delete a program given program name. If the program is not empty raise an appropriate exception Summary: Delete a program Tags: program Args: program (str): |program_id| Responses: 204: Success. 400: User error. 404: Program not found. 403: Unauthorized request. """ auth.current_user.require_admin() with flask.current_app.db.session_scope() as session: node = utils.lookup_program(flask.current_app.db, program) if node.edges_in: raise UserError("ERROR: Can not delete the program.\ Program {} is not empty".format(program)) session.delete(node) session.commit() return flask.jsonify({}), 204
def check_action_allowed_in_state(action, file_state): not_allowed_state = (action in ["upload", "initiate_multipart"] and file_state not in ALLOWED_STATES) not_uploading_state = action in UPLOADING_PARTS and file_state != UPLOADING_STATE not_success_state = action == "get_file" and file_state != SUCCESS_STATE if not_allowed_state or not_uploading_state or not_success_state: raise UserError("File in {} state, {} not allowed".format( file_state, action))
def delete_entities(program, project, ids, to_delete=None): """ Delete existing GDC entities. Using the :http:method:`delete` on a project's endpoint will *completely delete* an entity. The GDC does not allow deletions or creations that would leave nodes without parents, i.e. nodes that do not have an entity from which they were derived. To prevent catastrophic mistakes, the current philosophy is to disallow automatic cascading of deletes. However, to inform a user which entities must be deleted for the target entity to be deleted, the API will respond with at least one entity that must be deleted prior to deleting the target entity. Summary: Delete entities Tags: entity Args: program (str): |program_id| project (str): |project_id| ids (str): A comma separated list of ids specifying the entities to delete. These ids must be official GDC ids. Query Args: to_delete (bool): Set the to_delete sysan as true or false. If none, then don't try to set the sysan, and instead delete the node. Responses: 200: Entities deleted successfully 400: User error. 404: Entity not found. 403: Unauthorized request. :reqheader Content-Type: |reqheader_Content-Type| :reqheader Accept: |reqheader_Accept| :reqheader X-Auth-Token: |reqheader_X-Auth-Token| :resheader Content-Type: |resheader_Content-Type| """ ids_list = ids.split(",") fields = flask.request.args.get("fields") if to_delete is not None: # get value of that flag from string if to_delete.lower() == "false": to_delete = False elif to_delete.lower() == "true": to_delete = True else: raise UserError("to_delete value not true or false") return transactions.deletion.handle_deletion_request(program, project, ids_list, to_delete, dry_run=dry_run, fields=fields)
def check_action_allowed_in_state(action, file_state): not_allowed_state = (action in ['upload', 'initiate_multipart'] and file_state not in ALLOWED_STATES) not_uploading_state = (action in UPLOADING_PARTS and file_state != UPLOADING_STATE) not_success_state = (action == 'get_file' and file_state != SUCCESS_STATE) if not_allowed_state or not_uploading_state or not_success_state: raise UserError('File in {} state, {} not allowed'.format( file_state, action))
def parse_request_yaml(): """ Return a python representation of a YAML POST body. Raise UserError if any exception is raised parsing the YAML body. """ try: return yaml.safe_load(flask.request.get_data()) except Exception as e: raise UserError("Unable to parse yaml: {}".format(e))
def get_node(project_id, uuid, db=None): if db is None: db = flask.current_app.db with db.session_scope(): node = db.nodes().ids(uuid).props(project_id=project_id).first() if node: return node else: raise UserError("File {} doesn't exist in {}".format(uuid, project_id))
def delete_entities(program, project, ids, to_delete=None): """ Delete existing GDC entities. Using the :http:method:`delete` on a project's endpoint will *completely delete* an entity. The GDC does not allow deletions or creations that would leave nodes without parents, i.e. nodes that do not have an entity from which they were derived. To prevent catastrophic mistakes, the current philosophy is to disallow automatic cascading of deletes. However, to inform a user which entities must be deleted for the target entity to be deleted, the API will respond with a list of entities that must be deleted prior to deleting the target entity. :param str program: |program_id| :param str project: |project_id| :param str ids: A comma separated list of ids specifying the entities to delete. These ids must be official GDC ids. :param bool to_delete: Set the to_delete sysan as true or false. If none, then don't try to set the sysan, and instead delete the node. :param str ids: :reqheader Content-Type: |reqheader_Content-Type| :reqheader Accept: |reqheader_Accept| :reqheader X-Auth-Token: |reqheader_X-Auth-Token| :resheader Content-Type: |resheader_Content-Type| :statuscode 200: Entities deleted successfully :statuscode 404: Entity not found. :statuscode 403: Unauthorized request. """ ids_list = ids.split(',') fields = flask.request.args.get('fields') if to_delete is not None: # to_delete is admin only auth.admin_auth() # get value of that flag from string if to_delete.lower() == 'false': to_delete = False elif to_delete.lower() == 'true': to_delete = True else: raise UserError('to_delete value not true or false') return transactions.deletion.handle_deletion_request( program, project, ids_list, to_delete, dry_run=dry_run, fields=fields )
def validate_export_node(node_label): """ Raise a ``UserError`` if there is any reason that nodes with the type specified by ``node_label`` should not be exported. This m Args: node_label (str): string of the node type Return: None Raises: UserError: if the node cannot be exported """ if node_label not in dictionary.schema: raise UserError("dictionary does not have node with type {}".format(node_label)) category = get_node_category(node_label) if category in UNSUPPORTED_EXPORT_NODE_CATEGORIES: raise UserError("cannot export node with category `internal`")
def assert_project_state(self): """Assert that the transaction is allowed given the Project.state.""" project = utils.lookup_project(self.db_driver, self.program, self.project) state = project.state if state not in self.REQUIRED_PROJECT_STATES: states = ' or '.join(self.REQUIRED_PROJECT_STATES) msg = ("Project is in state '{}', which prevents {}. In order to" " perform this action, the project must be in state <{}>.") raise UserError(msg.format(state, flask.request.path, states))
def get_entities_by_id(program, project, entity_id_string): """ Retrieve existing GDC entities by ID. The return type of a :http:method:`get` on this endpoint is a JSON array containing JSON object elements, each corresponding to a provided ID. Return results are unordered. If any ID is not found in the database, a status code of 404 is returned with the missing IDs. Summary: Get entities by ID Tags: entity Args: program (str): |program_id| project (str): |project_id| entity_id_string (str): A comma-separated list of ids specifying the entities to retrieve. Responses: 200 (schema_entity_list): Success. 400: User error. 404: Entity not found. 403: Unauthorized request. :reqheader Content-Type: |reqheader_Content-Type| :reqheader Accept: |reqheader_Accept| :reqheader X-Auth-Token: |reqheader_X-Auth-Token| :resheader Content-Type: |resheader_Content-Type| """ entity_ids = entity_id_string.split(",") with flask.current_app.db.session_scope(): dictionary_nodes = flask.current_app.db.nodes().ids(entity_ids).props(project_id = program + "-" + project).all() project_nodes = flask.current_app.db.nodes(models.Project).ids(entity_ids).all() program_nodes = flask.current_app.db.nodes(models.Program).ids(entity_ids).all() nodes = [] nodes.extend(dictionary_nodes) nodes.extend(project_nodes) nodes.extend(program_nodes) auth.check_resource_access(program, project, nodes) entities = {n.node_id: n for n in nodes} missing_entities = set(entity_ids) - set(entities.keys()) if missing_entities: raise UserError( "Not found: {}".format(", ".join(missing_entities), code=404) ) return flask.jsonify({"entities": utils.create_entity_list(entities.values())})
def convert(self, doc): """ Add an entire document to the converter. Return docs and errors gathered so far. """ try: self.set_reader(doc) map(self.add_row, self.reader) except Exception as e: current_app.logger.exception(e) raise UserError("Unable to parse document") return self.docs, self.errors
def handle_single_transaction(role, program, project, **tx_kwargs): """ Main entry point for single file transactions. This function multiplexes on the content-type to call the appropriate transaction handler. """ doc = flask.request.get_data().decode("utf-8") content_type = flask.request.headers.get("Content-Type", "").lower() errors = None if content_type == "text/csv": doc_format = "csv" data, errors = utils.transforms.CSVToJSONConverter().convert(doc) elif content_type in ["text/tab-separated-values", "text/tsv"]: doc_format = "tsv" data, errors = utils.transforms.TSVToJSONConverter().convert(doc) else: doc_format = "json" data = utils.parse.parse_request_json() if errors: raise UserError("Unable to parse doc '{}': {}".format(doc, errors)) name = flask.request.headers.get("X-Document-Name", None) doc_args = [name, doc_format, doc, data] is_async = tx_kwargs.pop("is_async", utils.is_flag_set(FLAG_IS_ASYNC)) db_driver = tx_kwargs.pop("db_driver", flask.current_app.db) transaction = UploadTransaction( program=program, project=project, role=role, logger=flask.current_app.logger, flask_config=flask.current_app.config, index_client=flask.current_app.index_client, external_proxies=utils.get_external_proxies(), db_driver=db_driver, **tx_kwargs ) if is_async: session = transaction.db_driver.session_scope(can_inherit=False) with session, transaction: response = { "code": 200, "message": "Transaction submitted.", "transaction_id": transaction.transaction_id, } flask.current_app.async_pool.schedule( single_transaction_worker, transaction, *doc_args ) return flask.jsonify(response) else: response, code = single_transaction_worker(transaction, *doc_args) return flask.jsonify(response), code
def get_manifest(program, project): id_string = flask.request.args.get('ids', '').strip() if not id_string: raise UserError("No ids specified. Use query parameter 'ids', e.g." " 'ids=id1,id2'.") requested_ids = id_string.split(',') docs = utils.manifest.get_manifest(program, project, requested_ids) response = flask.make_response( yaml.safe_dump({'files': docs}, default_flow_style=False)) filename = "submission_manifest.yaml" response.headers["Content-Disposition"] = ( "attachment; filename={}".format(filename)) return response
def is_flag_set(flag, default=False): """ Check if the value of a flag is specified (e.g. "?async=true"). Requires flask request context. """ value = flask.request.args.get(flag, default) if isinstance(value, bool): return value elif value.lower() == "true": return True elif value.lower() == "false": return False else: raise UserError("Boolean value not one of [true, false]")
def get_node_category(node_type): """ Get the category for the given node type specified Args: node_type (str): the type of node Returns: str: node category """ cls = psqlgraph.Node.get_subclass(node_type) if cls is None: raise UserError('Node type "{}" not found in dictionary'.format(node_type)) return cls._dictionary.get("category")