def replace_workflow_module(project_id, branch_id, module_id): """Replace a module in the current project workflow branch and execute the resulting workflow. Request ------- { "packageId": "string", "commandId": "string", "arguments": [] } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain the expected elements. cmd = srv.validate_json_request( request, required=['packageId', 'commandId', 'arguments']) # Extend and execute workflow. This will throw a ValueError if the command # cannot be parsed. try: # Result is None if project, branch or module are not found. modules = api.workflows.replace_workflow_module( project_id=project_id, branch_id=branch_id, module_id=module_id, package_id=cmd['packageId'], command_id=cmd['commandId'], arguments=cmd['arguments']) if not modules is None: return jsonify(modules) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' branch \'' + branch_id + '\' or module \'' + module_id + '\'')
def update_branch(project_id, branch_id): """Update properties for a given project workflow branch. Expects a set of key,value-pairs in the request body. Properties with given key but missing value will be deleted. Request ------- { "properties": [ { "key": "string", "value": "string" } ] } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain a properties key. obj = srv.validate_json_request(request, required=['properties']) # Update properties for the given branch and return branch descriptor. properties = deserialize.PROPERTIES(obj['properties'], allow_null=True) try: # Result is None if project or branch are not found. branch = api.branches.update_branch(project_id=project_id, branch_id=branch_id, properties=properties) if not branch is None: return jsonify(branch) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def get_dataset(project_id: str, dataset_id: str) -> str: """Get the dataset with given identifier that has been generated by a curation workflow. """ # Get dataset rows with offset and limit parameters offset = request.args.get(PAGE_OFFSET) if offset is not None: offset = int(offset) if offset < 0: raise srv.InvalidRequest("Invalid Offset {}".format(offset)) limit = request.args.get(PAGE_LIMIT) if limit is not None: limit = int(limit) if limit < 0: raise srv.InvalidRequest("Invalid Offset {}".format(limit)) force_profiler_str = request.args.get(FORCE_PROFILER) force_profiler: Optional[bool] = None if force_profiler_str is not None: force_profiler = force_profiler_str.lower() == "true" try: dataset = api.datasets.get_dataset(project_id=project_id, dataset_id=dataset_id, offset=offset, limit=limit, force_profiler=force_profiler) if not dataset is None: return jsonify(dataset) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or dataset \'' + dataset_id + '\'')
def append_branch_head(project_id, branch_id): """Append a module to the workflow that is at the HEAD of the given branch. Request ------- { "packageId": "string", "commandId": "string", "arguments": [] } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain the expected elements. cmd = srv.validate_json_request( request, required=['packageId', 'commandId', 'arguments']) # Extend and execute workflow. This will throw a ValueError if the command # cannot be parsed. try: # Result is None if project or branch are not found module = api.workflows.append_workflow_module( project_id=project_id, branch_id=branch_id, package_id=cmd['packageId'], command_id=cmd['commandId'], arguments=cmd['arguments'], ) if not module is None: return jsonify(module) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def update_project(project_id): """Update the set of user-defined properties. Expects a Json object with a list of property update statements. These statements are (key,value) pairs, where the value is optional (i.e., missing value for delete statements). Request ------- { "properties": [ { "key": "string", "value": "scalar or list of scalars" } ] } """ # Abort with BAD REQUEST if request body is not in Json format or if any # of the project property update statements are invalid. obj = srv.validate_json_request(request, required=['properties']) if not isinstance(obj['properties'], list): raise srv.InvalidRequest('expected a list of properties') # Update project and return the project descriptor. If no project with # the given identifier exists the update result will be None properties = deserialize.PROPERTIES(obj['properties'], allow_null=True) try: pj = api.projects.update_project(project_id, properties) if not pj is None: return jsonify(pj) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')
def create_branch(project_id): """Create a new branch for a project. Expects a description of the parent workflow in the request body together with an optional list of branch properties (e.g., containing a branch name). Request ------- { "source": { "branchId": "string", "workflowId": "string" "moduleId": "string" }, "properties": [ { "key": "string", "value": "string" } ] } """ # Abort with BAD REQUEST if request body is not in Json format or does not # contain the expected elements. obj = srv.validate_json_request(request, required=['properties'], optional=['source']) # Get the branch point. If the source is given the dictionary should at # most contain the three identifier branch_id = None workflow_id = None module_id = None if 'source' in obj: source = obj['source'] for key in source: if key == 'branchId': branch_id = source[key] elif key == 'workflowId': workflow_id = source[key] elif key == 'moduleId': module_id = source[key] else: raise srv.InvalidRequest('invalid element \'' + key + '\' for branch point') # Get the properties for the new branch properties = deserialize.PROPERTIES(obj['properties']) # Create a new workflow. The result is the descriptor for the new workflow # or None if the specified project does not exist. Will raise a ValueError # if the specified workflow version or module do not exist try: branch = api.branches.create_branch(project_id=project_id, branch_id=branch_id, workflow_id=workflow_id, module_id=module_id, properties=properties) if not branch is None: return jsonify(branch) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')
def get_project(project_id): """Retrieve information for project with given identifier.""" # Retrieve project serialization. If project does not exist the result # will be none. pj = api.projects.get_project(project_id) if not pj is None: return jsonify(pj) raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')
def cancel_task(task_id): """Cancel execution for a given task.""" # Cancel the given task. The result is None if no task with the given # identifier exists. result = api.tasks.cancel_task(task_id=task_id) if not result is None: return jsonify(result) raise srv.ResourceNotFound('unknown task \'' + task_id + '\'')
def get_branch(project_id, branch_id): """Get handle for a branch in a given project.""" # Get the branch handle. The result is None if the project or the branch # do not exist. branch = api.branches.get_branch(project_id, branch_id) if not branch is None: return jsonify(branch) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def get_dataset_descriptor(dataset_id): """Get the descriptor for the dataset with given identifier.""" try: dataset = api.datasets.get_dataset_descriptor( project_id=config.project_id, dataset_id=dataset_id) if not dataset is None: return jsonify(dataset) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown dataset \'' + dataset_id + '\'')
def delete_workflow_module(project_id, branch_id, module_id): """Delete a module in the head workflow of a given project branch.""" result = api.workflows.delete_workflow_module(project_id=project_id, branch_id=branch_id, module_id=module_id) if not result is None: return jsonify(result) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' branch \'' + branch_id + '\' or module \'' + module_id + '\'')
def get_branch_head(project_id, branch_id): """Get handle for a workflow at the HEAD of a given project branch.""" # Get the workflow handle. The result is None if the project, branch or # workflow do not exist. workflow = api.workflows.get_workflow(project_id=project_id, branch_id=branch_id) if workflow is not None: return jsonify(workflow) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def delete_branch(project_id, branch_id): """Delete branch from a given project.""" try: success = api.branches.delete_branch(project_id=project_id, branch_id=branch_id) except ValueError as ex: raise srv.InvalidRequest(str(ex)) if success: return '', 204 raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def cancel_workflow(project_id, branch_id): """Cancel execution for all running and pending modules in the head workflow of a given project branch. """ # Get the workflow handle. The result is None if the project, branch or # workflow do not exist. workflow = api.workflows.cancel_workflow(project_id=project_id, branch_id=branch_id) if not workflow is None: return jsonify(workflow) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or branch \'' + branch_id + '\'')
def get_workflow_module(project_id, branch_id, module_id): """Get handle for a module in the head workflow of a given project branch. """ # Get the workflow handle. The result is None if the project, branch or # workflow do not exist. module = api.workflows.get_workflow_module(project_id=project_id, branch_id=branch_id, module_id=module_id) if not module is None: return jsonify(module) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' branch \'' + branch_id + '\' or module \'' + module_id + '\'')
def get_dataset_caveats(dataset_id: str) -> str: """Get annotations that are associated with the given dataset. """ # Expects at least a column or row identifier column_id = request.args.get(labels.COLUMN, type=int) row_id = request.args.get(labels.ROW, type=str) # Get annotations for dataset with given identifier. The result is None if # no dataset with given identifier exists. annotations = api.datasets.get_caveats(project_id=config.project_id, dataset_id=dataset_id, column_id=column_id, row_id=row_id) if not annotations is None: return jsonify(annotations) raise srv.ResourceNotFound('unknown dataset \'' + dataset_id + '\'')
def create_dataset(project_id): """Create a new dataset in the datastore for the given project. The dataset schema and rows are given in the request body. Dataset annotations are optional. The expected request body format is: { "columns": [ { "id": 0, "name": "string", "type": "string" } ], "rows": [ { "id": 0, "values": [ "string" ] } ], "annotations": [ { "columnId": 0, "rowId": 0, "key": "string", "value": "string" } ] } """ # Validate the request obj = srv.validate_json_request(request, required=[labels.COLUMNS, labels.ROWS], optional=[labels.PROPERTIES]) columns = deserialize.DATASET_COLUMNS(obj[labels.COLUMNS]) rows = [deserialize.DATASET_ROW(row) for row in obj[labels.ROWS]] properties = obj.get(labels.PROPERTIES, dict()) try: dataset = api.datasets.create_dataset(project_id=project_id, columns=columns, rows=rows, properties=properties) if not dataset is None: return jsonify(dataset) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')
def get_dataset(dataset_id): """Get the dataset with given identifier that has been generated by a curation workflow. """ # Get dataset rows with offset and limit parameters try: dataset = api.datasets.get_dataset( project_id=config.project_id, dataset_id=dataset_id, offset=request.args.get(PAGE_OFFSET), limit=request.args.get(PAGE_LIMIT)) if not dataset is None: return jsonify(dataset) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown dataset \'' + dataset_id + '\'')
def download_file(project_id, file_id): """Download file from file server.""" # Get handle for file from the project's filestore f_handle = api.files.get_file(project_id, file_id) if not f_handle is None: # Use send_file to send the contents of the file if f_handle.compressed: mimetype = 'application/gzip' else: mimetype = f_handle.mimetype return send_file(f_handle.filepath, mimetype=mimetype, attachment_filename=f_handle.file_name, as_attachment=True) raise srv.ResourceNotFound('unknown project \'' + project_id + '\' or file \'' + file_id + '\'')
def get_dataset_chart_view(branch_id, workflow_id, module_id, chart_id): """Get content of a dataset chart view for a given workflow module. """ try: view = api.views.get_dataset_chart_view(project_id=config.project_id, branch_id=branch_id, workflow_id=workflow_id, module_id=module_id, chart_id=chart_id) except ValueError as ex: raise srv.InvalidRequest(str(ex)) if not view is None: return jsonify(view) raise srv.ResourceNotFound(''.join([ 'unknown branch \'' + branch_id, '\', workflow \'' + workflow_id, '\', module \'' + module_id, '\' or chart \'' + chart_id + '\'' ]))
def update_dataset_annotation(dataset_id): """Update an annotation that is associated with a component of the given dataset. Request ------- { "columnId": 0, "rowId": 0, "key": "string", "oldValue": "string", or "int", or "float" "newValue": "string", or "int", or "float" } """ # Validate the request obj = srv.validate_json_request( request, required=['key'], optional=['columnId', 'rowId', 'key', 'oldValue', 'newValue'] ) # Create update statement and execute. The result is None if no dataset with # given identifier exists. key = obj[labels.KEY] if labels.KEY in obj else None column_id = obj[labels.COLUMN_ID] if labels.COLUMN_ID in obj else None row_id = obj[labels.ROW_ID] if labels.ROW_ID in obj else None old_value = obj[labels.OLD_VALUE] if labels.OLD_VALUE in obj else None new_value = obj[labels.NEW_VALUE] if labels.NEW_VALUE in obj else None try: annotations = api.datasets.update_annotation( project_id=config.project_id, dataset_id=dataset_id, key=key, column_id=column_id, row_id=row_id, old_value=old_value, new_value=new_value ) if not annotations is None: return jsonify(annotations) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown dataset \'' + dataset_id + '\'')
def download_dataset(dataset_id): """Get the dataset with given identifier in CSV format. """ # Get the handle for the dataset with given identifier. The result is None # if no dataset with given identifier exists. _, dataset = api.datasets.get_dataset_handle(config.project_id, dataset_id) if dataset is None: raise srv.ResourceNotFound('unknown dataset \'' + dataset_id + '\'') # Read the dataset into a string buffer in memory si = io.StringIO() cw = csv.writer(si) cw.writerow([col.name for col in dataset.columns]) with dataset.reader() as reader: for row in reader: cw.writerow(row.values) # Return the CSV file file output = make_response(si.getvalue()) output.headers["Content-Disposition"] = "attachment; filename=export.csv" output.headers["Content-type"] = "text/csv" return output
def upload_file(project_id): """Upload file (POST) - Upload a data file to the project's filestore. """ # The upload request may contain a file object or an Url from where to # download the data. if request.files and 'file' in request.files: file = request.files['file'] # A browser may submit a empty part without filename if file.filename == '': raise srv.InvalidRequest('empty file name') # Save uploaded file to temp directory filename = secure_filename(file.filename) try: f_handle = api.files.upload_file(project_id=project_id, file=file, file_name=filename) if not f_handle is None: return jsonify(f_handle), 201 except ValueError as ex: raise srv.InvalidRequest(str(ex)) else: raise srv.InvalidRequest('no file or url specified in request') raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')
def update_task_state(task_id): """Update the state of a running task.""" # Abort with BAD REQUEST if request body is not in Json format or does not # contain the expected elements. obj = srv.validate_json_request(request, required=[labels.STATE], optional=[ labels.STARTED_AT, labels.FINISHED_AT, labels.OUTPUTS, labels.PROVENANCE ]) # Update task state. The contents of the request body depend on the value of # the new task state. The request body is evaluated by the API. The API will # raise a ValueError if the request body is invalid. The result is None if # the project or task are unknown. try: # Result is None if task is not found. result = api.tasks.update_task_state(task_id=task_id, state=obj[labels.STATE], body=obj) if not result is None: return jsonify(result) except ValueError as ex: raise srv.InvalidRequest(str(ex)) raise srv.ResourceNotFound('unknown task \'' + task_id + '\'')
def import_project(): """Upload file (POST) - Upload a data files for a project. """ # The upload request may contain a file object or an Url from where to # download the data. if request.files and 'file' in request.files: file = request.files['file'] # A browser may submit a empty part without filename if file.filename == '': raise srv.InvalidRequest('empty file name') # Save uploaded file to temp directory try: base_dir = config.engine.data_dir si = io.BytesIO() file.save(dst=si) si.seek(0) project_id = "" with tarfile.open(fileobj=si, mode="r:gz") as tar: for tarinfo in tar: if tarinfo.name.startswith("ds/"): project_id = tarinfo.name.split('/')[1] break vtfpath = base_dir + os.path.sep + "vt" + os.path.sep + "viztrails" with open(vtfpath, "r") as vtf: vt_index_js = json.load(vtf) if project_id in vt_index_js: raise srv.InvalidRequest("This project already exists.") def ds_files(members): for tarinfo in members: if tarinfo.name.startswith("ds/"): yield tarinfo def fs_files(members): for tarinfo in tar: if tarinfo.name.startswith("fs/"): yield tarinfo def vt_files(members): for tarinfo in tar: if tarinfo.name.startswith("vt/"): yield tarinfo tar.extractall(path=base_dir, members=ds_files(tar)) tar.extractall(path=base_dir, members=fs_files(tar)) tar.extractall(path=base_dir, members=vt_files(tar)) with open(vtfpath, "w") as vtf: json.dump(vt_index_js + [project_id], vtf) global api api = VizierApi(config, init=True) pj = api.projects.get_project(project_id) if not pj is None: return jsonify(pj) except ValueError as ex: raise srv.InvalidRequest(str(ex)) print(ex) else: raise srv.InvalidRequest('no file or url specified in request') raise srv.ResourceNotFound('unknown project format')
def delete_project(project_id): """Delete an existing project.""" if api.projects.delete_project(project_id): return '', 204 raise srv.ResourceNotFound('unknown project \'' + project_id + '\'')