def DATASET_ANNOTATIONS(project, dataset, annotations, urls): """Get dictionary serialization for dataset component annotations. Parameters ---------- project: vizier.engine.project.base.ProjectHandle Handle for project containing the dataset dataset: vizier.datastore.dataset.DatasetDescriptor Dataset descriptor annotations: vizier.datastore.annotation.dataset.DatasetMetadata Set of annotations for dataset components urls: vizier.api.routes.base.UrlFactory, optional Factory for resource urls Returns ------- dict """ obj = { 'annotations': [ANNOTATION(a) for a in annotations.annotations], 'columns': [ANNOTATION(a) for a in annotations.columns], 'rows': [ANNOTATION(a) for a in annotations.rows], 'cells': [ANNOTATION(a) for a in annotations.cells] } # Add references to update annotations obj[labels.LINKS] = serialize.HATEOAS({ ref.ANNOTATIONS_UPDATE: urls.update_dataset_annotations(project_id=project.identifier, dataset_id=dataset.identifier) }) return obj
def FILE_HANDLE(f_handle, project, urls): """Dictionary serialization for a file handle. Parameters ---------- f_handle : vizier.filestore.base.FileHandle File handle project: vizier.engine.project.base.ProjectHandle Handle for the containing project urls: vizier.api.routes.base.UrlFactory Factory for resource urls Returns ------- dict """ project_id = project.identifier file_id = f_handle.identifier # At the moment the self reference and the download Url are identical download_url = urls.download_file(project_id, file_id) obj = { 'id': file_id, 'name': f_handle.file_name, labels.LINKS: serialize.HATEOAS({ ref.SELF: download_url, ref.FILE_DOWNLOAD: download_url }) } # Add mimetype and encoding if not None if not f_handle.mimetype is None: obj['mimetype'] = f_handle.mimetype if not f_handle.encoding is None: obj['encoding'] = f_handle.encoding return obj
def DATASET_DESCRIPTOR(dataset: DatasetDescriptor, name: Optional[str] = None, project: Optional[ProjectHandle] = None, urls: Optional[UrlFactory] = None) -> Dict[str, Any]: """Dictionary serialization for a dataset descriptor. Parameters ---------- dataset: vizier.datastore.dataset.DatasetDescriptor Dataset descriptor name : string, optional User-defined dataset name project: vizier.engine.project.base.ProjectHandle, optional Handle for project containing the dataset urls: vizier.api.routes.base.UrlFactory, optional Factory for resource urls Returns ------- dict """ obj = { labels.ID: dataset.identifier, labels.COLUMNS: [DATASET_COLUMN(col) for col in dataset.columns] #, #labels.ROWCOUNT: dataset.row_count } if not name is None: obj[labels.NAME] = name elif not dataset.name is None: obj[labels.NAME] = dataset.name # Add self reference if the project and url factory are given if project is not None and urls is not None: project_id = project.identifier dataset_id = dataset.identifier dataset_url = urls.get_dataset(project_id=project_id, dataset_id=dataset_id) obj[labels.LINKS] = serialize.HATEOAS({ ref.SELF: dataset_url, ref.DATASET_FETCH_ALL: dataset_url + '?' + routes.PAGE_LIMIT + '=-1', ref.DATASET_DOWNLOAD: urls.download_dataset(project_id=project_id, dataset_id=dataset_id), ref.ANNOTATIONS_GET: urls.get_dataset_caveats(project_id=project_id, dataset_id=dataset_id) }) return obj
def init(self): """Initialize the API before the first request.""" # Initialize the API compinents self.urls = ContainerApiUrlFactory( base_url=self.config.app_base_url, api_doc_url=self.config.webservice.doc_url ) self.engine = get_engine(self.config) self.projects =self.engine.projects self.datasets = VizierDatastoreApi( projects=self.projects, urls=self.urls, defaults=self.config.webservice.defaults ) self.views = VizierDatasetViewApi( projects=self.projects, urls=self.urls ) self.files = VizierFilestoreApi( projects=self.projects, urls=self.urls ) self.tasks = VizierContainerTaskApi( engine=self.engine, controller_url=self.config.controller_url ) # Initialize the service descriptor self.service_descriptor = { 'name': self.config.webservice.name, 'startedAt': get_current_time().isoformat(), 'defaults': { 'maxFileSize': self.config.webservice.defaults.max_file_size }, 'environment': { 'name': self.engine.name, 'version': VERSION_INFO, 'backend': self.config.engine.backend.identifier, 'packages': list(self.engine.packages.keys()) }, labels.LINKS: serialize.HATEOAS({ 'self': self.urls.service_descriptor(), 'doc': self.urls.api_doc() }) }
def WORKFLOW_HANDLE_LINKS(urls, project_id, branch_id, workflow_id=None, links=None): """Get basic set of HATEOAS references for workflow handles. For an empty workflow the identifier is None. In that case the result will not contain a self reference. Parameters ---------- urls: vizier.api.routes.base.UrlFactory Factory for resource urls project_id: string Unique project identifier branch_id: string Unique branch identifier workflow_id: string, optional Unique workflow identifier Returns ------- dict """ if links is None: links = dict() links[ref.WORKFLOW_APPEND] = urls.workflow_module_append( project_id=project_id, branch_id=branch_id) # References to the workflow branch links[ref.WORKFLOW_BRANCH] = urls.get_branch(project_id=project_id, branch_id=branch_id) links[ref.BRANCH_HEAD] = urls.get_branch_head(project_id=project_id, branch_id=branch_id) links[ref.WORKFLOW_PROJECT] = urls.get_project(project_id) links[ref.FILE_UPLOAD] = urls.upload_file(project_id) # Only include self reference if workflow identifier is given if not workflow_id is None: links[ref.SELF] = urls.get_workflow(project_id=project_id, branch_id=branch_id, workflow_id=workflow_id) return serialize.HATEOAS(links)
def CHART_VIEW(project_id, branch_id, workflow_id, module_id, chart_id, name, data, urls): """Get dictionary serialization for a dataset chart view result. Parameters ---------- project_id: string Unique project identifier branch_id: string Unique branch identifier workflow_id: string Unique workflow identifier module_id: string Unique module identifier chart_id: string Unique chart identifier name: Chart name data: dict Dictinary serialization for data series urls: vizier.api.routes.base.UrlFactory Factory for web service resource urls Returns ------- dict """ return { labels.NAME: name, labels.DATA: data, labels.LINKS: serialize.HATEOAS({ ref.SELF: urls.get_chart_view(project_id=project_id, branch_id=branch_id, workflow_id=workflow_id, module_id=module_id, chart_id=chart_id) }) }
def list_projects(self): """Returns a list of descriptors for all projects that are currently contained in the project repository. Returns ------ dict """ return { 'projects': [ serialpr.PROJECT_DESCRIPTOR(project=project, urls=self.urls) for project in self.projects.list_projects() ], labels.LINKS: serialize.HATEOAS({ ref.SELF: self.urls.list_projects(), ref.PROJECT_CREATE: self.urls.create_project(), ref.PROJECT_IMPORT: self.urls.import_project() }) }
def BRANCH_DESCRIPTOR(project, branch, urls): """Dictionary serialization for branch descriptor. Parameters ---------- project: vizier.engine.project.base.ProjectHandle Handle for the containing project branch : vizier.viztrail.branch.BranchHandle Branch handle urls: vizier.api.routes.base.UrlFactory Factory for resource urls Returns ------- dict """ project_id = project.identifier branch_id = branch.identifier return { 'id': branch_id, 'createdAt': branch.provenance.created_at.isoformat(), 'lastModifiedAt': branch.last_modified_at.isoformat(), 'isDefault': project.viztrail.is_default_branch(branch_id), 'properties': serialize.ANNOTATIONS(branch.properties), labels.LINKS: serialize.HATEOAS({ ref.SELF: urls.get_branch(project_id, branch_id), ref.BRANCH_DELETE: urls.delete_branch(project_id, branch_id), ref.BRANCH_HEAD: urls.get_branch_head(project_id, branch_id), ref.BRANCH_UPDATE: urls.update_branch(project_id, branch_id) }) }
def PROJECT_DESCRIPTOR(project, urls): """Dictionary serialization for project fundamental project metadata. Parameters ---------- project : vizier.engine.project.base.ProjectHandle Project handle urls: vizier.api.routes.base.UrlFactory Factory for resource urls Returns ------- dict """ project_id = project.identifier return { 'id': project_id, 'createdAt': project.created_at.isoformat(), 'lastModifiedAt': project.last_modified_at.isoformat(), 'defaultBranch': project.viztrail.default_branch.identifier, 'properties': serialize.ANNOTATIONS(project.viztrail.properties), labels.LINKS: serialize.HATEOAS({ ref.SELF: urls.get_project(project_id), ref.API_HOME: urls.service_descriptor(), ref.API_DOC: urls.api_doc(), ref.PROJECT_DELETE: urls.delete_project(project_id), ref.PROJECT_UPDATE: urls.update_project(project_id), ref.BRANCH_CREATE: urls.create_branch(project_id), ref.FILE_UPLOAD: urls.upload_file(project_id) }) }
def MODULE_HANDLE(project: "ProjectHandle", branch: BranchHandle, module: ModuleHandle, urls: UrlFactory, workflow: Optional[WorkflowHandle] = None, charts: List[ChartViewHandle] = None, include_self: bool = True) -> Dict[str, Any]: """Dictionary serialization for a handle in the workflow at the branch head. The list of references will only contain a self referene if the include_self flag is True. Parameters ---------- project: vizier.engine.project.base.ProjectHandle Handle for the containing project branch : vizier.viztrail.branch.BranchHandle Branch handle workflow: vizier.viztrail.workflow.WorkflowHandle Workflow handle module: vizier.viztrail.module.base.ModuleHandle Module handle charts: list(vizier.view.chart.ChartViewHandle) List of handles for available chart views urls: vizier.api.routes.base.UrlFactory Factory for resource urls include_self: bool, optional Indicate if self link is included Returns ------- dict """ project_id = project.identifier branch_id = branch.identifier module_id = module.identifier cmd = module.command timestamp = module.timestamp actual_workflow = branch.get_head() if workflow is None else workflow obj: Dict[str, Any] = { labels.ID: module_id, 'state': module.state, labels.COMMAND: { labels.COMMAND_PACKAGE: cmd.package_id, labels.COMMAND_ID: cmd.command_id, labels.COMMAND_ARGS: cmd.arguments.to_list() }, 'text': module.external_form, labels.TIMESTAMPS: { labels.CREATED_AT: timestamp.created_at.isoformat() }, labels.LINKS: serialize.HATEOAS({} if module_id is None else { ref.MODULE_INSERT: urls.workflow_module_insert(project_id=project_id, branch_id=branch_id, module_id=module_id) }) } if include_self: obj[labels.LINKS].extend( serialize.HATEOAS({} if module_id is None else { ref.SELF: urls.get_workflow_module(project_id=project_id, branch_id=branch_id, module_id=module_id), ref.MODULE_DELETE: urls.workflow_module_delete(project_id=project_id, branch_id=branch_id, module_id=module_id), ref.MODULE_REPLACE: urls.workflow_module_replace(project_id=project_id, branch_id=branch_id, module_id=module_id) })) if not timestamp.started_at is None: obj[labels.TIMESTAMPS][ labels.STARTED_AT] = timestamp.started_at.isoformat() # Add outputs and datasets if module is not active. if not module.is_active: artifacts: Dict[str, ArtifactDescriptor] = dict() for precursor in actual_workflow.modules: artifacts = precursor.provenance.get_database_state(artifacts) if precursor == module: break datasets = list() other_artifacts = list() for artifact_name in artifacts: artifact = artifacts[artifact_name] if artifact.is_dataset: datasets.append( serialds.DATASET_IDENTIFIER(identifier=artifact.identifier, name=artifact_name)) else: other_artifacts.append( serialds.ARTIFACT_DESCRIPTOR(artifact=artifact, project=project_id)) available_charts = list() if charts is not None: for c_handle in charts: available_charts.append({ labels.NAME: c_handle.chart_name, labels.LINKS: serialize.HATEOAS({} if module_id is None else { ref.SELF: urls.get_chart_view(project_id=project_id, branch_id=branch_id, workflow_id=actual_workflow. identifier, module_id=module_id, chart_id=c_handle.identifier) }) }) obj[labels.DATASETS] = datasets obj[labels.CHARTS] = available_charts obj[labels.OUTPUTS] = serialize.OUTPUTS(module.outputs) obj[labels.ARTIFACTS] = other_artifacts if not timestamp.finished_at is None: obj[labels.TIMESTAMPS][ labels.FINISHED_AT] = timestamp.finished_at.isoformat() else: # Add empty lists for outputs, datasets and charts if the module is # active obj[labels.DATASETS] = list() obj[labels.CHARTS] = list() obj[labels.OUTPUTS] = serialize.OUTPUTS(ModuleOutputs()) obj[labels.ARTIFACTS] = list() return obj
def DATASET_HANDLE(project, dataset, rows, defaults, urls, offset=0, limit=-1): """Dictionary serialization for dataset handle. Includes (part of) the dataset rows. Parameters ---------- project: vizier.engine.project.base.ProjectHandle Handle for project containing the dataset dataset : vizier.datastore.dataset.DatasetDescriptor Dataset descriptor rows: list(vizier.datastore.dataset.DatasetRow) List of rows from the dataset defaults : vizier.config.base.ConfigObject Web service default values urls: vizier.api.routes.base.UrlFactory Factory for resource urls offset: int, optional Number of rows at the beginning of the list that are skipped. limit: int, optional Limits the number of rows that are returned. Returns ------- dict """ # Use the dataset descriptor as the base obj = DATASET_DESCRIPTOR(dataset=dataset, project=project, urls=urls) # Serialize rows. The default dictionary representation for a row does # not include the row index position nor the annotation information. serialized_rows = list() for row in rows: serialized_rows.append(DATASET_ROW(row)) # Serialize the dataset schema and cells obj[labels.ROWS] = serialized_rows obj[labels.ROWCOUNT] = dataset.row_count obj[labels.OFFSET] = offset # Add pagination references links = obj[labels.LINKS] # Max. number of records shown if not limit is None and int(limit) >= 0: max_rows_per_request = int(limit) elif defaults.row_limit >= 0: max_rows_per_request = defaults.row_limit elif defaults.max_row_limit >= 0: max_rows_per_request = defaults.max_row_limit else: max_rows_per_request = -1 # List of pagination Urls # FIRST: Always include Url's to access the first page project_id = project.identifier dataset_id = dataset.identifier links.extend( serialize.HATEOAS({ ref.PAGE_FIRST: urls.dataset_pagination(project_id=project_id, dataset_id=dataset_id, offset=offset, limit=limit) })) # PREV: If offset is greater than zero allow to fetch previous page if not offset is None and offset > 0: if max_rows_per_request >= 0: if offset > 0: prev_offset = max(offset - max_rows_per_request, 0) links.extend( serialize.HATEOAS({ ref.PAGE_PREV: urls.dataset_pagination(project_id=project_id, dataset_id=dataset_id, offset=prev_offset, limit=limit) })) # NEXT & LAST: If there are rows beyond the current offset+limit include # Url's to fetch next page and last page. if offset < dataset.row_count and max_rows_per_request >= 0: next_offset = offset + max_rows_per_request if next_offset < dataset.row_count: links.extend( serialize.HATEOAS({ ref.PAGE_NEXT: urls.dataset_pagination(project_id=project_id, dataset_id=dataset_id, offset=next_offset, limit=limit) })) last_offset = (dataset.row_count - max_rows_per_request) if last_offset > offset: links.extend( serialize.HATEOAS({ ref.PAGE_LAST: urls.dataset_pagination(project_id=project_id, dataset_id=dataset_id, offset=last_offset, limit=limit) })) # Return pagination Url list return obj
def init(self) -> None: """Initialize the API before the first request.""" # Initialize the API compinents self.engine = get_engine(self.config) self.urls = get_url_factory( config=self.config, projects=self.engine.projects ) self.branches = VizierBranchApi( projects=self.engine.projects, urls=self.urls ) self.datasets = VizierDatastoreApi( projects=self.engine.projects, urls=self.urls, defaults=self.config.webservice.defaults ) self.views = VizierDatasetViewApi( projects=self.engine.projects, urls=self.urls ) self.files = VizierFilestoreApi( projects=self.engine.projects, urls=self.urls ) self.projects = VizierProjectApi( projects=self.engine.projects, urls=self.urls ) self.tasks = VizierTaskApi(engine=self.engine) self.workflows = VizierWorkflowApi(engine=self.engine, urls=self.urls) # Initialize the service descriptor. The service descriptor contains # the list of packages and commands that are supported by the engine package_listing = list() for pckg in self.engine.packages.values(): pckg_obj = {'id': pckg.identifier, 'name': pckg.name, 'category': pckg.category} if not pckg.description is None: pckg_obj['description'] = pckg.description pckg_commands = list() for cmd in list(pckg.commands.values()): cmd_obj: Dict[str, Any] = { 'id': cmd.identifier, 'name': cmd.name, 'suggest': cmd.suggest } if not cmd.description is None: cmd_obj['description'] = cmd.description cmd_obj['parameters'] = list(cmd.parameters.values()) pckg_commands.append(cmd_obj) pckg_obj['commands'] = pckg_commands package_listing.append(pckg_obj) self.service_descriptor = { 'name': self.config.webservice.name, 'startedAt': get_current_time().isoformat(), 'defaults': { 'maxFileSize': self.config.webservice.defaults.max_file_size, 'maxDownloadRowLimit': self.config.webservice.defaults.max_download_row_limit }, 'environment': { 'name': self.engine.name, 'version': VERSION_INFO, 'backend': self.config.engine.backend.identifier, 'packages': package_listing }, labels.LINKS: serialize.HATEOAS({ ref.SELF: self.urls.service_descriptor(), ref.API_DOC: self.urls.api_doc(), ref.PROJECT_CREATE: self.urls.create_project(), ref.PROJECT_LIST: self.urls.list_projects(), ref.PROJECT_IMPORT: self.urls.import_project() }) }