def manual_labels(session: Session, instance: Instance, project: CategorizationProject) -> Dataset: """Get manual labels from a Categorization project. Args: instance: Tamr instance containing project project: Tamr project containing labels Returns: Dataset containing manual labels Raises: _dataset.NotFound: If no dataset could be found at the specified URL Ambiguous: If multiple targets match dataset name """ unified_dataset = unified.from_project(session=session, instance=instance, project=project) labels_dataset_name = unified_dataset.name + "_manual_categorizations" datasets_url = URL(instance=instance, path="datasets") r = session.get(url=str(datasets_url), params={"filter": f"name=={labels_dataset_name}"}) matches = r.json() if len(matches) == 0: raise _dataset.NotFound(str(r.url)) if len(matches) > 1: raise _dataset.Ambiguous(str(r.url)) dataset_path = matches[0]["relativeId"] dataset_url = URL(instance=instance, path=dataset_path) return _dataset._from_url(session=session, url=dataset_url)
def by_name(session: Session, instance: Instance, name: str) -> Project: """Get project by name Fetches project from Tamr server. Args: instance: Tamr instance containing this project name: Project name Raises: project.NotFound: If no project could be found with that name. project.Ambiguous: If multiple targets match project name. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get( url=str(URL(instance=instance, path="projects")), params={"filter": f"name=={name}"}, ) # Check that exactly one project is returned matches = r.json() if len(matches) == 0: raise NotFound(str(r.url)) if len(matches) > 1: raise Ambiguous(str(r.url)) # Make Project from response url = URL(instance=instance, path=matches[0]["relativeId"]) return _from_json(url=url, data=matches[0])
def get_all( session: Session, instance: Instance, *, filter: Optional[Union[str, List[str]]] = None, ) -> Tuple[Project, ...]: """Get all projects from an instance Args: instance: Tamr instance from which to get projects filter: Filter expression, e.g. "externalId==wobbly" Multiple expressions can be passed as a list Returns: The projects retrieved from the instance Raises: requests.HTTPError: If an HTTP error is encountered. """ url = URL(instance=instance, path="projects") if filter is not None: r = session.get(str(url), params={"filter": filter}) else: r = session.get(str(url)) projects_json = response.successful(r).json() projects = [] for project_json in projects_json: project_url = URL(instance=instance, path=project_json["relativeId"]) project = _from_json(project_url, project_json) projects.append(project) return tuple(projects)
def _create( session: Session, instance: Instance, name: str, project_type: str, description: Optional[str] = None, external_id: Optional[str] = None, unified_dataset_name: Optional[str] = None, ) -> Project: """Create a project in Tamr. Args: instance: Tamr instance name: Project name project_type: Project type description: Project description external_id: External ID of the project unified_dataset_name: Name of the unified dataset Returns: Project created in Tamr Raises: project.AlreadyExists: If a project with these specifications already exists. requests.HTTPError: If any other HTTP error is encountered. """ if not unified_dataset_name: unified_dataset_name = name + "_unified_dataset" data = { "name": name, "type": project_type, "unifiedDatasetName": unified_dataset_name, "description": description, "externalId": external_id, } project_url = URL(instance=instance, path="projects") r = session.post(url=str(project_url), json=data) if r.status_code == 409: raise AlreadyExists(r.json()["message"]) data = response.successful(r).json() project_path = data["relativeId"] project_url = URL(instance=instance, path=str(project_path)) return _by_url(session=session, url=project_url)
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Operation: """Get operation by ID Args: resource_id: The ID of the operation """ url = URL(instance=instance, path=f"operations/{resource_id}") r = session.get(str(url)) return _from_response(instance, r)
def create( session: Session, instance: Instance, *, name: str, key_attribute_names: Tuple[str, ...], description: Optional[str] = None, external_id: Optional[str] = None, ) -> Dataset: """Create a dataset in Tamr. Args: instance: Tamr instance name: Dataset name key_attribute_names: Dataset primary key attribute names description: Dataset description external_id: External ID of the dataset Returns: Dataset created in Tamr Raises: dataset.AlreadyExists: If a dataset with these specifications already exists. requests.HTTPError: If any other HTTP error is encountered. """ data = { "name": name, "keyAttributeNames": key_attribute_names, "description": description, "externalId": external_id, } dataset_url = URL(instance=instance, path="datasets") r = session.post(url=str(dataset_url), json=data) if r.status_code == 400 and "already exists" in r.json()["message"]: raise AlreadyExists(r.json()["message"]) data = response.successful(r).json() dataset_path = data["relativeId"] dataset_url = URL(instance=instance, path=str(dataset_path)) return _by_url(session=session, url=dataset_url)
def initiate(session: Session, instance: Instance) -> Backup: """Initiate a Tamr backup. Args: session: Tamr session instance: Tamr instance Returns: Initiated backup Raises: backup.InvalidOperation: If attempting an invalid operation """ url = URL(instance=instance, path="backups") r = session.post(str(url)) if r.status_code == 400: raise InvalidOperation(str(url), r.json()["message"]) data = response.successful(r).json() return _from_json( URL(instance=instance, path=f'backups/{data["relativeId"]}'), data)
def get_all(session: Session, instance: Instance) -> List[Backup]: """Get all backups that have been initiated for a Tamr instance. Args: session: Tamr session instance: Tamr instance Returns: A list of Tamr backups Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="backups") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) backups = [ _from_json( URL(instance=instance, path=f'backups/{data["relativeId"]}'), data) for data in response.successful(r).json() ] return backups
def by_resource_id(session: Session, instance: Instance, id: str) -> Project: """Get project by resource ID. Fetches project from Tamr server. Args: instance: Tamr instance containing this dataset id: Project ID Raises: project.NotFound: If no project could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ url = URL(instance=instance, path=f"projects/{id}") return _by_url(session, url)
def from_project(session: Session, project: Project) -> UnifiedDataset: """Get unified dataset of a project Fetches the unified dataset of a given project from Tamr server Args: project: Tamr project of this Unified Dataset Raises: unified.NotFound: If no unified dataset could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ url = URL(instance=project.url.instance, path=f"{project.url.path}/unifiedDataset") return _by_url(session, url)
def from_resource_id(session: Session, instance: Instance, id: str) -> Dataset: """Get dataset by resource ID Fetches dataset from Tamr server Args: instance: Tamr instance containing this dataset id: Dataset ID Raises: dataset.NotFound: If no dataset could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ url = URL(instance=instance, path=f"datasets/{id}") return _from_url(session, url)
def _from_response(instance: Instance, response: requests.Response) -> Operation: """ Handle idiosyncrasies in constructing Operations from Tamr responses. When a Tamr API call would start an operation, but all results that would be produced by that operation are already up-to-date, Tamr returns `HTTP 204 No Content` To make it easy for client code to handle these API responses without checking the response code, this method will either construct an Operation, or a dummy `NoOp` operation representing the 204 Success response. Args: response: HTTP Response from the request that started the operation. """ if response.status_code == 204: # Operation was successful, but the response contains no content. # Create a dummy operation to represent this. _never = "0000-00-00T00:00:00.000Z" _description = """Tamr returned HTTP 204 for this operation, indicating that all results that would be produced by the operation are already up-to-date.""" resource_json = { "id": "-1", "type": "NOOP", "description": _description, "status": { "state": "SUCCEEDED", "startTime": _never, "endTime": _never, "message": "", }, "created": { "username": "", "time": _never, "version": "-1" }, "lastModified": { "username": "", "time": _never, "version": "-1" }, "relativeId": "operations/-1", } else: resource_json = response.json() _id = resource_json["id"] _url = URL(instance=instance, path=f"operations/{_id}") return _from_json(_url, resource_json)
def get(session: Session, instance: Instance) -> Restore: """Get information on the latest Tamr restore, if any. Args: session: Tamr session instance: Tamr instance Returns: Latest Tamr restore Raises: restore.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="instance/restore") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def initiate(session: Session, instance: Instance, backup_path: str) -> Restore: """Initiate a Tamr restore. Args: session: Tamr session instance: Tamr instance backup_path: Path to the backup Returns: Initiated restore Raises: restore.InvalidOperation: If attempting an invalid operation """ url = URL(instance=instance, path="instance/restore") r = session.post(str(url), data=backup_path) if r.status_code == 400: raise InvalidOperation(str(url), r.json()["message"]) return _from_json(url, response.successful(r).json())
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Backup: """Get information on a specific Tamr backup. Args: session: Tamr session instance: Tamr instance resource_id: Resource ID of the backup Returns: A Tamr backup Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path=f"backups/{resource_id}") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())