def get_all( session: Session, instance: Instance, *, filter: Optional[Union[str, List[str]]] = None, ) -> Tuple[Project, ...]: """Get all projects from an instance Args: instance: Tamr instance from which to get projects filter: Filter expression, e.g. "externalId==wobbly" Multiple expressions can be passed as a list Returns: The projects retrieved from the instance Raises: requests.HTTPError: If an HTTP error is encountered. """ url = URL(instance=instance, path="projects") if filter is not None: r = session.get(str(url), params={"filter": filter}) else: r = session.get(str(url)) projects_json = response.successful(r).json() projects = [] for project_json in projects_json: project_url = URL(instance=instance, path=project_json["relativeId"]) project = _from_json(project_url, project_json) projects.append(project) return tuple(projects)
def by_name(session: Session, instance: Instance, name: str) -> Project: """Get project by name Fetches project from Tamr server. Args: instance: Tamr instance containing this project name: Project name Raises: project.NotFound: If no project could be found with that name. project.Ambiguous: If multiple targets match project name. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get( url=str(URL(instance=instance, path="projects")), params={"filter": f"name=={name}"}, ) # Check that exactly one project is returned matches = r.json() if len(matches) == 0: raise NotFound(str(r.url)) if len(matches) > 1: raise Ambiguous(str(r.url)) # Make Project from response url = URL(instance=instance, path=matches[0]["relativeId"]) return _from_json(url=url, data=matches[0])
def manual_labels(session: Session, instance: Instance, project: CategorizationProject) -> Dataset: """Get manual labels from a Categorization project. Args: instance: Tamr instance containing project project: Tamr project containing labels Returns: Dataset containing manual labels Raises: _dataset.NotFound: If no dataset could be found at the specified URL Ambiguous: If multiple targets match dataset name """ unified_dataset = unified.from_project(session=session, instance=instance, project=project) labels_dataset_name = unified_dataset.name + "_manual_categorizations" datasets_url = URL(instance=instance, path="datasets") r = session.get(url=str(datasets_url), params={"filter": f"name=={labels_dataset_name}"}) matches = r.json() if len(matches) == 0: raise _dataset.NotFound(str(r.url)) if len(matches) > 1: raise _dataset.Ambiguous(str(r.url)) dataset_path = matches[0]["relativeId"] dataset_url = URL(instance=instance, path=dataset_path) return _dataset._from_url(session=session, url=dataset_url)
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Operation: """Get operation by ID Args: resource_id: The ID of the operation """ url = URL(instance=instance, path=f"operations/{resource_id}") r = session.get(str(url)) return _from_response(instance, r)
def stream(session: Session, dataset: AnyDataset) -> Iterator[JsonDict]: """Stream the records in this dataset as Python dictionaries. Args: dataset: Dataset from which to stream records Returns: Python generator yielding records """ with session.get(str(dataset.url) + "/records", stream=True) as r: return response.ndjson(r)
def version(session: Session, instance: Instance) -> str: """Return the Tamr version for an instance. Args: session: Tamr Session instance: Tamr instance Returns: Version """ # Version endpoints are not themselves versioned by design, but they are stable so they are ok to use here. return session.get( f"{origin(instance)}/api/versioned/service/version").json()["version"]
def _by_url(session: Session, url: URL) -> Project: """Get project by URL. Fetches project from Tamr server. Args: url: Project URL Raises: project.NotFound: If no project could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def _from_url(session: Session, url: URL) -> Attribute: """Get attribute by URL Fetches attribute from Tamr server Args: url: Attribute URL Raises: attribute.NotFound: If no attribute could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def _by_url(session: Session, url: URL) -> Operation: """Get operation by URL Fetches operation from Tamr server Args: url: Operation URL Raises: operation.NotFound: If no operation could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def get(session: Session, instance: Instance) -> Restore: """Get information on the latest Tamr restore, if any. Args: session: Tamr session instance: Tamr instance Returns: Latest Tamr restore Raises: restore.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="instance/restore") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def get_all(session: Session, project: Project) -> Transformations: """Get the transformations of a Project Args: project: Project containing transformations Raises: requests.HTTPError: If any HTTP error is encountered. Example: >>> import tamr_client as tc >>> session = tc.session.from_auth('username', 'password') >>> instance = tc.instance.Instance(host="localhost", port=9100) >>> project1 = tc.project.from_resource_id(session, instance, id='1') >>> print(tc.transformations.get_all(session, project1)) """ r = session.get(f"{project.url}/transformations") response.successful(r) return _from_json(session, project.url.instance, r.json())
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Backup: """Get information on a specific Tamr backup. Args: session: Tamr session instance: Tamr instance resource_id: Resource ID of the backup Returns: A Tamr backup Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path=f"backups/{resource_id}") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def poll(session: Session, backup: Backup) -> Backup: """Poll this backup for server-side updates. Does not update the :class:`~tamr_client.backup.Backup` object. Instead, returns a new :class:`~tamr_client.backup.Backup`. Args: session: Tamr session backup: Tamr backup to be polled Returns: A Tamr backup Raises: backup.NotFound: If no backup found at the specified URL """ url = backup.url r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def attributes(session: Session, dataset: Dataset) -> Tuple[Attribute, ...]: """Get all attributes from a dataset Args: dataset: Dataset containing the desired attributes Returns: The attributes for the specified dataset Raises: requests.HTTPError: If an HTTP error is encountered. """ attrs_url = replace(dataset.url, path=dataset.url.path + "/attributes") r = session.get(str(attrs_url)) attrs_json = response.successful(r).json() attrs = [] for attr_json in attrs_json: id = attr_json["name"] attr_url = replace(attrs_url, path=attrs_url.path + f"/{id}") attr = _attribute_from_json(attr_url, attr_json) attrs.append(attr) return tuple(attrs)
def get_all(session: Session, instance: Instance) -> List[Backup]: """Get all backups that have been initiated for a Tamr instance. Args: session: Tamr session instance: Tamr instance Returns: A list of Tamr backups Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="backups") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) backups = [ _from_json( URL(instance=instance, path=f'backups/{data["relativeId"]}'), data) for data in response.successful(r).json() ] return backups