def delete_all(session: Session, dataset: AnyDataset): """Delete all records in this dataset Args: dataset: Dataset from which to delete records """ r = session.delete(str(dataset.url) + "/records") response.successful(r)
def replace_all( session: Session, project: Project, tx: Transformations ) -> requests.Response: """Replaces the transformations of a Project Args: project: Project to place transformations within tx: Transformations to put into project Raises: requests.HTTPError: If any HTTP error is encountered. Example: >>> import tamr_client as tc >>> session = tc.session.from_auth('username', 'password') >>> instance = tc.instance.Instance(host="localhost", port=9100) >>> project1 = tc.project.from_resource_id(session, instance, id='1') >>> dataset3 = tc.dataset.from_resource_id(session, instance, id='3') >>> new_input_tx = tc.InputTransformation("SELECT *, upper(name) as name;", [dataset3]) >>> all_tx = tc.Transformations( ... input_scope=[new_input_tx], ... unified_scope=["SELECT *, 1 as one;"] ... ) >>> tc.transformations.replace_all(session, project1, all_tx) """ body = _to_json(tx) r = session.put(f"{project.url}/transformations", json=body) return response.successful(r)
def update( session: Session, attribute: Attribute, *, description: Optional[str] = None ) -> Attribute: """Update an existing attribute PUTS an update request to the Tamr server Args: attribute: Existing attribute to update description: Updated description for the existing attribute Returns: The newly updated attribute Raises: attribute.NotFound: If no attribute could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ updates = {"description": description} r = session.put(str(attribute.url), json=updates) if r.status_code == 404: raise NotFound(str(attribute.url)) data = response.successful(r).json() return _from_json(attribute.url, data)
def _create( session: Session, dataset: Dataset, *, name: str, is_nullable: bool, type: AttributeType = attribute_type.DEFAULT, description: Optional[str] = None, ) -> Attribute: """Same as `tc.attribute.create`, but does not check for reserved attribute names. """ attrs_url = replace(dataset.url, path=dataset.url.path + "/attributes") url = replace(attrs_url, path=attrs_url.path + f"/{name}") body = { "name": name, "type": attribute_type.to_json(type), "isNullable": is_nullable, } if description is not None: body["description"] = description r = session.post(str(attrs_url), json=body) if r.status_code == 409: raise AlreadyExists(str(url)) data = response.successful(r).json() return _from_json(url, data)
def get_all( session: Session, instance: Instance, *, filter: Optional[Union[str, List[str]]] = None, ) -> Tuple[Project, ...]: """Get all projects from an instance Args: instance: Tamr instance from which to get projects filter: Filter expression, e.g. "externalId==wobbly" Multiple expressions can be passed as a list Returns: The projects retrieved from the instance Raises: requests.HTTPError: If an HTTP error is encountered. """ url = URL(instance=instance, path="projects") if filter is not None: r = session.get(str(url), params={"filter": filter}) else: r = session.get(str(url)) projects_json = response.successful(r).json() projects = [] for project_json in projects_json: project_url = URL(instance=instance, path=project_json["relativeId"]) project = _from_json(project_url, project_json) projects.append(project) return tuple(projects)
def _update(session: Session, dataset: Dataset, updates: Iterable[Dict]) -> JsonDict: """Send a batch of record creations/updates/deletions to this dataset. You probably want to use :func:`~tamr_client.record.upsert` or :func:`~tamr_client.record.delete` instead. Args: dataset: Dataset containing records to be updated updates: Each update should be formatted as specified in the `Public Docs for Dataset updates <https://docs.tamr.com/reference#modify-a-datasets-records>`_. Returns: JSON response body from server Raises: requests.HTTPError: If an HTTP error is encountered """ stringified_updates = (json.dumps(update) for update in updates) # `requests` accepts a generator for `data` param, but stubs for `requests` in https://github.com/python/typeshed expects this to be a file-like object io_updates = cast(IO, stringified_updates) r = session.post( str(dataset.url) + ":updateRecords", headers={"Content-Encoding": "utf-8"}, data=io_updates, ) return response.successful(r).json()
def delete(session: Session, attribute: Attribute): """Deletes an existing attribute Sends a deletion request to the Tamr server Args: attribute: Existing attribute to delete Raises: attribute.NotFound: If no attribute could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.delete(str(attribute.url)) if r.status_code == 404: raise NotFound(str(attribute.url)) response.successful(r)
def delete(session: Session, dataset: Dataset, *, cascade: bool = False): """Deletes an existing dataset Sends a deletion request to the Tamr server Args: dataset: Existing dataset to delete cascade: Whether to delete all derived datasets as well Raises: dataset.NotFound: If no dataset could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.delete(str(dataset.url), params={"cascade": cascade}) if r.status_code == 404: raise NotFound(str(dataset.url)) response.successful(r)
def get_all(session: Session, project: Project) -> Transformations: """Get the transformations of a Project Args: project: Project containing transformations Raises: requests.HTTPError: If any HTTP error is encountered. Example: >>> import tamr_client as tc >>> session = tc.session.from_auth('username', 'password') >>> instance = tc.instance.Instance(host="localhost", port=9100) >>> project1 = tc.project.from_resource_id(session, instance, id='1') >>> print(tc.transformations.get_all(session, project1)) """ r = session.get(f"{project.url}/transformations") response.successful(r) return _from_json(session, project.url.instance, r.json())
def _create( session: Session, instance: Instance, name: str, project_type: str, description: Optional[str] = None, external_id: Optional[str] = None, unified_dataset_name: Optional[str] = None, ) -> Project: """Create a project in Tamr. Args: instance: Tamr instance name: Project name project_type: Project type description: Project description external_id: External ID of the project unified_dataset_name: Name of the unified dataset Returns: Project created in Tamr Raises: project.AlreadyExists: If a project with these specifications already exists. requests.HTTPError: If any other HTTP error is encountered. """ if not unified_dataset_name: unified_dataset_name = name + "_unified_dataset" data = { "name": name, "type": project_type, "unifiedDatasetName": unified_dataset_name, "description": description, "externalId": external_id, } project_url = URL(instance=instance, path="projects") r = session.post(url=str(project_url), json=data) if r.status_code == 409: raise AlreadyExists(r.json()["message"]) data = response.successful(r).json() project_path = data["relativeId"] project_url = URL(instance=instance, path=str(project_path)) return _by_url(session=session, url=project_url)
def _by_url(session: Session, url: URL) -> Project: """Get project by URL. Fetches project from Tamr server. Args: url: Project URL Raises: project.NotFound: If no project could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def _by_url(session: Session, url: URL) -> Operation: """Get operation by URL Fetches operation from Tamr server Args: url: Operation URL Raises: operation.NotFound: If no operation could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def get(session: Session, instance: Instance) -> Restore: """Get information on the latest Tamr restore, if any. Args: session: Tamr session instance: Tamr instance Returns: Latest Tamr restore Raises: restore.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="instance/restore") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def _from_url(session: Session, url: URL) -> Attribute: """Get attribute by URL Fetches attribute from Tamr server Args: url: Attribute URL Raises: attribute.NotFound: If no attribute could be found at the specified URL. Corresponds to a 404 HTTP error. requests.HTTPError: If any other HTTP error is encountered. """ r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) data = response.successful(r).json() return _from_json(url, data)
def create( session: Session, instance: Instance, *, name: str, key_attribute_names: Tuple[str, ...], description: Optional[str] = None, external_id: Optional[str] = None, ) -> Dataset: """Create a dataset in Tamr. Args: instance: Tamr instance name: Dataset name key_attribute_names: Dataset primary key attribute names description: Dataset description external_id: External ID of the dataset Returns: Dataset created in Tamr Raises: dataset.AlreadyExists: If a dataset with these specifications already exists. requests.HTTPError: If any other HTTP error is encountered. """ data = { "name": name, "keyAttributeNames": key_attribute_names, "description": description, "externalId": external_id, } dataset_url = URL(instance=instance, path="datasets") r = session.post(url=str(dataset_url), json=data) if r.status_code == 400 and "already exists" in r.json()["message"]: raise AlreadyExists(r.json()["message"]) data = response.successful(r).json() dataset_path = data["relativeId"] dataset_url = URL(instance=instance, path=str(dataset_path)) return _by_url(session=session, url=dataset_url)
def initiate(session: Session, instance: Instance, backup_path: str) -> Restore: """Initiate a Tamr restore. Args: session: Tamr session instance: Tamr instance backup_path: Path to the backup Returns: Initiated restore Raises: restore.InvalidOperation: If attempting an invalid operation """ url = URL(instance=instance, path="instance/restore") r = session.post(str(url), data=backup_path) if r.status_code == 400: raise InvalidOperation(str(url), r.json()["message"]) return _from_json(url, response.successful(r).json())
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Backup: """Get information on a specific Tamr backup. Args: session: Tamr session instance: Tamr instance resource_id: Resource ID of the backup Returns: A Tamr backup Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path=f"backups/{resource_id}") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def initiate(session: Session, instance: Instance) -> Backup: """Initiate a Tamr backup. Args: session: Tamr session instance: Tamr instance Returns: Initiated backup Raises: backup.InvalidOperation: If attempting an invalid operation """ url = URL(instance=instance, path="backups") r = session.post(str(url)) if r.status_code == 400: raise InvalidOperation(str(url), r.json()["message"]) data = response.successful(r).json() return _from_json( URL(instance=instance, path=f'backups/{data["relativeId"]}'), data)
def cancel(session: Session, restore: Restore) -> Restore: """Cancel a Tamr restore. Args: session: Tamr session restore: A Tamr restore Returns: Canceled restore Raises: restore.NotFound: If no backup file found at the specified path restore.InvalidOperation: If attempting an invalid operation """ cancel_url = f"{restore.url}:cancel" r = session.post(cancel_url) if r.status_code == 404: raise NotFound(cancel_url) if r.status_code == 400: raise InvalidOperation(cancel_url, r.json()["message"]) return _from_json(restore.url, response.successful(r).json())
def cancel(session: Session, backup: Backup) -> Backup: """Cancel a Tamr backup. Args: session: Tamr session backup: A Tamr backup Returns: Canceled backup Raises: backup.NotFound: If no backup found at the specified URL backup.InvalidOperation: If attempting an invalid operation """ cancel_url = f"{backup.url}:cancel" r = session.post(cancel_url) if r.status_code == 404: raise NotFound(cancel_url) if r.status_code == 400: raise InvalidOperation(cancel_url, r.json()["message"]) return _from_json(backup.url, response.successful(r).json())
def poll(session: Session, backup: Backup) -> Backup: """Poll this backup for server-side updates. Does not update the :class:`~tamr_client.backup.Backup` object. Instead, returns a new :class:`~tamr_client.backup.Backup`. Args: session: Tamr session backup: Tamr backup to be polled Returns: A Tamr backup Raises: backup.NotFound: If no backup found at the specified URL """ url = backup.url r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) return _from_json(url, response.successful(r).json())
def get_all(session: Session, instance: Instance) -> List[Backup]: """Get all backups that have been initiated for a Tamr instance. Args: session: Tamr session instance: Tamr instance Returns: A list of Tamr backups Raises: backup.NotFound: If no backup found at the specified URL """ url = URL(instance=instance, path="backups") r = session.get(str(url)) if r.status_code == 404: raise NotFound(str(url)) backups = [ _from_json( URL(instance=instance, path=f'backups/{data["relativeId"]}'), data) for data in response.successful(r).json() ] return backups
def attributes(session: Session, dataset: Dataset) -> Tuple[Attribute, ...]: """Get all attributes from a dataset Args: dataset: Dataset containing the desired attributes Returns: The attributes for the specified dataset Raises: requests.HTTPError: If an HTTP error is encountered. """ attrs_url = replace(dataset.url, path=dataset.url.path + "/attributes") r = session.get(str(attrs_url)) attrs_json = response.successful(r).json() attrs = [] for attr_json in attrs_json: id = attr_json["name"] attr_url = replace(attrs_url, path=attrs_url.path + f"/{id}") attr = _attribute_from_json(attr_url, attr_json) attrs.append(attr) return tuple(attrs)