예제 #1
0
def manual_labels(session: Session, instance: Instance,
                  project: CategorizationProject) -> Dataset:
    """Get manual labels from a Categorization project.

    Args:
        instance: Tamr instance containing project
        project: Tamr project containing labels

    Returns:
        Dataset containing manual labels

    Raises:
        _dataset.NotFound: If no dataset could be found at the specified URL
        Ambiguous: If multiple targets match dataset name
    """
    unified_dataset = unified.from_project(session=session,
                                           instance=instance,
                                           project=project)
    labels_dataset_name = unified_dataset.name + "_manual_categorizations"
    datasets_url = URL(instance=instance, path="datasets")
    r = session.get(url=str(datasets_url),
                    params={"filter": f"name=={labels_dataset_name}"})
    matches = r.json()
    if len(matches) == 0:
        raise _dataset.NotFound(str(r.url))
    if len(matches) > 1:
        raise _dataset.Ambiguous(str(r.url))

    dataset_path = matches[0]["relativeId"]
    dataset_url = URL(instance=instance, path=dataset_path)
    return _dataset._from_url(session=session, url=dataset_url)
예제 #2
0
def by_name(session: Session, instance: Instance, name: str) -> Project:
    """Get project by name
    Fetches project from Tamr server.

    Args:
        instance: Tamr instance containing this project
        name: Project name

    Raises:
        project.NotFound: If no project could be found with that name.
        project.Ambiguous: If multiple targets match project name.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    r = session.get(
        url=str(URL(instance=instance, path="projects")),
        params={"filter": f"name=={name}"},
    )

    # Check that exactly one project is returned
    matches = r.json()
    if len(matches) == 0:
        raise NotFound(str(r.url))
    if len(matches) > 1:
        raise Ambiguous(str(r.url))

    # Make Project from response
    url = URL(instance=instance, path=matches[0]["relativeId"])
    return _from_json(url=url, data=matches[0])
예제 #3
0
def get_all(
    session: Session,
    instance: Instance,
    *,
    filter: Optional[Union[str, List[str]]] = None,
) -> Tuple[Project, ...]:
    """Get all projects from an instance

    Args:
        instance: Tamr instance from which to get projects
        filter: Filter expression, e.g. "externalId==wobbly"
            Multiple expressions can be passed as a list

    Returns:
        The projects retrieved from the instance

    Raises:
        requests.HTTPError: If an HTTP error is encountered.
    """
    url = URL(instance=instance, path="projects")

    if filter is not None:
        r = session.get(str(url), params={"filter": filter})
    else:
        r = session.get(str(url))

    projects_json = response.successful(r).json()

    projects = []
    for project_json in projects_json:
        project_url = URL(instance=instance, path=project_json["relativeId"])
        project = _from_json(project_url, project_json)
        projects.append(project)
    return tuple(projects)
예제 #4
0
def _create(
    session: Session,
    instance: Instance,
    name: str,
    project_type: str,
    description: Optional[str] = None,
    external_id: Optional[str] = None,
    unified_dataset_name: Optional[str] = None,
) -> Project:
    """Create a project in Tamr.

    Args:
        instance: Tamr instance
        name: Project name
        project_type: Project type
        description: Project description
        external_id: External ID of the project
        unified_dataset_name: Name of the unified dataset

    Returns:
        Project created in Tamr

    Raises:
        project.AlreadyExists: If a project with these specifications already exists.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    if not unified_dataset_name:
        unified_dataset_name = name + "_unified_dataset"
    data = {
        "name": name,
        "type": project_type,
        "unifiedDatasetName": unified_dataset_name,
        "description": description,
        "externalId": external_id,
    }

    project_url = URL(instance=instance, path="projects")
    r = session.post(url=str(project_url), json=data)

    if r.status_code == 409:
        raise AlreadyExists(r.json()["message"])

    data = response.successful(r).json()
    project_path = data["relativeId"]
    project_url = URL(instance=instance, path=str(project_path))

    return _by_url(session=session, url=project_url)
예제 #5
0
def by_resource_id(session: Session, instance: Instance,
                   resource_id: str) -> Operation:
    """Get operation by ID

    Args:
        resource_id: The ID of the operation
    """
    url = URL(instance=instance, path=f"operations/{resource_id}")
    r = session.get(str(url))
    return _from_response(instance, r)
예제 #6
0
def create(
    session: Session,
    instance: Instance,
    *,
    name: str,
    key_attribute_names: Tuple[str, ...],
    description: Optional[str] = None,
    external_id: Optional[str] = None,
) -> Dataset:
    """Create a dataset in Tamr.

    Args:
        instance: Tamr instance
        name: Dataset name
        key_attribute_names: Dataset primary key attribute names
        description: Dataset description
        external_id: External ID of the dataset

    Returns:
        Dataset created in Tamr

    Raises:
        dataset.AlreadyExists: If a dataset with these specifications already exists.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    data = {
        "name": name,
        "keyAttributeNames": key_attribute_names,
        "description": description,
        "externalId": external_id,
    }

    dataset_url = URL(instance=instance, path="datasets")
    r = session.post(url=str(dataset_url), json=data)

    if r.status_code == 400 and "already exists" in r.json()["message"]:
        raise AlreadyExists(r.json()["message"])

    data = response.successful(r).json()
    dataset_path = data["relativeId"]
    dataset_url = URL(instance=instance, path=str(dataset_path))

    return _by_url(session=session, url=dataset_url)
예제 #7
0
def initiate(session: Session, instance: Instance) -> Backup:
    """Initiate a Tamr backup.

    Args:
        session: Tamr session
        instance: Tamr instance

    Returns:
        Initiated backup

    Raises:
        backup.InvalidOperation: If attempting an invalid operation
    """
    url = URL(instance=instance, path="backups")
    r = session.post(str(url))
    if r.status_code == 400:
        raise InvalidOperation(str(url), r.json()["message"])
    data = response.successful(r).json()
    return _from_json(
        URL(instance=instance, path=f'backups/{data["relativeId"]}'), data)
예제 #8
0
def get_all(session: Session, instance: Instance) -> List[Backup]:
    """Get all backups that have been initiated for a Tamr instance.

    Args:
        session: Tamr session
        instance: Tamr instance

    Returns:
        A list of Tamr backups

    Raises:
        backup.NotFound: If no backup found at the specified URL
    """
    url = URL(instance=instance, path="backups")
    r = session.get(str(url))
    if r.status_code == 404:
        raise NotFound(str(url))
    backups = [
        _from_json(
            URL(instance=instance, path=f'backups/{data["relativeId"]}'), data)
        for data in response.successful(r).json()
    ]
    return backups
예제 #9
0
def by_resource_id(session: Session, instance: Instance, id: str) -> Project:
    """Get project by resource ID.
    Fetches project from Tamr server.

    Args:
        instance: Tamr instance containing this dataset
        id: Project ID

    Raises:
        project.NotFound: If no project could be found at the specified URL.
            Corresponds to a 404 HTTP error.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    url = URL(instance=instance, path=f"projects/{id}")
    return _by_url(session, url)
예제 #10
0
def from_project(session: Session, project: Project) -> UnifiedDataset:
    """Get unified dataset of a project

    Fetches the unified dataset of a given project from Tamr server

    Args:
        project: Tamr project of this Unified Dataset

    Raises:
        unified.NotFound: If no unified dataset could be found at the specified URL.
            Corresponds to a 404 HTTP error.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    url = URL(instance=project.url.instance, path=f"{project.url.path}/unifiedDataset")
    return _by_url(session, url)
예제 #11
0
def from_resource_id(session: Session, instance: Instance, id: str) -> Dataset:
    """Get dataset by resource ID

    Fetches dataset from Tamr server

    Args:
        instance: Tamr instance containing this dataset
        id: Dataset ID

    Raises:
        dataset.NotFound: If no dataset could be found at the specified URL.
            Corresponds to a 404 HTTP error.
        requests.HTTPError: If any other HTTP error is encountered.
    """
    url = URL(instance=instance, path=f"datasets/{id}")
    return _from_url(session, url)
예제 #12
0
def _from_response(instance: Instance,
                   response: requests.Response) -> Operation:
    """
    Handle idiosyncrasies in constructing Operations from Tamr responses.
    When a Tamr API call would start an operation, but all results that would be
    produced by that operation are already up-to-date, Tamr returns `HTTP 204 No Content`

    To make it easy for client code to handle these API responses without checking
    the response code, this method will either construct an Operation, or a
    dummy `NoOp` operation representing the 204 Success response.

    Args:
        response: HTTP Response from the request that started the operation.
    """
    if response.status_code == 204:
        # Operation was successful, but the response contains no content.
        # Create a dummy operation to represent this.
        _never = "0000-00-00T00:00:00.000Z"
        _description = """Tamr returned HTTP 204 for this operation, indicating that all
            results that would be produced by the operation are already up-to-date."""
        resource_json = {
            "id": "-1",
            "type": "NOOP",
            "description": _description,
            "status": {
                "state": "SUCCEEDED",
                "startTime": _never,
                "endTime": _never,
                "message": "",
            },
            "created": {
                "username": "",
                "time": _never,
                "version": "-1"
            },
            "lastModified": {
                "username": "",
                "time": _never,
                "version": "-1"
            },
            "relativeId": "operations/-1",
        }
    else:
        resource_json = response.json()
    _id = resource_json["id"]
    _url = URL(instance=instance, path=f"operations/{_id}")
    return _from_json(_url, resource_json)
예제 #13
0
def get(session: Session, instance: Instance) -> Restore:
    """Get information on the latest Tamr restore, if any.

    Args:
        session: Tamr session
        instance: Tamr instance

    Returns:
        Latest Tamr restore

    Raises:
        restore.NotFound: If no backup found at the specified URL
    """
    url = URL(instance=instance, path="instance/restore")
    r = session.get(str(url))
    if r.status_code == 404:
        raise NotFound(str(url))
    return _from_json(url, response.successful(r).json())
예제 #14
0
def initiate(session: Session, instance: Instance, backup_path: str) -> Restore:
    """Initiate a Tamr restore.

    Args:
        session: Tamr session
        instance: Tamr instance
        backup_path: Path to the backup

    Returns:
        Initiated restore

    Raises:
        restore.InvalidOperation: If attempting an invalid operation
    """
    url = URL(instance=instance, path="instance/restore")
    r = session.post(str(url), data=backup_path)
    if r.status_code == 400:
        raise InvalidOperation(str(url), r.json()["message"])
    return _from_json(url, response.successful(r).json())
예제 #15
0
def by_resource_id(session: Session, instance: Instance,
                   resource_id: str) -> Backup:
    """Get information on a specific Tamr backup.

    Args:
        session: Tamr session
        instance: Tamr instance
        resource_id: Resource ID of the backup

    Returns:
        A Tamr backup

    Raises:
        backup.NotFound: If no backup found at the specified URL
    """
    url = URL(instance=instance, path=f"backups/{resource_id}")
    r = session.get(str(url))
    if r.status_code == 404:
        raise NotFound(str(url))
    return _from_json(url, response.successful(r).json())