Exemple #1
0
class DataRow(DbObject, Updateable, BulkDeletable):
    """ A DataRow represents a single piece of data. For example, if you have
    a CSV with 100 rows, you will have 1 Dataset and 100 DataRows.
    """
    external_id = Field.String("external_id")
    row_data = Field.String("row_data")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")

    # Relationships
    dataset = Relationship.ToOne("Dataset")
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)
    labels = Relationship.ToMany("Label", True)
    metadata = Relationship.ToMany("AssetMetadata", False, "metadata")
    predictions = Relationship.ToMany("Prediction", False)

    @staticmethod
    def bulk_delete(data_rows):
        """ Deletes all the given DataRows.

        Args:
            data_rows (list of DataRow): The DataRows to delete.
        """
        BulkDeletable._bulk_delete(data_rows, True)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.metadata.supports_filtering = False
        self.metadata.supports_sorting = False

    def create_metadata(self, meta_type, meta_value):
        """ Creates an asset metadata for this DataRow.
            >>> datarow.create_metadata("TEXT", "This is a text message")

        Args:
            meta_type (str): Asset metadata type, must be one of:
                VIDEO, IMAGE, TEXT.
            meta_value (str): Asset metadata value.
        Returns:
            AssetMetadata DB object.
        """
        meta_type_param = "metaType"
        meta_value_param = "metaValue"
        data_row_id_param = "dataRowId"
        query_str = """mutation CreateAssetMetadataPyApi(
            $%s: AttachmentType!, $%s: String!, $%s: ID!) {
            createAssetMetadata(data: {
                metaType: $%s metaValue: $%s dataRowId: $%s}) {%s}} """ % (
            meta_type_param, meta_value_param, data_row_id_param,
            meta_type_param, meta_value_param, data_row_id_param,
            query.results_query_part(Entity.AssetMetadata))

        res = self.client.execute(
            query_str, {
                meta_type_param: meta_type,
                meta_value_param: meta_value,
                data_row_id_param: self.uid
            })
        return Entity.AssetMetadata(self.client, res["createAssetMetadata"])
class PredictionModel(DbObject):
    """ A PredictionModel creates a Prediction. Legacy editor only.

    Refer to BulkImportRequest if using the new Editor.

    Attributes:
        updated_at (datetime)
        created_at (datetime)
        name (str)
        slug (str)
        version (int)

        created_by (Relationship): `ToOne` relationship to User
        organization (Relationship): `ToOne` relationship to Organization
    """
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)

    name = Field.String("name")
    slug = Field.String("slug")
    version = Field.Int("version")

    created_predictions = Relationship.ToMany("Prediction", False,
                                              "created_predictions")
Exemple #3
0
class User(DbObject):
    """ A User is a registered Labelbox user (for example you) associated with
    data they create or import and an Organization they belong to.
    """
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    email = Field.String("email")
    name = Field.String("nickname")
    nickname = Field.String("name")
    intercom_hash = Field.String("intercom_hash")
    picture = Field.String("picture")
    is_viewer = Field.Boolean("is_viewer")
    is_external_user = Field.Boolean("is_external_user")

    # Relationships
    organization = Relationship.ToOne("Organization")
    created_tasks = Relationship.ToMany("Task", False, "created_tasks")
    projects = Relationship.ToMany("Project", False)
Exemple #4
0
class LabelingFrontend(DbObject):
    """ Is a type representing an HTML / JavaScript UI that is used to generate
    labels. “Image Labeling” is the default Labeling Frontend that comes in every
    organization. You can create new labeling frontends for an organization.
    """
    name = Field.String("name")
    description = Field.String("description")
    iframe_url_path = Field.String("iframe_url_path")

    # TODO other fields and relationships
    projects = Relationship.ToMany("Project", True)
Exemple #5
0
class Label(DbObject, Updateable, BulkDeletable):
    """ Label represents an assessment on a DataRow. For example one label could
    contain 100 bounding boxes (annotations).
    """
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.reviews.supports_filtering = False

    label = Field.String("label")
    seconds_to_label = Field.Float("seconds_to_label")
    agreement = Field.Float("agreement")
    benchmark_agreement = Field.Float("benchmark_agreement")
    is_benchmark_reference = Field.Boolean("is_benchmark_reference")

    project = Relationship.ToOne("Project")
    data_row = Relationship.ToOne("DataRow")
    reviews = Relationship.ToMany("Review", False)
    created_by = Relationship.ToOne("User", False, "created_by")

    @staticmethod
    def bulk_delete(labels):
        """ Deletes all the given Labels.

        Args:
            labels (list of Label): The Labels to delete.
        """
        BulkDeletable._bulk_delete(labels, False)

    def create_review(self, **kwargs):
        """ Creates a Review for this label.

        Kwargs:
            Review attributes. At a minimum a `Review.score` field
            value must be provided.
        """
        kwargs[Entity.Review.label.name] = self
        kwargs[Entity.Review.project.name] = self.project()
        return self.client._create(Entity.Review, kwargs)

    def create_benchmark(self):
        """ Creates a Benchmark for this Label.

        Returns:
            The newly created Benchmark.
        """
        label_id_param = "labelId"
        query_str = """mutation CreateBenchmarkPyApi($%s: ID!) {
            createBenchmark(data: {labelId: $%s}) {%s}} """ % (
            label_id_param, label_id_param,
            query.results_query_part(Entity.Benchmark))
        res = self.client.execute(query_str, {label_id_param: self.uid})
        return Entity.Benchmark(self.client, res["createBenchmark"])
Exemple #6
0
class PredictionModel(DbObject):
    """ A prediction model represents a specific version of a model. """
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)

    name = Field.String("name")
    slug = Field.String("slug")
    version = Field.Int("version")

    created_predictions = Relationship.ToMany("Prediction", False,
                                              "created_predictions")
Exemple #7
0
class Ontology(DbObject):
    """An ontology specifies which tools and classifications are available
    to a project. This is read only for now.
    Attributes:
        name (str)
        description (str)
        updated_at (datetime)
        created_at (datetime)
        normalized (json)
        object_schema_count (int)
        classification_schema_count (int)
        projects (Relationship): `ToMany` relationship to Project
        created_by (Relationship): `ToOne` relationship to User
    """

    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    normalized = Field.Json("normalized")
    object_schema_count = Field.Int("object_schema_count")
    classification_schema_count = Field.Int("classification_schema_count")

    projects = Relationship.ToMany("Project", True)
    created_by = Relationship.ToOne("User", False, "created_by")

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self._tools: Optional[List[Tool]] = None
        self._classifications: Optional[List[Classification]] = None

    def tools(self) -> List[Tool]:
        """Get list of tools (AKA objects) in an Ontology."""
        if self._tools is None:
            self._tools = [
                Tool.from_dict(tool) for tool in self.normalized['tools']
            ]
        return self._tools

    def classifications(self) -> List[Classification]:
        """Get list of classifications in an Ontology."""
        if self._classifications is None:
            self._classifications = [
                Classification.from_dict(classification)
                for classification in self.normalized['classifications']
            ]
        return self._classifications
Exemple #8
0
class Ontology(DbObject):
    """ A ontology specifies which tools and classifications are available
    to a project.

    NOTE: This is read only for now.

    >>> project = client.get_project(name="<project_name>")
    >>> ontology = project.ontology()
    >>> ontology.normalized

    """

    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    normalized = Field.Json("normalized")
    object_schema_count = Field.Int("object_schema_count")
    classification_schema_count = Field.Int("classification_schema_count")

    projects = Relationship.ToMany("Project", True)
    created_by = Relationship.ToOne("User", False, "created_by")

    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self._tools: Optional[List[Tool]] = None
        self._classifications: Optional[List[Classification]] = None

    def tools(self) -> List[Tool]:
        if self._tools is None:
            self._tools = [
                Tool.from_json(tool) for tool in self.normalized['tools']
            ]
        return self._tools  # type: ignore

    def classifications(self) -> List[Classification]:
        if self._classifications is None:
            self._classifications = [
                Classification.from_json(classification)
                for classification in self.normalized['classifications']
            ]
        return self._classifications  # type: ignore
Exemple #9
0
class LabelingFrontend(DbObject):
    """ Label editor.

    Represents an HTML / JavaScript UI that is used to generate
    labels. “Editor” is the default Labeling Frontend that comes in every
    organization. You can create new labeling frontends for an organization.

    Attributes:
        name (str)
        description (str)
        iframe_url_path (str)

        projects (Relationship): `ToMany` relationship to Project
    """
    name = Field.String("name")
    description = Field.String("description")
    iframe_url_path = Field.String("iframe_url_path")

    # TODO other fields and relationships
    projects = Relationship.ToMany("Project", True)
Exemple #10
0
class PredictionModel(DbObject):
    """ A prediction model represents a specific version of a model.

    NOTE: This is used for the legacy editor [1], if you wish to
    import annotations, refer to [2]


    [1] https://labelbox.com/docs/legacy/import-model-prediction
    [2] https://labelbox.com/docs/automation/model-assisted-labeling

    """
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)

    name = Field.String("name")
    slug = Field.String("slug")
    version = Field.Int("version")

    created_predictions = Relationship.ToMany("Prediction", False,
                                              "created_predictions")
Exemple #11
0
class Project(DbObject, Updateable, Deletable):
    """ A Project is a container that includes a labeling frontend, an ontology,
    datasets and labels.
    """
    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    setup_complete = Field.DateTime("setup_complete")
    last_activity_time = Field.DateTime("last_activity_time")
    auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels")
    auto_audit_percentage = Field.Float("auto_audit_percentage")

    # Relationships
    datasets = Relationship.ToMany("Dataset", True)
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)
    reviews = Relationship.ToMany("Review", True)
    labeling_frontend = Relationship.ToOne("LabelingFrontend")
    labeling_frontend_options = Relationship.ToMany(
        "LabelingFrontendOptions", False, "labeling_frontend_options")
    labeling_parameter_overrides = Relationship.ToMany(
        "LabelingParameterOverride", False, "labeling_parameter_overrides")
    webhooks = Relationship.ToMany("Webhook", False)
    benchmarks = Relationship.ToMany("Benchmark", False)
    active_prediction_model = Relationship.ToOne("PredictionModel", False,
                                                 "active_prediction_model")
    predictions = Relationship.ToMany("Prediction", False)

    def create_label(self, **kwargs):
        """ Creates a label on this Project.

        Kwargs:
            Label attributes. At the minimum the label `DataRow`.
        """
        # Copy-paste of Client._create code so we can inject
        # a connection to Type. Type objects are on their way to being
        # deprecated and we don't want the Py client lib user to know
        # about them. At the same time they're connected to a Label at
        # label creation in a non-standard way (connect via name).

        Label = Entity.Label

        kwargs[Label.project] = self
        kwargs[Label.seconds_to_label] = kwargs.get(
            Label.seconds_to_label.name, 0.0)
        data = {
            Label.attribute(attr) if isinstance(attr, str) else attr:
            value.uid if isinstance(value, DbObject) else value
            for attr, value in kwargs.items()
        }

        query_str, params = query.create(Label, data)
        # Inject connection to Type
        query_str = query_str.replace(
            "data: {", "data: {type: {connect: {name: \"Any\"}} ")
        res = self.client.execute(query_str, params)
        return Label(self.client, res["createLabel"])

    def labels(self, datasets=None, order_by=None):
        """
        Custom relationship expansion method to support limited filtering.

        Args:
            datasets (iterable of Dataset): Optional collection of Datasets
                whose Labels are sought. If not provided, all Labels in
                this Project are returned.
            order_by (None or (Field, Field.Order)): Ordering clause.
        """
        Label = Entity.Label

        if datasets is not None:
            where = " where:{dataRow: {dataset: {id_in: [%s]}}}" % ", ".join(
                '"%s"' % dataset.uid for dataset in datasets)
        else:
            where = ""

        if order_by is not None:
            query.check_order_by_clause(Label, order_by)
            order_by_str = "orderBy: %s_%s" % (order_by[0].graphql_name,
                                               order_by[1].name.upper())
        else:
            order_by_str = ""

        id_param = "projectId"
        query_str = """query GetProjectLabelsPyApi($%s: ID!)
            {project (where: {id: $%s})
                {labels (skip: %%d first: %%d%s%s) {%s}}}""" % (
            id_param, id_param, where, order_by_str,
            query.results_query_part(Label))

        return PaginatedCollection(self.client, query_str,
                                   {id_param: self.uid}, ["project", "labels"],
                                   Label)

    def export_labels(self, timeout_seconds=60):
        """ Calls the server-side Label exporting that generates a JSON
        payload, and returns the URL to that payload.

        Will only generate a new URL at a max frequency of 30 min.

        Args:
            timeout_seconds (float): Max waiting time, in seconds.
        Returns:
            URL of the data file with this Project's labels. If the server
                didn't generate during the `timeout_seconds` period, None
                is returned.
        """
        sleep_time = 2
        id_param = "projectId"
        query_str = """mutation GetLabelExportUrlPyApi($%s: ID!)
            {exportLabels(data:{projectId: $%s }) {downloadUrl createdAt shouldPoll} }
        """ % (id_param, id_param)

        while True:
            res = self.client.execute(query_str, {id_param: self.uid})
            res = res["exportLabels"]
            if not res["shouldPoll"]:
                return res["downloadUrl"]

            timeout_seconds -= sleep_time
            if timeout_seconds <= 0:
                return None

            logger.debug("Project '%s' label export, waiting for server...",
                         self.uid)
            time.sleep(sleep_time)

    def labeler_performance(self):
        """ Returns the labeler performances for this Project.

        Returns:
            A PaginatedCollection of LabelerPerformance objects.
        """
        id_param = "projectId"
        query_str = """query LabelerPerformancePyApi($%s: ID!) {
            project(where: {id: $%s}) {
                labelerPerformance(skip: %%d first: %%d) {
                    count user {%s} secondsPerLabel totalTimeLabeling consensus
                    averageBenchmarkAgreement lastActivityTime}
            }}""" % (id_param, id_param, query.results_query_part(Entity.User))

        def create_labeler_performance(client, result):
            result["user"] = Entity.User(client, result["user"])
            result["lastActivityTime"] = datetime.fromtimestamp(
                result["lastActivityTime"] / 1000, timezone.utc)
            return LabelerPerformance(**{
                utils.snake_case(key): value
                for key, value in result.items()
            })

        return PaginatedCollection(self.client, query_str,
                                   {id_param: self.uid},
                                   ["project", "labelerPerformance"],
                                   create_labeler_performance)

    def review_metrics(self, net_score):
        """ Returns this Project's review metrics.

        Args:
            net_score (None or Review.NetScore): Indicates desired metric.
        Returns:
            int, aggregation count of reviews for given net_score.
        """
        if net_score not in (None, ) + tuple(Entity.Review.NetScore):
            raise InvalidQueryError(
                "Review metrics net score must be either None "
                "or one of Review.NetScore values")
        id_param = "projectId"
        net_score_literal = "None" if net_score is None else net_score.name
        query_str = """query ProjectReviewMetricsPyApi($%s: ID!){
            project(where: {id:$%s})
            {reviewMetrics {labelAggregate(netScore: %s) {count}}}
        }""" % (id_param, id_param, net_score_literal)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["reviewMetrics"]["labelAggregate"]["count"]

    def setup(self, labeling_frontend, labeling_frontend_options):
        """ Finalizes the Project setup.

        Args:
            labeling_frontend (LabelingFrontend): Which UI to use to label the
                data.
            labeling_frontend_options (dict or str): Labeling frontend options,
                a.k.a. project ontology. If given a `dict` it will be converted
                to `str` using `json.dumps`.
        """
        organization = self.client.get_organization()
        if not isinstance(labeling_frontend_options, str):
            labeling_frontend_options = json.dumps(labeling_frontend_options)

        self.labeling_frontend.connect(labeling_frontend)

        LFO = Entity.LabelingFrontendOptions
        labeling_frontend_options = self.client._create(
            LFO, {
                LFO.project: self,
                LFO.labeling_frontend: labeling_frontend,
                LFO.customization_options: labeling_frontend_options,
                LFO.organization: organization
            })

        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        self.update(setup_complete=timestamp)

    def set_labeling_parameter_overrides(self, data):
        """ Adds labeling parameter overrides to this project. Example:

            >>> project.set_labeling_parameter_overrides([
            >>>     (data_row_1, 2, 3), (data_row_2, 1, 4)])

        Args:
            data (iterable): An iterable of tuples. Each tuple must contain
                (DataRow, priority, numberOfLabels) for the new override.
        Returns:
            bool, indicates if the operation was a success.
        """
        data_str = ",\n".join(
            "{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" %
            (data_row.uid, priority, num_labels)
            for data_row, priority, num_labels in data)
        id_param = "projectId"
        query_str = """mutation SetLabelingParameterOverridesPyApi($%s: ID!){
            project(where: { id: $%s }) {setLabelingParameterOverrides
            (data: [%s]) {success}}} """ % (id_param, id_param, data_str)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["setLabelingParameterOverrides"]["success"]

    def unset_labeling_parameter_overrides(self, data_rows):
        """ Removes labeling parameter overrides to this project.

        Args:
            data_rows (iterable): An iterable of DataRows.
        Returns:
            bool, indicates if the operation was a success.
        """
        id_param = "projectId"
        query_str = """mutation UnsetLabelingParameterOverridesPyApi($%s: ID!){
            project(where: { id: $%s}) {
            unsetLabelingParameterOverrides(data: [%s]) { success }}}""" % (
            id_param, id_param, ",\n".join("{dataRowId: \"%s\"}" % row.uid
                                           for row in data_rows))
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["unsetLabelingParameterOverrides"]["success"]

    def upsert_review_queue(self, quota_factor):
        """ Reinitiates the review queue for this project.

        Args:
            quota_factor (float): Which part (percentage) of the queue
                to reinitiate. Between 0 and 1.
        """
        id_param = "projectId"
        quota_param = "quotaFactor"
        query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){
            upsertReviewQueue(where:{project: {id: $%s}}
                            data:{quotaFactor: $%s}) {id}}""" % (
            id_param, quota_param, id_param, quota_param)
        res = self.client.execute(query_str, {
            id_param: self.uid,
            quota_param: quota_factor
        })

    def extend_reservations(self, queue_type):
        """ Extends all the current reservations for the current user on the given
        queue type.

        Args:
            queue_type (str): Either "LabelingQueue" or "ReviewQueue"
        Returns:
            int, the number of reservations that were extended.
        """
        if queue_type not in ("LabelingQueue", "ReviewQueue"):
            raise InvalidQueryError("Unsupported queue type: %s" % queue_type)

        id_param = "projectId"
        query_str = """mutation ExtendReservationsPyApi($%s: ID!){
            extendReservations(projectId:$%s queueType:%s)}""" % (
            id_param, id_param, queue_type)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["extendReservations"]

    def create_prediction_model(self, name, version):
        """ Creates a PredictionModel connected to this Project.
        Args:
            name (str): The new PredictionModel's name.
            version (int): The new PredictionModel's version.
        Return:
            A newly created PredictionModel.
        """
        PM = Entity.PredictionModel
        model = self.client._create(PM, {
            PM.name.name: name,
            PM.version.name: version
        })
        self.active_prediction_model.connect(model)
        return model

    def create_prediction(self, label, data_row, prediction_model=None):
        """ Creates a Prediction within this Project.
        Args:
            label (str): The `label` field of the new Prediction.
            data_row (DataRow): The DataRow for which the Prediction is created.
            prediction_model (PredictionModel or None): The PredictionModel
                within which the new Prediction is created. If None then this
                Project's active_prediction_model is used.
        Return:
            A newly created Prediction.
        Raises:
            labelbox.excepions.InvalidQueryError: if given `prediction_model`
                is None and this Project's active_prediction_model is also
                None.
        """
        if prediction_model is None:
            prediction_model = self.active_prediction_model()
            if prediction_model is None:
                raise InvalidQueryError(
                    "Project '%s' has no active prediction model" % self.name)

        label_param = "label"
        model_param = "prediction_model_id"
        project_param = "project_id"
        data_row_param = "data_row_id"

        Prediction = Entity.Prediction
        query_str = """mutation CreatePredictionPyApi(
            $%s: String!, $%s: ID!, $%s: ID!, $%s: ID!) {createPrediction(
            data: {label: $%s, predictionModelId: $%s, projectId: $%s,
                   dataRowId: $%s})
            {%s}}""" % (label_param, model_param, project_param,
                        data_row_param, label_param, model_param,
                        project_param, data_row_param,
                        query.results_query_part(Prediction))
        params = {
            label_param: label,
            model_param: prediction_model.uid,
            data_row_param: data_row.uid,
            project_param: self.uid
        }
        res = self.client.execute(query_str, params)
        return Prediction(self.client, res["createPrediction"])

    def upload_annotations(
        self,
        name: str,
        annotations: Union[str, Union[str, Path], Iterable[dict]],
    ) -> 'BulkImportRequest':  # type: ignore
        """ Uploads annotations to a project.

        Args:
            name: name of the BulkImportRequest job
            annotations:
                url that is publicly accessible by Labelbox containing an
                ndjson file
                OR local path to an ndjson file
                OR iterable of annotation rows
        Returns:
            BulkImportRequest

        """
        if isinstance(annotations, str) or isinstance(annotations, Path):

            def _is_url_valid(url: Union[str, Path]) -> bool:
                """ Verifies that the given string is a valid url.

                Args:
                    url: string to be checked
                Returns:
                    True if the given url is valid otherwise False

                """
                if isinstance(url, Path):
                    return False
                parsed = urlparse(url)
                return bool(parsed.scheme) and bool(parsed.netloc)

            if _is_url_valid(annotations):
                return BulkImportRequest.create_from_url(
                    client=self.client,
                    project_id=self.uid,
                    name=name,
                    url=str(annotations),
                )
            else:
                path = Path(annotations)
                if not path.exists():
                    raise FileNotFoundError(
                        f'{annotations} is not a valid url nor existing local file'
                    )
                return BulkImportRequest.create_from_local_file(
                    client=self.client,
                    project_id=self.uid,
                    name=name,
                    file=path,
                    validate_file=True,
                )
        elif isinstance(annotations, Iterable):
            return BulkImportRequest.create_from_objects(
                client=self.client,
                project_id=self.uid,
                name=name,
                predictions=annotations,  # type: ignore
            )
        else:
            raise ValueError(
                f'Invalid annotations given of type: {type(annotations)}')
Exemple #12
0
class User(DbObject):
    """ A User is a registered Labelbox user (for example you) associated with
    data they create or import and an Organization they belong to.

    Attributes:
        updated_at (datetime)
        created_at (datetime)
        email (str)
        name (str)
        nickname (str)
        intercom_hash (str)
        picture (str)
        is_viewer (bool)
        is_external_viewer (bool)

        organization (Relationship): `ToOne` relationship to Organization
        created_tasks (Relationship): `ToMany` relationship to Task
        projects (Relationship): `ToMany` relationship to Project
    """

    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    email = Field.String("email")
    name = Field.String("nickname")
    nickname = Field.String("name")
    intercom_hash = Field.String("intercom_hash")
    picture = Field.String("picture")
    is_viewer = Field.Boolean("is_viewer")
    is_external_user = Field.Boolean("is_external_user")

    # Relationships
    organization = Relationship.ToOne("Organization")
    created_tasks = Relationship.ToMany("Task", False, "created_tasks")
    projects = Relationship.ToMany("Project", False)
    org_role = Relationship.ToOne("OrgRole", False)

    def update_org_role(self, role: Role):
        """ Updated the `User`s organization role. 

        See client.get_roles() to get all valid roles
        If you a user is converted from project level permissions to org level permissions and then convert back, their permissions will remain for each individual project

        Args:
            role (Role): The role that you want to set for this user.

        """
        user_id_param = "userId"
        role_id_param = "roleId"
        query_str = """mutation SetOrganizationRolePyApi($%s: ID!, $%s: ID!) { 
            setOrganizationRole(data: {userId: $userId, roleId: $roleId}) { id name }}
        """ % (user_id_param, role_id_param)

        self.client.execute(query_str, {
            user_id_param: self.uid,
            role_id_param: role.uid
        })

    def remove_from_project(self, project: Project):
        """ Removes a User from a project. Only used for project based users.
        Project based user means their org role is "NONE"

        Args:
            project (Project): Project to remove user from

        """
        self.upsert_project_role(project, self.client.get_roles()['NONE'])

    def upsert_project_role(self, project: Project, role: Role):
        """ Updates or replaces a User's role in a project.

        Args:
            project (Project): The project to update the users permissions for
            role (Role): The role to assign to this user in this project.
        
        """
        org_role = self.org_role()
        if org_role.name.upper() != 'NONE':
            raise ValueError(
                "User is not project based and has access to all projects")

        project_id_param = "projectId"
        user_id_param = "userId"
        role_id_param = "roleId"
        query_str = """mutation SetProjectMembershipPyApi($%s: ID!, $%s: ID!, $%s: ID!) {
                setProjectMembership(data: {%s: $userId, roleId: $%s, projectId: $%s}) {id}}
        """ % (user_id_param, role_id_param, project_id_param, user_id_param,
               role_id_param, project_id_param)

        self.client.execute(
            query_str, {
                project_id_param: project.uid,
                user_id_param: self.uid,
                role_id_param: role.uid
            })
Exemple #13
0
class Organization(DbObject):
    """ An Organization is a group of Users.

    It is associated with data created by Users within that Organization.
    Typically all Users within an Organization have access to data created by any User in the same Organization.

    Attributes:
        updated_at (datetime)
        created_at (datetime)
        name (str)

        users (Relationship): `ToMany` relationship to User
        projects (Relationship): `ToMany` relationship to Project
        webhooks (Relationship): `ToMany` relationship to Webhook
    """

    # RelationshipManagers in Organization use the type in Query (and
    # not the source object) because the server-side does not support
    # filtering on ID in the query for getting a single organization.
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        for relationship in self.relationships():
            getattr(self, relationship.name).filter_on_id = False

    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    name = Field.String("name")

    # Relationships
    users = Relationship.ToMany("User", False)
    projects = Relationship.ToMany("Project", True)
    webhooks = Relationship.ToMany("Webhook", False)

    @experimental
    def invite_user(
            self,
            email: str,
            role: Role,
            project_roles: Optional[List[ProjectRole]] = None) -> Invite:
        """
        Invite a new member to the org. This will send the user an email invite

        Args:
            email (str): email address of the user to invite
            role (Role): Role to assign to the user
            project_roles (Optional[List[ProjectRoles]]): List of project roles to assign to the User (if they have a project based org role).

        Returns:
            Invite for the user

        Notes:
            This function is currently experimental and has a few limitations that will be resolved in future releases
            1. If you try to add an unsupported you will get an error referring to invalid foreign keys
                - In this case `role.get_roles` is likely not getting the right ids
            2. Multiple invites can be sent for the same email. This can only be resolved in the UI for now.
                - Future releases of the SDK will support the ability to query and revoke invites to solve this problem (and/or checking on the backend)
            3. Some server side response are unclear (e.g. if the user invites themself `None` is returned which the SDK raises as a `LabelboxError` )
        """

        if project_roles and role.name != "NONE":
            raise ValueError(
                f"Project roles cannot be set for a user with organization level permissions. Found role name `{role.name}`, expected `NONE`"
            )

        data_param = "data"
        query_str = """mutation createInvitesPyApi($%s: [CreateInviteInput!]){
                    createInvites(data: $%s){  invite { id createdAt organizationRoleName inviteeEmail inviter { %s } }}}""" % (
            data_param, data_param, query.results_query_part(User))

        projects = [{
            "projectId": project_role.project.uid,
            "projectRoleId": project_role.role.uid
        } for project_role in project_roles or []]

        res = self.client.execute(query_str, {
            data_param: [{
                "inviterId": self.client.get_user().uid,
                "inviteeEmail": email,
                "organizationId": self.uid,
                "organizationRoleId": role.uid,
                "projects": projects
            }]
        },
                                  experimental=True)
        invite_response = res['createInvites'][0]['invite']
        if not invite_response:
            raise LabelboxError(f"Unable to send invite for email {email}")
        return Invite(self.client, invite_response)

    @experimental
    def invite_limit(self) -> InviteLimit:
        """ Retrieve invite limits for the org
        This already accounts for users currently in the org
        Meaining that  `used = users + invites, remaining = limit - (users + invites)`
       
        Returns:
            InviteLimit
    
        """
        org_id_param = "organizationId"
        res = self.client.execute("""query InvitesLimitPyApi($%s: ID!) {
            invitesLimit(where: {id: $%s}) { used limit remaining }
        }""" % (org_id_param, org_id_param), {org_id_param: self.uid},
                                  experimental=True)
        return InviteLimit(
            **{utils.snake_case(k): v
               for k, v in res['invitesLimit'].items()})

    def remove_user(self, user: User):
        """
        Deletes a user from the organization. This cannot be undone without sending another invite.

        Args:
            user (User): The user to delete from the org
        """

        user_id_param = "userId"
        self.client.execute(
            """mutation DeleteMemberPyApi($%s: ID!) {
            updateUser(where: {id: $%s}, data: {deleted: true}) { id deleted }
        }""" % (user_id_param, user_id_param), {user_id_param: user.uid})
class Project(DbObject, Updateable, Deletable):
    """ A Project is a container that includes a labeling frontend, an ontology,
    datasets and labels.

    Attributes:
        name (str)
        description (str)
        updated_at (datetime)
        created_at (datetime)
        setup_complete (datetime)
        last_activity_time (datetime)
        auto_audit_number_of_labels (int)
        auto_audit_percentage (float)

        datasets (Relationship): `ToMany` relationship to Dataset
        created_by (Relationship): `ToOne` relationship to User
        organization (Relationship): `ToOne` relationship to Organization
        reviews (Relationship): `ToMany` relationship to Review
        labeling_frontend (Relationship): `ToOne` relationship to LabelingFrontend
        labeling_frontend_options (Relationship): `ToMany` relationship to LabelingFrontendOptions
        labeling_parameter_overrides (Relationship): `ToMany` relationship to LabelingParameterOverride
        webhooks (Relationship): `ToMany` relationship to Webhook
        benchmarks (Relationship): `ToMany` relationship to Benchmark
        active_prediction_model (Relationship): `ToOne` relationship to PredictionModel
        predictions (Relationship): `ToMany` relationship to Prediction
        ontology (Relationship): `ToOne` relationship to Ontology
    """
    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")
    setup_complete = Field.DateTime("setup_complete")
    last_activity_time = Field.DateTime("last_activity_time")
    auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels")
    auto_audit_percentage = Field.Float("auto_audit_percentage")

    # Relationships
    datasets = Relationship.ToMany("Dataset", True)
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)
    reviews = Relationship.ToMany("Review", True)
    labeling_frontend = Relationship.ToOne("LabelingFrontend")
    labeling_frontend_options = Relationship.ToMany(
        "LabelingFrontendOptions", False, "labeling_frontend_options")
    labeling_parameter_overrides = Relationship.ToMany(
        "LabelingParameterOverride", False, "labeling_parameter_overrides")
    webhooks = Relationship.ToMany("Webhook", False)
    benchmarks = Relationship.ToMany("Benchmark", False)
    active_prediction_model = Relationship.ToOne("PredictionModel", False,
                                                 "active_prediction_model")
    predictions = Relationship.ToMany("Prediction", False)
    ontology = Relationship.ToOne("Ontology", True)

    def members(self):
        """ Fetch all current members for this project

        Returns:
            A `PaginatedCollection of `ProjectMember`s

        """
        id_param = "projectId"
        query_str = """query ProjectMemberOverviewPyApi($%s: ID!) {
             project(where: {id : $%s}) { id members(skip: %%d first: %%d){ id user { %s } role { id name } }
           }
        }""" % (id_param, id_param, query.results_query_part(Entity.User))
        return PaginatedCollection(self.client, query_str,
                                   {id_param: str(self.uid)},
                                   ["project", "members"], ProjectMember)

    def create_label(self, **kwargs):
        """ Creates a label on a Legacy Editor project. Not supported in the new Editor.
        Args:
            **kwargs: Label attributes. At minimum, the label `DataRow`.
        """
        # Copy-paste of Client._create code so we can inject
        # a connection to Type. Type objects are on their way to being
        # deprecated and we don't want the Py client lib user to know
        # about them. At the same time they're connected to a Label at
        # label creation in a non-standard way (connect via name).
        logger.warning(
            "`create_label` is deprecated and is not compatible with the new editor."
        )

        Label = Entity.Label

        kwargs[Label.project] = self
        kwargs[Label.seconds_to_label] = kwargs.get(
            Label.seconds_to_label.name, 0.0)
        data = {
            Label.attribute(attr) if isinstance(attr, str) else attr:
            value.uid if isinstance(value, DbObject) else value
            for attr, value in kwargs.items()
        }

        query_str, params = query.create(Label, data)
        # Inject connection to Type
        query_str = query_str.replace(
            "data: {", "data: {type: {connect: {name: \"Any\"}} ")
        res = self.client.execute(query_str, params)
        return Label(self.client, res["createLabel"])

    def labels(self, datasets=None, order_by=None):
        """ Custom relationship expansion method to support limited filtering.

        Args:
            datasets (iterable of Dataset): Optional collection of Datasets
                whose Labels are sought. If not provided, all Labels in
                this Project are returned.
            order_by (None or (Field, Field.Order)): Ordering clause.
        """
        Label = Entity.Label

        if datasets is not None:
            where = " where:{dataRow: {dataset: {id_in: [%s]}}}" % ", ".join(
                '"%s"' % dataset.uid for dataset in datasets)
        else:
            where = ""

        if order_by is not None:
            query.check_order_by_clause(Label, order_by)
            order_by_str = "orderBy: %s_%s" % (order_by[0].graphql_name,
                                               order_by[1].name.upper())
        else:
            order_by_str = ""

        id_param = "projectId"
        query_str = """query GetProjectLabelsPyApi($%s: ID!)
            {project (where: {id: $%s})
                {labels (skip: %%d first: %%d %s %s) {%s}}}""" % (
            id_param, id_param, where, order_by_str,
            query.results_query_part(Label))

        return PaginatedCollection(self.client, query_str,
                                   {id_param: self.uid}, ["project", "labels"],
                                   Label)

    def export_labels(self, timeout_seconds=60):
        """ Calls the server-side Label exporting that generates a JSON
        payload, and returns the URL to that payload.

        Will only generate a new URL at a max frequency of 30 min.

        Args:
            timeout_seconds (float): Max waiting time, in seconds.
        Returns:
            URL of the data file with this Project's labels. If the server didn't
            generate during the `timeout_seconds` period, None is returned.
        """
        sleep_time = 2
        id_param = "projectId"
        query_str = """mutation GetLabelExportUrlPyApi($%s: ID!)
            {exportLabels(data:{projectId: $%s }) {downloadUrl createdAt shouldPoll} }
        """ % (id_param, id_param)

        while True:
            res = self.client.execute(query_str, {id_param: self.uid})
            res = res["exportLabels"]
            if not res["shouldPoll"]:
                return res["downloadUrl"]

            timeout_seconds -= sleep_time
            if timeout_seconds <= 0:
                return None

            logger.debug("Project '%s' label export, waiting for server...",
                         self.uid)
            time.sleep(sleep_time)

    def export_issues(self, status=None):
        """ Calls the server-side Issues exporting that 
        returns the URL to that payload.

        Args:
            status (string): valid values: Open, Resolved
        Returns:
            URL of the data file with this Project's issues. 
        """
        id_param = "projectId"
        status_param = "status"
        query_str = """query GetProjectIssuesExportPyApi($%s: ID!, $%s: IssueStatus) {
            project(where: { id: $%s }) {
                issueExportUrl(where: { status: $%s })
            }
        }""" % (id_param, status_param, id_param, status_param)

        valid_statuses = {None, "Open", "Resolved"}

        if status not in valid_statuses:
            raise ValueError("status must be in {}. Found {}".format(
                valid_statuses, status))

        res = self.client.execute(query_str, {
            id_param: self.uid,
            status_param: status
        })

        res = res['project']

        logger.debug("Project '%s' issues export, link generated", self.uid)

        return res.get('issueExportUrl')

    def upsert_instructions(self, instructions_file: str):
        """
        * Uploads instructions to the UI. Running more than once will replace the instructions
            
        Args:
            instructions_file (str): Path to a local file.
                * Must be either a pdf, text, or html file.

        Raises:
            ValueError:
                * project must be setup 
                * instructions file must end with one of ".text", ".txt", ".pdf", ".html"
        """

        if self.setup_complete is None:
            raise ValueError(
                "Cannot attach instructions to a project that has not been set up."
            )

        frontend = self.labeling_frontend()
        frontendId = frontend.uid

        if frontend.name != "Editor":
            logger.warning(
                f"This function has only been tested to work with the Editor front end. Found %s",
                frontend.name)

        supported_instruction_formats = (".text", ".txt", ".pdf", ".html")
        if not instructions_file.endswith(supported_instruction_formats):
            raise ValueError(
                f"instructions_file must end with one of {supported_instruction_formats}. Found {instructions_file}"
            )

        lfo = list(self.labeling_frontend_options())[-1]
        instructions_url = self.client.upload_file(instructions_file)
        customization_options = json.loads(lfo.customization_options)
        customization_options['projectInstructions'] = instructions_url
        option_id = lfo.uid

        self.client.execute(
            """mutation UpdateFrontendWithExistingOptionsPyApi (
                    $frontendId: ID!, 
                    $optionsId: ID!, 
                    $name: String!, 
                    $description: String!, 
                    $customizationOptions: String!
                ) {
                    updateLabelingFrontend(
                        where: {id: $frontendId}, 
                        data: {name: $name, description: $description}
                    ) {id}
                    updateLabelingFrontendOptions(
                        where: {id: $optionsId}, 
                        data: {customizationOptions: $customizationOptions}
                    ) {id}
                }""", {
                "frontendId": frontendId,
                "optionsId": option_id,
                "name": frontend.name,
                "description": "Video, image, and text annotation",
                "customizationOptions": json.dumps(customization_options)
            })

    def labeler_performance(self):
        """ Returns the labeler performances for this Project.

        Returns:
            A PaginatedCollection of LabelerPerformance objects.
        """
        id_param = "projectId"
        query_str = """query LabelerPerformancePyApi($%s: ID!) {
            project(where: {id: $%s}) {
                labelerPerformance(skip: %%d first: %%d) {
                    count user {%s} secondsPerLabel totalTimeLabeling consensus
                    averageBenchmarkAgreement lastActivityTime}
            }}""" % (id_param, id_param, query.results_query_part(Entity.User))

        def create_labeler_performance(client, result):
            result["user"] = Entity.User(client, result["user"])
            # python isoformat doesn't accept Z as utc timezone
            result["lastActivityTime"] = datetime.fromisoformat(
                result["lastActivityTime"].replace('Z', '+00:00'))
            return LabelerPerformance(**{
                utils.snake_case(key): value
                for key, value in result.items()
            })

        return PaginatedCollection(self.client, query_str,
                                   {id_param: self.uid},
                                   ["project", "labelerPerformance"],
                                   create_labeler_performance)

    def review_metrics(self, net_score):
        """ Returns this Project's review metrics.

        Args:
            net_score (None or Review.NetScore): Indicates desired metric.
        Returns:
            int, aggregation count of reviews for given `net_score`.
        """
        if net_score not in (None, ) + tuple(Entity.Review.NetScore):
            raise InvalidQueryError(
                "Review metrics net score must be either None "
                "or one of Review.NetScore values")
        id_param = "projectId"
        net_score_literal = "None" if net_score is None else net_score.name
        query_str = """query ProjectReviewMetricsPyApi($%s: ID!){
            project(where: {id:$%s})
            {reviewMetrics {labelAggregate(netScore: %s) {count}}}
        }""" % (id_param, id_param, net_score_literal)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["reviewMetrics"]["labelAggregate"]["count"]

    def setup(self, labeling_frontend, labeling_frontend_options):
        """ Finalizes the Project setup.

        Args:
            labeling_frontend (LabelingFrontend): Which UI to use to label the
                data.
            labeling_frontend_options (dict or str): Labeling frontend options,
                a.k.a. project ontology. If given a `dict` it will be converted
                to `str` using `json.dumps`.
        """
        organization = self.client.get_organization()
        if not isinstance(labeling_frontend_options, str):
            labeling_frontend_options = json.dumps(labeling_frontend_options)

        self.labeling_frontend.connect(labeling_frontend)

        LFO = Entity.LabelingFrontendOptions
        labeling_frontend_options = self.client._create(
            LFO, {
                LFO.project: self,
                LFO.labeling_frontend: labeling_frontend,
                LFO.customization_options: labeling_frontend_options,
                LFO.organization: organization
            })

        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        self.update(setup_complete=timestamp)

    def validate_labeling_parameter_overrides(self, data):
        for idx, row in enumerate(data):
            if len(row) != 3:
                raise TypeError(
                    f"Data must be a list of tuples containing a DataRow, priority (int), num_labels (int). Found {len(row)} items. Index: {idx}"
                )
            data_row, priority, num_labels = row
            if not isinstance(data_row, DataRow):
                raise TypeError(
                    f"data_row should be be of type DataRow. Found {type(data_row)}. Index: {idx}"
                )

            for name, value in [["Priority", priority],
                                ["Number of labels", num_labels]]:
                if not isinstance(value, int):
                    raise TypeError(
                        f"{name} must be an int. Found {type(value)} for data_row {data_row}. Index: {idx}"
                    )
                if value < 1:
                    raise ValueError(
                        f"{name} must be greater than 0 for data_row {data_row}. Index: {idx}"
                    )

    def set_labeling_parameter_overrides(self, data):
        """ Adds labeling parameter overrides to this project.
                
        See information on priority here:
            https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system
    
            >>> project.set_labeling_parameter_overrides([
            >>>     (data_row_1, 2, 3), (data_row_2, 1, 4)])

        Args:
            data (iterable): An iterable of tuples. Each tuple must contain
                (DataRow, priority<int>, number_of_labels<int>) for the new override.

                Priority:
                    * Data will be labeled in priority order.
                        - A lower number priority is labeled first.
                        - Minimum priority is 1.
                    * Priority is not the queue position.
                        - The position is determined by the relative priority.
                        - E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)] 
                            will be assigned in the following order: [data_row_2, data_row_1, data_row_3]
                    * Datarows with parameter overrides will appear before datarows without overrides.
                    * The priority only effects items in the queue.
                        - Assigning a priority will not automatically add the item back into the queue.  
                Number of labels:
                    * The number of times a data row should be labeled.
                        - Creates duplicate data rows in a project (one for each number of labels).
                    * New duplicated data rows will be added to the queue.
                        - Already labeled duplicates will not be sent back to the queue.
                    * The queue will never assign the same datarow to a single labeler more than once.
                        - If the number of labels is greater than the number of labelers working on a project then
                            the extra items will remain in the queue (this can be fixed by removing the override at any time).
                    * Setting this to 1 will result in the default behavior (no duplicates).
        Returns:
            bool, indicates if the operation was a success.
        """
        self.validate_labeling_parameter_overrides(data)
        data_str = ",\n".join(
            "{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" %
            (data_row.uid, priority, num_labels)
            for data_row, priority, num_labels in data)
        id_param = "projectId"
        query_str = """mutation SetLabelingParameterOverridesPyApi($%s: ID!){
            project(where: { id: $%s }) {setLabelingParameterOverrides
            (data: [%s]) {success}}} """ % (id_param, id_param, data_str)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["setLabelingParameterOverrides"]["success"]

    def unset_labeling_parameter_overrides(self, data_rows):
        """ Removes labeling parameter overrides to this project.

        * This will remove unlabeled duplicates in the queue.

        Args:
            data_rows (iterable): An iterable of DataRows.
        Returns:
            bool, indicates if the operation was a success.
        """
        id_param = "projectId"
        query_str = """mutation UnsetLabelingParameterOverridesPyApi($%s: ID!){
            project(where: { id: $%s}) {
            unsetLabelingParameterOverrides(data: [%s]) { success }}}""" % (
            id_param, id_param, ",\n".join("{dataRowId: \"%s\"}" % row.uid
                                           for row in data_rows))
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["project"]["unsetLabelingParameterOverrides"]["success"]

    def upsert_review_queue(self, quota_factor):
        """ Sets the the proportion of total assets in a project to review.

        More information can be found here: 
            https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage

        Args:
            quota_factor (float): Which part (percentage) of the queue
                to reinitiate. Between 0 and 1.
        """

        if not 0. < quota_factor < 1.:
            raise ValueError("Quota factor must be in the range of [0,1]")

        id_param = "projectId"
        quota_param = "quotaFactor"
        query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){
            upsertReviewQueue(where:{project: {id: $%s}}
                            data:{quotaFactor: $%s}) {id}}""" % (
            id_param, quota_param, id_param, quota_param)
        res = self.client.execute(query_str, {
            id_param: self.uid,
            quota_param: quota_factor
        })

    def extend_reservations(self, queue_type):
        """ Extends all the current reservations for the current user on the given
        queue type.
        Args:
            queue_type (str): Either "LabelingQueue" or "ReviewQueue"
        Returns:
            int, the number of reservations that were extended.
        """
        if queue_type not in ("LabelingQueue", "ReviewQueue"):
            raise InvalidQueryError("Unsupported queue type: %s" % queue_type)

        id_param = "projectId"
        query_str = """mutation ExtendReservationsPyApi($%s: ID!){
            extendReservations(projectId:$%s queueType:%s)}""" % (
            id_param, id_param, queue_type)
        res = self.client.execute(query_str, {id_param: self.uid})
        return res["extendReservations"]

    def create_prediction_model(self, name, version):
        """ Creates a PredictionModel connected to a Legacy Editor Project.

        Args:
            name (str): The new PredictionModel's name.
            version (int): The new PredictionModel's version.
        Returns:
            A newly created PredictionModel.
        """

        logger.warning(
            "`create_prediction_model` is deprecated and is not compatible with the new editor."
        )

        PM = Entity.PredictionModel
        model = self.client._create(PM, {
            PM.name.name: name,
            PM.version.name: version
        })
        self.active_prediction_model.connect(model)
        return model

    def create_prediction(self, label, data_row, prediction_model=None):
        """ Creates a Prediction within a Legacy Editor Project. Not supported
        in the new Editor.

        Args:
            label (str): The `label` field of the new Prediction.
            data_row (DataRow): The DataRow for which the Prediction is created.
            prediction_model (PredictionModel or None): The PredictionModel
                within which the new Prediction is created. If None then this
                Project's active_prediction_model is used.
        Return:
            A newly created Prediction.
        Raises:
            labelbox.excepions.InvalidQueryError: if given `prediction_model`
                is None and this Project's active_prediction_model is also
                None.
        """
        logger.warning(
            "`create_prediction` is deprecated and is not compatible with the new editor."
        )

        if prediction_model is None:
            prediction_model = self.active_prediction_model()
            if prediction_model is None:
                raise InvalidQueryError(
                    "Project '%s' has no active prediction model" % self.name)

        label_param = "label"
        model_param = "prediction_model_id"
        project_param = "project_id"
        data_row_param = "data_row_id"

        Prediction = Entity.Prediction
        query_str = """mutation CreatePredictionPyApi(
            $%s: String!, $%s: ID!, $%s: ID!, $%s: ID!) {createPrediction(
            data: {label: $%s, predictionModelId: $%s, projectId: $%s,
                   dataRowId: $%s})
            {%s}}""" % (label_param, model_param, project_param,
                        data_row_param, label_param, model_param,
                        project_param, data_row_param,
                        query.results_query_part(Prediction))
        params = {
            label_param: label,
            model_param: prediction_model.uid,
            data_row_param: data_row.uid,
            project_param: self.uid
        }
        res = self.client.execute(query_str, params)
        return Prediction(self.client, res["createPrediction"])

    def enable_model_assisted_labeling(self, toggle: bool = True) -> bool:
        """ Turns model assisted labeling either on or off based on input

        Args:
            toggle (bool): True or False boolean
        Returns:
            True if toggled on or False if toggled off
        """
        project_param = "project_id"
        show_param = "show"

        query_str = """mutation toggle_model_assisted_labelingPyApi($%s: ID!, $%s: Boolean!) {
            project(where: {id: $%s }) {
                showPredictionsToLabelers(show: $%s) {
                    id, showingPredictionsToLabelers
                }
            }
        }""" % (project_param, show_param, project_param, show_param)

        params = {project_param: self.uid, show_param: toggle}

        res = self.client.execute(query_str, params)
        return res["project"]["showPredictionsToLabelers"][
            "showingPredictionsToLabelers"]

    def upload_annotations(
            self,
            name: str,
            annotations: Union[str, Path, Iterable[Dict]],
            validate: bool = True) -> 'BulkImportRequest':  # type: ignore
        """ Uploads annotations to a new Editor project.

        Args:
            name (str): name of the BulkImportRequest job
            annotations (str or Path or Iterable):
                url that is publicly accessible by Labelbox containing an
                ndjson file
                OR local path to an ndjson file
                OR iterable of annotation rows
            validate (bool):
                Whether or not to validate the payload before uploading.
        Returns:
            BulkImportRequest
        """

        if isinstance(annotations, str) or isinstance(annotations, Path):

            def _is_url_valid(url: Union[str, Path]) -> bool:
                """ Verifies that the given string is a valid url.

                Args:
                    url: string to be checked
                Returns:
                    True if the given url is valid otherwise False

                """
                if isinstance(url, Path):
                    return False
                parsed = urlparse(url)
                return bool(parsed.scheme) and bool(parsed.netloc)

            if _is_url_valid(annotations):
                return BulkImportRequest.create_from_url(client=self.client,
                                                         project_id=self.uid,
                                                         name=name,
                                                         url=str(annotations),
                                                         validate=validate)
            else:
                path = Path(annotations)
                if not path.exists():
                    raise FileNotFoundError(
                        f'{annotations} is not a valid url nor existing local file'
                    )
                return BulkImportRequest.create_from_local_file(
                    client=self.client,
                    project_id=self.uid,
                    name=name,
                    file=path,
                    validate_file=validate,
                )
        elif isinstance(annotations, Iterable):
            return BulkImportRequest.create_from_objects(
                client=self.client,
                project_id=self.uid,
                name=name,
                predictions=annotations,  # type: ignore
                validate=validate)
        else:
            raise ValueError(
                f'Invalid annotations given of type: {type(annotations)}')
Exemple #15
0
class Dataset(DbObject, Updateable, Deletable):
    """ A Dataset is a collection of DataRows.

    Attributes:
        name (str)
        description (str)
        updated_at (datetime)
        created_at (datetime)

        projects (Relationship): `ToMany` relationship to Project
        data_rows (Relationship): `ToMany` relationship to DataRow
        created_by (Relationship): `ToOne` relationship to User
        organization (Relationship): `ToOne` relationship to Organization

    """
    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")

    # Relationships
    projects = Relationship.ToMany("Project", True)
    data_rows = Relationship.ToMany("DataRow", False)
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)

    def create_data_row(self, **kwargs):
        """ Creates a single DataRow belonging to this dataset.

        >>> dataset.create_data_row(row_data="http://my_site.com/photos/img_01.jpg")

        Args:
            **kwargs: Key-value arguments containing new `DataRow` data. At a minimum,
                must contain `row_data`.

        Raises:
            InvalidQueryError: If `DataRow.row_data` field value is not provided
                in `kwargs`.
            InvalidAttributeError: in case the DB object type does not contain
                any of the field names given in `kwargs`.

        """
        DataRow = Entity.DataRow
        if DataRow.row_data.name not in kwargs:
            raise InvalidQueryError(
                "DataRow.row_data missing when creating DataRow.")

        # If row data is a local file path, upload it to server.
        row_data = kwargs[DataRow.row_data.name]
        if os.path.exists(row_data):
            kwargs[DataRow.row_data.name] = self.client.upload_file(row_data)

        kwargs[DataRow.dataset.name] = self

        return self.client._create(DataRow, kwargs)

    def create_data_rows(self, items):
        """ Creates multiple DataRow objects based on the given `items`.

        Each element in `items` can be either a `str` or a `dict`. If
        it is a `str`, then it is interpreted as a local file path. The file
        is uploaded to Labelbox and a DataRow referencing it is created.

        If an item is a `dict`, then it could support one of the two following structures
            1. For static imagery, video, and text it should map `DataRow` fields (or their names) to values. 
               At the minimum an `item` passed as a `dict` must contain a `DataRow.row_data` key and value.
            2. For tiled imagery the dict must match the import structure specified in the link below
               https://docs.labelbox.com/data-model/en/index-en#tiled-imagery-import
        
        >>> dataset.create_data_rows([
        >>>     {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
        >>>     "path/to/file2.jpg",
        >>>     {"tileLayerUrl" : "http://", ...}    
        >>>     ])

        For an example showing how to upload tiled data_rows see the following notebook:
            https://github.com/Labelbox/labelbox-python/blob/ms/develop/model_assisted_labeling/tiled_imagery_mal.ipynb

        Args:
            items (iterable of (dict or str)): See above for details.

        Returns:
            Task representing the data import on the server side. The Task
            can be used for inspecting task progress and waiting until it's done.

        Raises:
            InvalidQueryError: If the `items` parameter does not conform to
                the specification above or if the server did not accept the
                DataRow creation request (unknown reason).
            ResourceNotFoundError: If unable to retrieve the Task for the
                import process. This could imply that the import failed.
            InvalidAttributeError: If there are fields in `items` not valid for
                a DataRow.
        """
        file_upload_thread_count = 20
        DataRow = Entity.DataRow

        def upload_if_necessary(item):
            if isinstance(item, str):
                item_url = self.client.upload_file(item)
                # Convert item from str into a dict so it gets processed
                # like all other dicts.
                item = {DataRow.row_data: item_url, DataRow.external_id: item}
            return item

        with ThreadPool(file_upload_thread_count) as thread_pool:
            items = thread_pool.map(upload_if_necessary, items)

        def convert_item(item):
            # Don't make any changes to tms data
            if "tileLayerUrl" in item:
                return item
            # Convert string names to fields.
            item = {
                key if isinstance(key, Field) else DataRow.field(key): value
                for key, value in item.items()
            }

            if DataRow.row_data not in item:
                raise InvalidQueryError(
                    "DataRow.row_data missing when creating DataRow.")

            invalid_keys = set(item) - set(DataRow.fields())
            if invalid_keys:
                raise InvalidAttributeError(DataRow, invalid_keys)

            # Item is valid, convert it to a dict {graphql_field_name: value}
            # Need to change the name of DataRow.row_data to "data"
            return {
                "data" if key == DataRow.row_data else key.graphql_name: value
                for key, value in item.items()
            }

        # Prepare and upload the desciptor file
        items = [convert_item(item) for item in items]
        data = json.dumps(items)
        descriptor_url = self.client.upload_data(data)

        # Create data source
        dataset_param = "datasetId"
        url_param = "jsonUrl"
        query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
            appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
            ){ taskId accepted } } """ % (dataset_param, url_param,
                                          dataset_param, url_param)
        res = self.client.execute(query_str, {
            dataset_param: self.uid,
            url_param: descriptor_url
        })
        res = res["appendRowsToDataset"]
        if not res["accepted"]:
            raise InvalidQueryError(
                "Server did not accept DataRow creation request")

        # Fetch and return the task.
        task_id = res["taskId"]
        user = self.client.get_user()
        task = list(user.created_tasks(where=Entity.Task.uid == task_id))
        # Cache user in a private variable as the relationship can't be
        # resolved due to server-side limitations (see Task.created_by)
        # for more info.
        if len(task) != 1:
            raise ResourceNotFoundError(Entity.Task, task_id)
        task = task[0]
        task._user = user
        return task

    def data_rows_for_external_id(self, external_id, limit=10):
        """ Convenience method for getting a single `DataRow` belonging to this
        `Dataset` that has the given `external_id`.

        Args:
            external_id (str): External ID of the sought `DataRow`.
            limit (int): The maximum number of data rows to return for the given external_id

        Returns:
            A single `DataRow` with the given ID.

        Raises:
            labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
                in this `DataSet` with the given external ID, or if there are
                multiple `DataRows` for it.
        """
        DataRow = Entity.DataRow
        where = DataRow.external_id == external_id

        data_rows = self.data_rows(where=where)
        # Get at most `limit` data_rows.
        data_rows = list(islice(data_rows, limit))

        if not len(data_rows):
            raise ResourceNotFoundError(DataRow, where)
        return data_rows

    def data_row_for_external_id(self, external_id):
        """ Convenience method for getting a single `DataRow` belonging to this
        `Dataset` that has the given `external_id`.

        Args:
            external_id (str): External ID of the sought `DataRow`.

        Returns:
            A single `DataRow` with the given ID.

        Raises:
            labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
                in this `DataSet` with the given external ID, or if there are
                multiple `DataRows` for it.
        """
        data_rows = self.data_rows_for_external_id(external_id=external_id,
                                                   limit=2)
        if len(data_rows) > 1:
            logger.warning(
                f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
                external_id)
        return data_rows[0]
Exemple #16
0
class Dataset(DbObject, Updateable, Deletable):
    """ A dataset is a collection of DataRows. For example, if you have a CSV with
    100 rows, you will have 1 Dataset and 100 DataRows.
    """
    name = Field.String("name")
    description = Field.String("description")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")

    # Relationships
    projects = Relationship.ToMany("Project", True)
    data_rows = Relationship.ToMany("DataRow", False)
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)

    def create_data_row(self, **kwargs):
        """ Creates a single DataRow belonging to this dataset.

        >>> dataset.create_data_row(row_data="http://my_site.com/photos/img_01.jpg")

        Kwargs:
            Key-value arguments containing new `DataRow` data.
            At a minimum `kwargs` must contain `row_data`. The value for
            `row_data` is a string. If it is a path to an existing local
            file then it is uploaded to Labelbox's server. Otherwise it is
            treated as an external URL.
        Raises:
            InvalidQueryError: If `DataRow.row_data` field value is not provided
                in `kwargs`.
            InvalidAttributeError: in case the DB object type does not contain
                any of the field names given in `kwargs`.

        """
        DataRow = Entity.DataRow
        if DataRow.row_data.name not in kwargs:
            raise InvalidQueryError(
                "DataRow.row_data missing when creating DataRow.")

        # If row data is a local file path, upload it to server.
        row_data = kwargs[DataRow.row_data.name]
        if os.path.exists(row_data):
            kwargs[DataRow.row_data.name] = self.client.upload_file(row_data)

        kwargs[DataRow.dataset.name] = self

        return self.client._create(DataRow, kwargs)

    def create_data_rows(self, items):
        """ Creates multiple DataRow objects based on the given `items`.

        Each element in `items` can be either a `str` or a `dict`. If
        it is a `str`, then it is interpreted as a local file path. The file
        is uploaded to Labelbox and a DataRow referencing it is created.
        If an item is a `dict`, then it should map `DataRow` fields (or their
        names) to values. At the minimum an `item` passed as a `dict` must 
        contain a `DataRow.row_data` key and value.

        >>> dataset.create_data_rows([
        >>>     {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
        >>>     "path/to/file2.jpg"
        >>>     ])

        Args:
            items (iterable of (dict or str)): See above for details.

        Returns:
            Task representing the data import on the server side. The Task
            can be used for inspecting task progress and waiting until it's done.

        Raises:
            InvalidQueryError: If the `items` parameter does not conform to
                the specification above or if the server did not accept the
                DataRow creation request (unknown reason).
            ResourceNotFoundError: If unable to retrieve the Task for the
                import process. This could imply that the import failed.
            InvalidAttributeError: If there are fields in `items` not valid for
                a DataRow.
        """
        file_upload_thread_count = 20
        DataRow = Entity.DataRow

        def upload_if_necessary(item):
            if isinstance(item, str):
                item_url = self.client.upload_file(item)
                # Convert item from str into a dict so it gets processed
                # like all other dicts.
                item = {DataRow.row_data: item_url, DataRow.external_id: item}
            return item

        with ThreadPool(file_upload_thread_count) as thread_pool:
            items = thread_pool.map(upload_if_necessary, items)

        def convert_item(item):
            # Convert string names to fields.
            item = {
                key if isinstance(key, Field) else DataRow.field(key): value
                for key, value in item.items()
            }

            if DataRow.row_data not in item:
                raise InvalidQueryError(
                    "DataRow.row_data missing when creating DataRow.")

            invalid_keys = set(item) - set(DataRow.fields())
            if invalid_keys:
                raise InvalidAttributeError(DataRow, invalid_fields)

            # Item is valid, convert it to a dict {graphql_field_name: value}
            # Need to change the name of DataRow.row_data to "data"
            return {
                "data" if key == DataRow.row_data else key.graphql_name: value
                for key, value in item.items()
            }

        # Prepare and upload the desciptor file
        data = json.dumps([convert_item(item) for item in items])
        descriptor_url = self.client.upload_data(data)

        # Create data source
        dataset_param = "datasetId"
        url_param = "jsonUrl"
        query_str = """mutation AppendRowsToDatasetPyApi($%s: ID!, $%s: String!){
            appendRowsToDataset(data:{datasetId: $%s, jsonFileUrl: $%s}
            ){ taskId accepted } } """ % (dataset_param, url_param,
                                          dataset_param, url_param)
        res = self.client.execute(query_str, {
            dataset_param: self.uid,
            url_param: descriptor_url
        })
        res = res["appendRowsToDataset"]
        if not res["accepted"]:
            raise InvalidQueryError(
                "Server did not accept DataRow creation request")

        # Fetch and return the task.
        task_id = res["taskId"]
        user = self.client.get_user()
        task = list(user.created_tasks(where=Entity.Task.uid == task_id))
        # Cache user in a private variable as the relationship can't be
        # resolved due to server-side limitations (see Task.created_by)
        # for more info.
        if len(task) != 1:
            raise ResourceNotFoundError(Entity.Task, task_id)
        task = task[0]
        task._user = user
        return task

    def data_row_for_external_id(self, external_id):
        """ Convenience method for getting a single `DataRow` belonging to this
        `Dataset` that has the given `external_id`.

        Args:
            external_id (str): External ID of the sought `DataRow`.

        Returns:
            A single `DataRow` with the given ID.

        Raises:
            labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
                in this `DataSet` with the given external ID, or if there are
                multiple `DataRows` for it.
        """
        DataRow = Entity.DataRow
        where = DataRow.external_id == external_id

        data_rows = self.data_rows(where=where)
        # Get at most two data_rows.
        data_rows = [row for row, _ in zip(data_rows, range(2))]

        if len(data_rows) != 1:
            raise ResourceNotFoundError(DataRow, where)

        return data_rows[0]
class DataRow(DbObject, Updateable, BulkDeletable):
    """ Internal Labelbox representation of a single piece of data (e.g. image, video, text).

    Attributes:
        external_id (str): User-generated file name or identifier
        row_data (str): Paths to local files are uploaded to Labelbox's server.
            Otherwise, it's treated as an external URL.
        updated_at (datetime)
        created_at (datetime)

        dataset (Relationship): `ToOne` relationship to Dataset
        created_by (Relationship): `ToOne` relationship to User
        organization (Relationship): `ToOne` relationship to Organization
        labels (Relationship): `ToMany` relationship to Label
        metadata (Relationship): `ToMany` relationship to AssetMetadata
        predictions (Relationship): `ToMany` relationship to Prediction
    """
    external_id = Field.String("external_id")
    row_data = Field.String("row_data")
    updated_at = Field.DateTime("updated_at")
    created_at = Field.DateTime("created_at")

    # Relationships
    dataset = Relationship.ToOne("Dataset")
    created_by = Relationship.ToOne("User", False, "created_by")
    organization = Relationship.ToOne("Organization", False)
    labels = Relationship.ToMany("Label", True)
    metadata = Relationship.ToMany("AssetMetadata", False, "metadata")
    predictions = Relationship.ToMany("Prediction", False)

    supported_meta_types = {
        meta_type.value for meta_type in AssetMetadata.MetaType
    }

    @staticmethod
    def bulk_delete(data_rows):
        """ Deletes all the given DataRows.

        Args:
            data_rows (list of DataRow): The DataRows to delete.
        """
        BulkDeletable._bulk_delete(data_rows, True)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.metadata.supports_filtering = False
        self.metadata.supports_sorting = False

    def create_metadata(self, meta_type, meta_value):
        """ Attaches asset metadata to a DataRow.

            >>> datarow.create_metadata("TEXT", "This is a text message")

        Args:
            meta_type (str): Asset metadata type, must be one of:
                VIDEO, IMAGE, TEXT, IMAGE_OVERLAY (AssetMetadata.MetaType)
            meta_value (str): Asset metadata value.
        Returns:
            `AssetMetadata` DB object.
        Raises:
            ValueError: meta_type must be one of the supported types.
        """

        if meta_type not in self.supported_meta_types:
            raise ValueError(
                f"meta_type must be one of {self.supported_meta_types}. Found {meta_type}"
            )

        meta_type_param = "metaType"
        meta_value_param = "metaValue"
        data_row_id_param = "dataRowId"
        query_str = """mutation CreateAssetMetadataPyApi(
            $%s: AttachmentType!, $%s: String!, $%s: ID!) {
            createAssetMetadata(data: {
                metaType: $%s metaValue: $%s dataRowId: $%s}) {%s}} """ % (
            meta_type_param, meta_value_param, data_row_id_param,
            meta_type_param, meta_value_param, data_row_id_param,
            query.results_query_part(Entity.AssetMetadata))

        res = self.client.execute(
            query_str, {
                meta_type_param: meta_type,
                meta_value_param: meta_value,
                data_row_id_param: self.uid
            })
        return Entity.AssetMetadata(self.client, res["createAssetMetadata"])