class BulkImportRequest(DbObject): name = Field.String("name") state = Field.Enum(BulkImportRequestState, "state") input_file_url = Field.String("input_file_url") error_file_url = Field.String("error_file_url") status_file_url = Field.String("status_file_url") created_at = Field.DateTime("created_at") project = Relationship.ToOne("Project") created_by = Relationship.ToOne("User", False, "created_by") def refresh(self) -> None: """ Synchronizes values of all fields with the database. """ query_str, params = query.get_single(BulkImportRequest, self.uid) res = self.client.execute(query_str, params) res = res[utils.camel_case(BulkImportRequest.type_name())] self._set_field_values(res) def wait_until_done(self, sleep_time_seconds: int = 30) -> None: """ Blocks until the BulkImportRequest.state changes either to `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, periodically refreshing object's state. Args: sleep_time_seconds (str): a time to block between subsequent API calls """ while self.state == BulkImportRequestState.RUNNING: logger.info(f"Sleeping for {sleep_time_seconds} seconds...") time.sleep(sleep_time_seconds) self.__exponential_backoff_refresh() @backoff.on_exception( backoff.expo, (labelbox.exceptions.ApiLimitError, labelbox.exceptions.TimeoutError, labelbox.exceptions.NetworkError), max_tries=10, jitter=None) def __exponential_backoff_refresh(self) -> None: self.refresh() @classmethod def from_name(cls, client, project_id: str, name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. Args: client (Client): a Labelbox client project_id (str): BulkImportRequest's project id name (str): name of BulkImportRequest Returns: BulkImportRequest object """ query_str = """query getBulkImportRequestPyApi( $projectId: ID!, $name: String!) { bulkImportRequest(where: { projectId: $projectId, name: $name }) { %s } } """ % query.results_query_part(cls) params = {"projectId": project_id, "name": name} response = client.execute(query_str, params=params) return cls(client, response['bulkImportRequest']) @classmethod def create_from_url(cls, client, project_id: str, name: str, url: str) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a publicly accessible URL to an ndjson file with predictions. Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest url (str): publicly accessible URL pointing to ndjson file containing predictions Returns: BulkImportRequest object """ query_str = """mutation createBulkImportRequestPyApi( $projectId: ID!, $name: String!, $fileUrl: String!) { createBulkImportRequest(data: { projectId: $projectId, name: $name, fileUrl: $fileUrl }) { %s } } """ % query.results_query_part(cls) params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) return cls(client, bulk_import_request_response["createBulkImportRequest"]) @classmethod def create_from_objects(cls, client, project_id: str, name: str, predictions: Iterable[dict]) -> 'BulkImportRequest': """ Creates a BulkImportRequest from an iterable of dictionaries conforming to JSON predictions format, e.g.: ``{ "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", "schemaId": "ckappz7d700gn0zbocmqkwd9i", "dataRow": { "id": "ck1s02fqxm8fi0757f0e6qtdc" }, "bbox": { "top": 48, "left": 58, "height": 865, "width": 1512 } }`` Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest predictions (Iterable[dict]): iterable of dictionaries representing predictions Returns: BulkImportRequest object """ _validate_ndjson(predictions) data_str = ndjson.dumps(predictions) if not data_str: raise ValueError('annotations cannot be empty') data = data_str.encode('utf-8') file_name = _make_file_name(project_id, name) request_data = _make_request_data(project_id, name, len(data_str), file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, request_data=request_data, file_name=file_name, file_data=file_data) return cls(client, response_data["createBulkImportRequest"]) @classmethod def create_from_local_file(cls, client, project_id: str, name: str, file: Path, validate_file=True) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a local ndjson file with predictions. Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest file (Path): local ndjson file with predictions validate_file (bool): a flag indicating if there should be a validation if `file` is a valid ndjson file Returns: BulkImportRequest object """ file_name = _make_file_name(project_id, name) content_length = file.stat().st_size request_data = _make_request_data(project_id, name, content_length, file_name) with file.open('rb') as f: if validate_file: reader = ndjson.reader(f) # ensure that the underlying json load call is valid # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 # by iterating through the file so we only store # each line in memory rather than the entire file try: _validate_ndjson(reader) except ValueError: raise ValueError(f"{file} is not a valid ndjson file") else: f.seek(0) file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, request_data, file_name, file_data) return cls(client, response_data["createBulkImportRequest"])
class BulkImportRequest(DbObject): """Represents the import job when importing annotations. Attributes: name (str) state (Enum): FAILED, RUNNING, or FINISHED (Refers to the whole import job) input_file_url (str): URL to your web-hosted NDJSON file error_file_url (str): NDJSON that contains error messages for failed annotations status_file_url (str): NDJSON that contains status for each annotation created_at (datetime): UTC timestamp for date BulkImportRequest was created project (Relationship): `ToOne` relationship to Project created_by (Relationship): `ToOne` relationship to User """ name = Field.String("name") state = Field.Enum(BulkImportRequestState, "state") input_file_url = Field.String("input_file_url") error_file_url = Field.String("error_file_url") status_file_url = Field.String("status_file_url") created_at = Field.DateTime("created_at") project = Relationship.ToOne("Project") created_by = Relationship.ToOne("User", False, "created_by") @property def inputs(self) -> List[Dict[str, Any]]: """ Inputs for each individual annotation uploaded. This should match the ndjson annotations that you have uploaded. Returns: Uploaded ndjson. * This information will expire after 24 hours. """ return self._fetch_remote_ndjson(self.input_file_url) @property def errors(self) -> List[Dict[str, Any]]: """ Errors for each individual annotation uploaded. This is a subset of statuses Returns: List of dicts containing error messages. Empty list means there were no errors See `BulkImportRequest.statuses` for more details. * This information will expire after 24 hours. """ self.wait_until_done() return self._fetch_remote_ndjson(self.error_file_url) @property def statuses(self) -> List[Dict[str, Any]]: """ Status for each individual annotation uploaded. Returns: A status for each annotation if the upload is done running. See below table for more details .. list-table:: :widths: 15 150 :header-rows: 1 * - Field - Description * - uuid - Specifies the annotation for the status row. * - dataRow - JSON object containing the Labelbox data row ID for the annotation. * - status - Indicates SUCCESS or FAILURE. * - errors - An array of error messages included when status is FAILURE. Each error has a name, message and optional (key might not exist) additional_info. * This information will expire after 24 hours. """ self.wait_until_done() return self._fetch_remote_ndjson(self.status_file_url) @functools.lru_cache() def _fetch_remote_ndjson(self, url: str) -> List[Dict[str, Any]]: """ Fetches the remote ndjson file and caches the results. Args: url (str): Can be any url pointing to an ndjson file. Returns: ndjson as a list of dicts. """ response = requests.get(url) response.raise_for_status() return ndjson.loads(response.text) def refresh(self) -> None: """Synchronizes values of all fields with the database. """ query_str, params = query.get_single(BulkImportRequest, self.uid) res = self.client.execute(query_str, params) res = res[utils.camel_case(BulkImportRequest.type_name())] self._set_field_values(res) def wait_until_done(self, sleep_time_seconds: int = 30) -> None: """Blocks import job until certain conditions are met. Blocks until the BulkImportRequest.state changes either to `BulkImportRequestState.FINISHED` or `BulkImportRequestState.FAILED`, periodically refreshing object's state. Args: sleep_time_seconds (str): a time to block between subsequent API calls """ while self.state == BulkImportRequestState.RUNNING: logger.info(f"Sleeping for {sleep_time_seconds} seconds...") time.sleep(sleep_time_seconds) self.__exponential_backoff_refresh() @backoff.on_exception( backoff.expo, (labelbox.exceptions.ApiLimitError, labelbox.exceptions.TimeoutError, labelbox.exceptions.NetworkError), max_tries=10, jitter=None) def __exponential_backoff_refresh(self) -> None: self.refresh() @classmethod def from_name(cls, client, project_id: str, name: str) -> 'BulkImportRequest': """ Fetches existing BulkImportRequest. Args: client (Client): a Labelbox client project_id (str): BulkImportRequest's project id name (str): name of BulkImportRequest Returns: BulkImportRequest object """ query_str = """query getBulkImportRequestPyApi( $projectId: ID!, $name: String!) { bulkImportRequest(where: { projectId: $projectId, name: $name }) { %s } } """ % query.results_query_part(cls) params = {"projectId": project_id, "name": name} response = client.execute(query_str, params=params) return cls(client, response['bulkImportRequest']) @classmethod def create_from_url(cls, client, project_id: str, name: str, url: str, validate=True) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a publicly accessible URL to an ndjson file with predictions. Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest url (str): publicly accessible URL pointing to ndjson file containing predictions validate (bool): a flag indicating if there should be a validation if `url` is valid ndjson Returns: BulkImportRequest object """ if validate: logger.warn( "Validation is turned on. The file will be downloaded locally and processed before uploading." ) res = requests.get(url) data = ndjson.loads(res.text) _validate_ndjson(data, client.get_project(project_id)) query_str = """mutation createBulkImportRequestPyApi( $projectId: ID!, $name: String!, $fileUrl: String!) { createBulkImportRequest(data: { projectId: $projectId, name: $name, fileUrl: $fileUrl }) { %s } } """ % query.results_query_part(cls) params = {"projectId": project_id, "name": name, "fileUrl": url} bulk_import_request_response = client.execute(query_str, params=params) return cls(client, bulk_import_request_response["createBulkImportRequest"]) @classmethod def create_from_objects(cls, client, project_id: str, name: str, predictions: Iterable[Dict], validate=True) -> 'BulkImportRequest': """ Creates a `BulkImportRequest` from an iterable of dictionaries. Conforms to JSON predictions format, e.g.: ``{ "uuid": "9fd9a92e-2560-4e77-81d4-b2e955800092", "schemaId": "ckappz7d700gn0zbocmqkwd9i", "dataRow": { "id": "ck1s02fqxm8fi0757f0e6qtdc" }, "bbox": { "top": 48, "left": 58, "height": 865, "width": 1512 } }`` Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest predictions (Iterable[dict]): iterable of dictionaries representing predictions validate (bool): a flag indicating if there should be a validation if `predictions` is valid ndjson Returns: BulkImportRequest object """ if validate: _validate_ndjson(predictions, client.get_project(project_id)) data_str = ndjson.dumps(predictions) if not data_str: raise ValueError('annotations cannot be empty') data = data_str.encode('utf-8') file_name = _make_file_name(project_id, name) request_data = _make_request_data(project_id, name, len(data_str), file_name) file_data = (file_name, data, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, request_data=request_data, file_name=file_name, file_data=file_data) return cls(client, response_data["createBulkImportRequest"]) @classmethod def create_from_local_file(cls, client, project_id: str, name: str, file: Path, validate_file=True) -> 'BulkImportRequest': """ Creates a BulkImportRequest from a local ndjson file with predictions. Args: client (Client): a Labelbox client project_id (str): id of project for which predictions will be imported name (str): name of BulkImportRequest file (Path): local ndjson file with predictions validate_file (bool): a flag indicating if there should be a validation if `file` is a valid ndjson file Returns: BulkImportRequest object """ file_name = _make_file_name(project_id, name) content_length = file.stat().st_size request_data = _make_request_data(project_id, name, content_length, file_name) with file.open('rb') as f: if validate_file: reader = ndjson.reader(f) # ensure that the underlying json load call is valid # https://github.com/rhgrant10/ndjson/blob/ff2f03c56b21f28f7271b27da35ca4a8bf9a05d0/ndjson/api.py#L53 # by iterating through the file so we only store # each line in memory rather than the entire file try: _validate_ndjson(reader, client.get_project(project_id)) except ValueError: raise ValueError(f"{file} is not a valid ndjson file") else: f.seek(0) file_data = (file.name, f, NDJSON_MIME_TYPE) response_data = _send_create_file_command(client, request_data, file_name, file_data) return cls(client, response_data["createBulkImportRequest"])