Example #1
0
    def test_cloud_progress_to_reader_progress_percent_complete(self):
        cloud_progress = dataflow.ApproximateProgress()
        cloud_progress.percentComplete = 0.123

        reader_progress = apiclient.cloud_progress_to_reader_progress(
            cloud_progress)
        self.assertIsNotNone(reader_progress.percent_complete)
        self.assertEqual(0.123, reader_progress.percent_complete)
Example #2
0
    def test_cloud_progress_to_reader_progress_index_position(self):
        cloud_progress = dataflow.ApproximateProgress()
        cloud_progress.position = dataflow.Position()
        cloud_progress.position.byteOffset = 9999

        reader_progress = apiclient.cloud_progress_to_reader_progress(
            cloud_progress)
        self.assertIsNotNone(reader_progress.position)
        self.assertIsInstance(reader_progress.position, iobase.ReaderPosition)
        self.assertEqual(9999, reader_progress.position.byte_offset)
Example #3
0
def reader_progress_to_cloud_progress(reader_progress):
    """Converts a given 'ReaderProgress' to corresponding cloud format."""

    cloud_progress = dataflow.ApproximateProgress()
    if reader_progress.position is not None:
        cloud_progress.position = reader_position_to_cloud_position(
            reader_progress.position)
    if reader_progress.percent_complete is not None:
        cloud_progress.percentComplete = reader_progress.percent_complete
    if reader_progress.remaining_time is not None:
        cloud_progress.remainingTime = reader_progress.remaining_time

    return cloud_progress
Example #4
0
    def test_approximate_progress_to_dynamic_split_request(self):
        approximate_progress = dataflow.ApproximateProgress()
        approximate_progress.percentComplete = 0.123

        dynamic_split_request = (
            apiclient.approximate_progress_to_dynamic_split_request(
                approximate_progress))
        self.assertIsNotNone(dynamic_split_request)
        self.assertIsInstance(dynamic_split_request.progress,
                              iobase.ReaderProgress)
        self.assertIsNotNone(dynamic_split_request.progress.percent_complete)
        self.assertEqual(dynamic_split_request.progress.percent_complete,
                         0.123)
Example #5
0
    def report_status(self,
                      worker_info,
                      desired_lease_duration,
                      work_item,
                      completed,
                      progress,
                      dynamic_split_result_to_report=None,
                      exception_details=None):
        """Reports status for a work item (success or failure).

    This is an integration point. The @retry decorator is used on callers
    of this method defined in google/cloud/dataflow/worker/worker.py because
    there are different retry strategies for a completed versus in progress
    work item.

    Args:
      worker_info: A batchworker.BatchWorkerInfo that contains
        information about the Worker instance executing the work
        item.
      desired_lease_duration: The duration for which the worker would like to
        extend the lease of the work item. Should be in seconds formatted as a
        string.
      work_item: The work item for which to report status.
      completed: True if there is no further work to be done on this work item
        either because it succeeded or because it failed. False if this is a
        progress report.
      progress: A SourceReaderProgress that gives the progress of worker
        handling the work item.
      dynamic_split_result_to_report: A successful dynamic split result that
        should be sent to the Dataflow service along with the status report.
      exception_details: A string representation of the stack trace for an
        exception raised while executing the work item. The string is the
        output of the standard traceback.format_exc() function.

    Returns:
      A protobuf containing the response from the service for the status
      update (WorkItemServiceState).

    Raises:
      TypeError: if progress is of an unknown type
      RuntimeError: if dynamic split request is of an unknown type.
    """
        work_item_status = dataflow.WorkItemStatus()
        work_item_status.completed = completed

        if not completed:
            work_item_status.requestedLeaseDuration = desired_lease_duration

        if progress is not None:
            work_item_progress = dataflow.ApproximateProgress()
            work_item_status.progress = work_item_progress

            if progress.position is not None:
                work_item_progress.position = (
                    reader_position_to_cloud_position(progress.position))
            elif progress.percent_complete is not None:
                work_item_progress.percentComplete = progress.percent_complete
            elif progress.remaining_time is not None:
                work_item_progress.remainingTime = progress.remaining_time
            else:
                raise TypeError('Unknown type of progress')

        if dynamic_split_result_to_report is not None:
            assert isinstance(dynamic_split_result_to_report,
                              iobase.DynamicSplitResult)

            if isinstance(dynamic_split_result_to_report,
                          iobase.DynamicSplitResultWithPosition):
                work_item_status.stopPosition = (
                    dynamic_split_result_with_position_to_cloud_stop_position(
                        dynamic_split_result_to_report))
            else:
                raise RuntimeError('Unknown type of dynamic split result.')

        # The service keeps track of the report indexes in order to handle lost
        # and duplicate message.
        work_item_status.reportIndex = work_item.next_report_index
        work_item_status.workItemId = str(work_item.proto.id)

        # Add exception information if any.
        if exception_details is not None:
            status = dataflow.Status()
            # TODO(silviuc): Replace Code.UNKNOWN with a generated definition.
            status.code = 2
            # TODO(silviuc): Attach the stack trace as exception details.
            status.message = exception_details
            work_item_status.errors.append(status)

        # Look through the work item for metrics to send.
        for counter in work_item.map_task.itercounters():
            append_counter(work_item_status, counter, tentative=not completed)

        report_request = dataflow.ReportWorkItemStatusRequest()
        report_request.currentWorkerTime = worker_info.formatted_current_time
        report_request.workerId = worker_info.worker_id
        report_request.workItemStatuses.append(work_item_status)

        request = dataflow.DataflowProjectsJobsWorkItemsReportStatusRequest()
        request.jobId = worker_info.job_id
        request.projectId = worker_info.project_id
        try:
            request.reportWorkItemStatusRequest = report_request
        except AttributeError:
            request.report_work_item_status_request = report_request
        logging.debug('report_status: %s', request)
        response = self._client.projects_jobs_workItems.ReportStatus(request)
        logging.debug('report_status: %s', response)
        return response