Exemple #1
0
def digest(workflow_path: ComparisonPath, operations_path: ComparisonPath) -> JsonObject:
    def call_fn(succeeded_operations: Dict[CallName, JsonObject],
                operation_id: OperationId,
                path: CallNameSequence,
                attempt: JsonObject) -> None:
        backend_status = attempt.get('backendStatus', 'Unknown')
        # This script should only ever be pointed at successful workflow metadata. All jobs that have a backend status
        # other than `Success` must have later been re-run successfully, so any un`Success`ful attempts are ignored.
        # It's possible that a future version of the digester might actually want to look at these jobs since they
        # may have completed some lifecycle events which could be useful in accumulating more performance data.
        if backend_status == 'Success':
            string_path = '.'.join(path)
            cromwell_start = attempt.get('start')
            cromwell_end = attempt.get('end')

            cromwell_total_time_seconds = (dateutil.parser.parse(cromwell_end) -
                                           dateutil.parser.parse(cromwell_start)).total_seconds()

            bare_operation_id = operation_id.split('/')[-1]
            operations_file_path = operations_path / f'{bare_operation_id}.json'
            operations_data = operations_file_path.read_text()
            operations_metadata = json.loads(operations_data)
            operation = OperationDigester.create(operations_metadata)

            papi_total_time_seconds = operation.total_time_seconds()

            cromwell_additional_total_time_seconds = \
                float("%.3f" % (cromwell_total_time_seconds - papi_total_time_seconds))

            succeeded_operations[string_path] = {
                Attempt: attempt.get('attempt'),
                CromwellAdditionalTotalTimeSeconds: cromwell_additional_total_time_seconds,
                CromwellEnd: cromwell_end,
                CromwellStart: cromwell_start,
                CromwellTotalTimeSeconds: cromwell_total_time_seconds,
                DelocalizationTimeSeconds: operation.delocalization_time_seconds(),
                Disks: operation.disks(),
                DockerImagePullTimeSeconds: operation.docker_image_pull_time_seconds(),
                LocalizationTimeSeconds: operation.localization_time_seconds(),
                MachineType: operation.machine_type(),
                OperationIdKey: operation_id,
                OtherTimeSeconds: operation.other_time_seconds(),
                PapiCreate: operation.create_time(),
                PapiEnd: operation.end_time(),
                PapiStart: operation.start_time(),
                PapiTotalTimeSeconds: operation.total_time_seconds(),
                ShardIndex: attempt.get('shardIndex'),
                StartupTimeSeconds: operation.startup_time_seconds(),
                UserCommandTimeSeconds: operation.user_command_time_seconds(),
            }

    data = workflow_path.read_text()
    metadata = json.loads(data)

    shards = operation_ids.visit_papi_operations(metadata, call_fn, initial_accumulator={})
    return {'version': Version, 'calls': shards, 'workflowId': metadata['id']}
Exemple #2
0
def find_operation_ids_in_metadata(
        json_metadata: JsonObject) -> Sequence[AnyStr]:
    """Finds all instances of PAPI operations IDs in a workflow"""

    # Eg given:
    # {
    #   "calls": {
    #     "workflow_name.task_name": [
    #       {
    #         "jobId": "projects/broad-dsde-cromwell-dev/operations/01234567891011121314",
    # ...
    #
    # We want to extract "projects/broad-dsde-cromwell-dev/operations/01234567891011121314"
    def call_fn(acc: List[AnyStr], operation_id: OperationId,
                call_name_sequence: CallNameSequence,
                attempt: JsonObject) -> None:
        acc.append(operation_id)

    return visit_papi_operations(json_metadata,
                                 call_fn,
                                 initial_accumulator=[])