コード例 #1
0
class OperationsDigesterTestMethods(unittest.TestCase):
    set_log_verbosity(verbose=True)
    quieten_chatty_imports()

    def test_operations_digestion(self) -> None:
        """
        This uses "real" metadata from the PAPI v2 performance spike to drive operations digester testing.
        The metadata is stored in GCS and copied down to the local machine if not already present from an earlier run.
        Operations digesters can run against either local or GCS paths using `ComparisonPath`s. Since GCS testing is
        slow it's turned off by default, it can be turned on by setting the DIGESTER_TEST_GCS environment variable.
        """

        credentials, project_id = google.auth.default()
        storage_client = storage.Client(credentials=credentials)

        bucket_name = 'papi-performance-analysis'
        bucket = storage_client.get_bucket(bucket_name)

        # A cache of expensive-to-create GCS comparison paths.
        gcs_comparison_path_by_subdir = {}
        papi_versions = [VERSION_PAPI_V1, VERSION_PAPI_V2]

        for papi_version in papi_versions:
            subdir = subdir_for_papi_version(papi_version)
            local_parent = ComparisonPath.create(subdir)

            for sample_name in EXPECTATIONS.keys():
                download_metadata_from_gcs_if_needed(sample_name, local_parent, bucket)
                parents_to_test = [local_parent]
                # Skip slow GCS testing unless this environment variable is set.
                if os.environ.get('DIGESTER_TEST_GCS'):
                    parents_to_test.append(gcs_parent(subdir, gcs_comparison_path_by_subdir))

                for parent in parents_to_test:
                    description = parent.description()
                    logging.info(
                        f"Running operation digester on {description} sample '{sample_name}' backend {papi_version}")
                    sample_path = parent / sample_name

                    for operation in EXPECTATIONS.get(sample_name).get(papi_version).keys():
                        operations_path = sample_path / 'operations' / f'{operation}.json'
                        json_str = operations_path.read_text()
                        op_digester = OperationDigester.create(json.loads(json_str))
                        for key, value in EXPECTATIONS.get(sample_name).get(papi_version).get(operation).items():
                            method_to_call = getattr(op_digester, key)
                            self.assertEqual(method_to_call(), value, f'{key} was not {value}')
コード例 #2
0
                        nargs='+',
                        help='Workflows to process')
    parser.add_argument(
        'cromwell_checkout_path',
        metavar='CROMWELLCHECKOUTPATH',
        type=Path,
        help='Path to Cromwell git checkout used to run workflows')
    parser.add_argument(
        'cromwell_config_path',
        metavar='CROMWELLCONFIGPATH',
        type=Path,
        help='Path to Cromwell configuration file used to run workflows')

    args = parser.parse_args()
    set_log_verbosity(args.verbose)
    quieten_chatty_imports()

    cromwell_url = args.cromwell_url[0]
    gcs_bucket, gcs_path = args.gcs_path[0]
    workflows = args.workflows

    credentials, project_id = google.auth.default()
    storage_client = storage.Client(credentials=credentials)
    papi_clients = PapiClients(credentials)

    logger.info(f'cromwell: {cromwell_url}')
    logger.info(f'gcs_bucket: {gcs_bucket}; gcs_path: {gcs_path}')
    logger.info(f'workflows: {workflows}')

    for workflow in workflows:
        process_workflow(cromwell_url, gcs_bucket, gcs_path, storage_client,
コード例 #3
0
class DigesterTestMethods(unittest.TestCase):
    set_log_verbosity(verbose=True)
    quieten_chatty_imports()

    def test_digestion(self) -> None:
        """
        This uses "real" metadata from the PAPI v2 performance spike to drive digester testing. The metadata is stored
        in GCS and copied down to the local machine if not already present from an earlier run. The digester can run
        against either local or GCS paths using `ComparisonPath`s. Local is nicer to iterate on than GCS since it
        runs so much more quickly. Since GCS testing is slow it's turned off by default, it can be turned on by setting
        the DIGESTER_TEST_GCS environment variable.
        """

        credentials, project_id = google.auth.default()
        storage_client = storage.Client(credentials=credentials)

        bucket_name = 'papi-performance-analysis'
        bucket = storage_client.get_bucket(bucket_name)

        # A cache of expensive-to-create GCS comparison paths.
        gcs_comparison_path_by_subdir = {}
        papi_versions = [VERSION_PAPI_V1, VERSION_PAPI_V2]

        for papi_version in papi_versions:
            subdir = subdir_for_papi_version(papi_version)
            local_parent = ComparisonPath.create(subdir)

            for sample_name in EXPECTATIONS.keys():
                download_metadata_from_gcs_if_needed(sample_name, local_parent,
                                                     bucket)
                parents_to_test = [local_parent]
                # Skip slow GCS testing unless this environment variable is set.
                if os.environ.get('DIGESTER_TEST_GCS'):
                    parents_to_test.append(
                        gcs_parent(subdir, gcs_comparison_path_by_subdir))

                for parent in parents_to_test:
                    description = parent.description()
                    logging.info(
                        f"Running digester test on {description} for sample '{sample_name}' on backend {papi_version}"
                    )
                    sample_path = parent / sample_name
                    workflow_path = sample_path / 'workflow.json'
                    operations_path = sample_path / 'operations'
                    actual = digest(workflow_path, operations_path)

                    expected = EXPECTATIONS[sample_name][papi_version]
                    calls: JsonObject = actual.get('calls')

                    actual_total = len(calls)
                    self.assertEqual(actual_total, expected['total_jobs'])

                    for num_attempts in [1, 2, 3]:
                        actual_len = len(
                            list(
                                filter(
                                    more_than_x_attempts(calls, num_attempts),
                                    calls)))
                        self.assertEqual(
                            actual_len,
                            expected[f'more_than_{num_attempts}_attempts'])

                    for minutes_longer in range(3, 9):
                        actual_len = len(
                            list(
                                filter(
                                    more_than_x_minutes_longer(
                                        calls, minutes_longer), calls)))
                        expectation = expected[
                            f'cromwell_time_more_than_{minutes_longer}_minutes_longer_total']
                        self.assertEqual(actual_len, expectation)

                    # Currently just a smoke test to assert not-completely-insane results for both v1 and v2 digesters.

                    keys = [
                        StartupTimeSeconds, DockerImagePullTimeSeconds,
                        LocalizationTimeSeconds, UserCommandTimeSeconds,
                        DelocalizationTimeSeconds, PapiTotalTimeSeconds,
                        CromwellTotalTimeSeconds, OtherTimeSeconds
                    ]

                    for key in keys:
                        for name in calls:
                            self.assertTrue(
                                calls[name].get(key) >= 0,
                                f"failed for {papi_version} / {sample_name} / {key}"
                            )
コード例 #4
0
ファイル: digester.py プロジェクト: ylpduxinghua/cromwell
                operation.create_time(),
                PapiEnd:
                operation.end_time(),
                PapiStart:
                operation.start_time(),
                PapiTotalTimeSeconds:
                operation.total_time_seconds(),
                ShardIndex:
                attempt.get('shardIndex'),
                StartupTimeSeconds:
                operation.startup_time_seconds(),
                UserCommandTimeSeconds:
                operation.user_command_time_seconds(),
            }

    data = workflow_path.read_text()
    metadata = json.loads(data)

    shards = operation_ids.visit_papi_operations(metadata,
                                                 call_fn,
                                                 initial_accumulator={})
    return {'version': Version, 'calls': shards, 'workflowId': metadata['id']}


if __name__ == "__main__":
    logging.quieten_chatty_imports()
    _args = parse_args()
    logging.set_log_verbosity(_args.verbose)

    main(_args)