class OperationsDigesterTestMethods(unittest.TestCase): set_log_verbosity(verbose=True) quieten_chatty_imports() def test_operations_digestion(self) -> None: """ This uses "real" metadata from the PAPI v2 performance spike to drive operations digester testing. The metadata is stored in GCS and copied down to the local machine if not already present from an earlier run. Operations digesters can run against either local or GCS paths using `ComparisonPath`s. Since GCS testing is slow it's turned off by default, it can be turned on by setting the DIGESTER_TEST_GCS environment variable. """ credentials, project_id = google.auth.default() storage_client = storage.Client(credentials=credentials) bucket_name = 'papi-performance-analysis' bucket = storage_client.get_bucket(bucket_name) # A cache of expensive-to-create GCS comparison paths. gcs_comparison_path_by_subdir = {} papi_versions = [VERSION_PAPI_V1, VERSION_PAPI_V2] for papi_version in papi_versions: subdir = subdir_for_papi_version(papi_version) local_parent = ComparisonPath.create(subdir) for sample_name in EXPECTATIONS.keys(): download_metadata_from_gcs_if_needed(sample_name, local_parent, bucket) parents_to_test = [local_parent] # Skip slow GCS testing unless this environment variable is set. if os.environ.get('DIGESTER_TEST_GCS'): parents_to_test.append(gcs_parent(subdir, gcs_comparison_path_by_subdir)) for parent in parents_to_test: description = parent.description() logging.info( f"Running operation digester on {description} sample '{sample_name}' backend {papi_version}") sample_path = parent / sample_name for operation in EXPECTATIONS.get(sample_name).get(papi_version).keys(): operations_path = sample_path / 'operations' / f'{operation}.json' json_str = operations_path.read_text() op_digester = OperationDigester.create(json.loads(json_str)) for key, value in EXPECTATIONS.get(sample_name).get(papi_version).get(operation).items(): method_to_call = getattr(op_digester, key) self.assertEqual(method_to_call(), value, f'{key} was not {value}')
nargs='+', help='Workflows to process') parser.add_argument( 'cromwell_checkout_path', metavar='CROMWELLCHECKOUTPATH', type=Path, help='Path to Cromwell git checkout used to run workflows') parser.add_argument( 'cromwell_config_path', metavar='CROMWELLCONFIGPATH', type=Path, help='Path to Cromwell configuration file used to run workflows') args = parser.parse_args() set_log_verbosity(args.verbose) quieten_chatty_imports() cromwell_url = args.cromwell_url[0] gcs_bucket, gcs_path = args.gcs_path[0] workflows = args.workflows credentials, project_id = google.auth.default() storage_client = storage.Client(credentials=credentials) papi_clients = PapiClients(credentials) logger.info(f'cromwell: {cromwell_url}') logger.info(f'gcs_bucket: {gcs_bucket}; gcs_path: {gcs_path}') logger.info(f'workflows: {workflows}') for workflow in workflows: process_workflow(cromwell_url, gcs_bucket, gcs_path, storage_client,
class DigesterTestMethods(unittest.TestCase): set_log_verbosity(verbose=True) quieten_chatty_imports() def test_digestion(self) -> None: """ This uses "real" metadata from the PAPI v2 performance spike to drive digester testing. The metadata is stored in GCS and copied down to the local machine if not already present from an earlier run. The digester can run against either local or GCS paths using `ComparisonPath`s. Local is nicer to iterate on than GCS since it runs so much more quickly. Since GCS testing is slow it's turned off by default, it can be turned on by setting the DIGESTER_TEST_GCS environment variable. """ credentials, project_id = google.auth.default() storage_client = storage.Client(credentials=credentials) bucket_name = 'papi-performance-analysis' bucket = storage_client.get_bucket(bucket_name) # A cache of expensive-to-create GCS comparison paths. gcs_comparison_path_by_subdir = {} papi_versions = [VERSION_PAPI_V1, VERSION_PAPI_V2] for papi_version in papi_versions: subdir = subdir_for_papi_version(papi_version) local_parent = ComparisonPath.create(subdir) for sample_name in EXPECTATIONS.keys(): download_metadata_from_gcs_if_needed(sample_name, local_parent, bucket) parents_to_test = [local_parent] # Skip slow GCS testing unless this environment variable is set. if os.environ.get('DIGESTER_TEST_GCS'): parents_to_test.append( gcs_parent(subdir, gcs_comparison_path_by_subdir)) for parent in parents_to_test: description = parent.description() logging.info( f"Running digester test on {description} for sample '{sample_name}' on backend {papi_version}" ) sample_path = parent / sample_name workflow_path = sample_path / 'workflow.json' operations_path = sample_path / 'operations' actual = digest(workflow_path, operations_path) expected = EXPECTATIONS[sample_name][papi_version] calls: JsonObject = actual.get('calls') actual_total = len(calls) self.assertEqual(actual_total, expected['total_jobs']) for num_attempts in [1, 2, 3]: actual_len = len( list( filter( more_than_x_attempts(calls, num_attempts), calls))) self.assertEqual( actual_len, expected[f'more_than_{num_attempts}_attempts']) for minutes_longer in range(3, 9): actual_len = len( list( filter( more_than_x_minutes_longer( calls, minutes_longer), calls))) expectation = expected[ f'cromwell_time_more_than_{minutes_longer}_minutes_longer_total'] self.assertEqual(actual_len, expectation) # Currently just a smoke test to assert not-completely-insane results for both v1 and v2 digesters. keys = [ StartupTimeSeconds, DockerImagePullTimeSeconds, LocalizationTimeSeconds, UserCommandTimeSeconds, DelocalizationTimeSeconds, PapiTotalTimeSeconds, CromwellTotalTimeSeconds, OtherTimeSeconds ] for key in keys: for name in calls: self.assertTrue( calls[name].get(key) >= 0, f"failed for {papi_version} / {sample_name} / {key}" )
operation.create_time(), PapiEnd: operation.end_time(), PapiStart: operation.start_time(), PapiTotalTimeSeconds: operation.total_time_seconds(), ShardIndex: attempt.get('shardIndex'), StartupTimeSeconds: operation.startup_time_seconds(), UserCommandTimeSeconds: operation.user_command_time_seconds(), } data = workflow_path.read_text() metadata = json.loads(data) shards = operation_ids.visit_papi_operations(metadata, call_fn, initial_accumulator={}) return {'version': Version, 'calls': shards, 'workflowId': metadata['id']} if __name__ == "__main__": logging.quieten_chatty_imports() _args = parse_args() logging.set_log_verbosity(_args.verbose) main(_args)