def verify_build_platforms(refdata, build_platforms_ref): build_platforms = refdata.dhub.execute( proc='refdata_test.selects.test_all_build_platforms', ) build_platforms_set = set() for build_platform in build_platforms: build_platforms_set.add( RefDataManager.get_platform_key( build_platform.get('os_name'), build_platform.get('platform'), build_platform.get('architecture'))) assert build_platforms_ref.issubset(build_platforms_set)
def verify_machine_platforms(refdata, machine_platforms_ref): machine_platforms = refdata.dhub.execute(proc="refdata_test.selects.test_all_machine_platforms") machine_platforms_set = set() for machine_platform in machine_platforms: machine_platforms_set.add( RefDataManager.get_platform_key( machine_platform.get("os_name"), machine_platform.get("platform"), machine_platform.get("architecture") ) ) assert machine_platforms_ref.issubset(machine_platforms_set)
def verify_build_platforms(refdata, build_platforms_ref): build_platforms = refdata.dhub.execute(proc="refdata_test.selects.test_all_build_platforms") build_platforms_set = set() for build_platform in build_platforms: build_platforms_set.add( RefDataManager.get_platform_key( build_platform.get("os_name"), build_platform.get("platform"), build_platform.get("architecture") ) ) assert build_platforms_ref.issubset(build_platforms_set)
def verify_machine_platforms(refdata, machine_platforms_ref): machine_platforms = refdata.dhub.execute( proc='refdata_test.selects.test_all_machine_platforms', ) machine_platforms_set = set() for machine_platform in machine_platforms: machine_platforms_set.add( RefDataManager.get_platform_key( machine_platform.get('os_name'), machine_platform.get('platform'), machine_platform.get('architecture'))) assert machine_platforms_ref.issubset(machine_platforms_set)
def do_job_ingestion(jm, refdata, job_data, sample_resultset, verify_data=True): """ Ingest ``job_data`` which will be JSON job blobs. ``verify_data`` - whether or not to run the ingested jobs through the verifier. """ jm.store_result_set_data(sample_resultset) max_index = len(sample_resultset) - 1 resultset_index = 0 # Structures to test if we stored everything build_platforms_ref = set() machine_platforms_ref = set() machines_ref = set() options_ref = set() job_types_ref = set() products_ref = set() result_sets_ref = set() log_urls_ref = set() coalesced_job_guids = {} coalesced_replacements = [] artifacts_ref = {} blobs = [] for index, blob in enumerate(job_data): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if 'sources' in blob: del blob['sources'] blob['revision_hash'] = sample_resultset[resultset_index]['revision_hash'] blobs.append(blob) resultset_index += 1 # Build data structures to confirm everything is stored # as expected if verify_data: job_guid = blob['job']['job_guid'] job = blob['job'] build_platforms_ref.add( RefDataManager.get_platform_key( job.get('build_platform', {}).get('os_name', 'unkown'), job.get('build_platform', {}).get('platform', 'unkown'), job.get('build_platform', {}).get('architecture', 'unknown') )) machine_platforms_ref.add( RefDataManager.get_platform_key( job.get('machine_platform', {}).get('os_name', 'unkown'), job.get('machine_platform', {}).get('platform', 'unkown'), job.get('machine_platform', {}).get('architecture', 'unknown') )) machines_ref.add(job.get('machine', 'unknown')) options_ref = options_ref.union(job.get('option_collection', []).keys()) job_types_ref.add(job.get('name', 'unknown')) products_ref.add(job.get('product_name', 'unknown')) result_sets_ref.add(blob['revision_hash']) log_url_list = job.get('log_references', []) for log_data in log_url_list: log_urls_ref.add(log_data['url']) artifact_name = job.get('artifact', {}).get('name') if artifact_name: artifacts_ref[artifact_name] = job.get('artifact') coalesced = blob.get('coalesced', []) if coalesced: coalesced_job_guids[job_guid] = coalesced coalesced_replacements.append('%s') # Store the modified json blobs jm.store_job_data(blobs) if verify_data: # Confirms stored data matches whats in the reference data structs verify_build_platforms(refdata, build_platforms_ref) verify_machine_platforms(refdata, machine_platforms_ref) verify_machines(refdata, machines_ref) verify_options(refdata, options_ref) verify_job_types(refdata, job_types_ref) verify_products(refdata, products_ref) verify_result_sets(jm, result_sets_ref) verify_log_urls(jm, log_urls_ref) verify_artifacts(jm, artifacts_ref) verify_coalesced(jm, coalesced_job_guids, coalesced_replacements)
def do_job_ingestion(jm, refdata, job_data, sample_resultset, verify_data=True): """ Ingest ``job_data`` which will be JSON job blobs. ``verify_data`` - whether or not to run the ingested jobs through the verifier. """ jm.store_result_set_data(sample_resultset) max_index = len(sample_resultset) - 1 resultset_index = 0 # Structures to test if we stored everything build_platforms_ref = set() machine_platforms_ref = set() machines_ref = set() options_ref = set() job_types_ref = set() products_ref = set() result_sets_ref = set() log_urls_ref = set() coalesced_job_guids = {} coalesced_replacements = [] artifacts_ref = {} blobs = [] for index, blob in enumerate(job_data): if resultset_index > max_index: resultset_index = 0 # Modify job structure to sync with the resultset sample data if "sources" in blob: del blob["sources"] blob["revision_hash"] = sample_resultset[resultset_index]["revision_hash"] blobs.append(blob) resultset_index += 1 # Build data structures to confirm everything is stored # as expected if verify_data: job_guid = blob["job"]["job_guid"] job = blob["job"] build_platforms_ref.add( RefDataManager.get_platform_key( job.get("build_platform", {}).get("os_name", "unkown"), job.get("build_platform", {}).get("platform", "unkown"), job.get("build_platform", {}).get("architecture", "unknown"), ) ) machine_platforms_ref.add( RefDataManager.get_platform_key( job.get("machine_platform", {}).get("os_name", "unkown"), job.get("machine_platform", {}).get("platform", "unkown"), job.get("machine_platform", {}).get("architecture", "unknown"), ) ) machines_ref.add(job.get("machine", "unknown")) options_ref = options_ref.union(job.get("option_collection", []).keys()) job_types_ref.add(job.get("name", "unknown")) products_ref.add(job.get("product_name", "unknown")) result_sets_ref.add(blob["revision_hash"]) log_url_list = job.get("log_references", []) for log_data in log_url_list: log_urls_ref.add(log_data["url"]) artifact_name = job.get("artifact", {}).get("name") if artifact_name: artifacts_ref[artifact_name] = job.get("artifact") coalesced = blob.get("coalesced", []) if coalesced: coalesced_job_guids[job_guid] = coalesced coalesced_replacements.append("%s") # Store the modified json blobs jm.store_job_data(blobs) # Process the job objects in chunks of size == process_objects_limit process_objects_limit = 1000 chunks = grouper(job_data, process_objects_limit) for c in chunks: jm.process_objects(process_objects_limit, raise_errors=True) if verify_data: # Confirms stored data matches whats in the reference data structs verify_build_platforms(refdata, build_platforms_ref) verify_machine_platforms(refdata, machine_platforms_ref) verify_machines(refdata, machines_ref) verify_options(refdata, options_ref) verify_job_types(refdata, job_types_ref) verify_products(refdata, products_ref) verify_result_sets(jm, result_sets_ref) verify_log_urls(jm, log_urls_ref) verify_artifacts(jm, artifacts_ref) verify_coalesced(jm, coalesced_job_guids, coalesced_replacements) # Default verification confirms we loaded all of the objects complete_count = jm.get_os_dhub().execute(proc="objectstore_test.counts.complete")[0]["complete_count"] loading_count = jm.get_os_dhub().execute(proc="objectstore_test.counts.loading")[0]["loading_count"] assert complete_count == len(job_data) assert loading_count == 0