def run_end_to_end(): print('BQ_NONCE: {}'.format(settings.BQ_NONCE)) call_command('migrate') path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'measure_definitions') # No MeasureGlobals or MeasureValues are generated for the ghost branded # generics measure, because both numerator and denominator are computed # from a view (vw__ghost_generic_measure) which has no data. Rather than # populate this view, it is simpler to pretend it doesn't exist. num_measures = len(os.listdir(path)) - 1 shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f: f.write('{}') for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient('hscic') client.create_table('bnf', schemas.BNF_SCHEMA) client.create_table('ccgs', schemas.CCG_SCHEMA) client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA) client.create_table( 'practice_statistics', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table( 'practice_statistics_all_years', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table('practices', schemas.PRACTICE_SCHEMA) client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA) client.create_table('presentation', schemas.PRESENTATION_SCHEMA) client.create_table('tariff', schemas.TARIFF_SCHEMA) client.create_table('bdz_adq', schemas.BDZ_ADQ_SCHEMA) client = BQClient('measures') # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ('month', 'DATE'), ('practice_id', 'STRING'), ('numerator', 'INTEGER'), ('denominator', 'INTEGER'), ) path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'measure_definitions', '*.json') for path in glob.glob(path): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table('practice_data_' + measure_id, measures_schema) client.create_table('ccg_data_' + measure_id, measures_schema) client.create_table('global_data_' + measure_id, measures_schema) # Although there are no model instances, we call upload_model to create the # tables in BQ that might be required by certain measure views. client = BQClient('dmd') client.upload_model(TariffPrice) client.upload_model(VMPP) call_command('generate_presentation_replacements') path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'replace_matviews.sql') with open(path) as f: with connection.cursor() as c: c.execute(f.read()) copy_tree( os.path.join(e2e_path, 'data-1'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams). assert_count_equal(10 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(2, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(4, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(4, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(1, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'") copy_tree( os.path.join(e2e_path, 'data-2'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month assert_count_equal(20 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(4, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(8, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(8, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(2, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'")
def run_end_to_end(): print('BQ_NONCE: {}'.format(settings.BQ_NONCE)) num_measures = 56 shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f: f.write('{}') for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient('hscic') client.create_table('bnf', schemas.BNF_SCHEMA) client.create_table('ccgs', schemas.CCG_SCHEMA) client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA) client.create_table( 'practice_statistics', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table( 'practice_statistics_all_years', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table('practices', schemas.PRACTICE_SCHEMA) client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA) client.create_table('presentation', schemas.PRESENTATION_SCHEMA) client.create_table('tariff', schemas.TARIFF_SCHEMA) call_command('generate_presentation_replacements') path = os.path.join(settings.SITE_ROOT, 'frontend', 'management', 'commands', 'replace_matviews.sql') with open(path) as f: with connection.cursor() as c: c.execute(f.read()) copy_tree( os.path.join(e2e_path, 'data-1'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. If this assert fails, # check that num_measures is still correct. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(6 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(2, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(4, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(4, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(1, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'") copy_tree( os.path.join(e2e_path, 'data-2'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(12 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(4, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(8, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(8, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(2, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'")
def run_end_to_end(): print("BQ_NONCE: {}".format(settings.BQ_NONCE)) call_command("migrate") # No MeasureGlobals or MeasureValues are generated for the ghost branded # generics measure, because both numerator and denominator are computed # from a view (vw__ghost_generic_measure) which has no data. Rather than # populate this view, it is simpler to pretend it doesn't exist. num_measures = (len( glob.glob(os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json"))) - 1) shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, "w") as f: f.write("{}") for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient("hscic") client.create_table("bnf", schemas.BNF_SCHEMA) client.create_table("ccgs", schemas.CCG_SCHEMA) client.create_table("stps", schemas.STP_SCHEMA) client.create_table("regional_teams", schemas.REGIONAL_TEAM_SCHEMA) client.create_table("ppu_savings", schemas.PPU_SAVING_SCHEMA) client.create_table("practice_statistics", schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table("practice_statistics_all_years", schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table("practices", schemas.PRACTICE_SCHEMA) client.create_table("prescribing", schemas.PRESCRIBING_SCHEMA) client.create_table("presentation", schemas.PRESENTATION_SCHEMA) client.create_table("tariff", schemas.TARIFF_SCHEMA) client.create_table("bdz_adq", schemas.BDZ_ADQ_SCHEMA) client = BQClient("measures") # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ("month", "DATE"), ("practice_id", "STRING"), ("numerator", "INTEGER"), ("denominator", "INTEGER"), ) for path in glob.glob( os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json")): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table("practice_data_" + measure_id, measures_schema) client.create_table("ccg_data_" + measure_id, measures_schema) client.create_table("global_data_" + measure_id, measures_schema) # Although there are no model instances, we call upload_model to create the # dm+d tables in BQ that are required by certain measure views. client = BQClient("dmd") for model in apps.get_app_config("dmd2").get_models(): client.upload_model(model) call_command("generate_presentation_replacements") copy_tree(os.path.join(e2e_path, "data-1"), os.path.join(e2e_path, "data")) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams). assert_count_equal(10 * num_measures, MeasureValue) copy_tree(os.path.join(e2e_path, "data-2"), os.path.join(e2e_path, "data")) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month assert_count_equal(20 * num_measures, MeasureValue)
def run_end_to_end(): print('BQ_NONCE: {}'.format(settings.BQ_NONCE)) num_measures = 57 shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f: f.write('{}') for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient('hscic') client.create_table('bnf', schemas.BNF_SCHEMA) client.create_table('ccgs', schemas.CCG_SCHEMA) client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA) client.create_table('practice_statistics', schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table('practice_statistics_all_years', schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table('practices', schemas.PRACTICE_SCHEMA) client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA) client.create_table('presentation', schemas.PRESENTATION_SCHEMA) client.create_table('tariff', schemas.TARIFF_SCHEMA) client = BQClient('measures') # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ('month', 'DATE'), ('practice_id', 'STRING'), ('numerator', 'INTEGER'), ('denominator', 'INTEGER'), ) path = os.path.join(settings.SITE_ROOT, 'frontend', 'management', 'commands', 'measure_definitions', '*.json') for path in glob.glob(path): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table('practice_data_' + measure_id, measures_schema) client.create_table('ccg_data_' + measure_id, measures_schema) client.create_table('global_data_' + measure_id, measures_schema) call_command('generate_presentation_replacements') path = os.path.join(settings.SITE_ROOT, 'frontend', 'management', 'commands', 'replace_matviews.sql') with open(path) as f: with connection.cursor() as c: c.execute(f.read()) copy_tree( os.path.join(e2e_path, 'data-1'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. If this assert fails, # check that num_measures is still correct. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(6 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(2, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(4, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(4, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(1, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'") copy_tree( os.path.join(e2e_path, 'data-2'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(12 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(4, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(8, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(8, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(2, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'")