def test_cached_read(mock_read_gbq): mock_read_gbq.return_value = DataFrame([{"a": 3}]) sql = "select * from foobar" # Test identical SQL bypasses reading BQ with tempfile.NamedTemporaryFile() as csv_file: for _ in range(0, 2): df = bq.cached_read(sql, csv_path=csv_file.name) assert df.loc[0]["a"] == 3 assert mock_read_gbq.call_count == 1 # and now with `use_cache` param df = bq.cached_read(sql, csv_path=csv_file.name, use_cache=False) assert mock_read_gbq.call_count == 2
def get_data(self): print('Running all queries') if self.measure: for measure_name in self.measure_list: folder_name = os.path.join(self.name, measure_name) get_data_dir = self.get_working_dir(folder_name) self.create_dir(get_data_dir) query = self.get_measure_query(measure_name) csv_path = os.path.join(get_data_dir, 'bq_cache.csv') bq.cached_read(query, csv_path=csv_path) else: get_data_dir = self.get_working_dir(self.name) self.create_dir(get_data_dir) query = self.get_custom_query() csv_path = os.path.join(get_data_dir, 'bq_cache.csv') bq.cached_read(query, csv_path=csv_path) print('All queries done')
def get_data(self): if self.measure: for measure_name in self.measure_list: folder_name = os.path.join(self.name, measure_name) get_data_dir = self.get_working_dir(folder_name) self.create_dir(get_data_dir) if self.custom_measure: query = self.get_custom_query(measure_name=measure_name) else: query = self.get_measure_query(measure_name) csv_path = os.path.join(get_data_dir, self.csv_name) bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache) else: get_data_dir = self.get_working_dir(self.name) self.create_dir(get_data_dir) query = self.get_custom_query() csv_path = os.path.join(get_data_dir, self.csv_name) bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache)
def get_entity_names(entity_type): query = """ SELECT DISTINCT code, name FROM ebmdatalab.hscic.{}s WHERE name IS NOT NULL """.format(entity_type) entity_names = bq.cached_read( query, csv_path='data/{}_names.csv'.format(entity_type)) return entity_names.set_index('code')
def software_vendor(date="2019-08-01"): q = f""" SELECT ODS, Principal_Supplier FROM ebmdatalab.alex.vendors WHERE Date >= TIMESTAMP("{date}") ORDER BY ODS """ df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_list_size.csv") return df.set_index("ODS")
def prescribing_volume(year=2018): q = f""" SELECT practice, SUM(items) AS total_items FROM ebmdatalab.hscic.normalised_prescribing_standard WHERE month >= TIMESTAMP("{year}-01-01") AND month <= TIMESTAMP("{year}-12-01") GROUP BY practice """ df = bq.cached_read( q, csv_path=f"{py_file_dir}/cached_prescribing_volume.csv") return df.set_index("practice")
def get_measure_list(self): query = ''' SELECT table_id FROM ebmdatalab.{bq_folder}.__TABLES__ WHERE table_id LIKE "{name}" '''.format(bq_folder=self.bq_folder, name=self.name) dir_path = self.get_working_dir(self.name) csv_path = os.path.join(dir_path, "measure_list.csv") os.makedirs(dir_path, exist_ok=True) measure_list = bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache) return measure_list['table_id']
def urban_rural(): q = """ SELECT code AS practice, ru.LSOA11NM, SUBSTR(RUC11CD,1,1) AS ruc11cd, RUC11 FROM ebmdatalab.ONS.small_area_rural_urban ru INNER JOIN ebmdatalab.ONS.postcode_to_lsoa_map m ON ru.LSOA11CD = m.lsoa11cd INNER JOIN ebmdatalab.hscic.practices ON pcds = postcode """ df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_urban_rural.csv") return df.set_index("practice")
def practice_data(): q = """ SELECT DISTINCT practice, pct FROM ebmdatalab.hscic.normalised_prescribing_standard LEFT JOIN ebmdatalab.hscic.practices ON practice = code AND setting = 4 ORDER BY practice """ df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_practice.csv") return df.set_index("practice")
def list_size(year=2018): q = f""" SELECT practice, AVG(total_list_size) as list_size FROM ebmdatalab.hscic.practice_statistics WHERE month >= TIMESTAMP("{year}-01-01") AND month <= TIMESTAMP("{year}-12-01") GROUP BY practice ORDER BY practice """ df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_list_size.csv") return df.set_index("practice")
# # + sql1 = ''' SELECT month, pct_id, sum(numerator) as total_gaba, FROM `ebmdatalab.measures.ccg_data_gabapentinoidsddd` AS p GROUP BY month, pct_id ''' df_gaba = bq.cached_read(sql1, csv_path=os.path.join('..', 'data', 'df_gaba.csv')) df_gaba['month'] = pd.to_datetime(df_gaba['month']) df_gaba.rename( columns={'pct_id': 'pct'}, inplace=True) ##prep for maps library whe pct must be colum name df_gaba.head(5) # - ax = df_gaba.groupby(["month"])['total_gaba'].sum().plot( kind='line', title="Total Gabapentinoids DDD") ax.axvline(pd.to_datetime('2019-04-01'), color='black', linestyle='--', lw=2) ##law change plt.ylim(0, 30000000) # *Discussion* : Overall the changes to CD regulations fo not appear to have substantially changed prescribing of gabapentinoids. A visual inspection of the graph shows a possible levelling off of the total quanity prescribed.
) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, nm, bnf_code, id''' lama_single_codelist = bq.cached_read(sql, csv_path=os.path.join( '..', 'data', 'lama_single_codelist.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) lama_single_codelist # - # ## Multi Ingredent Preparations including LAMA <a id='multi'></a> # + sql = '''WITH bnf_codes AS ( SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE (bnf_code LIKE '0301020U0%' OR #BNF Aclidinium Brom/Formoterol bnf_code LIKE '0301040V0%' OR #BNF Aclidinium Brom/Formoterol bnf_code LIKE '0301040W0%' OR #BNF Umeclidinium bromide / Vilanterol bnf_code LIKE '0301040X0%' OR #BNF Tiotropium bromide / Olodaterol
# Ensure that DataFrames columns are not truncated pd.set_option('display.max_colwidth', -1) # ### Total items and net_cost for November 2019 # + # Query the prescribing data in BigQuery for the total items and net_cost sql = """ SELECT SUM(items) AS items, SUM(net_cost_pence) / 100 AS net_cost FROM public_draft.prescribing WHERE month = '2019-11-01' """ df = bq.cached_read(sql, csv_path='../bq-cache/2019_11_items_and_spending.csv') df # - # Save these off for later -- we'll want to see what proprtion the items and net_cost are affected by the changes total_items, total_net_cost = df.iloc[0] # --- # ## Change 1: BNF code changes # ### Affected presentations # Load the data from the spreadsheet bnf_mapping_raw = pd.read_excel('../data/MDR BNF to dm+d BNF Mapping.xlsx') bnf_mapping_raw.head()
from ebmdatalab import bq import os import pandas as pd # + sql = '''WITH bnf_codes AS ( SELECT DISTINCT(bnf_code), FROM measures.dmd_objs_with_form_route WHERE (bnf_code LIKE "05%" AND form_route LIKE '%.oral%') ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id''' oral_antibiotics_codelist = bq.cached_read(sql, csv_path=os.path.join('..','data','orale_antibiotics_codelist.csv')) pd.set_option('display.max_rows', None) oral_antibiotics_codelist # +
LEFT JOIN bnf_tab ON chemical_code=SUBSTR(presc.bnf_code,0,9) WHERE month BETWEEN TIMESTAMP('2019-01-01') AND TIMESTAMP('2019-12-01') ##user requested 2019 GROUP BY chemical_code, chemical ORDER BY items DESC LIMIT 20 ##limit to 20 ''' df_chemical_items = bq.cached_read(sql, csv_path='chemical_items.csv') df_chemical_items.head(11) # - # ## Top 10 Chemicals in 2019 by Cost # + sql = ''' WITH bnf_tab AS ( SELECT DISTINCT chemical, chemical_code FROM ebmdatalab.hscic.bnf ) SELECT
# language: python # name: python3 # --- # # The following notebook contains codes from the [NHS dictionary of medicines and devices](https://ebmdatalab.net/what-is-the-dmd-the-nhs-dictionary-of-medicines-and-devices/) for oral macrolide antibacterials. You can see the overall [prescribing of macrolides on OpenPrescribing here](https://openprescribing.net/bnf/050105/). from ebmdatalab import bq import os import pandas as pd # + sql = ''' WITH bnf_codes AS ( SELECT bnf_code FROM hscic.presentation WHERE bnf_code LIKE '050105%' #bnf code section macrolide ) SELECT * FROM measures.dmd_objs_with_form_route WHERE bnf_code IN (SELECT * FROM bnf_codes) AND obj_type IN ('vmp', 'amp') AND form_route LIKE '%.oral%' ORDER BY obj_type, bnf_code, snomed_id ''' macrolides = bq.cached_read(sql, csv_path=os.path.join('..','data','macrolides.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) macrolides
#import libraries from ebmdatalab import bq import pandas as pd import os # + sql = '''WITH bnf_codes AS ( SELECT bnf_code FROM hscic.presentation WHERE bnf_code LIKE "030401%" # antihistamine ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id''' antihistamine_codelist = bq.cached_read(sql, csv_path=os.path.join('..','data','antihistamine_codelist.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) antihistamine_codelist # -
) ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, nm ''' immuno_meds = bq.cached_read(sql, csv_path=os.path.join('..', 'data', 'immuno_meds .csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) immuno_meds.count() # + [markdown] # ## Dm+d Additions # Now some of the medicines dn't appear in prescribing data as they are generally hospital only medicines. WE need to include them in case a GP has recorded on a TPP system or we get access to another database. We will manually select these from a list used in a previous study by LSHTM. # # # + sql = ''' WITH dmd_codes AS ( SELECT id FROM dmd.vmp WHERE LOWER(nm) LIKE 'afliber%' OR
UNION ALL SELECT "amp" AS type, amp.id as id, amp.bnf_code as bnf_code, amp.descr as nm FROM dmd.amp as amp INNER JOIN dmd.vpi as vpi ON amp.vmp = vpi.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) GROUP BY amp.id, amp.bnf_code, amp.descr HAVING COUNT(ing) > 1 ORDER BY type, bnf_code, id''' all_inhaler_ics_combi_agent = bq.cached_read( sql, csv_path=os.path.join('..', 'data', 'all_inhaler_ics_combi_agent.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) all_inhaler_ics_combi_agent.info() # - #import csv from other notebook dealing with high dose dose_high_ics_combi_agent = pd.read_csv( '../data/highdose_inhaledsteroid_combiagent_codelist.csv') dose_high_ics_combi_agent.info() ## here we merge and create an indicator to see which ones are in both combine = pd.merge(all_inhaler_ics_combi_agent, dose_high_ics_combi_agent, how='outer',
bnf_code LIKE "0302000C0%CA" OR # Beclomet/Formoterol_Inh 200/6 (120D) Dry (brands and generic)", bnf_code LIKE "0302000K0%AU" OR # Budesonide/Formoterol_InhaB/A 400/12(60D (brands and generic)", bnf_code LIKE "0302000N0%AZ" OR # Fluticasone/Salmeterol_Inh 500/50mcg 60D (brands and generic)", bnf_code LIKE "0302000N0%BG" OR # Fluticasone/Salmeterol_Inh 250/25mcg120D (brands and generic)", bnf_code LIKE "0302000N0%BK" OR # Fluticasone/Formoterol_Inh 250/10mcg120D (brands and generic)", bnf_code LIKE "0302000V0%AA") # Fluticasone/Vilanterol_Inha 184/22mcg30D (brands and generic)" AND (form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%') ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id''' highdose_inhaledsteroids_combiagent_codelist = bq.cached_read( sql, csv_path=os.path.join('..', 'data', 'highdose_inhaledsteroid_combiagent_codelist.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) highdose_inhaledsteroids_combiagent_codelist # -
AND (form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%') ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id''' sabutamol_asthma = bq.cached_read(sql, csv_path=os.path.join('..','data','sabutamol_asthma.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) sabutamol_asthma # + ##rule 2 ICS sql = ''' WITH bnf_codes AS ( SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE (bnf_code LIKE '0302000C0%' OR #BNF Beclometasone dipropionate bnf_code LIKE '0301011AB%' OR #BNF BeclometDiprop/Formoterol/Glycopyrronium", bnf_code LIKE '0302000K0%' OR #BNF budesonide bnf_code LIKE '0302000U0%' OR #BNF Ciclesonide bnf_code LIKE '0302000V0%' OR #BNF Fluticasone furoate
SELECT DISTINCT(bnf_code) FROM ebmdatalab.brian.amp_recommended ) GROUP BY rx.month, rx.pct, rx.bnf_code, rx.bnf_name ORDER BY month ''' df_amp_recommended = bq.cached_read(sql, csv_path=os.path.join( '..', 'data', 'df_amp_recommended.zip')) #df_amp_recommended = bq.cached_read(sql, csv_path=os.path.join('..','data','df_amp_recommended.csv')) df_amp_recommended['month'] = df_amp_recommended['month'].astype( 'datetime64[ns]') df_amp_recommended.head(3) # - #lets graph to see the total number of items over time df_amp_recommended.groupby("month")['total_items'].sum().plot( kind='line', title="Total items where AMP recommended by NHS dm+d") plt.ylim(0, ) #create dataframe with only generically prescribed items df_generic = df_amp_recommended.loc[ df_amp_recommended["bnf_code"].str.contains('AA\w{4}$')]
from ebmdatalab import bq import os import pandas as pd # + sql = '''WITH bnf_codes AS ( SELECT bnf_code FROM hscic.presentation WHERE bnf_code LIKE '0501050A0%' #BNF chmeical azithromycin not including eye drops which are chap 11 ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id''' azithromycin_codelist = bq.cached_read(sql, csv_path=os.path.join( '..', 'data', 'azithromycin_codelist.csv')) pd.set_option('display.max_rows', None) azithromycin_codelist
# This notebook lists oral preparations for hydroxychloroquine. from ebmdatalab import bq import os import pandas as pd # + sql = ''' WITH bnf_codes AS ( SELECT bnf_code FROM hscic.presentation WHERE bnf_code LIKE '1001030C0%' ##hydroxychloroquine sulfate - BNF sect drugs used in rheumatic disease ) SELECT * FROM measures.dmd_objs_with_form_route WHERE bnf_code IN (SELECT * FROM bnf_codes) AND obj_type IN ('vmp', 'amp') AND form_route LIKE '%.oral%' #include oral preparations only ORDER BY obj_type, bnf_code, snomed_id''' hydroxychloroquine_codelist = bq.cached_read( sql, csv_path=os.path.join('..', 'data', 'hydroxychloroquine_codelist.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) hydroxychloroquine_codelist # -
l.Is_LA, l.Is_High_LA, sum(itemsper1000) as items_per_1000, sum(quantityper1000) as quantity_per_1000, sum(quantityper1000*dose_per_unit*new_ome_multiplier) AS total_ome_per_1000, sum(Infl_corr_Cost_per1000) as cost_per_1000 FROM ebmdatalab.helen.trends_from_pca_final_2017 p INNER JOIN (SELECT distinct drug_name, chem_substance, Is_LA, Is_High_LA, dose_per_unit, new_ome_multiplier FROM ebmdatalab.richard.opioid_converter) l on l.drug_name = p.drug_name GROUP BY year, chem_substance, Is_LA, Is_High_LA""" dfl = bq.cached_read(q4, csv_path="chemical_summary.zip").fillna(0) dfl.head() # - # tidy data dfl.loc[dfl["Is_High_LA"]!="TRUE","Is_High_LA"] = "Others" dfl.loc[dfl["Is_High_LA"]=="TRUE","Is_High_LA"] = "High dose" dfl.drop("Is_LA",axis=1).groupby(["year","Is_High_LA"]).sum() # ## Summary results tables # + # Summary results table by year tab = dfl.fillna(0) tab.loc[tab["Is_High_LA"]=="High dose","OME per 1000_High Dose"] = tab["total_ome_per_1000"] tab.loc[tab["Is_High_LA"]=="High dose","Items per 1000_High Dose"] = tab["items_per_1000"]
SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, bnf_code, id ''' cvd_medcodes = bq.cached_read(sql, csv_path=os.path.join('..', 'data', 'cvd_medcodes.csv')) cvd_medcodes.head() # - # #### Find patients with coded events where code matches cvd disease # + codes_where = codes_to_sql_where("CTV3Code", chd_codes) query = f''' SELECT DISTINCT Patient_ID, 1 AS chd_code FROM CodedEvent WHERE {codes_where} ORDER BY Patient_ID '''
# + sql = ''' WITH bnf_codes AS ( SELECT bnf_code FROM hscic.presentation WHERE (bnf_code LIKE '0802010G0%' OR #bnf azathioprine bnf_code LIKE '0105010E0%' OR #bnf sulfasalazine bnf_code LIKE '0802020G0%' OR #bnf ciclosporin bnf_code LIKE '0801030L0%' OR #bnf mercaptopurine bnf_code LIKE '1001030F0%' OR #bnf penicillamine bnf_code LIKE '1001030L0%' OR #bnf leflunomide bnf_code LIKE '1001030J0%' OR #bnf gold bnf_code LIKE '1001030U0%' OR #bnf methotrexate - mostly oral bnf_code LIKE '0801030P0%' OR #bnf methotrexate - injections, prefilled syringes bnf_code LIKE '0802010M0%' #bnf mycophenolate mofetil - mmf ) ) SELECT * FROM measures.dmd_objs_with_form_route WHERE bnf_code IN (SELECT * FROM bnf_codes) AND obj_type IN ('vmp', 'amp') ORDER BY obj_type, bnf_code, snomed_id ''' dmards_primary_care_meds = bq.cached_read(sql, csv_path=os.path.join('..','data','dmards_primary_care_meds.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) dmards_primary_care_meds # -
INNER JOIN hscic.practices pract ON presc.practice = pract.code INNER JOIN hscic.ccgs AS ccg ON presc.pct=ccg.code WHERE ccg.org_type='CCG' AND pract.setting = 4 AND presc.bnf_code IN ( SELECT DISTINCT(bnf_code) FROM ebmdatalab.measures.dmd_objs_with_form_route WHERE form_route IN ('pressurizedinhalation.inhalation', 'powderinhalation.inhalation') ) AND bnf_code LIKE "050%" GROUP BY pct, month, bnf_name, bnf_code ORDER BY pct, month ''' df_inh_abx = bq.cached_read(sql, csv_path='df_inh_abx.csv') df_inh_abx['month'] = df_inh_abx['month'].astype('datetime64[ns]') df_inh_abx.head() # - SELECT CAST(month AS DATE) AS month, practice AS practice_id, SUM(items) AS denominator FROM hscic.normalised_prescribing_standard WHERE bnf_code IN ( SELECT DISTINCT(bnf_code) FROM measures.dmd_objs_with_form_route WHERE form_route IN ('pressurizedinhalation.inhalation', 'powderinhalation.inhalation') AND bnf_code LIKE '03%' AND bnf_code NOT LIKE '0301011R0%' ) GROUP BY month, practice_id
import os import pandas as pd # + sql = ''' WITH bnf_codes AS ( SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE (bnf_code LIKE '0301011R0%' OR ##BNF salbutamol - excluded bnf_code LIKE '0301040R0%' as it only has comination with ipratropium in it bnf_code LIKE '0301011V0%') ##terbutaline AND (form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%') ) SELECT "vmp" AS type, id, bnf_code, nm FROM dmd.vmp WHERE bnf_code IN (SELECT * FROM bnf_codes) UNION ALL SELECT "amp" AS type, id, bnf_code, descr FROM dmd.amp WHERE bnf_code IN (SELECT * FROM bnf_codes) ORDER BY type, nm, bnf_code, id''' saba_codelist = bq.cached_read(sql, csv_path=os.path.join('..', 'data', 'saba_codelist.csv')) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) saba_codelist
# ## Import results lidocaine = practice_class.concatenate_outputs() lidocaine.head() # ### Get list to filter closed practices query = """ SELECT DISTINCT code FROM ebmdatalab.hscic.practices WHERE status_code = "A" """ open_practices = bq.cached_read(query, csv_path='data/open_practices.csv') open_practices.head() # + ### Get practice list to filter small list sizes # - query = """ SELECT DISTINCT practice FROM ebmdatalab.hscic.practice_statistics WHERE total_list_size < 2000 """ small_list_size = bq.cached_read(query, csv_path='data/small_list_size.csv')