Esempio n. 1
0
def test_cached_read(mock_read_gbq):
    mock_read_gbq.return_value = DataFrame([{"a": 3}])
    sql = "select * from foobar"

    # Test identical SQL bypasses reading BQ
    with tempfile.NamedTemporaryFile() as csv_file:
        for _ in range(0, 2):
            df = bq.cached_read(sql, csv_path=csv_file.name)
        assert df.loc[0]["a"] == 3
        assert mock_read_gbq.call_count == 1

        # and now with `use_cache` param
        df = bq.cached_read(sql, csv_path=csv_file.name, use_cache=False)
        assert mock_read_gbq.call_count == 2
Esempio n. 2
0
 def get_data(self):
     print('Running all queries')
     if self.measure:
         for measure_name in self.measure_list:
             folder_name = os.path.join(self.name, measure_name)
             get_data_dir = self.get_working_dir(folder_name)
             self.create_dir(get_data_dir)
             query = self.get_measure_query(measure_name)
             csv_path = os.path.join(get_data_dir, 'bq_cache.csv')
             bq.cached_read(query, csv_path=csv_path)
     else:
         get_data_dir = self.get_working_dir(self.name)
         self.create_dir(get_data_dir)
         query = self.get_custom_query()
         csv_path = os.path.join(get_data_dir, 'bq_cache.csv')
         bq.cached_read(query, csv_path=csv_path)
     print('All queries done')
Esempio n. 3
0
 def get_data(self):
     if self.measure:
         for measure_name in self.measure_list:
             folder_name = os.path.join(self.name, measure_name)
             get_data_dir = self.get_working_dir(folder_name)
             self.create_dir(get_data_dir)
             if self.custom_measure:
                 query = self.get_custom_query(measure_name=measure_name)
             else:
                 query = self.get_measure_query(measure_name)
             csv_path = os.path.join(get_data_dir, self.csv_name)
             bq.cached_read(query,
                            csv_path=csv_path,
                            use_cache=self.use_cache)
     else:
         get_data_dir = self.get_working_dir(self.name)
         self.create_dir(get_data_dir)
         query = self.get_custom_query()
         csv_path = os.path.join(get_data_dir, self.csv_name)
         bq.cached_read(query, csv_path=csv_path, use_cache=self.use_cache)
Esempio n. 4
0
def get_entity_names(entity_type):
    query = """
    SELECT
      DISTINCT code,
      name
    FROM
      ebmdatalab.hscic.{}s
    WHERE
      name IS NOT NULL
    """.format(entity_type)
    entity_names = bq.cached_read(
        query, csv_path='data/{}_names.csv'.format(entity_type))
    return entity_names.set_index('code')
def software_vendor(date="2019-08-01"):
    q = f"""
	SELECT
	  ODS,
	  Principal_Supplier
	FROM
	  ebmdatalab.alex.vendors
	WHERE
	  Date >= TIMESTAMP("{date}")
	ORDER BY
	  ODS
	"""
    df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_list_size.csv")
    return df.set_index("ODS")
def prescribing_volume(year=2018):
    q = f"""
	SELECT
	  practice,
	  SUM(items) AS total_items
	FROM
	  ebmdatalab.hscic.normalised_prescribing_standard
	WHERE
	  month >= TIMESTAMP("{year}-01-01")
	  AND month <= TIMESTAMP("{year}-12-01")
	GROUP BY
	  practice
	"""
    df = bq.cached_read(
        q, csv_path=f"{py_file_dir}/cached_prescribing_volume.csv")
    return df.set_index("practice")
Esempio n. 7
0
 def get_measure_list(self):
     query = '''
     SELECT
       table_id
     FROM
       ebmdatalab.{bq_folder}.__TABLES__
     WHERE
       table_id LIKE "{name}"
     '''.format(bq_folder=self.bq_folder, name=self.name)
     dir_path = self.get_working_dir(self.name)
     csv_path = os.path.join(dir_path, "measure_list.csv")
     os.makedirs(dir_path, exist_ok=True)
     measure_list = bq.cached_read(query,
                                   csv_path=csv_path,
                                   use_cache=self.use_cache)
     return measure_list['table_id']
def urban_rural():
    q = """
	SELECT
	  code AS practice, ru.LSOA11NM, SUBSTR(RUC11CD,1,1) AS ruc11cd, RUC11
	FROM
	  ebmdatalab.ONS.small_area_rural_urban ru
	INNER JOIN
	  ebmdatalab.ONS.postcode_to_lsoa_map m
	ON
	  ru.LSOA11CD = m.lsoa11cd
	INNER JOIN
	  ebmdatalab.hscic.practices
	ON
	  pcds = postcode
	"""
    df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_urban_rural.csv")
    return df.set_index("practice")
def practice_data():
    q = """
	SELECT
	  DISTINCT practice,
	  pct
	FROM
	  ebmdatalab.hscic.normalised_prescribing_standard
	LEFT JOIN
	  ebmdatalab.hscic.practices
	ON
	  practice = code
	  AND setting = 4
	ORDER BY
	  practice
	"""
    df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_practice.csv")
    return df.set_index("practice")
def list_size(year=2018):
    q = f"""
	SELECT
	  practice,
	  AVG(total_list_size) as list_size
	FROM
	  ebmdatalab.hscic.practice_statistics
	WHERE
	  month >= TIMESTAMP("{year}-01-01")
	  AND month <= TIMESTAMP("{year}-12-01")
	GROUP BY
	  practice
	ORDER BY
	  practice
	"""
    df = bq.cached_read(q, csv_path=f"{py_file_dir}/cached_list_size.csv")
    return df.set_index("practice")
Esempio n. 11
0
#

# +
sql1 = '''
SELECT
  month,
  pct_id,
  sum(numerator) as total_gaba,
FROM
  `ebmdatalab.measures.ccg_data_gabapentinoidsddd` AS p
GROUP BY
month,
pct_id
'''

df_gaba = bq.cached_read(sql1,
                         csv_path=os.path.join('..', 'data', 'df_gaba.csv'))
df_gaba['month'] = pd.to_datetime(df_gaba['month'])
df_gaba.rename(
    columns={'pct_id': 'pct'},
    inplace=True)  ##prep for maps library whe  pct must be colum name
df_gaba.head(5)

# -

ax = df_gaba.groupby(["month"])['total_gaba'].sum().plot(
    kind='line', title="Total Gabapentinoids DDD")
ax.axvline(pd.to_datetime('2019-04-01'), color='black', linestyle='--',
           lw=2)  ##law change
plt.ylim(0, 30000000)

# *Discussion* : Overall the changes to CD regulations fo not appear to have substantially changed prescribing of gabapentinoids. A visual inspection of the graph shows a possible levelling off of the total quanity prescribed.
  )

SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, nm, bnf_code, id'''

lama_single_codelist = bq.cached_read(sql,
                                      csv_path=os.path.join(
                                          '..', 'data',
                                          'lama_single_codelist.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
lama_single_codelist
# -

# ## Multi Ingredent Preparations including LAMA <a id='multi'></a>

# +
sql = '''WITH bnf_codes AS (
 SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE 
    (bnf_code LIKE '0301020U0%'   OR        #BNF Aclidinium Brom/Formoterol
    bnf_code LIKE '0301040V0%'    OR        #BNF Aclidinium Brom/Formoterol
    bnf_code LIKE '0301040W0%'    OR        #BNF Umeclidinium bromide / Vilanterol
    bnf_code LIKE '0301040X0%'    OR        #BNF Tiotropium bromide  / Olodaterol
Esempio n. 13
0
# Ensure that DataFrames columns are not truncated
pd.set_option('display.max_colwidth', -1)

# ### Total items and net_cost for November 2019

# +
# Query the prescribing data in BigQuery for the total items and net_cost
sql = """
SELECT
    SUM(items) AS items,
    SUM(net_cost_pence) / 100 AS net_cost
FROM public_draft.prescribing
WHERE month = '2019-11-01'
"""

df = bq.cached_read(sql, csv_path='../bq-cache/2019_11_items_and_spending.csv')
df
# -

# Save these off for later -- we'll want to see what proprtion the items and net_cost are affected by the changes
total_items, total_net_cost = df.iloc[0]

# ---

# ## Change 1: BNF code changes

# ### Affected presentations

# Load the data from the spreadsheet
bnf_mapping_raw = pd.read_excel('../data/MDR BNF to dm+d BNF Mapping.xlsx')
bnf_mapping_raw.head()
Esempio n. 14
0
from ebmdatalab import bq
import os
import pandas as pd

# +
  sql = '''WITH bnf_codes AS (
  SELECT DISTINCT(bnf_code),
  FROM measures.dmd_objs_with_form_route
  WHERE (bnf_code LIKE "05%" AND form_route LIKE '%.oral%')
  )

SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id'''

oral_antibiotics_codelist = bq.cached_read(sql, csv_path=os.path.join('..','data','orale_antibiotics_codelist.csv'))
pd.set_option('display.max_rows', None)
oral_antibiotics_codelist
# +


Esempio n. 15
0
LEFT JOIN
bnf_tab
ON
chemical_code=SUBSTR(presc.bnf_code,0,9)
WHERE
 month BETWEEN TIMESTAMP('2019-01-01')
 AND TIMESTAMP('2019-12-01') ##user requested 2019
GROUP BY
chemical_code,
chemical
ORDER BY
 items DESC
 LIMIT 20 ##limit to 20
  '''

df_chemical_items = bq.cached_read(sql, csv_path='chemical_items.csv')
df_chemical_items.head(11)
# -

# ## Top 10 Chemicals in 2019 by Cost

# +
sql = '''
WITH
  bnf_tab AS (
  SELECT
    DISTINCT chemical,
    chemical_code
  FROM
    ebmdatalab.hscic.bnf )
SELECT
#     language: python
#     name: python3
# ---

#
# The following notebook contains codes from the [NHS dictionary of medicines and devices](https://ebmdatalab.net/what-is-the-dmd-the-nhs-dictionary-of-medicines-and-devices/) for oral macrolide antibacterials. You can see the overall [prescribing of macrolides on OpenPrescribing here](https://openprescribing.net/bnf/050105/).

from ebmdatalab import bq
import os
import pandas as pd

# +
sql = '''
WITH bnf_codes AS (  
  SELECT bnf_code FROM hscic.presentation WHERE 
  bnf_code LIKE '050105%' #bnf code section macrolide
   )
SELECT *
FROM measures.dmd_objs_with_form_route
WHERE bnf_code IN (SELECT * FROM bnf_codes) 
AND 
obj_type IN ('vmp', 'amp')
AND
form_route LIKE '%.oral%' 
ORDER BY obj_type, bnf_code, snomed_id '''

macrolides = bq.cached_read(sql, csv_path=os.path.join('..','data','macrolides.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
macrolides
#import libraries
from ebmdatalab import bq
import pandas as pd
import os

# +
sql = '''WITH bnf_codes AS (
  SELECT bnf_code FROM hscic.presentation WHERE 
    bnf_code LIKE  "030401%" # antihistamine
    )

SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id'''

antihistamine_codelist = bq.cached_read(sql, csv_path=os.path.join('..','data','antihistamine_codelist.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
antihistamine_codelist
# -


)
   )
SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, nm '''

immuno_meds = bq.cached_read(sql,
                             csv_path=os.path.join('..', 'data',
                                                   'immuno_meds .csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
immuno_meds.count()
# + [markdown]
# ## Dm+d Additions
# Now some of the medicines dn't appear in prescribing data as they are generally hospital only medicines. WE need to include them in case a GP has recorded on a TPP system or we get access to another database. We will manually select these from a list used in a previous study by LSHTM.
#
#

# +
sql = '''
WITH dmd_codes AS (  
  SELECT id FROM dmd.vmp WHERE
  LOWER(nm) LIKE 'afliber%'     OR
UNION ALL

SELECT "amp" AS type, amp.id as id, amp.bnf_code as bnf_code, amp.descr as nm
FROM dmd.amp as amp
INNER JOIN dmd.vpi as vpi
ON
amp.vmp = vpi.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)
GROUP BY amp.id, amp.bnf_code, amp.descr
HAVING COUNT(ing) > 1

ORDER BY type, bnf_code, id'''

all_inhaler_ics_combi_agent = bq.cached_read(
    sql,
    csv_path=os.path.join('..', 'data', 'all_inhaler_ics_combi_agent.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
all_inhaler_ics_combi_agent.info()
# -

#import csv from other notebook dealing with high dose
dose_high_ics_combi_agent = pd.read_csv(
    '../data/highdose_inhaledsteroid_combiagent_codelist.csv')
dose_high_ics_combi_agent.info()

## here we merge and create an indicator to see which ones are in both
combine = pd.merge(all_inhaler_ics_combi_agent,
                   dose_high_ics_combi_agent,
                   how='outer',
   bnf_code LIKE "0302000C0%CA" OR # Beclomet/Formoterol_Inh 200/6 (120D) Dry (brands and generic)",
   bnf_code LIKE "0302000K0%AU" OR # Budesonide/Formoterol_InhaB/A 400/12(60D (brands and generic)",
   bnf_code LIKE "0302000N0%AZ" OR # Fluticasone/Salmeterol_Inh 500/50mcg 60D (brands and generic)",
   bnf_code LIKE "0302000N0%BG" OR # Fluticasone/Salmeterol_Inh 250/25mcg120D (brands and generic)",
   bnf_code LIKE "0302000N0%BK" OR # Fluticasone/Formoterol_Inh 250/10mcg120D (brands and generic)",
   bnf_code LIKE "0302000V0%AA")   # Fluticasone/Vilanterol_Inha 184/22mcg30D (brands and generic)"
   AND
(form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%')
   )
   
SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id'''

highdose_inhaledsteroids_combiagent_codelist = bq.cached_read(
    sql,
    csv_path=os.path.join('..', 'data',
                          'highdose_inhaledsteroid_combiagent_codelist.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
highdose_inhaledsteroids_combiagent_codelist
# -
  AND (form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%')
   )
   
SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id'''

sabutamol_asthma = bq.cached_read(sql, csv_path=os.path.join('..','data','sabutamol_asthma.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
sabutamol_asthma

# +
##rule 2 ICS

sql = '''
WITH bnf_codes AS (  
  SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE 
  (bnf_code LIKE '0302000C0%' OR #BNF Beclometasone dipropionate
  bnf_code LIKE '0301011AB%'  OR #BNF BeclometDiprop/Formoterol/Glycopyrronium",
  bnf_code LIKE '0302000K0%'  OR #BNF budesonide
  bnf_code LIKE '0302000U0%'  OR #BNF Ciclesonide
  bnf_code LIKE '0302000V0%'  OR #BNF Fluticasone furoate 
  SELECT
    DISTINCT(bnf_code)
  FROM
    ebmdatalab.brian.amp_recommended
     )
GROUP BY
rx.month,
rx.pct,
rx.bnf_code,
rx.bnf_name
ORDER BY
month
'''

df_amp_recommended = bq.cached_read(sql,
                                    csv_path=os.path.join(
                                        '..', 'data',
                                        'df_amp_recommended.zip'))
#df_amp_recommended = bq.cached_read(sql, csv_path=os.path.join('..','data','df_amp_recommended.csv'))
df_amp_recommended['month'] = df_amp_recommended['month'].astype(
    'datetime64[ns]')
df_amp_recommended.head(3)
# -

#lets graph to see the total number of items over time
df_amp_recommended.groupby("month")['total_items'].sum().plot(
    kind='line', title="Total items where AMP recommended by NHS dm+d")
plt.ylim(0, )

#create dataframe with only generically prescribed items
df_generic = df_amp_recommended.loc[
    df_amp_recommended["bnf_code"].str.contains('AA\w{4}$')]
from ebmdatalab import bq
import os
import pandas as pd

# +
sql = '''WITH bnf_codes AS (
  SELECT bnf_code FROM hscic.presentation WHERE 
    bnf_code LIKE '0501050A0%' #BNF chmeical azithromycin not including eye drops which are chap 11
 
  )

SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id'''

azithromycin_codelist = bq.cached_read(sql,
                                       csv_path=os.path.join(
                                           '..', 'data',
                                           'azithromycin_codelist.csv'))
pd.set_option('display.max_rows', None)
azithromycin_codelist
# This notebook lists oral preparations for hydroxychloroquine.

from ebmdatalab import bq
import os
import pandas as pd

# +
sql = '''
WITH bnf_codes AS (
  SELECT bnf_code FROM hscic.presentation WHERE 
  bnf_code LIKE '1001030C0%' ##hydroxychloroquine sulfate - BNF sect drugs used in rheumatic disease

)

SELECT *
FROM measures.dmd_objs_with_form_route
WHERE bnf_code IN (SELECT * FROM bnf_codes) 
AND 
obj_type IN ('vmp', 'amp')
AND
form_route LIKE '%.oral%' #include oral preparations only
ORDER BY obj_type, bnf_code, snomed_id'''

hydroxychloroquine_codelist = bq.cached_read(
    sql,
    csv_path=os.path.join('..', 'data', 'hydroxychloroquine_codelist.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
hydroxychloroquine_codelist
# -
  l.Is_LA, 
  l.Is_High_LA,
  sum(itemsper1000) as items_per_1000, 
  sum(quantityper1000) as quantity_per_1000,
  sum(quantityper1000*dose_per_unit*new_ome_multiplier) AS total_ome_per_1000,
  sum(Infl_corr_Cost_per1000) as cost_per_1000
FROM ebmdatalab.helen.trends_from_pca_final_2017 p
INNER JOIN (SELECT distinct drug_name, chem_substance, Is_LA, Is_High_LA, dose_per_unit, new_ome_multiplier FROM ebmdatalab.richard.opioid_converter) l on l.drug_name = p.drug_name

GROUP BY 
  year,
  chem_substance,
  Is_LA, 
  Is_High_LA"""

dfl = bq.cached_read(q4, csv_path="chemical_summary.zip").fillna(0)
dfl.head()
# -

# tidy data
dfl.loc[dfl["Is_High_LA"]!="TRUE","Is_High_LA"] = "Others"
dfl.loc[dfl["Is_High_LA"]=="TRUE","Is_High_LA"] = "High dose"
dfl.drop("Is_LA",axis=1).groupby(["year","Is_High_LA"]).sum()

# ## Summary results tables

# +
# Summary results table by year
tab = dfl.fillna(0)
tab.loc[tab["Is_High_LA"]=="High dose","OME per 1000_High Dose"] = tab["total_ome_per_1000"]
tab.loc[tab["Is_High_LA"]=="High dose","Items per 1000_High Dose"] = tab["items_per_1000"]
Esempio n. 26
0
SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id
'''

cvd_medcodes = bq.cached_read(sql,
                              csv_path=os.path.join('..', 'data',
                                                    'cvd_medcodes.csv'))
cvd_medcodes.head()
# -

# #### Find patients with coded events where code matches cvd disease

# +
codes_where = codes_to_sql_where("CTV3Code", chd_codes)

query = f'''
SELECT DISTINCT Patient_ID, 1 AS chd_code
FROM CodedEvent
WHERE {codes_where}
ORDER BY Patient_ID
'''
Esempio n. 27
0
# +
sql = '''
WITH bnf_codes AS (  
  SELECT bnf_code FROM hscic.presentation WHERE 
  (bnf_code LIKE '0802010G0%'     OR #bnf azathioprine
  bnf_code LIKE '0105010E0%'      OR #bnf sulfasalazine
  bnf_code LIKE '0802020G0%'      OR #bnf ciclosporin   
  bnf_code LIKE '0801030L0%'      OR #bnf mercaptopurine
  bnf_code LIKE '1001030F0%'      OR #bnf penicillamine
  bnf_code LIKE '1001030L0%'      OR #bnf leflunomide
  bnf_code LIKE '1001030J0%'      OR #bnf gold
  bnf_code LIKE '1001030U0%'      OR #bnf methotrexate - mostly oral
  bnf_code LIKE '0801030P0%'      OR #bnf methotrexate - injections, prefilled syringes
  bnf_code LIKE '0802010M0%'         #bnf mycophenolate mofetil - mmf
) 
   )
SELECT *
FROM measures.dmd_objs_with_form_route
WHERE bnf_code IN (SELECT * FROM bnf_codes) 
AND 
obj_type IN ('vmp', 'amp')
ORDER BY obj_type, bnf_code, snomed_id '''

dmards_primary_care_meds = bq.cached_read(sql, csv_path=os.path.join('..','data','dmards_primary_care_meds.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
dmards_primary_care_meds
# -


INNER JOIN hscic.practices pract ON presc.practice = pract.code
INNER JOIN
  hscic.ccgs AS ccg
ON
  presc.pct=ccg.code
WHERE
    ccg.org_type='CCG' AND
    pract.setting = 4 AND
    presc.bnf_code IN (
        SELECT DISTINCT(bnf_code)
        FROM ebmdatalab.measures.dmd_objs_with_form_route
        WHERE form_route IN ('pressurizedinhalation.inhalation', 'powderinhalation.inhalation')
        )
AND bnf_code LIKE "050%"
GROUP BY pct, month, bnf_name, bnf_code
ORDER BY pct, month
'''

df_inh_abx = bq.cached_read(sql, csv_path='df_inh_abx.csv')
df_inh_abx['month'] = df_inh_abx['month'].astype('datetime64[ns]')
df_inh_abx.head()
# -

SELECT
     CAST(month AS DATE) AS month,
     practice AS practice_id,
     SUM(items) AS denominator
 FROM hscic.normalised_prescribing_standard
 WHERE bnf_code IN (   SELECT DISTINCT(bnf_code)   FROM measures.dmd_objs_with_form_route   WHERE form_route IN ('pressurizedinhalation.inhalation', 'powderinhalation.inhalation')   AND bnf_code LIKE '03%'    AND bnf_code NOT LIKE '0301011R0%'  )
 GROUP BY month, practice_id
Esempio n. 29
0
import os
import pandas as pd

# +
sql = '''
WITH bnf_codes AS (  
  SELECT DISTINCT bnf_code FROM measures.dmd_objs_with_form_route WHERE 
  (bnf_code LIKE '0301011R0%' OR ##BNF salbutamol - excluded bnf_code LIKE '0301040R0%' as it only has comination with ipratropium in it
   bnf_code LIKE '0301011V0%')    ##terbutaline
  AND (form_route LIKE '%pressurizedinhalation.inhalation' OR form_route LIKE 'powderinhalation.inhalation%')
   )
   
SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, nm, bnf_code, id'''

saba_codelist = bq.cached_read(sql,
                               csv_path=os.path.join('..', 'data',
                                                     'saba_codelist.csv'))
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
saba_codelist
# ## Import results

lidocaine = practice_class.concatenate_outputs()
lidocaine.head()

# ### Get list to filter closed practices

query = """
SELECT
  DISTINCT code
FROM
  ebmdatalab.hscic.practices
WHERE
  status_code = "A"
"""
open_practices = bq.cached_read(query, csv_path='data/open_practices.csv')
open_practices.head()

# +
### Get practice list to filter small list sizes
# -

query = """
SELECT
  DISTINCT practice
FROM
  ebmdatalab.hscic.practice_statistics
WHERE
  total_list_size < 2000
"""
small_list_size = bq.cached_read(query, csv_path='data/small_list_size.csv')