Python Client Examples, gcutils.bigquery.Client Python Examples

Example #1

0

Show file

File: test_import_adqs.py Project: mehrdadmzn/openprescribing

class CommandsFunctionalTestCase(TestCase):
    fixtures = ['presentations', 'importlog']

    def setUp(self):
        """Create a raw_prescribing_data table such as is expected to exist by
        ADQ calculation code.
        """
        raw_data_path = 'frontend/tests/fixtures/commands/' +\
                        'convert_hscic_prescribing/2016_01/' +\
                        'Detailed_Prescribing_Information.csv'
        year_and_month = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime("%Y_%m")
        self.table_name = 'raw_prescribing_data_{}'.format(year_and_month)
        self.client = Client('tmp_eu')
        t1 = self.client.get_or_create_table(
            self.table_name, RAW_PRESCRIBING_SCHEMA)
        t1.insert_rows_from_csv(raw_data_path, skip_leading_rows=1)

        call_command('import_adqs')

    def test_import_adqs_functional(self):
        # These all have ADQs in the raw data
        for p in Presentation.objects.filter(bnf_code__regex='0202010B0.*AB'):
            self.assertEqual(p.adq_per_quantity, 1.0)
        # This doesn't exist in the raw data
        p = Presentation.objects.get(bnf_code='0204000I0AAAZAZ')
        self.assertEqual(p.adq_per_quantity, None)

    def tearDown(self):
        try:
            self.client.delete_table(self.table_name)
        except NotFound:
            pass

Example #2

0

Show file

def write_zero_prescribing_codes_table(level):
    """Given a BNF `level` (`section`, `chapter`, `paragraph`, etc), write
    a table in bigquery listing all such levels that have zero prescribing.

    Returns a bigquery Table.

    """
    logger.info("Scanning %s to see if it has zero prescribing" % level)
    sql = """
    SELECT
      bnf.%s
    FROM
      {hscic}.normalised_prescribing_standard AS prescribing
    RIGHT JOIN
      {hscic}.bnf bnf
    ON
      prescribing.bnf_code = bnf.presentation_code
    WHERE (
        bnf.presentation_code NOT LIKE '2%%'  -- appliances, etc
    )
    GROUP BY
      bnf.%s
    HAVING
      COUNT(prescribing.bnf_code) = 0
    """ % (level, level)
    client = Client('tmp_eu')
    table = client.get_table('unused_codes_%s' % level)
    table.insert_rows_from_query(sql)
    return table

Example #3

0

Show file

class CommandsFunctionalTestCase(TestCase):
    fixtures = ["presentations", "importlog"]

    def setUp(self):
        """Create a raw_prescribing_data table such as is expected to exist by
        ADQ calculation code.
        """
        raw_data_path = ("frontend/tests/fixtures/commands/" +
                         "convert_hscic_prescribing/2016_01/" +
                         "EPD_201601.csv")
        year_and_month = ImportLog.objects.latest_in_category(
            "prescribing").current_at.strftime("%Y_%m")
        self.table_name = "raw_prescribing_data_{}".format(year_and_month)
        self.client = Client("tmp_eu")
        t1 = self.client.get_or_create_table(self.table_name,
                                             RAW_PRESCRIBING_SCHEMA_V2)
        t1.insert_rows_from_csv(raw_data_path,
                                RAW_PRESCRIBING_SCHEMA_V2,
                                skip_leading_rows=1)

        call_command("import_adqs")

    def test_import_adqs_functional(self):
        # These all have ADQs in the raw data
        for p in Presentation.objects.filter(bnf_code__regex="0202010B0.*AB"):
            self.assertEqual(p.adq_per_quantity, 1.0)
        # This doesn't exist in the raw data
        p = Presentation.objects.get(bnf_code="0204000I0AAAZAZ")
        self.assertEqual(p.adq_per_quantity, None)

    def tearDown(self):
        try:
            self.client.delete_table(self.table_name)
        except NotFound:
            pass

Example #4

0

Show file

File: create_bq_measure_views.py Project: maykonmenezes/openprescribing

    def handle(self, *args, **kwargs):
        base_path = os.path.join(
            settings.APPS_ROOT, "frontend", "management", "commands", "measure_sql"
        )

        client = Client("measures")

        for table_name in [
            "dmd_objs_with_form_route",
            "opioid_total_ome",
            "practice_data_all_low_priority",
            "pregabalin_total_mg",
            "vw__median_price_per_unit",
            "vw__ghost_generic_measure",
            # This references pregabalin_total_mg, so must come afterwards
            "gaba_total_ddd",
        ]:
            path = os.path.join(base_path, table_name + ".sql")
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING")
        )
        client.get_or_create_table("cmpa_products", schema)

Example #5

0

Show file

def upload_dummy_prescribing(bnf_codes):
    """Upload enough dummy prescribing data to BQ to allow the BNF code simplification
    to be meaningful."""

    prescribing_rows = []
    for bnf_code in bnf_codes:
        row = [
            None,  # sha
            None,  # pct
            None,  # practice
            bnf_code,  # bnf_code
            None,  # bnf_name
            None,  # items
            None,  # net_cost
            None,  # actual_cost
            None,  # quantity
            None,  # month
        ]
        prescribing_rows.append(row)

    table = Client("hscic").get_table("normalised_prescribing")
    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)

Example #6

0

Show file

    def fill_views(self):
        client = Client('hscic')

        pool = Pool(processes=len(self.view_paths))
        tables = []

        prescribing_date = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime('%Y-%m-%d')

        for path in self.view_paths:
            table_name = "vw__%s" % os.path.basename(path).replace('.sql', '')
            table = client.get_table(table_name)
            tables.append(table)

            with open(path) as f:
                sql = f.read()

            substitutions = {'this_month': prescribing_date}
            args = [table.name, sql, substitutions]
            pool.apply_async(query_and_export, args)

        pool.close()
        pool.join()  # wait for all worker processes to exit

        for table in tables:
            self.download_and_import(table)
            self.log("-------------")

Example #7

0

Show file

File: create_views.py Project: mehrdadmzn/openprescribing

    def fill_views(self):
        client = Client('hscic')

        pool = Pool(processes=len(self.view_paths))
        tables = []

        prescribing_date = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime('%Y-%m-%d')

        for path in self.view_paths:
            table_name = "vw__%s" % os.path.basename(path).replace('.sql', '')
            table = client.get_table(table_name)
            tables.append(table)

            with open(path) as f:
                sql = f.read()

            substitutions = {'this_month': prescribing_date}
            args = [table.table_id, sql, substitutions]
            pool.apply_async(query_and_export, args)

        pool.close()
        pool.join()  # wait for all worker processes to exit

        for table in tables:
            self.download_and_import(table)
            self.log("-------------")

        with connection.cursor() as cursor:
            for view_id in self.materialized_views:
                # This is quite slow! up to 10 mins.
                cursor.execute("REFRESH MATERIALIZED VIEW %s" % view_id)

Example #8

0

Show file

def query_and_export(table_name, sql, substitutions):
    try:
        client = Client('hscic')
        table = client.get_table(table_name)

        storage_prefix = 'hscic/views/{}-'.format(table_name)
        logger.info("Generating view %s and saving to %s" %
                    (table_name, storage_prefix))

        logger.info("Running SQL for %s: %s" % (table_name, sql))
        table.insert_rows_from_query(sql, substitutions=substitutions)

        exporter = TableExporter(table, storage_prefix)

        logger.info('Deleting existing data in storage at %s' % storage_prefix)
        exporter.delete_from_storage()

        logger.info('Exporting data to storage at %s' % storage_prefix)
        exporter.export_to_storage()

        logger.info("View generation complete for %s" % table_name)
    except Exception:
        # Log the formatted error, because the multiprocessing pool
        # this is called from only shows the error message (with no
        # traceback)
        logger.error(traceback.format_exc())
        raise

Example #9

0

Show file

File: download_practice_stats.py Project: somulu/openprescribing

def ensure_stats_downloaded_for_date(date):
    """
    Download practice statistics for date, or do nothing if already downloaded
    """
    filename = get_practice_stats_filename(date)
    if os.path.exists(filename):
        return
    client = Client("hscic")
    check_stats_in_bigquery(date, client)
    logger.info("Downloading practice statistics for %s", date)
    temp_name = get_temp_filename(filename)
    result = client.query(
        """
        SELECT *
        FROM {hscic}.practice_statistics_all_years
        WHERE month = TIMESTAMP("%s")
        """
        % (date,)
    )

    with gzip.open(temp_name, "wt") as f:
        writer = csv.writer(f)
        writer.writerow(result.field_names)
        for row in result.rows:
            writer.writerow(row)
    os.rename(temp_name, filename)

Example #10

0

Show file

def upload_dummy_prescribing(bnf_codes):
    """Upload enough dummy prescribing data to BQ to allow the BNF code simplification
    to be meaningful."""

    prescribing_rows = []
    for bnf_code in bnf_codes:
        row = [
            None,
            None,
            None,
            None,
            None,
            None,
            bnf_code,
            None,
            None,
            None,
            None,
            None,
            None,
        ]
        prescribing_rows.append(row)

    table = Client("hscic").get_table("normalised_prescribing_standard")
    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)

Example #11

0

Show file

    def reimport_all(self):
        last_imported = ImportLog.objects.latest_in_category(
            'prescribing').current_at
        self.date = last_imported - relativedelta(years=5)
        client = Client('tmp_eu')
        while self.date <= last_imported:
            date_str = self.date.strftime('%Y-%m-%d')
            sql = ('SELECT pct AS pct_id, practice AS practice_id, '
                   'bnf_code AS presentation_code, items AS total_items, '
                   'net_cost, actual_cost, quantity, '
                   'FORMAT_TIMESTAMP("%%Y_%%m_%%d", month) AS processing_date '
                   'FROM {hscic}.normalised_prescribing_standard '
                   "WHERE month = '%s'" % date_str)
            table_name = "prescribing_%s" % date_str.replace('-', '_')
            table = client.get_or_create_table(table_name)
            table.insert_rows_from_query(sql)
            exporter = TableExporter(table, 'tmp/{}-*'.format(table_name))
            exporter.export_to_storage()

            with tempfile.NamedTemporaryFile(mode='wb') as tmpfile:
                logger.info("Importing data for %s" % self.date)
                exporter.download_from_storage_and_unzip(tmpfile)
                with transaction.atomic():
                    self.drop_partition()
                    self.create_partition()
                    self.import_prescriptions(tmpfile.name)
                    self.create_partition_indexes()
                    self.add_parent_trigger()
            self.date += relativedelta(months=1)

Example #12

0

Show file

File: update_smoketests.py Project: somulu/openprescribing

    def handle(self, *args, **kwargs):
        last_imported = kwargs["last_imported"]
        prescribing_date = "-".join(last_imported.split("_")) + "-01"
        date_condition = ('month > TIMESTAMP(DATE_SUB(DATE "%s", '
                          "INTERVAL 5 YEAR))" % prescribing_date)

        path = os.path.join(settings.PIPELINE_METADATA_DIR, "smoketests")
        for sql_file in glob.glob(os.path.join(path, "*.sql")):
            test_name = os.path.splitext(os.path.basename(sql_file))[0]
            with open(sql_file) as f:
                query = f.read().replace("{{ date_condition }}",
                                         date_condition)
            print(query)
            client = Client()
            results = client.query(query)

            quantity = []
            cost = []
            items = []

            for row in results_to_dicts(results):
                quantity.append(row["quantity"])
                cost.append(row["actual_cost"])
                items.append(row["items"])

            print("Updating test expectations for %s" % test_name)
            json_path = os.path.join(path, "%s.json" % test_name)
            with open(json_path, "w") as f:
                obj = {"cost": cost, "items": items, "quantity": quantity}
                json.dump(obj, f, indent=2)

Example #13

0

Show file

    def handle(self, *args, **kwargs):
        base_path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                                 'commands', 'measure_sql')

        client = Client("measures")

        for table_name in [
            'opioid_total_ome',
            'practice_data_all_low_priority',
            'pregabalin_total_mg',
            'vw__median_price_per_unit',
            'vw__ghost_generic_measure',
        ]:
            path = os.path.join(base_path, table_name + '.sql')
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ('bnf_code', 'STRING'),
            ('bnf_name', 'STRING'),
            ('type', 'STRING'),
        )
        client.get_or_create_table('cmpa_products', schema)

Example #14

0

Show file

File: fetch_and_import_drug_tariff.py Project: cjwright83/openprescribing

    def handle(self, *args, **kwargs):
        url = "https://www.nhsbsa.nhs.uk/pharmacies-gp-practices-and-appliance-contractors/drug-tariff/drug-tariff-part-viii/"
        rsp = requests.get(url)
        doc = bs4.BeautifulSoup(rsp.content, "html.parser")

        month_abbrs = [x.lower() for x in calendar.month_abbr]

        imported_months = []

        for a in doc.findAll("a", href=re.compile(r"Part%20VIIIA.+\.xlsx$")):
            # a.attrs['href'] typically has a filename part like
            # Part%20VIIIA%20September%202017.xlsx
            #
            # We split that into ['Part', 'VIIIA', 'September', '2017']
            base_filename = unquote(
                os.path.splitext(os.path.basename(a.attrs["href"]))[0])

            if base_filename == "Part VIIIA Nov 20 updated":
                # November 2020 has a different filename.  In general we want to be
                # warned (through the scraper crashing) about updates (because we have
                # to delete all records for the month in question, and reimport) so
                # special-casing is appropriate here.
                year, month = "2020", 11

            else:
                words = re.split(r"[ -]+", base_filename)
                month_name, year = words[-2:]

                # We have seen the last token in `words` be "19_0".  The year is
                # reported to us via Slack, so if we pull out some nonsense here we
                # *should* notice.
                year = re.match("\d+", year).group()
                if len(year) == 2:
                    year = "20" + year

                # We have seen the month be `September`, `Sept`, and `Sep`.
                month_abbr = month_name.lower()[:3]
                month = month_abbrs.index(month_abbr)

            date = datetime.date(int(year), month, 1)
            if ImportLog.objects.filter(category="tariff",
                                        current_at=date).exists():
                continue

            xls_url = urljoin(url, a.attrs["href"])
            xls_file = BytesIO(requests.get(xls_url).content)

            import_month(xls_file, date)
            imported_months.append((year, month))

        if imported_months:
            client = Client("dmd")
            client.upload_model(TariffPrice)

            for year, month in imported_months:
                msg = "Imported Drug Tariff for %s_%s" % (year, month)
                notify_slack(msg)
        else:
            msg = "Found no new tariff data to import"
            notify_slack(msg)

Example #15

0

Show file

    def handle(self, *args, **kwargs):
        last_imported = kwargs['last_imported']
        prescribing_date = "-".join(last_imported.split('_')) + '-01'
        date_condition = ('month > TIMESTAMP(DATE_SUB(DATE "%s", '
                          'INTERVAL 5 YEAR))' % prescribing_date)

        path = os.path.join(settings.PIPELINE_METADATA_DIR, 'smoketests')
        for sql_file in glob.glob(os.path.join(path, '*.sql')):
            test_name = os.path.splitext(os.path.basename(sql_file))[0]
            with open(sql_file, 'rb') as f:
                query = f.read().replace(
                    '{{ date_condition }}', date_condition)
            print(query)
            client = Client()
            results = client.query(query)

            quantity = []
            cost = []
            items = []

            for row in results_to_dicts(results):
                quantity.append(row['quantity'])
                cost.append(row['actual_cost'])
                items.append(row['items'])

            print("Updating test expectations for %s" % test_name)
            json_path = os.path.join(path, '%s.json' % test_name)
            with open(json_path, 'wb') as f:
                obj = {'cost': cost,
                       'items': items,
                       'quantity': quantity}
                json.dump(obj, f, indent=2)

Example #16

0

Show file

    def backup_table(self, table_name):
        client = Client("hscic")
        sql = "SELECT max(month) FROM {hscic}.%s" % table_name
        latest_date = client.query(sql).rows[0][0]
        latest_year_and_month = latest_date.strftime("%Y_%m")
        table = client.get_table(table_name)

        storage_client = StorageClient()
        bucket = storage_client.bucket()
        year_and_months = set()

        prefix_base = "backups/{}/".format(table_name)

        for blob in bucket.list_blobs(prefix=prefix_base):
            match = re.search("/(\d{4}_\d{2})/", blob.name)
            year_and_months.add(match.groups()[0])

        if latest_year_and_month in year_and_months:
            print("{} table already backed up for {}".format(
                table_name, latest_year_and_month))
            return

        storage_prefix = "{}/{}/{}-".format(prefix_base, latest_year_and_month,
                                            table_name)
        exporter = TableExporter(table, storage_prefix)
        exporter.export_to_storage()

Example #17

0

Show file

    def handle(self, *args, **kwargs):
        url = 'https://www.nhsbsa.nhs.uk/pharmacies-gp-practices-and-appliance-contractors/drug-tariff/drug-tariff-part-viii/'
        rsp = requests.get(url)
        doc = bs4.BeautifulSoup(rsp.content, 'html.parser')

        month_names = [x.lower() for x in calendar.month_name]
        month_abbrs = [x.lower() for x in calendar.month_abbr]

        imported_months = []

        for a in doc.findAll('a', href=re.compile('Part%20VIIIA')):
            # a.attrs['href'] typically has a filename part like
            # Part%20VIIIA%20September%202017.xlsx
            #
            # We split that into ['Part', 'VIIIA', 'September', '2017']
            words = re.split(
                r'[ -]+',
                urllib.unquote(os.path.splitext(
                    os.path.basename(a.attrs['href']))[0]))
            month_name, year = words[-2:]

            # We have seen the last token in `words` be "19_0".  The year is
            # reported to us via Slack, so if we pull out some nonsense here we
            # *should* notice.
            year = re.match('\d+', year).group()
            if len(year) == 2:
                year = "20" + year

            try:
                month = month_names.index(month_name.lower())
            except ValueError:
                month = month_abbrs.index(month_name.lower())

            date = datetime.date(int(year), month, 1)

            if ImportLog.objects.filter(
                category='tariff',
                current_at=date
            ).exists():
                continue

            xls_url = urljoin(url, a.attrs['href'])
            xls_file = StringIO(requests.get(xls_url).content)

            import_month(xls_file, date)
            imported_months.append((year, month))

        if imported_months:
            client = Client('dmd')
            client.upload_model(TariffPrice)

            for year, month in imported_months:
                msg = 'Imported Drug Tariff for %s_%s' % (year, month)
                notify_slack(msg)
        else:
            msg = 'Found no new tariff data to import'
            notify_slack(msg)

Example #18

0

Show file

File: import_dmd.py Project: dtrinh62/openprescribing

    def upload_to_bq(self):
        client = Client("dmd")
        for model in apps.get_app_config("dmd").get_models():
            client.upload_model(model)

        for model in [VMP, AMP, VMPP, AMPP]:
            table_id = "{}_full".format(model.obj_type)
            table = client.get_table(table_id)
            sql = self.sql_for_cls(model)
            table.insert_rows_from_query(sql)

Example #19

0

Show file

    def handle(self, *args, **options):
        """
        Compute and store cost savings for the specified month.

        Deletes any existing data for that month.
        """
        if not options["month"]:
            last_prescribing = ImportLog.objects.latest_in_category(
                "prescribing"
            ).current_at
            options["month"] = last_prescribing

            log = ImportLog.objects.latest_in_category("ppu")
            if log is not None:
                if options["month"] <= log.current_at:
                    raise argparse.ArgumentTypeError("Couldn't infer date")
        with transaction.atomic():
            # Create custom presentations for our overrides, if they don't exist.
            # See https://github.com/ebmdatalab/price-per-dose/issues/1.
            Presentation.objects.get_or_create(
                bnf_code="0601060D0AAA0A0",
                name="Glucose Blood Testing Reagents",
                is_generic=True,
            )
            Presentation.objects.get_or_create(
                bnf_code="0601060U0AAA0A0",
                name="Urine Testing Reagents",
                is_generic=True,
            )
            PPUSaving.objects.filter(date=options["month"]).delete()
            for entity_type in ["pct", "practice"]:
                result = get_savings(entity_type, options["month"])
                for row in result.itertuples():
                    d = row._asdict()
                    if d["price_per_unit"]:
                        PPUSaving.objects.create(
                            date=options["month"],
                            presentation_id=d["Index"],
                            lowest_decile=d["lowest_decile"],
                            quantity=d["quantity"],
                            price_per_unit=d["price_per_unit"],
                            possible_savings=d["possible_savings"],
                            formulation_swap=d["formulation_swap"] or None,
                            pct_id=d.get("pct", None),
                            practice_id=d.get("practice", None),
                        )
            ImportLog.objects.create(
                category="ppu", filename="n/a", current_at=options["month"]
            )

        client = Client("hscic")
        table = client.get_or_create_table("ppu_savings", PPU_SAVING_SCHEMA)
        table.insert_rows_from_pg(
            PPUSaving, PPU_SAVING_SCHEMA, transformer=ppu_savings_transform
        )

Example #20

0

Show file

File: clean_up_bq_test_data.py Project: rebkwok/openprescribing

    def handle(self, *args, **kwargs):
        if os.environ['DJANGO_SETTINGS_MODULE'] != \
                'openprescribing.settings.test':
            raise CommandError('Command must run with test settings')

        gcbq_client = Client().gcbq_client

        for dataset in gcbq_client.list_datasets():
            tables = list(dataset.list_tables())
            if len(tables) == 0:
                dataset.delete()

Example #21

0

Show file

File: test_bigquery.py Project: maykonmenezes/openprescribing

    def tearDown(self):
        client = Client("test")
        client.delete_dataset()

        client = StorageClient()
        bucket = client.bucket()
        for blob in bucket.list_blobs(prefix=self.storage_prefix):
            blob.delete()

        archive_client = Client("archive")
        archive_client.delete_dataset()

Example #22

0

Show file

File: bigquery_upload.py Project: ebmdatalab/openprescribing

def update_bnf_table():
    """Update `bnf` table from cloud-stored CSV"""
    storage_client = StorageClient()
    bucket = storage_client.get_bucket()
    blobs = bucket.list_blobs(prefix="hscic/bnf_codes/")
    blobs = sorted(blobs, key=lambda blob: blob.name, reverse=True)
    blob = blobs[0]

    bq_client = BQClient("hscic")
    table = bq_client.get_table("bnf")
    table.insert_rows_from_storage(blob.name, skip_leading_rows=1)

Example #23

0

Show file

File: import_measures.py Project: appscluster/openprescribing

def create_pregabalin_total_mg_view():
    fpath = os.path.dirname(__file__)
    sql_path = os.path.join(fpath, "./measure_sql/pregabalin_total_mg.sql")
    with open(sql_path, "r") as sql_file:
        sql = sql_file.read()

    client = Client("measures")
    try:
        client.create_table_with_view('pregabalin_total_mg', sql, False)
    except Conflict:
        pass

Example #24

0

Show file

File: import_measures.py Project: appscluster/openprescribing

def create_omnibus_lp_view():
    fpath = os.path.dirname(__file__)
    sql_path = os.path.join(fpath, "./measure_sql/lpomnibusview.sql")
    with open(sql_path, "r") as sql_file:
        sql = sql_file.read()

    client = Client("measures")
    try:
        client.create_table_with_view('practice_data_all_low_priority', sql,
                                      False)
    except Conflict:
        pass

Example #25

0

Show file

    def handle(self, *args, measure_ids, **kwargs):
        base_path = os.path.join(settings.APPS_ROOT, "bq_public_tables")

        client = Client("public")

        with open(os.path.join(base_path, "_measure_template.sql")) as f:
            measure_template_sql = f.read()

        if measure_ids:
            measures = Measure.objects.filter(id__in=measure_ids)
        else:
            measures = Measure.objects.all()

        for measure in measures:
            table_name = "measure_" + measure.id
            print(table_name)
            table = client.get_table(table_name)

            numerator_sql = """
            SELECT
                CAST(month AS DATE) AS month,
                practice AS practice_id,
                {numerator_columns}
            FROM {numerator_from}
            WHERE {numerator_where}
            GROUP BY month, practice_id
            """.format(
                numerator_columns=measure.numerator_columns,
                numerator_from=measure.numerator_from,
                numerator_where=measure.numerator_where,
            )

            denominator_sql = """
            SELECT
                CAST(month AS DATE) AS month,
                practice AS practice_id,
                {denominator_columns}
            FROM {denominator_from}
            WHERE {denominator_where}
            GROUP BY month, practice_id
            """.format(
                denominator_columns=measure.denominator_columns,
                denominator_from=measure.denominator_from,
                denominator_where=measure.denominator_where,
            )

            sql = partially_format(
                measure_template_sql,
                numerator_sql=numerator_sql,
                denominator_sql=denominator_sql,
            )

            table.insert_rows_from_query(sql)

Example #26

0

Show file

    def handle(self, *args, **kwargs):
        client = Client("hscic")

        try:
            client.create_storage_backed_table(
                "raw_prescribing_v1",
                RAW_PRESCRIBING_SCHEMA_V1,
                "hscic/prescribing_v1/20*Detailed_Prescribing_Information.csv",
            )
        except Conflict:
            pass

        try:
            client.create_storage_backed_table(
                "raw_prescribing_v2",
                RAW_PRESCRIBING_SCHEMA_V2,
                # This pattern may change once the data is published via the
                # new Open Data Portal.
                "hscic/prescribing_v2/20*.csv",
            )
        except Conflict:
            pass

        for table_name in [
            "all_prescribing",
            "normalised_prescribing",
            "normalised_prescribing_standard",
            "raw_prescribing_normalised",
        ]:
            self.recreate_table(client, table_name)

        client = Client("measures")

        for table_name in [
            "dmd_objs_with_form_route",
            "dmd_objs_hospital_only",
            "opioid_total_ome",
            "practice_data_all_low_priority",
            "pregabalin_total_mg",
            "vw__median_price_per_unit",
            "vw__ghost_generic_measure",
            "vw__herbal_list",
            # This references pregabalin_total_mg, so must come afterwards
            "gaba_total_ddd",
        ]:
            self.recreate_table(client, table_name)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING")
        )
        client.get_or_create_table("cmpa_products", schema)

Example #27

0

Show file

File: import_adqs.py Project: maykonmenezes/openprescribing

 def handle(self, *args, **options):
     client = Client("tmp_eu")
     year_and_month = ImportLog.objects.latest_in_category(
         "prescribing").current_at.strftime("%Y_%m")
     raw_data_table_name = "raw_prescribing_data_{}".format(year_and_month)
     sql = SQL.format(detailed_raw_data_table="{tmp_eu}.%s" %
                      raw_data_table_name)
     with transaction.atomic():
         for row in client.query(sql).rows:
             bnf_code_regex, adq_per_quantity = row
             matches = Presentation.objects.filter(
                 bnf_code__regex=bnf_code_regex)
             matches.update(adq_per_quantity=adq_per_quantity)

Example #28

0

Show file

File: create_bq_public_tables.py Project: maykonmenezes/openprescribing

    def handle(self, *args, **kwargs):
        base_path = os.path.join(settings.APPS_ROOT, "bq_public_tables")

        client = Client("public")

        for path in glob.glob(os.path.join(base_path, "*.sql")):
            table_name = os.path.splitext(os.path.basename(path))[0]
            table = client.get_table(table_name)

            with open(path) as f:
                sql = f.read()

            table.insert_rows_from_query(sql)

Example #29

0

Show file

File: test_bigquery.py Project: mehrdadmzn/openprescribing

    def setUp(self):
        client = Client('test')
        self.storage_prefix = 'test_bq_client/{}-'.format(client.dataset_id)
        client.create_dataset()

        archive_client = Client('archive')
        archive_client.create_dataset()

Example #30

0

Show file

File: test_bigquery.py Project: maykonmenezes/openprescribing

    def setUp(self):
        client = Client("test")
        self.storage_prefix = "test_bq_client/{}-".format(client.dataset_id)
        client.create_dataset()

        archive_client = Client("archive")
        archive_client.create_dataset()