Python Client.get_or_create_tableの例、gcutils.bigquery.Client.get_or_create_table Pythonの例

コード例 #1

0

ファイルを表示

    def handle(self, *args, **kwargs):
        base_path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                                 'commands', 'measure_sql')

        client = Client("measures")

        for table_name in [
            'opioid_total_ome',
            'practice_data_all_low_priority',
            'pregabalin_total_mg',
            'vw__median_price_per_unit',
            'vw__ghost_generic_measure',
        ]:
            path = os.path.join(base_path, table_name + '.sql')
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ('bnf_code', 'STRING'),
            ('bnf_name', 'STRING'),
            ('type', 'STRING'),
        )
        client.get_or_create_table('cmpa_products', schema)

コード例 #2

0

ファイルを表示

ファイル: create_bq_measure_views.py プロジェクト: maykonmenezes/openprescribing

    def handle(self, *args, **kwargs):
        base_path = os.path.join(
            settings.APPS_ROOT, "frontend", "management", "commands", "measure_sql"
        )

        client = Client("measures")

        for table_name in [
            "dmd_objs_with_form_route",
            "opioid_total_ome",
            "practice_data_all_low_priority",
            "pregabalin_total_mg",
            "vw__median_price_per_unit",
            "vw__ghost_generic_measure",
            # This references pregabalin_total_mg, so must come afterwards
            "gaba_total_ddd",
        ]:
            path = os.path.join(base_path, table_name + ".sql")
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING")
        )
        client.get_or_create_table("cmpa_products", schema)

コード例 #3

0

ファイルを表示

ファイル: create_bq_measure_views.py プロジェクト: WeakLemonDrink/openprescribing

    def handle(self, *args, **kwargs):
        try:
            Client("hscic").create_storage_backed_table(
                "raw_prescribing",
                RAW_PRESCRIBING_SCHEMA,
                "hscic/prescribing/20*Detailed_Prescribing_Information.csv",
            )
        except Conflict:
            pass

        client = Client("measures")

        for table_name in [
                "dmd_objs_with_form_route",
                "opioid_total_ome",
                "practice_data_all_low_priority",
                "pregabalin_total_mg",
                "vw__median_price_per_unit",
                "vw__ghost_generic_measure",
                "vw__herbal_list",
                # This references pregabalin_total_mg, so must come afterwards
                "gaba_total_ddd",
        ]:
            self.recreate_table(client, table_name)

        self.recreate_table(Client("hscic"), "raw_prescribing_normalised")

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(("bnf_code", "STRING"), ("bnf_name", "STRING"),
                              ("type", "STRING"))
        client.get_or_create_table("cmpa_products", schema)

コード例 #4

0

ファイルを表示

    def setUpClass(cls):
        if 'SKIP_BQ_LOAD' not in os.environ:
            # Create local test data from fixtures, then upload this to a
            # test project in bigquery
            call_command('loaddata',
                         'frontend/tests/fixtures/ccgs.json',
                         verbosity=0)
            call_command('loaddata',
                         'frontend/tests/fixtures/practices.json',
                         verbosity=0)
            call_command('loaddata',
                         'frontend/tests/fixtures/practice_listsizes.json',
                         verbosity=0)
            prescribing_fixture_path = os.path.join(
                'frontend', 'tests', 'fixtures', 'commands',
                'prescribing_bigquery_views_fixture.csv'
            )

            client = Client('hscic')

            for table_name in [
                    'normalised_prescribing_standard',
                    'normalised_prescribing_legacy']:
                table = client.get_or_create_table(
                    table_name,
                    PRESCRIBING_SCHEMA
                )
                table.insert_rows_from_csv(prescribing_fixture_path)

            table = client.get_or_create_table('ccgs', CCG_SCHEMA)
            columns = [field.name for field in CCG_SCHEMA]
            table.insert_rows_from_pg(PCT, columns, ccgs_transform)

            table = client.get_or_create_table(
                'practice_statistics',
                PRACTICE_STATISTICS_SCHEMA
            )
            columns = [field.name for field in PRACTICE_STATISTICS_SCHEMA]
            columns[0] = 'date'
            columns[-1] = 'practice_id'
            table.insert_rows_from_pg(
                PracticeStatistics,
                columns,
                statistics_transform
            )

            client = StorageClient()
            bucket = client.get_bucket()
            for blob in bucket.list_blobs(prefix='hscic/views/vw__'):
                blob.delete()

        ImportLog.objects.create(
            category='prescribing', current_at='2015-10-01')
        # Create view tables and indexes
        with open(
                'frontend/management/commands/replace_matviews.sql', 'r') as f:
            with connection.cursor() as c:
                c.execute(f.read())

コード例 #5

0

ファイルを表示

ファイル: test_import_ppu_savings.py プロジェクト: appscluster/openprescribing

    def setUpTestData(cls):
        fixtures_base_path = os.path.join(
            'frontend', 'tests', 'fixtures', 'commands',
        )

        client = Client('hscic')

        prescribing_fixture_path = os.path.join(
            fixtures_base_path,
            'prescribing_bigquery_fixture.csv'
        )
        table = client.get_or_create_table(
            'normalised_prescribing_standard',
            bq_schemas.PRESCRIBING_SCHEMA
        )
        table.insert_rows_from_csv(prescribing_fixture_path)

        practices_fixture_path = os.path.join(
            fixtures_base_path,
            'practices.csv'
        )
        table = client.get_or_create_table(
            'practices',
            bq_schemas.PRACTICE_SCHEMA
        )
        columns = [field.name for field in bq_schemas.PRACTICE_SCHEMA]
        table.insert_rows_from_csv(practices_fixture_path)

        tariff_path = os.path.join(fixtures_base_path, 'tariff_fixture.csv')
        table = client.get_or_create_table('tariff', bq_schemas.TARIFF_SCHEMA)
        table.insert_rows_from_csv(tariff_path)

        bnf_path = os.path.join(
            fixtures_base_path,
            'bnf_codes_for_ppu_savings.csv'
        )
        table = client.get_or_create_table('bnf', bq_schemas.BNF_SCHEMA)
        table.insert_rows_from_csv(bnf_path)

        month = date(2015, 9, 1)
        dummy_substitutions = pd.read_csv(
            os.path.join(fixtures_base_path, 'ppu_substitutions.csv'))
        with patch(
                'frontend.management.commands.import_ppu_savings.pd.read_csv',
                return_value=dummy_substitutions):
            import_ppu_savings.Command().handle(
                month=month,
                min_ccg_saving=0,
                min_practice_saving=0,
                limit=1
            )

コード例 #6

0

ファイルを表示

    def reimport_all(self):
        last_imported = ImportLog.objects.latest_in_category(
            'prescribing').current_at
        self.date = last_imported - relativedelta(years=5)
        client = Client('tmp_eu')
        while self.date <= last_imported:
            date_str = self.date.strftime('%Y-%m-%d')
            sql = ('SELECT pct AS pct_id, practice AS practice_id, '
                   'bnf_code AS presentation_code, items AS total_items, '
                   'net_cost, actual_cost, quantity, '
                   'FORMAT_TIMESTAMP("%%Y_%%m_%%d", month) AS processing_date '
                   'FROM {hscic}.normalised_prescribing_standard '
                   "WHERE month = '%s'" % date_str)
            table_name = "prescribing_%s" % date_str.replace('-', '_')
            table = client.get_or_create_table(table_name)
            table.insert_rows_from_query(sql)
            exporter = TableExporter(table, 'tmp/{}-*'.format(table_name))
            exporter.export_to_storage()

            with tempfile.NamedTemporaryFile(mode='wb') as tmpfile:
                logger.info("Importing data for %s" % self.date)
                exporter.download_from_storage_and_unzip(tmpfile)
                with transaction.atomic():
                    self.drop_partition()
                    self.create_partition()
                    self.import_prescriptions(tmpfile.name)
                    self.create_partition_indexes()
                    self.add_parent_trigger()
            self.date += relativedelta(months=1)

コード例 #7

0

ファイルを表示

class CommandsFunctionalTestCase(TestCase):
    fixtures = ["presentations", "importlog"]

    def setUp(self):
        """Create a raw_prescribing_data table such as is expected to exist by
        ADQ calculation code.
        """
        raw_data_path = ("frontend/tests/fixtures/commands/" +
                         "convert_hscic_prescribing/2016_01/" +
                         "EPD_201601.csv")
        year_and_month = ImportLog.objects.latest_in_category(
            "prescribing").current_at.strftime("%Y_%m")
        self.table_name = "raw_prescribing_data_{}".format(year_and_month)
        self.client = Client("tmp_eu")
        t1 = self.client.get_or_create_table(self.table_name,
                                             RAW_PRESCRIBING_SCHEMA_V2)
        t1.insert_rows_from_csv(raw_data_path,
                                RAW_PRESCRIBING_SCHEMA_V2,
                                skip_leading_rows=1)

        call_command("import_adqs")

    def test_import_adqs_functional(self):
        # These all have ADQs in the raw data
        for p in Presentation.objects.filter(bnf_code__regex="0202010B0.*AB"):
            self.assertEqual(p.adq_per_quantity, 1.0)
        # This doesn't exist in the raw data
        p = Presentation.objects.get(bnf_code="0204000I0AAAZAZ")
        self.assertEqual(p.adq_per_quantity, None)

    def tearDown(self):
        try:
            self.client.delete_table(self.table_name)
        except NotFound:
            pass

コード例 #8

0

ファイルを表示

def set_up_bq():
    """Set up BQ datasets and tables."""

    try:
        Client("measures").create_dataset()
    except Conflict:
        pass

    client = Client("hscic")
    client.get_or_create_table("ccgs", schemas.CCG_SCHEMA)
    client.get_or_create_table("practices", schemas.PRACTICE_SCHEMA)
    client.get_or_create_table("normalised_prescribing",
                               schemas.PRESCRIBING_SCHEMA)
    client.get_or_create_table("practice_statistics",
                               schemas.PRACTICE_STATISTICS_SCHEMA)
    client.get_or_create_table("presentation", schemas.PRESENTATION_SCHEMA)

コード例 #9

0

ファイルを表示

ファイル: test_import_adqs.py プロジェクト: mehrdadmzn/openprescribing

class CommandsFunctionalTestCase(TestCase):
    fixtures = ['presentations', 'importlog']

    def setUp(self):
        """Create a raw_prescribing_data table such as is expected to exist by
        ADQ calculation code.
        """
        raw_data_path = 'frontend/tests/fixtures/commands/' +\
                        'convert_hscic_prescribing/2016_01/' +\
                        'Detailed_Prescribing_Information.csv'
        year_and_month = ImportLog.objects.latest_in_category(
            'prescribing').current_at.strftime("%Y_%m")
        self.table_name = 'raw_prescribing_data_{}'.format(year_and_month)
        self.client = Client('tmp_eu')
        t1 = self.client.get_or_create_table(
            self.table_name, RAW_PRESCRIBING_SCHEMA)
        t1.insert_rows_from_csv(raw_data_path, skip_leading_rows=1)

        call_command('import_adqs')

    def test_import_adqs_functional(self):
        # These all have ADQs in the raw data
        for p in Presentation.objects.filter(bnf_code__regex='0202010B0.*AB'):
            self.assertEqual(p.adq_per_quantity, 1.0)
        # This doesn't exist in the raw data
        p = Presentation.objects.get(bnf_code='0204000I0AAAZAZ')
        self.assertEqual(p.adq_per_quantity, None)

    def tearDown(self):
        try:
            self.client.delete_table(self.table_name)
        except NotFound:
            pass

コード例 #10

0

ファイルを表示

ファイル: test_import_measures_new.py プロジェクト: mehrdadmzn/openprescribing

def set_up_bq():
    '''Set up BQ datasets and tables.'''

    Client('measures').create_dataset()
    client = Client('hscic')
    client.get_or_create_table('ccgs', schemas.CCG_SCHEMA)
    client.get_or_create_table('practices', schemas.PRACTICE_SCHEMA)
    client.get_or_create_table('normalised_prescribing_standard',
                               schemas.PRESCRIBING_SCHEMA)
    client.get_or_create_table('practice_statistics',
                               schemas.PRACTICE_STATISTICS_SCHEMA)

コード例 #11

0

ファイルを表示

    def handle(self, *args, **options):
        """
        Compute and store cost savings for the specified month.

        Deletes any existing data for that month.
        """
        if not options["month"]:
            last_prescribing = ImportLog.objects.latest_in_category(
                "prescribing"
            ).current_at
            options["month"] = last_prescribing

            log = ImportLog.objects.latest_in_category("ppu")
            if log is not None:
                if options["month"] <= log.current_at:
                    raise argparse.ArgumentTypeError("Couldn't infer date")
        with transaction.atomic():
            # Create custom presentations for our overrides, if they don't exist.
            # See https://github.com/ebmdatalab/price-per-dose/issues/1.
            Presentation.objects.get_or_create(
                bnf_code="0601060D0AAA0A0",
                name="Glucose Blood Testing Reagents",
                is_generic=True,
            )
            Presentation.objects.get_or_create(
                bnf_code="0601060U0AAA0A0",
                name="Urine Testing Reagents",
                is_generic=True,
            )
            PPUSaving.objects.filter(date=options["month"]).delete()
            for entity_type in ["pct", "practice"]:
                result = get_savings(entity_type, options["month"])
                for row in result.itertuples():
                    d = row._asdict()
                    if d["price_per_unit"]:
                        PPUSaving.objects.create(
                            date=options["month"],
                            presentation_id=d["Index"],
                            lowest_decile=d["lowest_decile"],
                            quantity=d["quantity"],
                            price_per_unit=d["price_per_unit"],
                            possible_savings=d["possible_savings"],
                            formulation_swap=d["formulation_swap"] or None,
                            pct_id=d.get("pct", None),
                            practice_id=d.get("practice", None),
                        )
            ImportLog.objects.create(
                category="ppu", filename="n/a", current_at=options["month"]
            )

        client = Client("hscic")
        table = client.get_or_create_table("ppu_savings", PPU_SAVING_SCHEMA)
        table.insert_rows_from_pg(
            PPUSaving, PPU_SAVING_SCHEMA, transformer=ppu_savings_transform
        )

コード例 #12

0

ファイルを表示

ファイル: import_ppu_savings.py プロジェクト: mehrdadmzn/openprescribing

    def handle(self, *args, **options):
        '''
        Compute and store cost savings for the specified month.

        Deletes any existing data for that month.
        '''
        if not options['month']:
            last_prescribing = ImportLog.objects.latest_in_category(
                'prescribing').current_at
            options['month'] = last_prescribing

            log = ImportLog.objects.latest_in_category('ppu')
            if log is not None:
                if options['month'] <= log.current_at:
                    raise argparse.ArgumentTypeError("Couldn't infer date")
        with transaction.atomic():
            # Create custom presentations for our overrides, if they don't exist.
            # See https://github.com/ebmdatalab/price-per-dose/issues/1.
            Presentation.objects.get_or_create(
                bnf_code='0601060D0AAA0A0',
                name='Glucose Blood Testing Reagents',
                is_generic=True)
            Presentation.objects.get_or_create(bnf_code='0601060U0AAA0A0',
                                               name='Urine Testing Reagents',
                                               is_generic=True)
            PPUSaving.objects.filter(date=options['month']).delete()
            for entity_type in ['pct', 'practice']:
                result = get_savings(entity_type, options['month'])
                for row in result.itertuples():
                    d = row._asdict()
                    if d['price_per_unit']:
                        PPUSaving.objects.create(
                            date=options['month'],
                            presentation_id=d['Index'],
                            lowest_decile=d['lowest_decile'],
                            quantity=d['quantity'],
                            price_per_unit=d['price_per_unit'],
                            possible_savings=d['possible_savings'],
                            formulation_swap=d['formulation_swap'] or None,
                            pct_id=d.get('pct', None),
                            practice_id=d.get('practice', None))
            ImportLog.objects.create(category='ppu',
                                     filename='n/a',
                                     current_at=options['month'])

        client = Client('hscic')
        table = client.get_or_create_table('ppu_savings', PPU_SAVING_SCHEMA)
        columns = [field.name for field in PPU_SAVING_SCHEMA]
        table.insert_rows_from_pg(PPUSaving, columns, ppu_savings_transform)

コード例 #13

0

ファイルを表示

def create_bigquery_table():
    """Create a table in bigquery of all BNF codes for presentations that
    are no longer current, along with the BNF code of their latest
    incarnation

    """
    # output a row for each presentation and its ultimate replacement
    with tempfile.NamedTemporaryFile(mode='r+b') as csv_file:
        writer = csv.writer(csv_file)
        for p in Presentation.objects.filter(replaced_by__isnull=False):
            writer.writerow([p.bnf_code, p.current_version.bnf_code])
        csv_file.seek(0)
        client = Client('hscic')
        table = client.get_or_create_table('bnf_map', schemas.BNF_MAP_SCHEMA)
        table.insert_rows_from_csv(csv_file.name)

コード例 #14

0

ファイルを表示

ファイル: generate_measure_fixtures.py プロジェクト: somulu/openprescribing

    def handle(self, *args, **kwargs):
        # Ensure that we'll use the test BQ instance
        assert settings.BQ_PROJECT == "ebmdatalabtest", settings.BQ_PROJECT

        # Ensure we won't pick up any unexpected models
        for model in [
                Measure,
                MeasureGlobal,
                MeasureValue,
                Practice,
                Prescription,
                PCT,
                STP,
                RegionalTeam,
        ]:
            assert model.objects.count() == 0, model

        # Delete any ImportLogs that were created by migrations
        ImportLog.objects.all().delete()

        # Create a bunch of RegionalTeams, STPs, CCGs, Practices
        for regtm_ix in range(2):
            regtm = RegionalTeam.objects.create(
                code="Y0{}".format(regtm_ix),
                name="Region {}".format(regtm_ix))

            for stp_ix in range(2):
                stp = STP.objects.create(
                    ons_code="E000000{}{}".format(regtm_ix, stp_ix),
                    name="STP {}/{}".format(regtm_ix, stp_ix),
                )

                pcns = []
                for pcn_ix in range(2):
                    pcn = PCN.objects.create(
                        code="E00000{}{}{}".format(regtm_ix, stp_ix, pcn_ix),
                        name="PCN {}/{}/{}".format(regtm_ix, stp_ix, pcn_ix),
                    )
                    pcns.append(pcn)
                # Function to return next PCN, looping round forever
                get_next_pcn = itertools.cycle(pcns).__next__

                for ccg_ix in range(2):
                    ccg = PCT.objects.create(
                        regional_team=regtm,
                        stp=stp,
                        code="{}{}{}".format(regtm_ix, stp_ix,
                                             ccg_ix).replace("0", "A"),
                        name="CCG {}/{}/{}".format(regtm_ix, stp_ix, ccg_ix),
                        org_type="CCG",
                    )

                    for prac_ix in range(2):
                        Practice.objects.create(
                            ccg=ccg,
                            pcn=get_next_pcn(),
                            code="P0{}{}{}{}".format(regtm_ix, stp_ix, ccg_ix,
                                                     prac_ix),
                            name="Practice {}/{}/{}/{}".format(
                                regtm_ix, stp_ix, ccg_ix, prac_ix),
                            setting=4,
                            address1="",
                            address2="",
                            address3="",
                            address4="",
                            address5="",
                            postcode="",
                        )

        # import_measures uses this ImportLog to work out which months it
        # should import data.
        ImportLog.objects.create(category="prescribing",
                                 current_at="2018-08-01")
        # The practice, CCG etc dashboards use this date
        ImportLog.objects.create(category="dashboard_data",
                                 current_at="2018-08-01")

        # Set up BQ, and upload STPs, CCGs, Practices.
        Client("measures").create_dataset()
        client = Client("hscic")
        table = client.get_or_create_table("ccgs", schemas.CCG_SCHEMA)
        table.insert_rows_from_pg(PCT,
                                  schemas.CCG_SCHEMA,
                                  transformer=schemas.ccgs_transform)
        table = client.get_or_create_table("practices",
                                           schemas.PRACTICE_SCHEMA)
        table.insert_rows_from_pg(Practice, schemas.PRACTICE_SCHEMA)

        # Create measures definitions and record the BNF codes used
        bnf_codes = []

        for ix in range(5):
            numerator_bnf_codes_filter = ["0{}01".format(ix)]
            denominator_bnf_codes_filter = ["0{}".format(ix)]

            if ix in [0, 1]:
                measure_id = "core_{}".format(ix)
                name = "Core measure {}".format(ix)
                tags = ["core"]
                tags_focus = None
            elif ix in [2, 3]:
                measure_id = "lp_{}".format(ix)
                name = "LP measure {}".format(ix)
                tags = ["lowpriority"]
                tags_focus = None
            else:
                assert ix == 4
                measure_id = "lpzomnibus"
                name = "LP omnibus measure"
                tags = ["core"]
                tags_focus = ["lowpriority"]
                numerator_bnf_codes_filter = ["0201", "0301"]
                denominator_bnf_codes_filter = ["02", "03"]

            measure_definition = {
                "name": name,
                "title": "{} Title".format(ix),
                "description": "{} description".format(name),
                "why_it_matters": "Why {} matters".format(name),
                "url": "http://example.com/measure-{}".format(measure_id),
                "numerator_short": "Numerator for {}".format(measure_id),
                "numerator_type": "bnf_quantity",
                "numerator_bnf_codes_filter": numerator_bnf_codes_filter,
                "denominator_short": "Denominator for {}".format(measure_id),
                "denominator_type": "bnf_quantity",
                "denominator_bnf_codes_filter": denominator_bnf_codes_filter,
                "is_cost_based": True,
                "is_percentage": True,
                "low_is_good": True,
                "tags": tags,
                "tags_focus": tags_focus,
            }

            path = os.path.join(settings.MEASURE_DEFINITIONS_PATH,
                                "{}.json".format(measure_id))
            with open(path, "w") as f:
                json.dump(measure_definition, f, indent=2)

            bnf_codes.append("0{}0000000000000".format(ix))
            bnf_codes.append("0{}0100000000000".format(ix))

        # Generate random prescribing data. We don't currently save this to the
        # database as it would make the fixture too big and isn't needed.
        # Later we create the minimal prescribing needed by the MatrixStore.
        prescribing_rows = []

        timestamps = [
            "2018-0{}-01 00:00:00 UTC".format(month)
            for month in [1, 2, 3, 4, 5, 6, 7, 8]
        ]

        for practice_ix, practice in enumerate(Practice.objects.all()):
            for month, timestamp in enumerate(timestamps, start=1):

                # 0 <= practice_ix <= 15; 1 <= month <= 8
                item_ratio = (22 + practice_ix - 2 * month +
                              randint(-5, 5)) / 43.0
                assert 0 < item_ratio < 1

                numerator_items = 100 + randint(0, 100)
                denominator_items = int(numerator_items / item_ratio)

                for bnf_code_ix, bnf_code in enumerate(bnf_codes):
                    if bnf_code_ix % 2 == 0:
                        items = denominator_items
                    else:
                        items = numerator_items

                    quantity = 28 * items
                    unit_cost = 1 + bnf_code_ix
                    actual_cost = unit_cost * quantity

                    # We don't care about net_cost.
                    net_cost = actual_cost

                    row = [
                        "sha",  # This value doesn't matter.
                        practice.ccg_id,
                        practice.code,
                        bnf_code,
                        "bnf_name",  # This value doesn't matter
                        items,
                        net_cost,
                        actual_cost,
                        quantity,
                        timestamp,
                    ]

                    prescribing_rows.append(row)

        # Create the minimal amount of prescribing necessary for the
        # MatrixStore to build and for the homepages to load. This means
        # at least one prescription for each practice.
        for practice in Practice.objects.all():
            bnf_code = bnf_codes[-1]
            timestamp = timestamps[-1]
            items = 10
            quantity = 500
            net_cost = 100
            actual_cost = 95

            row = [
                "sha",  # This value doesn't matter.
                practice.ccg_id,
                practice.code,
                bnf_code,
                "bnf_name",  # This value doesn't matter
                items,
                net_cost,
                actual_cost,
                quantity,
                timestamp,
            ]
            prescribing_rows.append(row)

            # Unlike the measure prescribing we created earlier this
            # prescribing needs to be written to the database so it gets
            # included in the fixture we create
            Prescription.objects.create(
                practice_id=row[2],
                pct_id=row[1],
                presentation_code=row[3],
                total_items=row[5],
                net_cost=row[6],
                actual_cost=row[7],
                quantity=row[8],
                processing_date=row[9][:10],
            )

        # Upload presentations to BigQuery: the new measures system requires them
        table = client.get_or_create_table("presentation",
                                           schemas.PRESENTATION_SCHEMA)
        with tempfile.NamedTemporaryFile(mode="wt",
                                         encoding="utf8",
                                         newline="") as f:
            writer = csv.DictWriter(
                f, [field.name for field in schemas.PRESENTATION_SCHEMA])
            for bnf_code in bnf_codes:
                writer.writerow({"bnf_code": bnf_code})
            f.seek(0)
            table.insert_rows_from_csv(f.name, schemas.PRESENTATION_SCHEMA)

        # In production, normalised_prescribing is actually a view,
        # but for the tests it's much easier to set it up as a normal table.
        table = client.get_or_create_table("normalised_prescribing",
                                           schemas.PRESCRIBING_SCHEMA)

        # Upload prescribing_rows to normalised_prescribing.
        with tempfile.NamedTemporaryFile(mode="wt",
                                         encoding="utf8",
                                         newline="") as f:
            writer = csv.writer(f)
            for row in prescribing_rows:
                writer.writerow(row)
            f.seek(0)
            table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)

        # Create some dummy prescribing data in the MatrixStore.
        factory = DataFactory()
        month = factory.create_months("2018-10-01", 1)[0]
        practice = factory.create_practices(1)[0]
        for bnf_code in bnf_codes:
            presentation = factory.create_presentation(bnf_code)
            factory.create_prescription(presentation, practice, month)

        # Do the work.
        with patched_global_matrixstore_from_data_factory(factory):
            call_command("import_measures",
                         measure="core_0,core_1,lp_2,lp_3,lpzomnibus")

        # Clean up.
        for ix in range(5):
            if ix in [0, 1]:
                measure_id = "core_{}".format(ix)
            elif ix in [2, 3]:
                measure_id = "lp_{}".format(ix)
            else:
                assert ix == 4
                measure_id = "lpzomnibus"

            path = os.path.join(settings.MEASURE_DEFINITIONS_PATH,
                                "{}.json".format(measure_id))
            os.remove(path)

        # Dump the fixtures.
        fixture_path = os.path.join("frontend", "tests", "fixtures",
                                    "functional-measures-dont-edit.json")
        call_command("dumpdata", "frontend", indent=2, output=fixture_path)

コード例 #15

0

ファイルを表示

    def handle(self, *args, **kwargs):
        # Ensure that we'll use the test BQ instance
        assert settings.BQ_PROJECT == 'ebmdatalabtest', settings.BQ_PROJECT

        # Ensure we won't pick up any unexpected models
        for model in [
                Measure, MeasureGlobal, MeasureValue, Practice, PCT, STP,
                RegionalTeam
        ]:
            assert model.objects.count() == 0, model

        # Delete any ImportLogs that were created by migrations
        ImportLog.objects.all().delete()

        # Create a bunch of RegionalTeams, STPs, CCGs, Practices
        for regtm_ix in range(2):
            regtm = RegionalTeam.objects.create(
                code='Y0{}'.format(regtm_ix),
                name='Region {}'.format(regtm_ix),
            )

            for stp_ix in range(2):
                stp = STP.objects.create(
                    ons_code='E000000{}{}'.format(regtm_ix, stp_ix),
                    name='STP {}/{}'.format(regtm_ix, stp_ix),
                )

                for ccg_ix in range(2):
                    ccg = PCT.objects.create(
                        regional_team=regtm,
                        stp=stp,
                        code='{}{}{}'.format(regtm_ix, stp_ix,
                                             ccg_ix).replace('0', 'A'),
                        name='CCG {}/{}/{}'.format(regtm_ix, stp_ix, ccg_ix),
                        org_type='CCG',
                    )

                    for prac_ix in range(2):
                        Practice.objects.create(
                            ccg=ccg,
                            code='P0{}{}{}{}'.format(regtm_ix, stp_ix, ccg_ix,
                                                     prac_ix),
                            name='Practice {}/{}/{}/{}'.format(
                                regtm_ix, stp_ix, ccg_ix, prac_ix),
                            setting=4,
                            address1='',
                            address2='',
                            address3='',
                            address4='',
                            address5='',
                            postcode='',
                        )

        # import_measures uses this ImportLog to work out which months it
        # should import data.
        ImportLog.objects.create(
            category='prescribing',
            current_at='2018-08-01',
        )

        # Practice and CCG homepages need this to work out which PPU savings to
        # show.
        ImportLog.objects.create(
            category='ppu',
            current_at='2018-08-01',
        )

        # Set up BQ, and upload STPs, CCGs, Practices.
        Client('measures').create_dataset()
        client = Client('hscic')
        table = client.get_or_create_table('ccgs', schemas.CCG_SCHEMA)
        columns = [field.name for field in schemas.CCG_SCHEMA]
        table.insert_rows_from_pg(PCT, columns, schemas.ccgs_transform)
        table = client.get_or_create_table('practices',
                                           schemas.PRACTICE_SCHEMA)
        columns = [field.name for field in schemas.PRACTICE_SCHEMA]
        table.insert_rows_from_pg(Practice, columns)

        # Create measures definitions and record the BNF codes used
        measure_definitions_path = os.path.join(settings.APPS_ROOT, 'frontend',
                                                'management', 'commands',
                                                'measure_definitions')

        # lpzomnibus is a real measure, and we don't want to overwrite its
        # definition.
        os.rename(
            os.path.join(measure_definitions_path, 'lpzomnibus.json'),
            os.path.join(measure_definitions_path, 'lpzomnibus.json.bak'),
        )

        bnf_codes = []

        for ix in range(5):
            numerator_where = "bnf_code LIKE '0{}01%'".format(ix),
            denominator_where = "bnf_code LIKE '0{}%'".format(ix),

            if ix in [0, 1]:
                measure_id = 'core_{}'.format(ix)
                name = 'Core measure {}'.format(ix)
                tags = ['core']
                tags_focus = None
            elif ix in [2, 3]:
                measure_id = 'lp_{}'.format(ix)
                name = 'LP measure {}'.format(ix)
                tags = ['lowpriority']
                tags_focus = None
            else:
                assert ix == 4
                measure_id = 'lpzomnibus'
                name = 'LP omnibus measure'
                tags = ['core']
                tags_focus = ['lowpriority']
                numerator_where = "bnf_code LIKE '0201%' OR bnf_code LIKE'0301%'",
                denominator_where = "bnf_code LIKE '02%' OR bnf_code LIKE'03%'",

            measure_definition = {
                'name': name,
                'title': '{} Title'.format(ix),
                'description': '{} description'.format(name),
                'why_it_matters': 'Why {} matters'.format(name),
                'url': 'http://example.com/measure-{}'.format(measure_id),
                'numerator_short': 'Numerator for {}'.format(measure_id),
                'numerator_from': '{hscic}.normalised_prescribing_standard',
                'numerator_where': numerator_where,
                'numerator_columns': 'SUM(quantity) AS numerator',
                'denominator_short': 'Denominator for {}'.format(measure_id),
                'denominator_from': '{hscic}.normalised_prescribing_standard',
                'denominator_where': denominator_where,
                'denominator_columns': 'SUM(quantity) AS denominator',
                'is_cost_based': True,
                'is_percentage': True,
                'low_is_good': True,
                'tags': tags,
                'tags_focus': tags_focus,
            }

            path = os.path.join(measure_definitions_path,
                                '{}.json'.format(measure_id))
            with open(path, 'w') as f:
                json.dump(measure_definition, f, indent=2)

            bnf_codes.append('0{}0000000000000'.format(ix))
            bnf_codes.append('0{}0100000000000'.format(ix))

        # Generate random prescribing data.  This data is never saved to the
        # database.
        prescribing_rows = []

        for practice_ix, practice in enumerate(Practice.objects.all()):
            for month in [1, 2, 3, 4, 5, 6, 7, 8]:
                timestamp = '2018-0{}-01 00:00:00 UTC'.format(month)

                # 0 <= practice_ix <= 15; 1 <= month <= 8
                item_ratio = (22 + practice_ix - 2 * month +
                              randint(-5, 5)) / 43.0
                assert 0 < item_ratio < 1

                numerator_items = 100 + randint(0, 100)
                denominator_items = int(numerator_items / item_ratio)

                for bnf_code_ix, bnf_code in enumerate(bnf_codes):
                    if bnf_code_ix % 2 == 0:
                        items = denominator_items
                    else:
                        items = numerator_items

                    quantity = 28 * items
                    unit_cost = 1 + bnf_code_ix
                    actual_cost = unit_cost * quantity

                    # We don't care about net_cost.
                    net_cost = actual_cost

                    row = [
                        'sha',  # This value doesn't matter.
                        practice.ccg.regional_team_id,
                        practice.ccg.stp_id,
                        practice.ccg_id,
                        practice.code,
                        bnf_code,
                        'bnf_name',  # This value doesn't matter
                        items,
                        net_cost,
                        actual_cost,
                        quantity,
                        timestamp,
                    ]

                    prescribing_rows.append(row)

        # In production, normalised_prescribing_standard is actually a view,
        # but for the tests it's much easier to set it up as a normal table.
        table = client.get_or_create_table('normalised_prescribing_standard',
                                           schemas.PRESCRIBING_SCHEMA)

        # Upload prescribing_rows to normalised_prescribing_standard.
        with tempfile.NamedTemporaryFile() as f:
            writer = csv.writer(f)
            for row in prescribing_rows:
                writer.writerow(row)
            f.seek(0)
            table.insert_rows_from_csv(f.name)

        # Do the work.
        call_command('import_measures',
                     measure='core_0,core_1,lp_2,lp_3,lpzomnibus')

        # Clean up.
        for ix in range(5):
            numerator_where = "bnf_code LIKE '0{}01%'".format(ix),
            denominator_where = "bnf_code LIKE '0{}%'".format(ix),

            if ix in [0, 1]:
                measure_id = 'core_{}'.format(ix)
            elif ix in [2, 3]:
                measure_id = 'lp_{}'.format(ix)
            else:
                assert ix == 4
                measure_id = 'lpzomnibus'

            path = os.path.join(measure_definitions_path,
                                '{}.json'.format(measure_id))
            os.remove(path)

        os.rename(
            os.path.join(measure_definitions_path, 'lpzomnibus.json.bak'),
            os.path.join(measure_definitions_path, 'lpzomnibus.json'),
        )

        # Dump the fixtures.
        fixture_path = os.path.join('frontend', 'tests', 'fixtures',
                                    'functional-measures.json')
        call_command('dumpdata', 'frontend', indent=2, output=fixture_path)

コード例 #16

0

ファイルを表示

ファイル: test_import_ppu_savings.py プロジェクト: mehrdadmzn/openprescribing

    def setUpTestData(cls):
        bassetlaw = PCT.objects.create(code='02Q', org_type='CCG')
        lincs_west = PCT.objects.create(code='04D', org_type='CCG')
        lincs_east = PCT.objects.create(code='03T', org_type='CCG',
                                        open_date='2013-04-01',
                                        close_date='2015-01-01')
        Chemical.objects.create(bnf_code='0703021Q0',
                                chem_name='Desogestrel')
        Chemical.objects.create(bnf_code='0408010A0',
                                chem_name='Levetiracetam')
        Practice.objects.create(code='C84001', ccg=bassetlaw,
                                name='LARWOOD SURGERY', setting=4)
        Practice.objects.create(code='C84024', ccg=bassetlaw,
                                name='NEWGATE MEDICAL GROUP', setting=4)
        Practice.objects.create(code='B82005', ccg=bassetlaw,
                                name='PRIORY MEDICAL GROUP', setting=4,
                                open_date='2015-01-01')
        Practice.objects.create(code='B82010', ccg=bassetlaw,
                                name='RIPON SPA SURGERY', setting=4)
        Practice.objects.create(code='A85017', ccg=bassetlaw,
                                name='BEWICK ROAD SURGERY', setting=4)
        Practice.objects.create(code='A86030', ccg=bassetlaw,
                                name='BETTS AVENUE MEDICAL GROUP', setting=4)
        Practice.objects.create(code='C83051', ccg=lincs_west,
                                name='ABBEY MEDICAL PRACTICE', setting=4)
        Practice.objects.create(code='C83019', ccg=lincs_east,
                                name='BEACON MEDICAL PRACTICE', setting=4)
        # Ensure we only include open practices in our calculations.
        Practice.objects.create(code='B82008', ccg=bassetlaw,
                                name='NORTH SURGERY', setting=4,
                                open_date='2010-04-01',
                                close_date='2012-01-01')
        # Ensure we only include standard practices in our calculations.
        Practice.objects.create(code='Y00581', ccg=bassetlaw,
                                name='BASSETLAW DRUG & ALCOHOL SERVICE',
                                setting=1)

        fixtures_base_path = os.path.join(
            'frontend', 'tests', 'fixtures', 'commands',
        )

        client = Client('hscic')

        prescribing_fixture_path = os.path.join(
            fixtures_base_path,
            'prescribing_bigquery_fixture.csv'
        )
        table = client.get_or_create_table(
            'normalised_prescribing_standard',
            bq_schemas.PRESCRIBING_SCHEMA
        )
        table.insert_rows_from_csv(prescribing_fixture_path)

        practices_fixture_path = os.path.join(
            fixtures_base_path,
            'practices.csv'
        )
        table = client.get_or_create_table(
            'practices',
            bq_schemas.PRACTICE_SCHEMA
        )
        table.insert_rows_from_csv(practices_fixture_path)

        tariff_path = os.path.join(fixtures_base_path, 'tariff_fixture.csv')
        table = client.get_or_create_table('tariff', bq_schemas.TARIFF_SCHEMA)
        table.insert_rows_from_csv(tariff_path)

        bnf_path = os.path.join(
            fixtures_base_path,
            'bnf_codes_for_ppu_savings.csv'
        )
        table = client.get_or_create_table('bnf', bq_schemas.BNF_SCHEMA)
        table.insert_rows_from_csv(bnf_path)

        month = date(2015, 9, 1)
        dummy_substitutions = pd.read_csv(
            os.path.join(fixtures_base_path, 'ppu_substitutions.csv'))
        with patch(
                'frontend.management.commands.import_ppu_savings.pd.read_csv',
                return_value=dummy_substitutions):
            import_ppu_savings.Command().handle(month=month)

コード例 #17

0

ファイルを表示

 def setUp(self):
     client = BQClient("hscic")
     client.get_or_create_table("prescribing", PRESCRIBING_SCHEMA)

コード例 #18

0

ファイルを表示

ファイル: test_bigquery.py プロジェクト: maykonmenezes/openprescribing

    def test_the_lot(self):
        client = Client("test")
        archive_client = Client("archive")

        orig_schema = build_schema(("a", "STRING"), ("b", "INTEGER"))

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        headers = ["a", "b"]
        rows = [(1, "apple"), (2, "banana"), (3, "coconut")]

        t1 = client.get_or_create_table("t1", orig_schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv("gcutils/tests/test_table.csv", schema)

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table("t2")

        sql = "SELECT * FROM {} WHERE a > 1".format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + "test_table-")
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode="r+") as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [next(reader)] + sorted(reader)

        self.assertEqual(data,
                         [list(map(str, row)) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + "test_table.csv"
        self.upload_to_storage("gcutils/tests/test_table.csv", storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + "test_table_headers.csv"
        self.upload_to_storage("gcutils/tests/test_table_headers.csv",
                               storage_path)

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        t3 = client.create_storage_backed_table("t3", schema, storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage("gcutils/tests/test_table_headers_2.csv",
                               storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, "damson")])

        # Test Client.create_table_with_view
        sql = "SELECT * FROM {{project}}.{} WHERE a > 1".format(t1_qname)

        t4 = client.create_table_with_view("t4", sql, False)

        results = client.query("SELECT * FROM {}".format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Table.copy_to_new_dataset
        t1.copy_to_new_dataset("archive")
        t1_archived = archive_client.get_table("t1")
        self.assertEqual(sorted(t1_archived.get_rows()), rows)
        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.move_to_new_dataset
        t2.move_to_new_dataset("archive")
        t2_archived = archive_client.get_table("t2")
        self.assertEqual(sorted(t2_archived.get_rows()), rows)
        with self.assertRaises(NotFound):
            list(t2.get_rows())

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code="ABC", name="CCG 1")
        PCT.objects.create(code="XYZ", name="CCG 2")

        def transformer(row):
            return [ord(row[0][0]), row[1]]

        t1.insert_rows_from_pg(
            PCT,
            build_schema(("code", "INTEGER"), ("name", "STRING")),
            transformer=transformer,
        )

        self.assertEqual(sorted(t1.get_rows()), [(65, "CCG 1"), (88, "CCG 2")])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])

コード例 #19

0

ファイルを表示

    def setUpTestData(cls):
        bassetlaw = PCT.objects.create(code="02Q", org_type="CCG")
        lincs_west = PCT.objects.create(code="04D", org_type="CCG")
        lincs_east = PCT.objects.create(code="03T",
                                        org_type="CCG",
                                        open_date="2013-04-01",
                                        close_date="2015-01-01")
        Chemical.objects.create(bnf_code="0703021Q0", chem_name="Desogestrel")
        Chemical.objects.create(bnf_code="0408010A0",
                                chem_name="Levetiracetam")
        Practice.objects.create(code="C84001",
                                ccg=bassetlaw,
                                name="LARWOOD SURGERY",
                                setting=4)
        Practice.objects.create(code="C84024",
                                ccg=bassetlaw,
                                name="NEWGATE MEDICAL GROUP",
                                setting=4)
        Practice.objects.create(
            code="B82005",
            ccg=bassetlaw,
            name="PRIORY MEDICAL GROUP",
            setting=4,
            open_date="2015-01-01",
        )
        Practice.objects.create(code="B82010",
                                ccg=bassetlaw,
                                name="RIPON SPA SURGERY",
                                setting=4)
        Practice.objects.create(code="A85017",
                                ccg=bassetlaw,
                                name="BEWICK ROAD SURGERY",
                                setting=4)
        Practice.objects.create(code="A86030",
                                ccg=bassetlaw,
                                name="BETTS AVENUE MEDICAL GROUP",
                                setting=4)
        Practice.objects.create(code="C83051",
                                ccg=lincs_west,
                                name="ABBEY MEDICAL PRACTICE",
                                setting=4)
        Practice.objects.create(code="C83019",
                                ccg=lincs_east,
                                name="BEACON MEDICAL PRACTICE",
                                setting=4)
        # Ensure we only include open practices in our calculations.
        Practice.objects.create(
            code="B82008",
            ccg=bassetlaw,
            name="NORTH SURGERY",
            setting=4,
            open_date="2010-04-01",
            close_date="2012-01-01",
        )
        # Ensure we only include standard practices in our calculations.
        Practice.objects.create(
            code="Y00581",
            ccg=bassetlaw,
            name="BASSETLAW DRUG & ALCOHOL SERVICE",
            setting=1,
        )

        fixtures_base_path = os.path.join("frontend", "tests", "fixtures",
                                          "commands")

        client = Client("hscic")

        prescribing_fixture_path = os.path.join(
            fixtures_base_path, "prescribing_bigquery_fixture.csv")
        table = client.get_or_create_table("normalised_prescribing_standard",
                                           bq_schemas.PRESCRIBING_SCHEMA)
        table.insert_rows_from_csv(prescribing_fixture_path,
                                   bq_schemas.PRESCRIBING_SCHEMA)

        practices_fixture_path = os.path.join(fixtures_base_path,
                                              "practices.csv")
        table = client.get_or_create_table("practices",
                                           bq_schemas.PRACTICE_SCHEMA)
        table.insert_rows_from_csv(practices_fixture_path,
                                   bq_schemas.PRACTICE_SCHEMA)

        tariff_path = os.path.join(fixtures_base_path, "tariff_fixture.csv")
        table = client.get_or_create_table("tariff", bq_schemas.TARIFF_SCHEMA)
        table.insert_rows_from_csv(tariff_path, bq_schemas.TARIFF_SCHEMA)

        bnf_path = os.path.join(fixtures_base_path,
                                "bnf_codes_for_ppu_savings.csv")
        table = client.get_or_create_table("bnf", bq_schemas.BNF_SCHEMA)
        table.insert_rows_from_csv(bnf_path, bq_schemas.BNF_SCHEMA)

        month = date(2015, 9, 1)
        dummy_substitutions = pd.read_csv(
            os.path.join(fixtures_base_path, "ppu_substitutions.csv"))
        with patch(
                "frontend.management.commands.import_ppu_savings.pd.read_csv",
                return_value=dummy_substitutions,
        ):
            import_ppu_savings.Command().handle(month=month)

コード例 #20

0

ファイルを表示

ファイル: test_convert_hscic_prescribing.py プロジェクト: mehrdadmzn/openprescribing

 def setUp(self):
     client = BQClient('hscic')
     table = client.get_or_create_table('prescribing', PRESCRIBING_SCHEMA)

コード例 #21

0

ファイルを表示

ファイル: import_scmd.py プロジェクト: ebmdatalab/openprescribing

    def handle(self, *args, **kwargs):
        s = requests.Session()

        # Get URLs keyed by the date (year-month) they're for
        urls = set(self.iter_dataset_urls(s))
        urls_by_month = dict(self.iter_months(urls))

        # set up the BigQuery client, dataset, and table
        client = Client(dataset_key="scmd")
        self.ensure_dataset_exists(client)
        table = client.get_or_create_table("scmd", schema=SCHEMA)

        # look for existing months in BigQuery
        sql = "SELECT DISTINCT year_month FROM {};".format(
            table.qualified_name)
        known_dates = [r[0] for r in client.query(sql).rows]

        # convert the datetime.dates the query gives us to strings since
        # that's what we're dealing with elsewhere.
        known_months = {d.strftime("%Y-%m") for d in known_dates}
        print(known_months)

        missing_months = set(urls_by_month.keys()) - known_months
        print("Missing months: {}".format(", ".join(sorted(missing_months))))
        pending_urls = {m: urls_by_month[m] for m in missing_months}

        # abort if there's nothing to get
        if not pending_urls:
            print("no pending urls, aborting")
            return

        # grab missing months
        for month, url in sorted(pending_urls.items()):
            print("{} | Getting: {}".format(month, url))
            r = s.get(url)
            r.raise_for_status()
            print("{} | Downloaded: {}".format(month, url))

            # read CSV into memory
            decoded_content = r.content.decode("utf-8")
            reader = csv.reader(decoded_content.splitlines(), delimiter=",")

            # remove headers
            next(reader)

            # use a tempfile so we can write the CSV to disk with necessary
            # changes (adding days to the year-month dates at time of writing),
            # before uploading from that file to BigQuery.
            with tempfile.NamedTemporaryFile(mode="w+") as f:
                writer = csv.writer(f, delimiter=",")
                for line in reader:
                    # Convert year-month dates to year-month-day
                    line[0] = line[0] + "-01"
                    writer.writerow(line)
                print("{} | Wrote: {}".format(month, f.name))

                # rewind the file so we can read it into BigQuery
                f.seek(0)

                # insert into BigQuery
                table.insert_rows_from_csv(f.name,
                                           SCHEMA,
                                           write_disposition="WRITE_APPEND")
                print("{} | Ingested into BigQuery".format(month))

コード例 #22

0

ファイルを表示

ファイル: test_bigquery.py プロジェクト: dmcalli2/openprescribing

    def test_the_lot(self):
        client = Client('test')

        schema = build_schema(
            ('a', 'INTEGER'),
            ('b', 'STRING'),
        )

        headers = ['a', 'b']
        rows = [
            (1, 'apple'),
            (2, 'banana'),
            (3, 'coconut'),
        ]

        t1 = client.get_or_create_table('t1', schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv('gcutils/tests/test_table.csv')

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table('t2')

        sql = 'SELECT * FROM {} WHERE a > 1'.format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + 'test_table-')
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode='r+') as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [reader.next()] + sorted(reader)

        self.assertEqual(data, [map(str, row) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + 'test_table.csv'
        self.upload_to_storage('gcutils/tests/test_table.csv', storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + 'test_table_headers.csv'
        self.upload_to_storage(
            'gcutils/tests/test_table_headers.csv',
            storage_path
        )

        schema = [
            {'name': 'a', 'type': 'integer'},
            {'name': 'b', 'type': 'string'},
        ]

        t3 = client.create_storage_backed_table(
            't3',
            schema,
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage(
            'gcutils/tests/test_table_headers_2.csv',
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, u'damson')])

        # Test Client.create_table_with_view
        sql = 'SELECT * FROM {{project}}.{} WHERE a > 1'.format(t1_qname)

        t4 = client.create_table_with_view('t4', sql, False)

        results = client.query('SELECT * FROM {}'.format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code='ABC', name='CCG 1')
        PCT.objects.create(code='XYZ', name='CCG 2')

        def transformer(row):
            return [ord(row[0][0]), row[1]]
        t1.insert_rows_from_pg(PCT, ['code', 'name'], transformer)

        self.assertEqual(sorted(t1.get_rows()), [(65, 'CCG 1'), (88, 'CCG 2')])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])

コード例 #23

0

ファイルを表示

ファイル: test_import_measures.py プロジェクト: rebkwok/openprescribing

    def _createData(cls):
        bassetlaw = PCT.objects.create(code='02Q', org_type='CCG')
        lincs_west = PCT.objects.create(code='04D', org_type='CCG')
        lincs_east = PCT.objects.create(code='03T', org_type='CCG',
                                        open_date='2013-04-01',
                                        close_date='2015-01-01')
        Chemical.objects.create(bnf_code='0703021Q0',
                                chem_name='Desogestrel')
        Chemical.objects.create(bnf_code='0408010A0',
                                chem_name='Levetiracetam')
        Practice.objects.create(code='C84001', ccg=bassetlaw,
                                name='LARWOOD SURGERY', setting=4)
        Practice.objects.create(code='C84024', ccg=bassetlaw,
                                name='NEWGATE MEDICAL GROUP', setting=4)
        Practice.objects.create(code='B82005', ccg=bassetlaw,
                                name='PRIORY MEDICAL GROUP', setting=4,
                                open_date='2015-01-01')
        Practice.objects.create(code='B82010', ccg=bassetlaw,
                                name='RIPON SPA SURGERY', setting=4)
        Practice.objects.create(code='A85017', ccg=bassetlaw,
                                name='BEWICK ROAD SURGERY', setting=4)
        Practice.objects.create(code='A86030', ccg=bassetlaw,
                                name='BETTS AVENUE MEDICAL GROUP', setting=4)
        Practice.objects.create(code='C83051', ccg=lincs_west,
                                name='ABBEY MEDICAL PRACTICE', setting=4)
        Practice.objects.create(code='C83019', ccg=lincs_east,
                                name='BEACON MEDICAL PRACTICE', setting=4)
        # Ensure we only include open practices in our calculations.
        Practice.objects.create(code='B82008', ccg=bassetlaw,
                                name='NORTH SURGERY', setting=4,
                                open_date='2010-04-01',
                                close_date='2012-01-01')
        # Ensure we only include standard practices in our calculations.
        Practice.objects.create(code='Y00581', ccg=bassetlaw,
                                name='BASSETLAW DRUG & ALCOHOL SERVICE',
                                setting=1)

        args = []
        if 'SKIP_BQ_LOAD' not in os.environ:
            fixtures_path = os.path.join(
                'frontend', 'tests', 'fixtures', 'commands')

            prescribing_fixture_path = os.path.join(
                fixtures_path,
                'prescribing_bigquery_fixture.csv'
            )
            # TODO Make this a table with a view (see
            # generate_presentation_replacements), and put it in the correct
            # dataset ('hscic', not 'measures').
            client = Client('measures')
            table = client.get_or_create_table(
                'normalised_prescribing_legacy',
                PRESCRIBING_SCHEMA
            )
            table.insert_rows_from_csv(prescribing_fixture_path)

            practices_fixture_path = os.path.join(
                fixtures_path,
                'practices.csv'
            )
            client = Client('hscic')
            table = client.get_or_create_table('practices', PRACTICE_SCHEMA)
            columns = [field.name for field in PRACTICE_SCHEMA]
            table.insert_rows_from_csv(practices_fixture_path)

            ccgs_fixture_path = os.path.join(
                fixtures_path,
                'ccgs.csv'
            )
            table = client.get_or_create_table('ccgs', CCG_SCHEMA)
            table.insert_rows_from_csv(ccgs_fixture_path)

        month = '2015-09-01'
        measure_id = 'cerazette'
        args = []
        opts = {
            'month': month,
            'measure': measure_id,
            'v': 3
        }
        with patch('frontend.management.commands.import_measures'
                   '.parse_measures',
                   new=MagicMock(return_value=test_measures())):
            call_command('import_measures', *args, **opts)

コード例 #24

0

ファイルを表示

ファイル: test_create_views.py プロジェクト: mehrdadmzn/openprescribing

    def setUpClass(cls):
        if 'SKIP_BQ_LOAD' not in os.environ:
            # Create local test data from fixtures, then upload this to a
            # test project in bigquery
            call_command('loaddata',
                         'frontend/tests/fixtures/orgs.json',
                         verbosity=0)
            call_command('loaddata',
                         'frontend/tests/fixtures/practices.json',
                         verbosity=0)
            call_command('loaddata',
                         'frontend/tests/fixtures/practice_listsizes.json',
                         verbosity=0)

            client = Client('hscic')

            table = client.get_or_create_table('prescribing',
                                               PRESCRIBING_SCHEMA)
            prescribing_fixture_path = os.path.join(
                'frontend', 'tests', 'fixtures', 'commands',
                'prescribing_bigquery_views_fixture.csv')
            table.insert_rows_from_csv(prescribing_fixture_path)

            table = client.get_or_create_table('ccgs', CCG_SCHEMA)
            columns = [field.name for field in CCG_SCHEMA]
            table.insert_rows_from_pg(PCT, columns, ccgs_transform)

            table = client.get_or_create_table('practices', PRACTICE_SCHEMA)
            columns = [field.name for field in PRACTICE_SCHEMA]
            table.insert_rows_from_pg(Practice, columns)

            table = client.get_or_create_table('practice_statistics',
                                               PRACTICE_STATISTICS_SCHEMA)
            columns = [field.name for field in PRACTICE_STATISTICS_SCHEMA]
            columns[0] = 'date'
            columns[-1] = 'practice_id'
            table.insert_rows_from_pg(PracticeStatistics, columns,
                                      statistics_transform)

            sql = """
            SELECT
              prescribing.sha AS sha,
              practices.ccg_id AS pct,
              prescribing.practice AS practice,
              prescribing.bnf_code AS bnf_code,
              prescribing.bnf_name AS bnf_name,
              prescribing.items AS items,
              prescribing.net_cost AS net_cost,
              prescribing.actual_cost AS actual_cost,
              prescribing.quantity AS quantity,
              prescribing.month AS month
            FROM
              {project}.{hscic}.prescribing AS prescribing
            INNER JOIN
              {project}.{hscic}.practices  AS practices
            ON practices.code = prescribing.practice
            """

            try:
                client.delete_table('normalised_prescribing_standard')
            except NotFound:
                pass

            client.create_table_with_view('normalised_prescribing_standard',
                                          sql, False)

            client = StorageClient()
            bucket = client.get_bucket()
            for blob in bucket.list_blobs(prefix='hscic/views/vw__'):
                blob.delete()
        else:
            assert 'BQ_NONCE' in os.environ, 'Set BQ_NONCE to reuse BQ data'

        ImportLog.objects.create(category='prescribing',
                                 current_at='2015-10-01')

        # Create view tables and indexes
        with open('frontend/management/commands/replace_matviews.sql') as f:
            with connection.cursor() as c:
                c.execute(f.read())

コード例 #25

0

ファイルを表示

    def setUpTestData(cls):
        regional_team_54 = RegionalTeam.objects.create(code='Y54')
        regional_team_55 = RegionalTeam.objects.create(code='Y55')
        stp_1 = STP.objects.create(ons_code='E00000001')
        stp_2 = STP.objects.create(ons_code='E00000002')

        bassetlaw = PCT.objects.create(code='02Q',
                                       org_type='CCG',
                                       stp=stp_1,
                                       regional_team=regional_team_54)
        lincs_west = PCT.objects.create(code='04D',
                                        org_type='CCG',
                                        stp=stp_2,
                                        regional_team=regional_team_55)
        lincs_east = PCT.objects.create(code='03T',
                                        org_type='CCG',
                                        open_date='2013-04-01',
                                        close_date='2015-01-01',
                                        stp=stp_2,
                                        regional_team=regional_team_55)
        Chemical.objects.create(bnf_code='0703021Q0', chem_name='Desogestrel')
        Chemical.objects.create(bnf_code='0408010A0',
                                chem_name='Levetiracetam')
        Practice.objects.create(code='C84001',
                                ccg=bassetlaw,
                                name='LARWOOD SURGERY',
                                setting=4)
        Practice.objects.create(code='C84024',
                                ccg=bassetlaw,
                                name='NEWGATE MEDICAL GROUP',
                                setting=4)
        Practice.objects.create(code='B82005',
                                ccg=bassetlaw,
                                name='PRIORY MEDICAL GROUP',
                                setting=4,
                                open_date='2015-01-01')
        Practice.objects.create(code='B82010',
                                ccg=bassetlaw,
                                name='RIPON SPA SURGERY',
                                setting=4)
        Practice.objects.create(code='A85017',
                                ccg=bassetlaw,
                                name='BEWICK ROAD SURGERY',
                                setting=4)
        Practice.objects.create(code='A86030',
                                ccg=bassetlaw,
                                name='BETTS AVENUE MEDICAL GROUP',
                                setting=4)
        Practice.objects.create(code='C83051',
                                ccg=lincs_west,
                                name='ABBEY MEDICAL PRACTICE',
                                setting=4)
        Practice.objects.create(code='C83019',
                                ccg=lincs_east,
                                name='BEACON MEDICAL PRACTICE',
                                setting=4)
        # Ensure we only include open practices in our calculations.
        Practice.objects.create(code='B82008',
                                ccg=bassetlaw,
                                name='NORTH SURGERY',
                                setting=4,
                                open_date='2010-04-01',
                                close_date='2012-01-01')
        # Ensure we only include standard practices in our calculations.
        Practice.objects.create(code='Y00581',
                                ccg=bassetlaw,
                                name='BASSETLAW DRUG & ALCOHOL SERVICE',
                                setting=1)

        measure = Measure.objects.create(
            id='cerazette',
            name='Cerazette vs. Desogestrel',
            title='Prescribing of...',
            tags=['core'],
            numerator_bnf_codes=[],
        )

        # We expect this MeasureValue to be deleted because it is older than
        # five years.
        MeasureValue.objects.create(
            measure=measure,
            pct_id='02Q',
            month='2000-01-01',
        )

        # We expect this MeasureValue to be unchanged
        MeasureValue.objects.create(
            measure=measure,
            pct_id='02Q',
            month='2015-08-01',
        )

        # We expect this MeasureValue to be updated
        MeasureValue.objects.create(
            measure=measure,
            pct_id='02Q',
            month='2015-09-01',
        )

        ImportLog.objects.create(
            category='prescribing',
            current_at='2018-04-01',
            filename='/tmp/prescribing.csv',
        )
        if 'SKIP_BQ_LOAD' in os.environ:
            assert 'BQ_NONCE' in os.environ, "Specify BQ_NONCE to reuse fixtures"

        if 'SKIP_BQ_LOAD' not in os.environ:
            fixtures_path = os.path.join('frontend', 'tests', 'fixtures',
                                         'commands')

            prescribing_fixture_path = os.path.join(
                fixtures_path, 'prescribing_bigquery_fixture.csv')
            # TODO Make this a table with a view (see
            # generate_presentation_replacements), and put it in the correct
            # dataset ('hscic', not 'measures').
            client = Client('measures')
            table = client.get_or_create_table('normalised_prescribing_legacy',
                                               PRESCRIBING_SCHEMA)
            table.insert_rows_from_csv(prescribing_fixture_path)

            practices_fixture_path = os.path.join(fixtures_path,
                                                  'practices.csv')
            client = Client('hscic')
            table = client.get_or_create_table('practices', PRACTICE_SCHEMA)
            table.insert_rows_from_csv(practices_fixture_path)

            ccgs_fixture_path = os.path.join(fixtures_path, 'ccgs.csv')
            table = client.get_or_create_table('ccgs', CCG_SCHEMA)
            table.insert_rows_from_csv(ccgs_fixture_path)

        opts = {'month': '2015-09-01', 'measure': 'cerazette', 'v': 3}
        with patch(MODULE + '.get_measure_definition_paths',
                   new=working_measure_files):
            call_command('import_measures', **opts)