def test_data_is_aggregated(self):
        # there are 11 rows in the input file; 2 are for the same
        # practice/presentation and should be collapsed, and 1 is for
        # an UNKNONWN SURGERY (see issue #349)

        raw_data_path = ("frontend/tests/fixtures/commands/" +
                         "convert_hscic_prescribing/2016_01/" +
                         "EPD_201601.csv")
        gcs_path = "hscic/prescribing_v2/2016_01/EPD_201601.csv"

        client = StorageClient()
        bucket = client.get_bucket()
        blob = bucket.blob(gcs_path)

        with open(raw_data_path, "rb") as f:
            blob.upload_from_file(f)

        call_command("convert_hscic_prescribing", filename=raw_data_path)

        # Test that data added to prescribing table
        client = BQClient()
        sql = """SELECT *
        FROM {hscic}.prescribing_v2
        WHERE month = TIMESTAMP('2016-01-01')"""

        rows = list(results_to_dicts(client.query(sql)))
        self.assertEqual(len(rows), 9)
        for row in rows:
            if row["practice"] == "P92042" and row[
                    "bnf_code"] == "0202010B0AAABAB":
                self.assertEqual(row["quantity"], 1288)
    def test_existing_files_deleted(self):
        # Create a dataset fragment which should end up being deleted
        client = StorageClient()
        bucket = client.get_bucket()
        blob_name = ('hscic/views/vw__presentation_summary_by_ccg'
                     '-000000009999.csv.gz')
        blob = bucket.blob(blob_name)
        blob.upload_from_string("test", content_type="application/gzip")

        # Run import command
        call_command('create_views')

        # Check the bucket is no longer there
        client = StorageClient()
        bucket = client.get_bucket()
        prefix, suffix = blob_name.split('-')
        for blob in bucket.list_blobs(prefix=prefix):
            self.assertNotIn(suffix, blob.path)
Ejemplo n.º 3
0
def update_bnf_table():
    """Update `bnf` table from cloud-stored CSV"""
    storage_client = StorageClient()
    bucket = storage_client.get_bucket()
    blobs = bucket.list_blobs(prefix="hscic/bnf_codes/")
    blobs = sorted(blobs, key=lambda blob: blob.name, reverse=True)
    blob = blobs[0]

    bq_client = BQClient("hscic")
    table = bq_client.get_table("bnf")
    table.insert_rows_from_storage(blob.name, skip_leading_rows=1)
Ejemplo n.º 4
0
def upload_task_input_files(task):
    storage_client = StorageClient()
    bucket = storage_client.get_bucket()

    for path in task.input_paths():
        assert path[0] == '/'
        assert settings.PIPELINE_DATA_BASEDIR[-1] == '/'
        name = 'hscic' + path.replace(settings.PIPELINE_DATA_BASEDIR, '/')
        blob = bucket.blob(name)
        if blob.exists():
            print("Skipping %s, already uploaded" % name)
            continue
        print("Uploading %s to %s" % (path, name))
        with open(path) as f:
            blob.upload_from_file(f)
    def test_data_is_aggregated(self):
        # there are 11 rows in the input file; 2 are for the same
        # practice/presentation and should be collapsed, and 1 is for
        # an UNKNONWN SURGERY (see issue #349)

        raw_data_path = 'frontend/tests/fixtures/commands/' +\
            'convert_hscic_prescribing/2016_01/' +\
            'Detailed_Prescribing_Information.csv'
        converted_data_path = 'frontend/tests/fixtures/commands/' +\
            'convert_hscic_prescribing/2016_01/' +\
            'Detailed_Prescribing_Information_formatted.CSV'
        gcs_path = 'hscic/prescribing/2016_01/' +\
            'Detailed_Prescribing_Information.csv'

        client = StorageClient()
        bucket = client.get_bucket()
        blob = bucket.blob(gcs_path)

        with open(raw_data_path) as f:
            blob.upload_from_file(f)

        call_command('convert_hscic_prescribing', filename=raw_data_path)

        # Test that data added to prescribing table
        client = BQClient()
        sql = '''SELECT *
        FROM {hscic}.prescribing
        WHERE month = TIMESTAMP('2016-01-01')'''

        rows = list(results_to_dicts(client.query(sql)))
        self.assertEqual(len(rows), 9)
        for row in rows:
            if row['practice'] == 'P92042' and \
                    row['bnf_code'] == '0202010B0AAABAB':
                self.assertEqual(row['quantity'], 1288)

        # Test that downloaded data is correct
        with open(converted_data_path) as f:
            rows = list(csv.reader(f))

        self.assertEqual(len(rows), 9)
        for row in rows:
            if row[1] == 'P92042' and row[2] == '0202010B0AAABAB':
                self.assertEqual(row[6], '1288')