コード例 #1
0
    def test_the_lot(self):
        client = Client('test')

        schema = build_schema(
            ('a', 'INTEGER'),
            ('b', 'STRING'),
        )

        headers = ['a', 'b']
        rows = [
            (1, 'apple'),
            (2, 'banana'),
            (3, 'coconut'),
        ]

        t1 = client.get_or_create_table('t1', schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv('gcutils/tests/test_table.csv')

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table('t2')

        sql = 'SELECT * FROM {} WHERE a > 1'.format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + 'test_table-')
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode='r+') as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [reader.next()] + sorted(reader)

        self.assertEqual(data, [map(str, row) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + 'test_table.csv'
        self.upload_to_storage('gcutils/tests/test_table.csv', storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + 'test_table_headers.csv'
        self.upload_to_storage(
            'gcutils/tests/test_table_headers.csv',
            storage_path
        )

        schema = [
            {'name': 'a', 'type': 'integer'},
            {'name': 'b', 'type': 'string'},
        ]

        t3 = client.create_storage_backed_table(
            't3',
            schema,
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage(
            'gcutils/tests/test_table_headers_2.csv',
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, u'damson')])

        # Test Client.create_table_with_view
        sql = 'SELECT * FROM {{project}}.{} WHERE a > 1'.format(t1_qname)

        t4 = client.create_table_with_view('t4', sql, False)

        results = client.query('SELECT * FROM {}'.format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code='ABC', name='CCG 1')
        PCT.objects.create(code='XYZ', name='CCG 2')

        def transformer(row):
            return [ord(row[0][0]), row[1]]
        t1.insert_rows_from_pg(PCT, ['code', 'name'], transformer)

        self.assertEqual(sorted(t1.get_rows()), [(65, 'CCG 1'), (88, 'CCG 2')])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])
コード例 #2
0
def get_savings(entity_type, month):
    """Execute SQL to calculate savings in BigQuery, and return as a
    DataFrame.

    References to issues below are for
    https://github.com/ebmdatalab/price-per-dose/issues

    """
    prescribing_table = "{hscic}.%s" % (make_merged_table_for_month(month))

    # This is interpolated into the SQL template as it is used multiple times.
    restricting_condition = (
        "AND LENGTH(RTRIM(p.bnf_code)) >= 15 "
        "AND p.bnf_code NOT LIKE '0302000C0____BE' "  # issue #10
        "AND p.bnf_code NOT LIKE '0302000C0____BF' "  # issue #10
        "AND p.bnf_code NOT LIKE '0302000C0____BH' "  # issue #10
        "AND p.bnf_code NOT LIKE '0302000C0____BG' "  # issue #10
        "AND p.bnf_code NOT LIKE '0904010H0%' "  # issue #9
        "AND p.bnf_code NOT LIKE '0904010H0%' "  # issue #9
        "AND p.bnf_code NOT LIKE '1311070S0____AA' "  # issue #9
        "AND p.bnf_code NOT LIKE '1311020L0____BS' "  # issue #9
        "AND p.bnf_code NOT LIKE '0301020S0____AA' "  # issue #12
        "AND p.bnf_code NOT LIKE '190700000BBCJA0' "  # issue #12
        "AND p.bnf_code NOT LIKE '0604011L0BGAAAH' "  # issue #12
        "AND p.bnf_code NOT LIKE '1502010J0____BY' "  # issue #12
        "AND p.bnf_code NOT LIKE '1201010F0AAAAAA' "  # issue #12
        "AND p.bnf_code NOT LIKE '0107010S0AAAGAG' "  # issue #12
        "AND p.bnf_code NOT LIKE '060016000BBAAA0' "  # issue #14
        "AND p.bnf_code NOT LIKE '190201000AABJBJ' "  # issue #14
        "AND p.bnf_code NOT LIKE '190201000AABKBK' "  # issue #14
        "AND p.bnf_code NOT LIKE '190201000AABLBL' "  # issue #14
        "AND p.bnf_code NOT LIKE '190201000AABMBM' "  # issue #14
        "AND p.bnf_code NOT LIKE '190201000AABNBN' "  # issue #14
        "AND p.bnf_code NOT LIKE '190202000AAADAD' "  # issue #14
    )

    # Generate variable SQL based on if we're interested in CCG or
    # practice-level data
    if entity_type == 'pct':
        select = 'savings.presentations.pct AS pct,'
        inner_select = 'presentations.pct, '
        group_by = 'presentations.pct, '
        min_saving = 1000
    elif entity_type == 'practice':
        select = ('savings.presentations.practice AS practice,'
                  'savings.presentations.pct AS pct,')
        inner_select = ('presentations.pct, ' 'presentations.practice,')
        group_by = ('presentations.practice, ' 'presentations.pct,')
        min_saving = 50
    else:
        # 7d21f9c6 (#769) removed 'product'` as a possible entity_type.  We may
        # want to revisit this.
        assert False

    fpath = os.path.dirname(__file__)

    # Execute SQL
    with open("%s/ppu_sql/savings_for_decile.sql" % fpath, "r") as f:
        sql = f.read()

    substitutions = (('{{ restricting_condition }}', restricting_condition),
                     ('{{ month }}', month.strftime('%Y-%m-%d')),
                     ('{{ group_by }}', group_by), ('{{ select }}', select),
                     ('{{ prescribing_table }}', prescribing_table),
                     ('{{ inner_select }}', inner_select), ('{{ min_saving }}',
                                                            min_saving))
    for key, value in substitutions:
        sql = sql.replace(key, str(value))
    # Format results in a DataFrame
    client = Client()
    df = client.query_into_dataframe(sql, legacy=True)
    # Rename null values in category, so we can group by it
    df.loc[df['category'].isnull(), 'category'] = 'NP8'
    df = df.set_index('generic_presentation')
    df.index.name = 'bnf_code'
    # Add in substitutions column
    subs = pd.read_csv(SUBSTITUTIONS_SPREADSHEET).set_index('Code')
    subs = subs[subs['Really equivalent?'] == 'Y'].copy()
    subs['formulation_swap'] = (subs['Formulation'] + ' / ' +
                                subs['Alternative formulation'])
    df = df.join(subs[['formulation_swap']], how='left')
    # Convert nans to Nones
    df = df.where((pd.notnull(df)), None)
    return df
コード例 #3
0
    def test_the_lot(self):
        client = Client("test")
        archive_client = Client("archive")

        orig_schema = build_schema(("a", "STRING"), ("b", "INTEGER"))

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        headers = ["a", "b"]
        rows = [(1, "apple"), (2, "banana"), (3, "coconut")]

        t1 = client.get_or_create_table("t1", orig_schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv("gcutils/tests/test_table.csv", schema)

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table("t2")

        sql = "SELECT * FROM {} WHERE a > 1".format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + "test_table-")
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode="r+") as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [next(reader)] + sorted(reader)

        self.assertEqual(data,
                         [list(map(str, row)) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + "test_table.csv"
        self.upload_to_storage("gcutils/tests/test_table.csv", storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + "test_table_headers.csv"
        self.upload_to_storage("gcutils/tests/test_table_headers.csv",
                               storage_path)

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        t3 = client.create_storage_backed_table("t3", schema, storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage("gcutils/tests/test_table_headers_2.csv",
                               storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, "damson")])

        # Test Client.create_table_with_view
        sql = "SELECT * FROM {{project}}.{} WHERE a > 1".format(t1_qname)

        t4 = client.create_table_with_view("t4", sql, False)

        results = client.query("SELECT * FROM {}".format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Table.copy_to_new_dataset
        t1.copy_to_new_dataset("archive")
        t1_archived = archive_client.get_table("t1")
        self.assertEqual(sorted(t1_archived.get_rows()), rows)
        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.move_to_new_dataset
        t2.move_to_new_dataset("archive")
        t2_archived = archive_client.get_table("t2")
        self.assertEqual(sorted(t2_archived.get_rows()), rows)
        with self.assertRaises(NotFound):
            list(t2.get_rows())

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code="ABC", name="CCG 1")
        PCT.objects.create(code="XYZ", name="CCG 2")

        def transformer(row):
            return [ord(row[0][0]), row[1]]

        t1.insert_rows_from_pg(
            PCT,
            build_schema(("code", "INTEGER"), ("name", "STRING")),
            transformer=transformer,
        )

        self.assertEqual(sorted(t1.get_rows()), [(65, "CCG 1"), (88, "CCG 2")])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])