def test_the_lot(self): client = Client('test') schema = build_schema( ('a', 'INTEGER'), ('b', 'STRING'), ) headers = ['a', 'b'] rows = [ (1, 'apple'), (2, 'banana'), (3, 'coconut'), ] t1 = client.get_or_create_table('t1', schema) t1_qname = t1.qualified_name # Test Table.insert_rows_from_csv t1.insert_rows_from_csv('gcutils/tests/test_table.csv') self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.insert_rows_from_query t2 = client.get_table('t2') sql = 'SELECT * FROM {} WHERE a > 1'.format(t1_qname) t2.insert_rows_from_query(sql) self.assertEqual(sorted(t2.get_rows()), rows[1:]) # Test Client.query sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname) results = client.query(sql) self.assertEqual(sorted(results.rows), rows[2:]) # Test Client.query_into_dataframe sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname) df = client.query_into_dataframe(sql) self.assertEqual(df.values.tolist(), [list(rows[2])]) # Test TableExporter.export_to_storage and # TableExporter.download_from_storage_and_unzip t1_exporter = TableExporter(t1, self.storage_prefix + 'test_table-') t1_exporter.export_to_storage() with tempfile.NamedTemporaryFile(mode='r+') as f: t1_exporter.download_from_storage_and_unzip(f) f.seek(0) reader = csv.reader(f) data = [reader.next()] + sorted(reader) self.assertEqual(data, [map(str, row) for row in [headers] + rows]) # Test Table.insert_rows_from_storage storage_path = self.storage_prefix + 'test_table.csv' self.upload_to_storage('gcutils/tests/test_table.csv', storage_path) t2.insert_rows_from_storage(storage_path) self.assertEqual(sorted(t2.get_rows()), rows) # Test Client.create_storage_backed_table storage_path = self.storage_prefix + 'test_table_headers.csv' self.upload_to_storage( 'gcutils/tests/test_table_headers.csv', storage_path ) schema = [ {'name': 'a', 'type': 'integer'}, {'name': 'b', 'type': 'string'}, ] t3 = client.create_storage_backed_table( 't3', schema, storage_path ) results = client.query('SELECT * FROM {}'.format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows) self.upload_to_storage( 'gcutils/tests/test_table_headers_2.csv', storage_path ) results = client.query('SELECT * FROM {}'.format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows + [(4, u'damson')]) # Test Client.create_table_with_view sql = 'SELECT * FROM {{project}}.{} WHERE a > 1'.format(t1_qname) t4 = client.create_table_with_view('t4', sql, False) results = client.query('SELECT * FROM {}'.format(t4.qualified_name)) self.assertEqual(sorted(results.rows), rows[1:]) # Test Client.insert_rows_from_pg PCT.objects.create(code='ABC', name='CCG 1') PCT.objects.create(code='XYZ', name='CCG 2') def transformer(row): return [ord(row[0][0]), row[1]] t1.insert_rows_from_pg(PCT, ['code', 'name'], transformer) self.assertEqual(sorted(t1.get_rows()), [(65, 'CCG 1'), (88, 'CCG 2')]) # Test Table.delete_all_rows t1.delete_all_rows() self.assertEqual(list(t1.get_rows()), [])
def get_savings(entity_type, month): """Execute SQL to calculate savings in BigQuery, and return as a DataFrame. References to issues below are for https://github.com/ebmdatalab/price-per-dose/issues """ prescribing_table = "{hscic}.%s" % (make_merged_table_for_month(month)) # This is interpolated into the SQL template as it is used multiple times. restricting_condition = ( "AND LENGTH(RTRIM(p.bnf_code)) >= 15 " "AND p.bnf_code NOT LIKE '0302000C0____BE' " # issue #10 "AND p.bnf_code NOT LIKE '0302000C0____BF' " # issue #10 "AND p.bnf_code NOT LIKE '0302000C0____BH' " # issue #10 "AND p.bnf_code NOT LIKE '0302000C0____BG' " # issue #10 "AND p.bnf_code NOT LIKE '0904010H0%' " # issue #9 "AND p.bnf_code NOT LIKE '0904010H0%' " # issue #9 "AND p.bnf_code NOT LIKE '1311070S0____AA' " # issue #9 "AND p.bnf_code NOT LIKE '1311020L0____BS' " # issue #9 "AND p.bnf_code NOT LIKE '0301020S0____AA' " # issue #12 "AND p.bnf_code NOT LIKE '190700000BBCJA0' " # issue #12 "AND p.bnf_code NOT LIKE '0604011L0BGAAAH' " # issue #12 "AND p.bnf_code NOT LIKE '1502010J0____BY' " # issue #12 "AND p.bnf_code NOT LIKE '1201010F0AAAAAA' " # issue #12 "AND p.bnf_code NOT LIKE '0107010S0AAAGAG' " # issue #12 "AND p.bnf_code NOT LIKE '060016000BBAAA0' " # issue #14 "AND p.bnf_code NOT LIKE '190201000AABJBJ' " # issue #14 "AND p.bnf_code NOT LIKE '190201000AABKBK' " # issue #14 "AND p.bnf_code NOT LIKE '190201000AABLBL' " # issue #14 "AND p.bnf_code NOT LIKE '190201000AABMBM' " # issue #14 "AND p.bnf_code NOT LIKE '190201000AABNBN' " # issue #14 "AND p.bnf_code NOT LIKE '190202000AAADAD' " # issue #14 ) # Generate variable SQL based on if we're interested in CCG or # practice-level data if entity_type == 'pct': select = 'savings.presentations.pct AS pct,' inner_select = 'presentations.pct, ' group_by = 'presentations.pct, ' min_saving = 1000 elif entity_type == 'practice': select = ('savings.presentations.practice AS practice,' 'savings.presentations.pct AS pct,') inner_select = ('presentations.pct, ' 'presentations.practice,') group_by = ('presentations.practice, ' 'presentations.pct,') min_saving = 50 else: # 7d21f9c6 (#769) removed 'product'` as a possible entity_type. We may # want to revisit this. assert False fpath = os.path.dirname(__file__) # Execute SQL with open("%s/ppu_sql/savings_for_decile.sql" % fpath, "r") as f: sql = f.read() substitutions = (('{{ restricting_condition }}', restricting_condition), ('{{ month }}', month.strftime('%Y-%m-%d')), ('{{ group_by }}', group_by), ('{{ select }}', select), ('{{ prescribing_table }}', prescribing_table), ('{{ inner_select }}', inner_select), ('{{ min_saving }}', min_saving)) for key, value in substitutions: sql = sql.replace(key, str(value)) # Format results in a DataFrame client = Client() df = client.query_into_dataframe(sql, legacy=True) # Rename null values in category, so we can group by it df.loc[df['category'].isnull(), 'category'] = 'NP8' df = df.set_index('generic_presentation') df.index.name = 'bnf_code' # Add in substitutions column subs = pd.read_csv(SUBSTITUTIONS_SPREADSHEET).set_index('Code') subs = subs[subs['Really equivalent?'] == 'Y'].copy() subs['formulation_swap'] = (subs['Formulation'] + ' / ' + subs['Alternative formulation']) df = df.join(subs[['formulation_swap']], how='left') # Convert nans to Nones df = df.where((pd.notnull(df)), None) return df
def test_the_lot(self): client = Client("test") archive_client = Client("archive") orig_schema = build_schema(("a", "STRING"), ("b", "INTEGER")) schema = build_schema(("a", "INTEGER"), ("b", "STRING")) headers = ["a", "b"] rows = [(1, "apple"), (2, "banana"), (3, "coconut")] t1 = client.get_or_create_table("t1", orig_schema) t1_qname = t1.qualified_name # Test Table.insert_rows_from_csv t1.insert_rows_from_csv("gcutils/tests/test_table.csv", schema) self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.insert_rows_from_query t2 = client.get_table("t2") sql = "SELECT * FROM {} WHERE a > 1".format(t1_qname) t2.insert_rows_from_query(sql) self.assertEqual(sorted(t2.get_rows()), rows[1:]) # Test Client.query sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname) results = client.query(sql) self.assertEqual(sorted(results.rows), rows[2:]) # Test Client.query_into_dataframe sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname) df = client.query_into_dataframe(sql) self.assertEqual(df.values.tolist(), [list(rows[2])]) # Test TableExporter.export_to_storage and # TableExporter.download_from_storage_and_unzip t1_exporter = TableExporter(t1, self.storage_prefix + "test_table-") t1_exporter.export_to_storage() with tempfile.NamedTemporaryFile(mode="r+") as f: t1_exporter.download_from_storage_and_unzip(f) f.seek(0) reader = csv.reader(f) data = [next(reader)] + sorted(reader) self.assertEqual(data, [list(map(str, row)) for row in [headers] + rows]) # Test Table.insert_rows_from_storage storage_path = self.storage_prefix + "test_table.csv" self.upload_to_storage("gcutils/tests/test_table.csv", storage_path) t2.insert_rows_from_storage(storage_path) self.assertEqual(sorted(t2.get_rows()), rows) # Test Client.create_storage_backed_table storage_path = self.storage_prefix + "test_table_headers.csv" self.upload_to_storage("gcutils/tests/test_table_headers.csv", storage_path) schema = build_schema(("a", "INTEGER"), ("b", "STRING")) t3 = client.create_storage_backed_table("t3", schema, storage_path) results = client.query("SELECT * FROM {}".format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows) self.upload_to_storage("gcutils/tests/test_table_headers_2.csv", storage_path) results = client.query("SELECT * FROM {}".format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows + [(4, "damson")]) # Test Client.create_table_with_view sql = "SELECT * FROM {{project}}.{} WHERE a > 1".format(t1_qname) t4 = client.create_table_with_view("t4", sql, False) results = client.query("SELECT * FROM {}".format(t4.qualified_name)) self.assertEqual(sorted(results.rows), rows[1:]) # Test Table.copy_to_new_dataset t1.copy_to_new_dataset("archive") t1_archived = archive_client.get_table("t1") self.assertEqual(sorted(t1_archived.get_rows()), rows) self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.move_to_new_dataset t2.move_to_new_dataset("archive") t2_archived = archive_client.get_table("t2") self.assertEqual(sorted(t2_archived.get_rows()), rows) with self.assertRaises(NotFound): list(t2.get_rows()) # Test Client.insert_rows_from_pg PCT.objects.create(code="ABC", name="CCG 1") PCT.objects.create(code="XYZ", name="CCG 2") def transformer(row): return [ord(row[0][0]), row[1]] t1.insert_rows_from_pg( PCT, build_schema(("code", "INTEGER"), ("name", "STRING")), transformer=transformer, ) self.assertEqual(sorted(t1.get_rows()), [(65, "CCG 1"), (88, "CCG 2")]) # Test Table.delete_all_rows t1.delete_all_rows() self.assertEqual(list(t1.get_rows()), [])