Esempio n. 1
0
def upload_dummy_prescribing(bnf_codes):
    """Upload enough dummy prescribing data to BQ to allow the BNF code simplification
    to be meaningful."""

    prescribing_rows = []
    for bnf_code in bnf_codes:
        row = [
            None,
            None,
            None,
            None,
            None,
            None,
            bnf_code,
            None,
            None,
            None,
            None,
            None,
            None,
        ]
        prescribing_rows.append(row)

    table = Client("hscic").get_table("normalised_prescribing_standard")
    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)
Esempio n. 2
0
def upload_dummy_prescribing(bnf_codes):
    """Upload enough dummy prescribing data to BQ to allow the BNF code simplification
    to be meaningful."""

    prescribing_rows = []
    for bnf_code in bnf_codes:
        row = [
            None,  # sha
            None,  # pct
            None,  # practice
            bnf_code,  # bnf_code
            None,  # bnf_name
            None,  # items
            None,  # net_cost
            None,  # actual_cost
            None,  # quantity
            None,  # month
        ]
        prescribing_rows.append(row)

    table = Client("hscic").get_table("normalised_prescribing")
    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)
Esempio n. 3
0
def upload_presentations():
    """Upload presentations to BQ."""

    table = Client("hscic").get_table("presentation")

    presentations = [
        ("0703021Q0AAAAAA", "Desogestrel_Tab 75mcg"),
        ("0703021Q0BBAAAA", "Cerazette_Tab 75mcg"),
        ("076543210AAAAAA", "Etynodiol Diacet_Tab 500mcg"),
        ("0407010Q0AAAAAA", "Co-Proxamol_Tab 32.5mg/325mg"),
        ("0904010AUBBAAAA", "Mrs Crimble's_G/F W/F Cheese Bites Orig"),
    ]

    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for presentation in presentations:
            row = presentation + (None, None)
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESENTATION_SCHEMA)
Esempio n. 4
0
def upload_practice_statistics(randint):
    """Generate practice statistics data, and upload to BQ."""

    practice_statistics_rows = []
    seen_practice_with_no_statistics = False
    columns = [
        "month",
        "regional_team_id",
        "stp_id",
        "ccg_id",
        "pcn_id",
        "practice_id",
        "total_list_size",
    ]
    practice_stats = pd.DataFrame(columns=columns)

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = "2018-0{}-01 00:00:00 UTC".format(month)

            if month == 8 and practice.code == "P00000":
                seen_practice_with_no_statistics = True
                continue

            total_list_size = randint(100, 200)

            row = [
                timestamp,  # month
                0,  # male_0_4
                0,  # female_0_4
                0,  # male_5_14
                0,  # male_15_24
                0,  # male_25_34
                0,  # male_35_44
                0,  # male_45_54
                0,  # male_55_64
                0,  # male_65_74
                0,  # male_75_plus
                0,  # female_5_14
                0,  # female_15_24
                0,  # female_25_34
                0,  # female_35_44
                0,  # female_45_54
                0,  # female_55_64
                0,  # female_65_74
                0,  # female_75_plus
                total_list_size,  # total_list_size
                0,  # astro_pu_cost
                0,  # astro_pu_items
                "{}",  # star_pu
                practice.ccg_id,  # pct_id
                practice.code,  # practice
            ]

            practice_statistics_rows.append(row)
            practice_stats = practice_stats.append(
                {
                    "month": timestamp[:10],
                    "practice_id": practice.code,
                    "pcn_id": practice.pcn_id,
                    "ccg_id": practice.ccg_id,
                    "stp_id": practice.ccg.stp_id,
                    "regional_team_id": practice.ccg.regional_team_id,
                    "thousand_patients": total_list_size / 1000.0,
                },
                ignore_index=True,
            )

    assert seen_practice_with_no_statistics

    # Upload practice_statistics_rows to BigQuery.
    table = Client("hscic").get_table("practice_statistics")

    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in practice_statistics_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRACTICE_STATISTICS_SCHEMA)

    return practice_stats
Esempio n. 5
0
def upload_prescribing(randint):
    """Generate prescribing data, and upload to BQ."""

    prescribing_rows = []

    # These are for the desogestrel measure
    presentations = [
        ("0703021Q0AAAAAA", "Desogestrel_Tab 75mcg"),  # generic
        ("0703021Q0BBAAAA", "Cerazette_Tab 75mcg"),  # branded
        ("076543210AAAAAA", "Etynodiol Diacet_Tab 500mcg"),  # irrelevant
    ]

    seen_practice_with_no_prescribing = False
    seen_practice_with_no_relevant_prescribing = False
    seen_practice_with_no_generic_prescribing = False
    seen_practice_with_no_branded_prescribing = False

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = "2018-0{}-01 00:00:00 UTC".format(month)

            for ix, (bnf_code, bnf_name) in enumerate(presentations):
                if practice.code == "P00000":
                    seen_practice_with_no_prescribing = True
                    continue
                elif practice.code == "P00010" and "0703021Q" in bnf_code:
                    seen_practice_with_no_relevant_prescribing = True
                    continue
                elif practice.code == "P00020" and bnf_code == "0703021Q0AAAAAA":
                    seen_practice_with_no_generic_prescribing = True
                    continue
                elif practice.code == "P00030" and bnf_code == "0703021Q0BBAAAA":
                    seen_practice_with_no_branded_prescribing = True
                    continue

                items = randint(0, 100)
                quantity = randint(6, 28) * items

                # Multiplying by (1 + ix) ensures that the branded cost is
                # always higher than the generic cost.
                actual_cost = (1 + ix) * randint(100, 200) * quantity * 0.01

                # We don't care about net_cost.
                net_cost = actual_cost

                row = [
                    "sha",  # This value doesn't matter.
                    practice.ccg.regional_team_id,
                    practice.ccg.stp_id,
                    practice.ccg_id,
                    practice.pcn_id,
                    practice.code,
                    bnf_code,
                    bnf_name,
                    items,
                    net_cost,
                    actual_cost,
                    quantity,
                    timestamp,
                ]

                prescribing_rows.append(row)

    assert seen_practice_with_no_prescribing
    assert seen_practice_with_no_relevant_prescribing
    assert seen_practice_with_no_generic_prescribing
    assert seen_practice_with_no_branded_prescribing

    # These are for the coproxamol and glutenfree measures
    presentations = [
        ("0407010Q0AAAAAA", "Co-Proxamol_Tab 32.5mg/325mg"),
        ("0904010AUBBAAAA", "Mrs Crimble's_G/F W/F Cheese Bites Orig"),
    ]

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = "2018-0{}-01 00:00:00 UTC".format(month)

            for bnf_code, bnf_name in presentations:
                items = randint(0, 100)
                quantity = randint(6, 28) * items

                actual_cost = randint(100, 200) * quantity * 0.01

                # We don't care about net_cost.
                net_cost = actual_cost

                row = [
                    "sha",  # This value doesn't matter.
                    practice.ccg.regional_team_id,
                    practice.ccg.stp_id,
                    practice.ccg_id,
                    practice.pcn_id,
                    practice.code,
                    bnf_code,
                    bnf_name,
                    items,
                    net_cost,
                    actual_cost,
                    quantity,
                    timestamp,
                ]

                prescribing_rows.append(row)

    # In production, normalised_prescribing_standard is actually a view,
    # but for the tests it's much easier to set it up as a normal table.
    table = Client("hscic").get_table("normalised_prescribing_standard")

    with tempfile.NamedTemporaryFile("wt") as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name, schemas.PRESCRIBING_SCHEMA)

        headers = [
            "sha",
            "regional_team_id",
            "stp_id",
            "ccg_id",
            "pcn_id",
            "practice_id",
            "bnf_code",
            "bnf_name",
            "items",
            "net_cost",
            "actual_cost",
            "quantity",
            "month",
        ]
        prescriptions = pd.read_csv(f.name, names=headers)
        prescriptions["month"] = prescriptions["month"].str[:10]

    return prescriptions
def upload_practice_statistics():
    '''Generate practice statistics data, and upload to BQ.'''

    practice_statistics_rows = []
    seen_practice_with_no_statistics = False
    columns = [
        'month',
        'regional_team_id',
        'stp_id',
        'ccg_id',
        'practice_id',
        'total_list_size',
    ]
    practice_stats = pd.DataFrame(columns=columns)

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = '2018-0{}-01 00:00:00 UTC'.format(month)

            if month == 8 and practice.code == 'P00000':
                seen_practice_with_no_statistics = True
                continue

            total_list_size = randint(100, 200)

            row = [
                timestamp,  #  month
                0,  #  male_0_4
                0,  #  female_0_4
                0,  #  male_5_14
                0,  #  male_15_24
                0,  #  male_25_34
                0,  #  male_35_44
                0,  #  male_45_54
                0,  #  male_55_64
                0,  #  male_65_74
                0,  #  male_75_plus
                0,  #  female_5_14
                0,  #  female_15_24
                0,  #  female_25_34
                0,  #  female_35_44
                0,  #  female_45_54
                0,  #  female_55_64
                0,  #  female_65_74
                0,  #  female_75_plus
                total_list_size,  #  total_list_size
                0,  #  astro_pu_cost
                0,  #  astro_pu_items
                '{}',  #  star_pu
                practice.ccg_id,  #  pct_id
                practice.code,  #  practice
            ]

            practice_statistics_rows.append(row)
            practice_stats = practice_stats.append(
                {
                    'month': timestamp[:10],
                    'practice_id': practice.code,
                    'ccg_id': practice.ccg_id,
                    'stp_id': practice.ccg.stp_id,
                    'regional_team_id': practice.ccg.regional_team_id,
                    'thousand_patients': total_list_size / 1000.0,
                },
                ignore_index=True,
            )

    assert seen_practice_with_no_statistics

    # Upload practice_statistics_rows to BigQuery.
    table = Client('hscic').get_table('practice_statistics')

    with tempfile.NamedTemporaryFile() as f:
        writer = csv.writer(f)
        for row in practice_statistics_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name)

    return practice_stats
def upload_prescribing():
    '''Generate prescribing data, and upload to BQ.'''

    prescribing_rows = []

    # These are for the desogestrel measure
    presentations = [
        ('0703021Q0AAAAAA', 'Desogestrel_Tab 75mcg'),  # generic
        ('0703021Q0BBAAAA', 'Cerazette_Tab 75mcg'),  # branded
        ('076543210AAAAAA', 'Etynodiol Diacet_Tab 500mcg'),  # irrelevant
    ]

    seen_practice_with_no_prescribing = False
    seen_practice_with_no_relevant_prescribing = False
    seen_practice_with_no_generic_prescribing = False
    seen_practice_with_no_branded_prescribing = False

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = '2018-0{}-01 00:00:00 UTC'.format(month)

            for ix, (bnf_code, bnf_name) in enumerate(presentations):
                if practice.code == 'P00000':
                    seen_practice_with_no_prescribing = True
                    continue
                elif practice.code == 'P00010' and '0703021Q' in bnf_code:
                    seen_practice_with_no_relevant_prescribing = True
                    continue
                elif practice.code == 'P00020' and bnf_code == '0703021Q0AAAAAA':
                    seen_practice_with_no_generic_prescribing = True
                    continue
                elif practice.code == 'P00030' and bnf_code == '0703021Q0BBAAAA':
                    seen_practice_with_no_branded_prescribing = True
                    continue

                items = randint(0, 100)
                quantity = randint(6, 28) * items

                # Multiplying by (1 + ix) ensures that the branded cost is
                # always higher than the generic cost.
                actual_cost = (1 + ix) * randint(100, 200) * quantity * 0.01

                # We don't care about net_cost.
                net_cost = actual_cost

                row = [
                    'sha',  #  This value doesn't matter.
                    practice.ccg.regional_team_id,
                    practice.ccg.stp_id,
                    practice.ccg_id,
                    practice.code,
                    bnf_code,
                    bnf_name,
                    items,
                    net_cost,
                    actual_cost,
                    quantity,
                    timestamp,
                ]

                prescribing_rows.append(row)

    assert seen_practice_with_no_prescribing
    assert seen_practice_with_no_relevant_prescribing
    assert seen_practice_with_no_generic_prescribing
    assert seen_practice_with_no_branded_prescribing

    # These are for the coproxamol and glutenfree measures
    presentations = [
        ('0407010Q0AAAAAA', 'Co-Proxamol_Tab 32.5mg/325mg'),
        ('0904010AUBBAAAA', "Mrs Crimble's_G/F W/F Cheese Bites Orig"),
    ]

    for practice in Practice.objects.all():
        for month in [7, 8]:
            timestamp = '2018-0{}-01 00:00:00 UTC'.format(month)

            for bnf_code, bnf_name in presentations:
                items = randint(0, 100)
                quantity = randint(6, 28) * items

                actual_cost = randint(100, 200) * quantity * 0.01

                # We don't care about net_cost.
                net_cost = actual_cost

                row = [
                    'sha',  #  This value doesn't matter.
                    practice.ccg.regional_team_id,
                    practice.ccg.stp_id,
                    practice.ccg_id,
                    practice.code,
                    bnf_code,
                    bnf_name,
                    items,
                    net_cost,
                    actual_cost,
                    quantity,
                    timestamp,
                ]

                prescribing_rows.append(row)

    # In production, normalised_prescribing_standard is actually a view,
    # but for the tests it's much easier to set it up as a normal table.
    table = Client('hscic').get_table('normalised_prescribing_standard')

    with tempfile.NamedTemporaryFile() as f:
        writer = csv.writer(f)
        for row in prescribing_rows:
            writer.writerow(row)
        f.seek(0)
        table.insert_rows_from_csv(f.name)

        headers = [
            'sha',
            'regional_team_id',
            'stp_id',
            'ccg_id',
            'practice_id',
            'bnf_code',
            'bnf_name',
            'items',
            'net_cost',
            'actual_cost',
            'quantity',
            'month',
        ]
        prescriptions = pd.read_csv(f.name, names=headers)
        prescriptions['month'] = prescriptions['month'].str[:10]

    return prescriptions