Beispiel #1
0
def to_sql(df: DataFrame) -> NoReturn:
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = ceil(df_size / DB_INSERT_MAX_ROWS)

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
def create_asset(release_date: str):

    partition_date = datetime.fromisoformat(release_date).strftime(
        "%Y_%-m_%-d")

    query = QUERY.format(date=partition_date)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = DataFrame(raw_data, columns=["properties", "geometry"])

    geo_data = {
        "type":
        "FeatureCollection",
        "features":
        (data.assign(type="Feature").reset_index().rename(columns={
            "index": "id"
        }).to_dict("records"))
    }

    return store_data(geo_data, "downloads", "maps/vax-data_latest.geojson")
Beispiel #3
0
def derive_publish_date_percentages(timestamp, area_type):
    query = PERCENTAGE_DATA.format(
        date=f"{timestamp:%Y_%-m_%-d}",
        area_type=area_type
    )

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    columns = [
        'area_id', 'partition_id', 'metric_id', 'metric', 'area_type',
        'area_code', 'release_id', 'date', 'payload'
    ]

    to_sql(
        DataFrame(raw_data, columns=columns)
        .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True)))
        .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]]
    )

    return True
Beispiel #4
0
def confirm_or_create_area(area_type: str, area_code: str, area_name: str):
    stmt = (
        insert(AreaReference.__table__)
        .values(
            area_type=area_type,
            area_code=area_code,
            area_name=area_name,
            unique_ref=f"{area_type}|{area_code}"
        )
        .on_conflict_do_nothing(
            index_elements=[
                AreaReference.area_type,
                AreaReference.area_code
            ]
        )
        .compile(dialect=postgres())
    )

    # session = Session(autocommit=True)
    session = Session()
    try:
        session.connection().execute(stmt)
        session.flush()
        # session.begin()
        # session.add(stmt)
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #5
0
def to_sql(df: DataFrame):
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = df_size // DB_INSERT_MAX_ROWS + 1
    df.drop_duplicates(
        ["release_id", "area_id", "metric_id", "date"],
        keep="first",
        inplace=True
    )

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
Beispiel #6
0
def get_release(timestamp):
    insert_stmt = (
        insert(ReleaseReference.__table__)
        .values(timestamp=timestamp)
    )

    stmt = (
        insert_stmt
        .on_conflict_do_update(
            index_elements=[ReleaseReference.timestamp],
            set_={ReleaseReference.timestamp.name: insert_stmt.excluded.timestamp}
        )
        .returning(ReleaseReference.id)
    )

    # session = Session(autocommit=True)
    session = Session()
    try:
        response = session.execute(stmt)
        result = response.fetchone()[0]
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return result
Beispiel #7
0
def register_release(category: str, release_id: int):
    insert_stmt = (
        insert(ReleaseCategory.__table__)
        .values(
            release_id=release_id,
            process_name=category
        )
        .on_conflict_do_nothing(
            index_elements=[
                ReleaseCategory.release_id,
                ReleaseCategory.process_name,
            ]
        )
    )

    session = Session()
    try:
        session.begin()
        session.add(insert_stmt)
        session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #8
0
def get_timeseries(date: str, metric: str):
    ts = datetime.strptime(date, "%Y-%m-%d")
    partition = f"{ts:%Y_%-m_%-d}_other"
    partition_id = f"{ts:%Y_%-m_%-d}|other"
    values_query = queries.TIMESRIES_QUERY.format(partition=partition)
    change_query = queries.LATEST_CHANGE_QUERY.format(partition=partition)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(values_query),
                            partition_id=partition_id,
                            metric=metric)
        values = resp.fetchall()

        resp = conn.execute(text(change_query),
                            partition_id=partition_id,
                            datestamp=ts,
                            metric=metric + "Change")
        change = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    store_data(date, metric,
               plot_thumbnail(values, metric_name=metric, change=change))

    return True
Beispiel #9
0
def derive_publish_date_metrics(timestamp, area_type):
    query = PUBLISH_DATE_CALCULATION.format(
        today=f"{timestamp:%Y_%-m_%-d}",
        yesterday=get_previous_partition_date(),
        area_type=area_type
    )

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query), )
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    columns = [
        'partition_id', 'area_id', 'area_type', 'area_code',
        'metric_id', 'release_id', 'date', 'payload'
    ]

    to_sql(
        DataFrame(raw_data, columns=columns)
        .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True)))
        .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]]
    )

    return True
Beispiel #10
0
def get_vaccinations(date):
    ts = datetime.strptime(date, "%Y-%m-%d")
    partition = f"{ts:%Y_%-m_%-d}"

    vax_query = queries.VACCINATIONS_QUERY.format(partition_date=partition)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(vax_query), datestamp=ts)
        values = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    for item in values:
        store_data(date,
                   "vaccinations",
                   plot_vaccinations(item),
                   area_type=item["area_type"],
                   area_code=item["area_code"])

    return True
Beispiel #11
0
def create_partition(area_type, release):
    area_type = area_type.lower()
    partition_id = get_partition_id(area_type, release)

    if area_type in ["nhstrust", "utla", "ltla", "msoa"]:
        area_partition = f"{release:%Y_%-m_%-d}_{area_type}"
    else:
        area_partition = f"{release:%Y_%-m_%-d}_other"

    # session = Session(autocommit=True)
    session = Session()
    try:
        # session.begin()
        session.execute(
            f"""
            CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} 
            PARTITION OF covid19.time_series ( partition_id )
            FOR VALUES IN ('{partition_id}');
            """
        )
        session.flush()
    except ProgrammingError as e:
        session.rollback()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return partition_id
Beispiel #12
0
def get_release_id(datestamp: datetime, process_name: str) -> Tuple[int, datetime]:
    query = (
        select([
            ReleaseReference.id,
            ReleaseReference.timestamp
        ])
        .select_from(
            join(
                ReleaseReference, ReleaseCategory,
                ReleaseReference.id == ReleaseCategory.release_id
            )
        )
        .where(
            and_(
                func.DATE(ReleaseReference.timestamp) == func.DATE(datestamp.isoformat()),
                ReleaseCategory.process_name == process_name
            )
        )
    )

    session = Session()

    try:
        response = session.execute(query)
        result = response.fetchone()

        if result is not None:
            return result

    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    try:
        release = ReleaseReference(timestamp=datestamp)
        session.begin()
        session.add(release)
        session.commit()

        category = ReleaseCategory(release_id=release.id, process_name=process_name)
        session.begin()
        session.add(category)
        session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return get_release_id(datestamp, process_name)
def update_permissions():
    session = Session()
    connection = session.connection()
    try:
        connection.execute(text(PERMISSIONS_QUERY))
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #14
0
def get_previous_partition_date():
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(PREVIOUS_PUBLICATION_DATE))
        data = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return f"{data[0]:%Y_%-m_%-d}"
def create_asset(data_type):
    params = PARAMETERS[data_type]

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(params["query"]),
                            process_name=params["process_name"])
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    store_data(dict(raw_data), params['container'], params['path'])
Beispiel #16
0
def get_area_data():
    session = Session()
    try:
        results = read_sql(
            f"""\
            SELECT c.id AS "area_id", c.area_type, c.area_code
            FROM covid19.area_reference AS c
            """,
            con=session.connection(),
            index_col=["area_type", "area_code"]
        )
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return results
Beispiel #17
0
def get_data(category: str, timestamp):
    query = text(CATEGORY_TIMESTAMPS)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(query, category=category, timestamp=timestamp)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=['timestamp'])

    df.timestamp = (df.timestamp.map(to_mins).astype({"timestamp": int}))
    return df
Beispiel #18
0
def get_metrics():
    session = Session()

    try:
        metrics = read_sql(
            f"""\
            SELECT ref.id AS "metric_id", ref.metric
            FROM covid19.metric_reference AS ref;
            """,
            con=session.connection(),
            index_col=["metric"]
        )
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return metrics
def update_stats(date, category):
    session = Session()
    connection = session.connection()
    try:
        connection.execute(
            text(
                STATS_QUERY.format(
                    datestamp=date,
                    partitions=
                    f'{{{str.join(",", get_partition_ids(date, category))}}}'))
        )
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #20
0
def generate_html(record_date: datetime):
    slug_id = get_record_id(record_date)

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(OUTPUT_DATA), slug_id=slug_id)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=["area_name", "metric", "date", "value"])
    df.value = df.value.map(format_number)

    area_names = [
        "United Kingdom", "England", "Northern Ireland", "Scotland", "Wales"
    ]

    result = list()

    for section in structure:
        result.append(
            [process_section(df, item, area_names) for item in section])

    template = env.get_template("base.html")

    html_data = template.render(
        results={
            "area_names": area_names,
            "data": result
        },
        timestamp=datetime.now().strftime(r"%A, %d %b %Y at %H:%M:%S GMT"))

    # Uncomment for testing
    # with open("sample.html", "w") as fp:
    #     print(html_data, file=fp)

    # Comment for testing
    store_html(html_data, slug_id=slug_id)
def get_latest_scale_records(payload):
    date = datetime.fromisoformat(payload["timestamp"])
    area_type = payload["area_type"]

    query = RATES.format(area_type=AREA_TYPE_PARTITION[area_type],
                         date=f"{date:%Y_%-m_%-d}")

    metric, attr = "newCasesBySpecimenDateRollingRate", "value"
    if area_type == "msoa":
        metric, attr = "newCasesBySpecimenDate", "rollingRate"

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query),
                            metric=metric,
                            attr=attr,
                            area_type=area_type)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = DataFrame(raw_data, columns=["area_type", "area_code", "rate"])

    results = {
        "records": data.to_dict(orient="records"),
        "percentiles": {
            "min": data.loc[:, "rate"].min(),
            "0.1": data.loc[:, "rate"].quantile(0.1).round(1),
            "0.4": data.loc[:, "rate"].quantile(0.4).round(1),
            "median": data.loc[:, "rate"].median().round(1),
            "0.6": data.loc[:, "rate"].quantile(0.6).round(1),
            "0.9": data.loc[:, "rate"].quantile(0.9).round(1),
            "max": data.loc[:, "rate"].max(),
        },
        "timestamp": payload["timestamp"]
    }

    return results
def register_file(filepath: str,
                  timestamp: datetime,
                  instance_id: str,
                  release_id=None) -> True:
    parsed_filepath = parse_filepath(filepath)

    processed_file = ProcessedFile(file_path=filepath,
                                   type=category_label(parsed_filepath),
                                   timestamp=timestamp,
                                   release_id=release_id,
                                   process_id=instance_id)

    session = Session()
    try:
        session.add(processed_file)
        session.flush()

    except IntegrityError as err:
        session.rollback()

        query = session.execute(
            select([
                ProcessedFile.id,
            ]).where(
                and_(ProcessedFile.file_path == filepath,
                     ProcessedFile.process_id == instance_id)))
        result = query.fetchone()

        if result is not None:
            return True

        logging.info("Record already exists.")
        raise err

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return True
Beispiel #23
0
def retrieve_data(timestamp: datetime):
    with open(query_path) as fp:
        query = fp.read()

    query = Template(query).substitute(release_date=f"{timestamp:%Y_%-m_%-d}")

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(query))
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    df = DataFrame(raw_data, columns=["key", "value"])

    return df
Beispiel #24
0
def get_data(record_date: datetime) -> DataFrame:
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(text(
            MAIN_QUERY.format(partition_date=f"{record_date:%Y_%-m_%-d}")),
                            metrics=METRICS)
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    dt = (DataFrame(raw_data,
                    columns=[
                        "release_id", "area_id", "metric", "date", "value"
                    ]).assign(slug_id=get_record_id(record_date)))

    return dt
Beispiel #25
0
def get_metric_id(metric: str):
    stmt = (
        select([
            MetricReference.id
        ])
        .where(
            MetricReference.metric == metric
        )
    )

    session = Session()
    try:
        result = session.execute(stmt)
        return result.fetchone()[0]

    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
Beispiel #26
0
def create_partition(area_type: str, release: datetime):
    """
    Creates new database partition - if one doesn't already exist - for
    the `time_series` table based on `area_type` and `release` datestamp.

    Parameters
    ----------
    area_type : str
        Area type, as defined in the `area_reference` table.

    release: datetime
        Release timestamp of the data.

    Returns
    -------
    NoReturn
    """
    partition_id = get_partition_id(area_type, release)

    if area_type in ["nhsTrust", "utla", "ltla", "msoa"]:
        area_partition = f"{release:%Y_%-m_%-d}_{area_type.lower()}"
    else:
        area_partition = f"{release:%Y_%-m_%-d}_other"

    session = Session()
    try:
        session.execute(
            f"""
            CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} 
            PARTITION OF covid19.time_series ( partition_id )
            FOR VALUES IN ('{partition_id}');
            """
        )
        session.flush()
    except ProgrammingError as e:
        session.rollback()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
Beispiel #27
0
def get_data(query, metric):
    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(
            text(query),
            metric=metric
        )
        raw_data = resp.fetchone()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    data = {
        f'{metric}Date': f'{raw_data[1]:%-d %b}',
        metric: format(int(raw_data[2]), ",d")
    }

    return data
Beispiel #28
0
def get_area_codes(area_type: str):
    query = (
        select([
            AreaReference.id.label("area_id"),
            AreaReference.area_code
        ])
        .where(
            AreaReference.area_type == area_type
        )
    )

    session = Session()
    try:
        result = read_sql(query, con=session.connection())
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return result
Beispiel #29
0
def add_metric(metric):
    stmt = (
        insert(MetricReference.__table__)
        .values(metric=metric)
        .on_conflict_do_nothing(index_elements=[MetricReference.metric])
        .compile(dialect=postgres())
    )

    session = Session()
    try:
        # session.begin()
        session.connection().execute(stmt)
        session.flush()
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #30
0
def get_data(timestamp: datetime):
    query = MAIN.format(date=f"{timestamp:%Y_%-m_%-d}")
    metric = "newCasesBySpecimenDateRollingRate"

    session = Session()
    conn = session.connection()
    try:
        resp = conn.execute(
            text(query),
            metric=metric,
            datestamp=f'{timestamp::%Y-%m-%d}'
        )
        raw_data = resp.fetchall()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    print(len(raw_data))
    return DataFrame(raw_data, columns=["areaType", "areaCode", metric])