Beispiel #1
0
def create_partition(area_type, release):
    area_type = area_type.lower()
    partition_id = get_partition_id(area_type, release)

    if area_type in ["nhstrust", "utla", "ltla", "msoa"]:
        area_partition = f"{release:%Y_%-m_%-d}_{area_type}"
    else:
        area_partition = f"{release:%Y_%-m_%-d}_other"

    # session = Session(autocommit=True)
    session = Session()
    try:
        # session.begin()
        session.execute(
            f"""
            CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} 
            PARTITION OF covid19.time_series ( partition_id )
            FOR VALUES IN ('{partition_id}');
            """
        )
        session.flush()
    except ProgrammingError as e:
        session.rollback()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return partition_id
Beispiel #2
0
def to_sql(df: DataFrame) -> NoReturn:
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = ceil(df_size / DB_INSERT_MAX_ROWS)

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
Beispiel #3
0
def confirm_or_create_area(area_type: str, area_code: str, area_name: str):
    stmt = (
        insert(AreaReference.__table__)
        .values(
            area_type=area_type,
            area_code=area_code,
            area_name=area_name,
            unique_ref=f"{area_type}|{area_code}"
        )
        .on_conflict_do_nothing(
            index_elements=[
                AreaReference.area_type,
                AreaReference.area_code
            ]
        )
        .compile(dialect=postgres())
    )

    # session = Session(autocommit=True)
    session = Session()
    try:
        session.connection().execute(stmt)
        session.flush()
        # session.begin()
        # session.add(stmt)
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
Beispiel #4
0
def to_sql(df: DataFrame):
    if df.size == 0:
        return None

    df_size = df.shape[0]
    n_chunks = df_size // DB_INSERT_MAX_ROWS + 1
    df.drop_duplicates(
        ["release_id", "area_id", "metric_id", "date"],
        keep="first",
        inplace=True
    )

    session = Session()
    connection = session.connection()
    try:
        for chunk in df.pipe(array_split, n_chunks):
            records = chunk.to_dict(orient="records")

            insert_stmt = insert(MainData.__table__).values(records)
            stmt = insert_stmt.on_conflict_do_update(
                index_elements=[MainData.hash, MainData.partition_id],
                set_={MainData.payload.name: insert_stmt.excluded.payload}
            )

            connection.execute(stmt)
            session.flush()

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return None
def update_permissions():
    session = Session()
    connection = session.connection()
    try:
        connection.execute(text(PERMISSIONS_QUERY))
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
def update_stats(date, category):
    session = Session()
    connection = session.connection()
    try:
        connection.execute(
            text(
                STATS_QUERY.format(
                    datestamp=date,
                    partitions=
                    f'{{{str.join(",", get_partition_ids(date, category))}}}'))
        )
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
def register_file(filepath: str,
                  timestamp: datetime,
                  instance_id: str,
                  release_id=None) -> True:
    parsed_filepath = parse_filepath(filepath)

    processed_file = ProcessedFile(file_path=filepath,
                                   type=category_label(parsed_filepath),
                                   timestamp=timestamp,
                                   release_id=release_id,
                                   process_id=instance_id)

    session = Session()
    try:
        session.add(processed_file)
        session.flush()

    except IntegrityError as err:
        session.rollback()

        query = session.execute(
            select([
                ProcessedFile.id,
            ]).where(
                and_(ProcessedFile.file_path == filepath,
                     ProcessedFile.process_id == instance_id)))
        result = query.fetchone()

        if result is not None:
            return True

        logging.info("Record already exists.")
        raise err

    except Exception as err:
        session.rollback()
        raise err

    finally:
        session.close()

    return True
Beispiel #8
0
def create_partition(area_type: str, release: datetime):
    """
    Creates new database partition - if one doesn't already exist - for
    the `time_series` table based on `area_type` and `release` datestamp.

    Parameters
    ----------
    area_type : str
        Area type, as defined in the `area_reference` table.

    release: datetime
        Release timestamp of the data.

    Returns
    -------
    NoReturn
    """
    partition_id = get_partition_id(area_type, release)

    if area_type in ["nhsTrust", "utla", "ltla", "msoa"]:
        area_partition = f"{release:%Y_%-m_%-d}_{area_type.lower()}"
    else:
        area_partition = f"{release:%Y_%-m_%-d}_other"

    session = Session()
    try:
        session.execute(
            f"""
            CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} 
            PARTITION OF covid19.time_series ( partition_id )
            FOR VALUES IN ('{partition_id}');
            """
        )
        session.flush()
    except ProgrammingError as e:
        session.rollback()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
Beispiel #9
0
def add_metric(metric):
    stmt = (
        insert(MetricReference.__table__)
        .values(metric=metric)
        .on_conflict_do_nothing(index_elements=[MetricReference.metric])
        .compile(dialect=postgres())
    )

    session = Session()
    try:
        # session.begin()
        session.connection().execute(stmt)
        session.flush()
        # session.commit()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return None
def set_file_releaseid(filepath: str, release_id: int) -> True:
    session = Session()

    try:
        session.begin()

        session.query(ProcessedFile).filter(
            ProcessedFile.file_path == filepath).update(
                {"release_id": release_id})

        session.commit()
        session.flush()
    except IntegrityError as err:
        session.rollback()
        logging.info("Record already exists.")
        raise err
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return True
Beispiel #11
0
def store_data(data: DataFrame):
    if not data.size:
        return None

    session = Session()
    connection = session.connection()
    try:
        records = data.to_dict(orient="records")

        insert_stmt = insert(PrivateReport.__table__).values(records)
        stmt = insert_stmt.on_conflict_do_update(
            index_elements=[
                PrivateReport.slug_id, PrivateReport.date,
                PrivateReport.metric, PrivateReport.area_id
            ],
            set_={PrivateReport.value.name: insert_stmt.excluded.value})
        connection.execute(stmt)
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()
Beispiel #12
0
def get_release_id(datestamp: datetime, process_name: str) -> Tuple[int, datetime]:
    """
    Generates or retrieves the `release_id` for the process.

    Parameters
    ----------
    datestamp : datetime
        Datestamp for the data.

    process_name : str
        Name of the process - must match the ENUM defined in the database.

    Returns
    -------
    Tuple[int, datetime]
        Tuple of `release_id` and the timestamp associated with the release.
    """
    query = (
        select([
            ReleaseReference.id,
            ReleaseReference.timestamp
        ])
        .select_from(
            join(
                ReleaseReference, ReleaseCategory,
                ReleaseReference.id == ReleaseCategory.release_id
            )
        )
        .where(
            and_(
                func.DATE(ReleaseReference.timestamp) == datestamp.date(),
                ReleaseCategory.process_name == process_name
            )
        )
    )

    session = Session()

    try:
        response = session.execute(query)
        result = response.fetchone()

        if result is not None:
            return result

    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    session = Session(autocommit=True)
    try:
        release = ReleaseReference(timestamp=datestamp)
        session.add(release)
        session.flush()

        category = ReleaseCategory(
            release_id=release.id,
            process_name=process_name
        )
        session.add(category)
        session.flush()
    except Exception as err:
        session.rollback()
        raise err
    finally:
        session.close()

    return get_release_id(datestamp, process_name)