def create_partition(area_type, release): area_type = area_type.lower() partition_id = get_partition_id(area_type, release) if area_type in ["nhstrust", "utla", "ltla", "msoa"]: area_partition = f"{release:%Y_%-m_%-d}_{area_type}" else: area_partition = f"{release:%Y_%-m_%-d}_other" # session = Session(autocommit=True) session = Session() try: # session.begin() session.execute( f""" CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} PARTITION OF covid19.time_series ( partition_id ) FOR VALUES IN ('{partition_id}'); """ ) session.flush() except ProgrammingError as e: session.rollback() except Exception as err: session.rollback() raise err finally: session.close() return partition_id
def to_sql(df: DataFrame) -> NoReturn: if df.size == 0: return None df_size = df.shape[0] n_chunks = ceil(df_size / DB_INSERT_MAX_ROWS) session = Session() connection = session.connection() try: for chunk in df.pipe(array_split, n_chunks): records = chunk.to_dict(orient="records") insert_stmt = insert(MainData.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[MainData.hash, MainData.partition_id], set_={MainData.payload.name: insert_stmt.excluded.payload} ) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def confirm_or_create_area(area_type: str, area_code: str, area_name: str): stmt = ( insert(AreaReference.__table__) .values( area_type=area_type, area_code=area_code, area_name=area_name, unique_ref=f"{area_type}|{area_code}" ) .on_conflict_do_nothing( index_elements=[ AreaReference.area_type, AreaReference.area_code ] ) .compile(dialect=postgres()) ) # session = Session(autocommit=True) session = Session() try: session.connection().execute(stmt) session.flush() # session.begin() # session.add(stmt) # session.commit() except Exception as err: session.rollback() raise err finally: session.close() return None
def to_sql(df: DataFrame): if df.size == 0: return None df_size = df.shape[0] n_chunks = df_size // DB_INSERT_MAX_ROWS + 1 df.drop_duplicates( ["release_id", "area_id", "metric_id", "date"], keep="first", inplace=True ) session = Session() connection = session.connection() try: for chunk in df.pipe(array_split, n_chunks): records = chunk.to_dict(orient="records") insert_stmt = insert(MainData.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[MainData.hash, MainData.partition_id], set_={MainData.payload.name: insert_stmt.excluded.payload} ) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def update_permissions(): session = Session() connection = session.connection() try: connection.execute(text(PERMISSIONS_QUERY)) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def update_stats(date, category): session = Session() connection = session.connection() try: connection.execute( text( STATS_QUERY.format( datestamp=date, partitions= f'{{{str.join(",", get_partition_ids(date, category))}}}')) ) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def register_file(filepath: str, timestamp: datetime, instance_id: str, release_id=None) -> True: parsed_filepath = parse_filepath(filepath) processed_file = ProcessedFile(file_path=filepath, type=category_label(parsed_filepath), timestamp=timestamp, release_id=release_id, process_id=instance_id) session = Session() try: session.add(processed_file) session.flush() except IntegrityError as err: session.rollback() query = session.execute( select([ ProcessedFile.id, ]).where( and_(ProcessedFile.file_path == filepath, ProcessedFile.process_id == instance_id))) result = query.fetchone() if result is not None: return True logging.info("Record already exists.") raise err except Exception as err: session.rollback() raise err finally: session.close() return True
def create_partition(area_type: str, release: datetime): """ Creates new database partition - if one doesn't already exist - for the `time_series` table based on `area_type` and `release` datestamp. Parameters ---------- area_type : str Area type, as defined in the `area_reference` table. release: datetime Release timestamp of the data. Returns ------- NoReturn """ partition_id = get_partition_id(area_type, release) if area_type in ["nhsTrust", "utla", "ltla", "msoa"]: area_partition = f"{release:%Y_%-m_%-d}_{area_type.lower()}" else: area_partition = f"{release:%Y_%-m_%-d}_other" session = Session() try: session.execute( f""" CREATE TABLE IF NOT EXISTS covid19.time_series_p{area_partition} PARTITION OF covid19.time_series ( partition_id ) FOR VALUES IN ('{partition_id}'); """ ) session.flush() except ProgrammingError as e: session.rollback() except Exception as err: session.rollback() raise err finally: session.close()
def add_metric(metric): stmt = ( insert(MetricReference.__table__) .values(metric=metric) .on_conflict_do_nothing(index_elements=[MetricReference.metric]) .compile(dialect=postgres()) ) session = Session() try: # session.begin() session.connection().execute(stmt) session.flush() # session.commit() except Exception as err: session.rollback() raise err finally: session.close() return None
def set_file_releaseid(filepath: str, release_id: int) -> True: session = Session() try: session.begin() session.query(ProcessedFile).filter( ProcessedFile.file_path == filepath).update( {"release_id": release_id}) session.commit() session.flush() except IntegrityError as err: session.rollback() logging.info("Record already exists.") raise err except Exception as err: session.rollback() raise err finally: session.close() return True
def store_data(data: DataFrame): if not data.size: return None session = Session() connection = session.connection() try: records = data.to_dict(orient="records") insert_stmt = insert(PrivateReport.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[ PrivateReport.slug_id, PrivateReport.date, PrivateReport.metric, PrivateReport.area_id ], set_={PrivateReport.value.name: insert_stmt.excluded.value}) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close()
def get_release_id(datestamp: datetime, process_name: str) -> Tuple[int, datetime]: """ Generates or retrieves the `release_id` for the process. Parameters ---------- datestamp : datetime Datestamp for the data. process_name : str Name of the process - must match the ENUM defined in the database. Returns ------- Tuple[int, datetime] Tuple of `release_id` and the timestamp associated with the release. """ query = ( select([ ReleaseReference.id, ReleaseReference.timestamp ]) .select_from( join( ReleaseReference, ReleaseCategory, ReleaseReference.id == ReleaseCategory.release_id ) ) .where( and_( func.DATE(ReleaseReference.timestamp) == datestamp.date(), ReleaseCategory.process_name == process_name ) ) ) session = Session() try: response = session.execute(query) result = response.fetchone() if result is not None: return result except Exception as err: session.rollback() raise err finally: session.close() session = Session(autocommit=True) try: release = ReleaseReference(timestamp=datestamp) session.add(release) session.flush() category = ReleaseCategory( release_id=release.id, process_name=process_name ) session.add(category) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return get_release_id(datestamp, process_name)