def confirm_or_create_area(area_type: str, area_code: str, area_name: str): stmt = ( insert(AreaReference.__table__) .values( area_type=area_type, area_code=area_code, area_name=area_name, unique_ref=f"{area_type}|{area_code}" ) .on_conflict_do_nothing( index_elements=[ AreaReference.area_type, AreaReference.area_code ] ) .compile(dialect=postgres()) ) # session = Session(autocommit=True) session = Session() try: session.connection().execute(stmt) session.flush() # session.begin() # session.add(stmt) # session.commit() except Exception as err: session.rollback() raise err finally: session.close() return None
def get_email_recipients(): session = Session() query = session.execute( select([ReportRecipient.recipient]).where( and_(ReportRecipient.deactivated == False, not_(ReportRecipient.approved_by == None)))) return [item[0] for item in query]
def to_sql(df: DataFrame): if df.size == 0: return None df_size = df.shape[0] n_chunks = df_size // DB_INSERT_MAX_ROWS + 1 df.drop_duplicates( ["release_id", "area_id", "metric_id", "date"], keep="first", inplace=True ) session = Session() connection = session.connection() try: for chunk in df.pipe(array_split, n_chunks): records = chunk.to_dict(orient="records") insert_stmt = insert(MainData.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[MainData.hash, MainData.partition_id], set_={MainData.payload.name: insert_stmt.excluded.payload} ) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def to_sql(df: DataFrame) -> NoReturn: if df.size == 0: return None df_size = df.shape[0] n_chunks = ceil(df_size / DB_INSERT_MAX_ROWS) session = Session() connection = session.connection() try: for chunk in df.pipe(array_split, n_chunks): records = chunk.to_dict(orient="records") insert_stmt = insert(MainData.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[MainData.hash, MainData.partition_id], set_={MainData.payload.name: insert_stmt.excluded.payload} ) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def update_permissions(): session = Session() connection = session.connection() try: connection.execute(text(PERMISSIONS_QUERY)) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def derive_publish_date_metrics(timestamp, area_type): query = PUBLISH_DATE_CALCULATION.format( today=f"{timestamp:%Y_%-m_%-d}", yesterday=get_previous_partition_date(), area_type=area_type ) session = Session() conn = session.connection() try: resp = conn.execute(text(query), ) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() columns = [ 'partition_id', 'area_id', 'area_type', 'area_code', 'metric_id', 'release_id', 'date', 'payload' ] to_sql( DataFrame(raw_data, columns=columns) .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True))) .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]] ) return True
def derive_publish_date_percentages(timestamp, area_type): query = PERCENTAGE_DATA.format( date=f"{timestamp:%Y_%-m_%-d}", area_type=area_type ) session = Session() conn = session.connection() try: resp = conn.execute(text(query)) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() columns = [ 'area_id', 'partition_id', 'metric_id', 'metric', 'area_type', 'area_code', 'release_id', 'date', 'payload' ] to_sql( DataFrame(raw_data, columns=columns) .pipe(lambda dt: dt.assign(hash=generate_row_hash(dt, hash_only=True))) .loc[:, ["metric_id", "area_id", "partition_id", "release_id", "hash", "date", "payload"]] ) return True
def get_timeseries(date: str, metric: str): ts = datetime.strptime(date, "%Y-%m-%d") partition = f"{ts:%Y_%-m_%-d}_other" partition_id = f"{ts:%Y_%-m_%-d}|other" values_query = queries.TIMESRIES_QUERY.format(partition=partition) change_query = queries.LATEST_CHANGE_QUERY.format(partition=partition) session = Session() conn = session.connection() try: resp = conn.execute(text(values_query), partition_id=partition_id, metric=metric) values = resp.fetchall() resp = conn.execute(text(change_query), partition_id=partition_id, datestamp=ts, metric=metric + "Change") change = resp.fetchone() except Exception as err: session.rollback() raise err finally: session.close() store_data(date, metric, plot_thumbnail(values, metric_name=metric, change=change)) return True
def get_vaccinations(date): ts = datetime.strptime(date, "%Y-%m-%d") partition = f"{ts:%Y_%-m_%-d}" vax_query = queries.VACCINATIONS_QUERY.format(partition_date=partition) session = Session() conn = session.connection() try: resp = conn.execute(text(vax_query), datestamp=ts) values = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() for item in values: store_data(date, "vaccinations", plot_vaccinations(item), area_type=item["area_type"], area_code=item["area_code"]) return True
def get_release(timestamp): insert_stmt = ( insert(ReleaseReference.__table__) .values(timestamp=timestamp) ) stmt = ( insert_stmt .on_conflict_do_update( index_elements=[ReleaseReference.timestamp], set_={ReleaseReference.timestamp.name: insert_stmt.excluded.timestamp} ) .returning(ReleaseReference.id) ) # session = Session(autocommit=True) session = Session() try: response = session.execute(stmt) result = response.fetchone()[0] except Exception as err: session.rollback() raise err finally: session.close() return result
def create_asset(release_date: str): partition_date = datetime.fromisoformat(release_date).strftime( "%Y_%-m_%-d") query = QUERY.format(date=partition_date) session = Session() conn = session.connection() try: resp = conn.execute(text(query)) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() data = DataFrame(raw_data, columns=["properties", "geometry"]) geo_data = { "type": "FeatureCollection", "features": (data.assign(type="Feature").reset_index().rename(columns={ "index": "id" }).to_dict("records")) } return store_data(geo_data, "downloads", "maps/vax-data_latest.geojson")
def add_metric(metric): stmt = ( insert(MetricReference.__table__) .values(metric=metric) .on_conflict_do_nothing(index_elements=[MetricReference.metric]) .compile(dialect=postgres()) ) session = Session() try: # session.begin() session.connection().execute(stmt) session.flush() # session.commit() except Exception as err: session.rollback() raise err finally: session.close() return None
def update_stats(date, category): session = Session() connection = session.connection() try: connection.execute( text( STATS_QUERY.format( datestamp=date, partitions= f'{{{str.join(",", get_partition_ids(date, category))}}}')) ) session.flush() except Exception as err: session.rollback() raise err finally: session.close() return None
def get_previous_partition_date(): session = Session() conn = session.connection() try: resp = conn.execute(text(PREVIOUS_PUBLICATION_DATE)) data = resp.fetchone() except Exception as err: session.rollback() raise err finally: session.close() return f"{data[0]:%Y_%-m_%-d}"
def create_asset(data_type): params = PARAMETERS[data_type] session = Session() conn = session.connection() try: resp = conn.execute(text(params["query"]), process_name=params["process_name"]) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() store_data(dict(raw_data), params['container'], params['path'])
def store_data(data: DataFrame): if not data.size: return None session = Session() connection = session.connection() try: records = data.to_dict(orient="records") insert_stmt = insert(PrivateReport.__table__).values(records) stmt = insert_stmt.on_conflict_do_update( index_elements=[ PrivateReport.slug_id, PrivateReport.date, PrivateReport.metric, PrivateReport.area_id ], set_={PrivateReport.value.name: insert_stmt.excluded.value}) connection.execute(stmt) session.flush() except Exception as err: session.rollback() raise err finally: session.close()
def get_area_data(): session = Session() try: results = read_sql( f"""\ SELECT c.id AS "area_id", c.area_type, c.area_code FROM covid19.area_reference AS c """, con=session.connection(), index_col=["area_type", "area_code"] ) except Exception as err: session.rollback() raise err finally: session.close() return results
def get_data(category: str, timestamp): query = text(CATEGORY_TIMESTAMPS) session = Session() conn = session.connection() try: resp = conn.execute(query, category=category, timestamp=timestamp) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() df = DataFrame(raw_data, columns=['timestamp']) df.timestamp = (df.timestamp.map(to_mins).astype({"timestamp": int})) return df
def get_metrics(): session = Session() try: metrics = read_sql( f"""\ SELECT ref.id AS "metric_id", ref.metric FROM covid19.metric_reference AS ref; """, con=session.connection(), index_col=["metric"] ) except Exception as err: session.rollback() raise err finally: session.close() return metrics
def get_latest_scale_records(payload): date = datetime.fromisoformat(payload["timestamp"]) area_type = payload["area_type"] query = RATES.format(area_type=AREA_TYPE_PARTITION[area_type], date=f"{date:%Y_%-m_%-d}") metric, attr = "newCasesBySpecimenDateRollingRate", "value" if area_type == "msoa": metric, attr = "newCasesBySpecimenDate", "rollingRate" session = Session() conn = session.connection() try: resp = conn.execute(text(query), metric=metric, attr=attr, area_type=area_type) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() data = DataFrame(raw_data, columns=["area_type", "area_code", "rate"]) results = { "records": data.to_dict(orient="records"), "percentiles": { "min": data.loc[:, "rate"].min(), "0.1": data.loc[:, "rate"].quantile(0.1).round(1), "0.4": data.loc[:, "rate"].quantile(0.4).round(1), "median": data.loc[:, "rate"].median().round(1), "0.6": data.loc[:, "rate"].quantile(0.6).round(1), "0.9": data.loc[:, "rate"].quantile(0.9).round(1), "max": data.loc[:, "rate"].max(), }, "timestamp": payload["timestamp"] } return results
def generate_html(record_date: datetime): slug_id = get_record_id(record_date) session = Session() conn = session.connection() try: resp = conn.execute(text(OUTPUT_DATA), slug_id=slug_id) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() df = DataFrame(raw_data, columns=["area_name", "metric", "date", "value"]) df.value = df.value.map(format_number) area_names = [ "United Kingdom", "England", "Northern Ireland", "Scotland", "Wales" ] result = list() for section in structure: result.append( [process_section(df, item, area_names) for item in section]) template = env.get_template("base.html") html_data = template.render( results={ "area_names": area_names, "data": result }, timestamp=datetime.now().strftime(r"%A, %d %b %Y at %H:%M:%S GMT")) # Uncomment for testing # with open("sample.html", "w") as fp: # print(html_data, file=fp) # Comment for testing store_html(html_data, slug_id=slug_id)
def get_data(record_date: datetime) -> DataFrame: session = Session() conn = session.connection() try: resp = conn.execute(text( MAIN_QUERY.format(partition_date=f"{record_date:%Y_%-m_%-d}")), metrics=METRICS) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() dt = (DataFrame(raw_data, columns=[ "release_id", "area_id", "metric", "date", "value" ]).assign(slug_id=get_record_id(record_date))) return dt
def retrieve_data(timestamp: datetime): with open(query_path) as fp: query = fp.read() query = Template(query).substitute(release_date=f"{timestamp:%Y_%-m_%-d}") session = Session() conn = session.connection() try: resp = conn.execute(text(query)) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() df = DataFrame(raw_data, columns=["key", "value"]) return df
def get_metric_id(metric: str): stmt = ( select([ MetricReference.id ]) .where( MetricReference.metric == metric ) ) session = Session() try: result = session.execute(stmt) return result.fetchone()[0] except Exception as err: session.rollback() raise err finally: session.close()
def get_data(query, metric): session = Session() conn = session.connection() try: resp = conn.execute( text(query), metric=metric ) raw_data = resp.fetchone() except Exception as err: session.rollback() raise err finally: session.close() data = { f'{metric}Date': f'{raw_data[1]:%-d %b}', metric: format(int(raw_data[2]), ",d") } return data
def get_area_codes(area_type: str): query = ( select([ AreaReference.id.label("area_id"), AreaReference.area_code ]) .where( AreaReference.area_type == area_type ) ) session = Session() try: result = read_sql(query, con=session.connection()) except Exception as err: session.rollback() raise err finally: session.close() return result
def get_data(timestamp: datetime): query = MAIN.format(date=f"{timestamp:%Y_%-m_%-d}") metric = "newCasesBySpecimenDateRollingRate" session = Session() conn = session.connection() try: resp = conn.execute( text(query), metric=metric, datestamp=f'{timestamp::%Y-%m-%d}' ) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() print(len(raw_data)) return DataFrame(raw_data, columns=["areaType", "areaCode", metric])
def get_announcements(): session = Session() conn = session.connection() try: resp = conn.execute(text(ANNOUNCEMENTS)) values = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() results = list() for item in values: results.append({ "body": markdown(item["body"].strip()), "date": datetime.fromisoformat(item["date"]).strftime("%-d %B %Y") }) return results
def main(req: HttpRequest) -> HttpResponse: logging.info("Running health checks") logging.info("Testing database connection and response") session = Session() conn = session.connection() try: q = conn.execute(text("SELECT 1 AS result;")) _ = q.fetchone() logging.info("Database is alive") except Exception as err: session.rollback() logging.warning("Database is dead") raise err finally: session.close() storage_kws = dict( container="pipeline", path="healthchecks/etl", content_type="text/plain", compressed=False ) logging.info("Testing storage connection") try: with StorageClient(**storage_kws) as storage: storage.upload("test") storage.download() storage.delete() logging.info("Storage is alive") except Exception as err: logging.warning("Storage is dead") raise err logging.info("All tests passed.") return HttpResponse(b"ALIVE", status_code=200)
def create_asset(area_type: str, device: str, release_date: str): params = PARAMETERS[area_type] partition_date = datetime.fromisoformat(release_date).strftime( "%Y_%-m_%-d") query = params['query'].format(date=partition_date, area_type=AREA_TYPE_PARTITION[area_type], attr=params['attribute']) session = Session() conn = session.connection() try: resp = conn.execute(text(query), metric=params["metric"], area_type=area_type) raw_data = resp.fetchall() except Exception as err: session.rollback() raise err finally: session.close() data = DataFrame(raw_data, columns=["date", "properties", "geometry"]) if device == Device.mobile: data = data.loc[data.date == data.date.max(), :] geo_data = { "type": "FeatureCollection", "features": (data.drop(columns=["date"]).assign( type="Feature").reset_index().rename(columns={ "index": "id" }).to_dict("records")) } return store_data(geo_data, params['container'], params['path'][device])