def time_limited_db_operation(timeout=None): """ Context manager for performing some time limited DB operation. timeout: timeout for the operation in 's' """ timeout = int(timeout * 1000) if timeout else int(COUNT_TIMEOUT * 1000) session.execute(f"SET statement_timeout TO {timeout}; commit;") yield session.execute("SET statement_timeout TO 0; commit;")
def _get_results(report): """Limit the number of documents to REPORT_MAX_DOCUMENTS so as not to crash the server.""" query = _build_query(report) try: session.execute( f"SET statement_timeout TO {int(REPORT_COUNT_TIMEOUT * 1000)}; commit;" ) if query.count() == 0: return None except OperationalError: pass session.execute("SET statement_timeout TO 0; commit;") results = query.order_by( Result.start_time.desc()).limit(REPORT_MAX_DOCUMENTS).all() return [result.to_dict() for result in results]
def prune_old_files(months=5): """Delete artifact files older than specified months (here defined as 30 days).""" try: if isinstance(months, str): months = int(months) if months < 2: # we don't want to remove files more recent than 2 months return max_date = datetime.utcnow() - timedelta(days=months * DAYS_IN_MONTH) # delete artifact files older than max_date delete_statement = Artifact.__table__.delete().where(Artifact.upload_date < max_date) session.execute(delete_statement) session.commit() except Exception: # we don't want to continually retry this task return
def prune_old_runs(months=12): """ Remove runs older than specified months (here defined as 30 days). IMPORTANT NOTE: to avoid primary key errors, 'months' must be greater than what is used in 'prune_old_results' """ try: if isinstance(months, str): months = int(months) if months < 10: # we don't want to remove files more recent than 10 months return max_date = datetime.utcnow() - timedelta(days=months * DAYS_IN_MONTH) # delete artifact files older than max_date delete_statement = Run.__table__.delete().where(Run.start_time < max_date) session.execute(delete_statement) session.commit() except Exception: # we don't want to continually retry this task return
def get_count_estimate(query, no_filter=False, **kwargs): """ Given tablename, return an estimated count of the number of rows in the table. """ if no_filter: tablename = kwargs.get("tablename") sql = f"SELECT reltuples as approx_count FROM pg_class WHERE relname='{tablename}'" return int(session.execute(sql).fetchall()[0][0]) else: estimate = _get_count_from_explain(query) # if the estimate is < COUNT_ESTIMATE_LIMIT # then probably there aren't too many rows, just regularly count them if estimate < COUNT_ESTIMATE_LIMIT: return query.count() return estimate
def _get_count_from_explain(query): explain_result = session.execute(Explain(query)).fetchall()[0][0] rows = int(explain_result.split("rows")[-1].split("=")[1].split(" ")[0]) return rows