def init() -> None: try: db = SessionLocal() # Try to create session to check if DB is awake db.execute("SELECT 1") except Exception as e: logger.error(e) raise e
def get_db() -> Generator: logger.debug("start db session") db = SessionLocal() try: yield db finally: logger.debug("close db session") db.close()
async def db_session_middleware(request: Request, call_next): response = Response("Internal server error", status_code=500) try: request.state.db = SessionLocal() response = await call_next(request) finally: request.state.db.close() return response
async def db_session_middleware(request: Request, call_next): '''Позволяет подкручивать local session к нашим запросам.''' response = Response("Internal server error", status_code=500) try: request.state.db = SessionLocal() response = await call_next(request) finally: request.state.db.close() return response
class EmailCoreService: def __init__(self): self._db = SessionLocal() self._jinja_env = Environment(loader=FileSystemLoader( searchpath="src/templates")) self._yag = SMTP(settings.EMAIL_FROM, oauth2_file=settings.GMAIL_CREDENTIALS_PATH) def add_message_to_queue(self, to: str, subject_template_path: str, html_template_path: str, environment: Any): subject = self._jinja_env.get_template(subject_template_path).render( environment) html = self._jinja_env.get_template(html_template_path).render( environment) self._db.add( MessageQueue(to=to, subject=subject, contents=html, status=MessageStatus.NEW, message="")) self._db.commit() def send_message_from_queue(self) -> bool: """Find a new message in the queue and send it. Returns False if queue is empty """ message: MessageQueue = self._db.query(MessageQueue) \ .filter(MessageQueue.status == MessageStatus.NEW) \ .with_for_update(skip_locked=True, key_share=True) \ .first() if not message: return False try: self.send_message(message.to, message.subject, message.contents) # TODO: Handle errors finally: message.status = MessageStatus.OK self._db.commit() return True def send_message(self, to: str, subject: str, contents: str, html=True): if html: contents = contents.replace( "\n", " ") # Prevent yagmail from replacing newlines with <br> tags return self._yag.send(to, subject, contents)
def db() -> Generator: yield SessionLocal()
def __init__(self): self.db: Session = SessionLocal()
def __init__(self): self._db = SessionLocal() self._email_service = EmailCoreService()
class SubscriptionCoreService: def __init__(self): self._db = SessionLocal() self._email_service = EmailCoreService() def find_articles_to_send(self): never_sent = SubscriptionStatus.last_send.is_(None) subscription_interval_reached = UserConfig.subscription_interval + SubscriptionStatus.last_send < now( ) article_not_sent = ArticleData.updated > SubscriptionStatus.last_send subscription_condition = never_sent | subscription_interval_reached article_condition = never_sent | article_not_sent fields_to_select = (User.id, User.username, User.email, ArticleData.title, ArticleData.url, Subscription.mutation) articles_to_send = self._db.query(*fields_to_select)\ .outerjoin(UserConfig)\ .outerjoin(SubscriptionStatus)\ .join(Subscription)\ .join(ArticleMutation, ArticleMutation.mutation == Subscription.mutation)\ .join(ArticleData)\ .filter(User.active & subscription_condition & article_condition) \ .all() articles_by_mutation_by_user_id = defaultdict( lambda: {"mutations": defaultdict(list)}) for row in articles_to_send: articles_by_mutation_by_user_id[row.id]["mutations"][ row.mutation].append({ "title": row.title, "url": row.url, }) articles_by_mutation_by_user_id[row.id]["email"] = row.email articles_by_mutation_by_user_id[row.id]["username"] = row.username return articles_by_mutation_by_user_id def find_and_queue_articles_to_send(self) -> int: articles_to_send = self.find_articles_to_send() for user_id, articles in articles_to_send.items(): self.add_new_articles_message_to_queue(articles["email"], articles) subscription_status = { "user_id": user_id, "last_send": datetime.utcnow() } insert_statement = insert(SubscriptionStatus).values( subscription_status).on_conflict_do_update( index_elements=[SubscriptionStatus.user_id], set_={"last_send": datetime.utcnow()}) self._db.execute(insert_statement) self._db.commit() return len(articles_to_send) def add_new_articles_message_to_queue(self, to, articles): self._email_service.add_message_to_queue( to, "new_articles_for_mutations_subject.html", "new_articles_for_mutations.html", articles, )
print("Commiting changes...") session.commit() else: print("Table already has data") def delete_tables(engine: Engine, session: Session): # noqa print("\nRemoving Tables...\n") session.close_all() with contextlib.closing(engine.connect()) as con: trans = con.begin() for table in reversed(Base.metadata.sorted_tables): con.execute(table.delete()) for table in reversed(Base.metadata.sorted_tables): con.execute( f"TRUNCATE TABLE public.{table.name} RESTART IDENTITY CASCADE;" ) trans.commit() print("\nTables removed") if __name__ == "__main__": from settings import settings # noqa from src.db.session import engine, SessionLocal session = SessionLocal() create_tables(engine, session) # delete_tables(engine, session) session.close() # noqa engine.dispose()
def get_db() -> Generator: try: db = SessionLocal() yield db finally: db.close()
def __init__(self, ncbi_db: str = "pmc"): self.db = SessionLocal() self.ncbi_db = ncbi_db Entrez.email = settings.ENTREZ_EMAIL self.entrez = Entrez
class ArticleCoreService: def __init__(self, ncbi_db: str = "pmc"): self.db = SessionLocal() self.ncbi_db = ncbi_db Entrez.email = settings.ENTREZ_EMAIL self.entrez = Entrez def get_last_fetch_date(self) -> Optional[date]: log: ArticleFetchLog = self.db.query(ArticleFetchLog).order_by(desc(ArticleFetchLog.end_date)).first() return log.end_date if log else None def fetch_article_ids_for_period(self, start: date, end: date) -> List[str]: query = 'SARS-CoV-2 mutation AND "open access"[filter]' # less articles but more relevant content # query = 'SARS-CoV-2AND "open access"[filter]' #more articles but less relevant content if end - start < timedelta(0): # end == start is Ok. It means fetch for exactly one day logger.warning(f'Cannot fetch article IDs: end {end} is less then start {start}') return [] ids = [] pagesize = 500 offset = 0 while True: page_ids = self._fetch_article_ids_for_period_paged(query, start, end, pagesize, offset) if not page_ids: break ids += page_ids offset += pagesize return ids def fetch_and_save_new_article_ids(self) -> int: """Fetches external article IDs and returns them""" last_fetch_date = self.get_last_fetch_date() if last_fetch_date: start = last_fetch_date + timedelta(days=1) # next day after last fetch else: start = date.min end = date.today() - timedelta(days=1) # yesterday ids = self.fetch_article_ids_for_period(start, end) message = f"Fetched {len(ids)} new article IDs" if ids: bulk_insert = insert(Article).values([ { "id": str(uuid4()), "external_id": id_, "body": "", "status": ArticleStatus.NEW, "message": "" } for id_ in ids ]).on_conflict_do_update( index_elements=[Article.external_id], set_={"status": ArticleStatus.NEW, "updated": datetime.utcnow()} ) self.db.execute(bulk_insert) self.db.add(ArticleFetchLog(id=str(uuid4()), start_date=start, end_date=end, message=message)) self.db.commit() return len(ids) def fetch_and_save_article(self, id_: UUID): article = self.db.query(Article).filter(Article.id == id_).with_for_update(key_share=True).first() if not article: raise RuntimeError(f'Article {id_} not found') try: self._update_article_body(article) finally: self.db.commit() def fetch_and_save_new_article(self): article = self.db.query(Article)\ .filter(Article.status == ArticleStatus.NEW)\ .with_for_update(skip_locked=True, key_share=True)\ .first() if not article: return try: self._update_article_body(article) finally: self.db.commit() def parse_article(self, id_): article = self.db.query(Article).filter(Article.id == id_).with_for_update(key_share=True).first() if not article: raise RuntimeError(f'Article {id_} not found') allowed_statuses = {ArticleStatus.FETCHED, ArticleStatus.PARSED} if article.status not in allowed_statuses: raise RuntimeError(f"Article body hasn't been fetched. Please fetch it first") try: self._parse_and_save_article_data(article) except Exception as e: logger.warning(f'Cannot parse article {article.id} body: {e}') article.status = ArticleStatus.ERROR article.message = str(e) else: article.status = ArticleStatus.PARSED self.db.commit() def parse_new_article(self): article = self.db.query(Article) \ .filter(Article.status == ArticleStatus.FETCHED) \ .with_for_update(skip_locked=True, key_share=True) \ .first() if not article: return try: self._parse_and_save_article_data(article) except Exception as e: logger.warning(f'Cannot parse article {article.id} body: {e}') article.status = ArticleStatus.ERROR article.message = str(e) else: article.status = ArticleStatus.PARSED self.db.commit() def _parse_article(self, article) -> ArticleDataDict: article_xml = ArticleXml(article.body) # catch exception title = article_xml.title() mutations = article_xml.mutations() abstract = article_xml.abstract() return ArticleDataDict(title=title, mutations=mutations, abstract=abstract) def _parse_and_save_article_data(self, article: Article): data = self._parse_article(article) data["url"] = f"{PMC_BASE_URL}{article.external_id}" logger.info(f'{len(data["mutations"])} mutations found for article {article.id}') logger.debug('Mutations for article {article.id}: ' + ', '.join(data["mutations"])) self._save_article_data(article, data) def _save_article_data(self, article: Article, data: ArticleDataDict): base_data_keys = {"title", "abstract", "url"} base_data = {key: value for (key, value) in data.items() if key in base_data_keys} insert_statement = insert(ArticleData).values({**base_data, **{"id": article.id}}).on_conflict_do_update( index_elements=[ArticleData.id], set_={**base_data, **{"updated": datetime.utcnow()}} ) self.db.execute(insert_statement) self.db.flush() if data["mutations"]: self.db.query(ArticleMutation).filter(ArticleMutation.article_id == article.id).delete() self.db.add_all([ ArticleMutation(article_id=article.id, mutation=m) for m in data["mutations"] ]) def _fetch_article_ids_for_period_paged(self, query: str, start: date, end: date, limit=100000, offset=0) -> List[str]: mindate = start.strftime(ENTREZ_DATE_FORMAT) maxdate = end.strftime(ENTREZ_DATE_FORMAT) logger.info(f'Fetching article IDs from {mindate} till {maxdate}, limit: {limit}, offset: {offset}') handle = self.entrez.esearch( db=self.ncbi_db, term=query, mindate=mindate, maxdate=maxdate, datetype='pdat', retmax=limit, retstart=offset, ) record = self.entrez.read(handle) handle.close() ids = record["IdList"] logger.info(f'Fetched {len(ids)} IDs') logger.debug('IDs: ' + ', '.join(ids)) return ids def _update_article_body(self, article: Article) -> Article: try: response: HTTPResponse = self.entrez.efetch( db=self.ncbi_db, id=article.external_id, rettype="xml", retmode="text" ) raw_body = response.read() charset = response.headers.get_content_charset('utf-8') body = raw_body.decode(charset) except Exception as e: logger.warning(f'Error while fetching article {article.id}: {e}') article.status = ArticleStatus.ERROR article.message = str(e) else: logger.info(f'Article {article.id} fetched') logger.debug(f'Article {article.id} body: {body}') article.status = ArticleStatus.FETCHED article.body = body finally: response.close() return article
def __init__(self): self._db = SessionLocal() self._jinja_env = Environment(loader=FileSystemLoader( searchpath="src/templates")) self._yag = SMTP(settings.EMAIL_FROM, oauth2_file=settings.GMAIL_CREDENTIALS_PATH)