예제 #1
0
def init() -> None:
    try:
        db = SessionLocal()
        # Try to create session to check if DB is awake
        db.execute("SELECT 1")
    except Exception as e:
        logger.error(e)
        raise e
예제 #2
0
def get_db() -> Generator:
    logger.debug("start db session")
    db = SessionLocal()
    try:
        yield db
    finally:
        logger.debug("close db session")
        db.close()
예제 #3
0
async def db_session_middleware(request: Request, call_next):
    response = Response("Internal server error", status_code=500)
    try:
        request.state.db = SessionLocal()
        response = await call_next(request)
    finally:
        request.state.db.close()
    return response
예제 #4
0
파일: main.py 프로젝트: crayzee/useful
async def db_session_middleware(request: Request, call_next):
    '''Позволяет подкручивать local session к нашим запросам.'''
    response = Response("Internal server error", status_code=500)
    try:
        request.state.db = SessionLocal()
        response = await call_next(request)
    finally:
        request.state.db.close()
    return response
예제 #5
0
class EmailCoreService:
    def __init__(self):
        self._db = SessionLocal()
        self._jinja_env = Environment(loader=FileSystemLoader(
            searchpath="src/templates"))
        self._yag = SMTP(settings.EMAIL_FROM,
                         oauth2_file=settings.GMAIL_CREDENTIALS_PATH)

    def add_message_to_queue(self, to: str, subject_template_path: str,
                             html_template_path: str, environment: Any):
        subject = self._jinja_env.get_template(subject_template_path).render(
            environment)
        html = self._jinja_env.get_template(html_template_path).render(
            environment)

        self._db.add(
            MessageQueue(to=to,
                         subject=subject,
                         contents=html,
                         status=MessageStatus.NEW,
                         message=""))
        self._db.commit()

    def send_message_from_queue(self) -> bool:
        """Find a new message in the queue and send it.

        Returns False if queue is empty
        """
        message: MessageQueue = self._db.query(MessageQueue) \
            .filter(MessageQueue.status == MessageStatus.NEW) \
            .with_for_update(skip_locked=True, key_share=True) \
            .first()

        if not message:
            return False

        try:
            self.send_message(message.to, message.subject,
                              message.contents)  # TODO: Handle errors
        finally:
            message.status = MessageStatus.OK
            self._db.commit()

        return True

    def send_message(self, to: str, subject: str, contents: str, html=True):
        if html:
            contents = contents.replace(
                "\n",
                " ")  # Prevent yagmail from replacing newlines with <br> tags
        return self._yag.send(to, subject, contents)
예제 #6
0
def db() -> Generator:
    yield SessionLocal()
예제 #7
0
 def __init__(self):
     self.db: Session = SessionLocal()
예제 #8
0
 def __init__(self):
     self._db = SessionLocal()
     self._email_service = EmailCoreService()
예제 #9
0
class SubscriptionCoreService:
    def __init__(self):
        self._db = SessionLocal()
        self._email_service = EmailCoreService()

    def find_articles_to_send(self):
        never_sent = SubscriptionStatus.last_send.is_(None)
        subscription_interval_reached = UserConfig.subscription_interval + SubscriptionStatus.last_send < now(
        )
        article_not_sent = ArticleData.updated > SubscriptionStatus.last_send

        subscription_condition = never_sent | subscription_interval_reached
        article_condition = never_sent | article_not_sent

        fields_to_select = (User.id, User.username, User.email,
                            ArticleData.title, ArticleData.url,
                            Subscription.mutation)

        articles_to_send = self._db.query(*fields_to_select)\
            .outerjoin(UserConfig)\
            .outerjoin(SubscriptionStatus)\
            .join(Subscription)\
            .join(ArticleMutation, ArticleMutation.mutation == Subscription.mutation)\
            .join(ArticleData)\
            .filter(User.active & subscription_condition & article_condition) \
            .all()

        articles_by_mutation_by_user_id = defaultdict(
            lambda: {"mutations": defaultdict(list)})
        for row in articles_to_send:
            articles_by_mutation_by_user_id[row.id]["mutations"][
                row.mutation].append({
                    "title": row.title,
                    "url": row.url,
                })
            articles_by_mutation_by_user_id[row.id]["email"] = row.email
            articles_by_mutation_by_user_id[row.id]["username"] = row.username

        return articles_by_mutation_by_user_id

    def find_and_queue_articles_to_send(self) -> int:
        articles_to_send = self.find_articles_to_send()
        for user_id, articles in articles_to_send.items():
            self.add_new_articles_message_to_queue(articles["email"], articles)
            subscription_status = {
                "user_id": user_id,
                "last_send": datetime.utcnow()
            }
            insert_statement = insert(SubscriptionStatus).values(
                subscription_status).on_conflict_do_update(
                    index_elements=[SubscriptionStatus.user_id],
                    set_={"last_send": datetime.utcnow()})
            self._db.execute(insert_statement)
        self._db.commit()

        return len(articles_to_send)

    def add_new_articles_message_to_queue(self, to, articles):
        self._email_service.add_message_to_queue(
            to,
            "new_articles_for_mutations_subject.html",
            "new_articles_for_mutations.html",
            articles,
        )
예제 #10
0
        print("Commiting changes...")
        session.commit()
    else:
        print("Table already has data")


def delete_tables(engine: Engine, session: Session):  # noqa
    print("\nRemoving Tables...\n")
    session.close_all()
    with contextlib.closing(engine.connect()) as con:
        trans = con.begin()
        for table in reversed(Base.metadata.sorted_tables):
            con.execute(table.delete())
        for table in reversed(Base.metadata.sorted_tables):
            con.execute(
                f"TRUNCATE TABLE public.{table.name} RESTART IDENTITY CASCADE;"
            )
        trans.commit()
    print("\nTables removed")


if __name__ == "__main__":
    from settings import settings  # noqa
    from src.db.session import engine, SessionLocal

    session = SessionLocal()
    create_tables(engine, session)
    # delete_tables(engine, session)
    session.close()  # noqa
    engine.dispose()
예제 #11
0
def get_db() -> Generator:
    try:
        db = SessionLocal()
        yield db
    finally:
        db.close()
예제 #12
0
    def __init__(self, ncbi_db: str = "pmc"):
        self.db = SessionLocal()
        self.ncbi_db = ncbi_db

        Entrez.email = settings.ENTREZ_EMAIL
        self.entrez = Entrez
예제 #13
0
class ArticleCoreService:
    def __init__(self, ncbi_db: str = "pmc"):
        self.db = SessionLocal()
        self.ncbi_db = ncbi_db

        Entrez.email = settings.ENTREZ_EMAIL
        self.entrez = Entrez

    def get_last_fetch_date(self) -> Optional[date]:
        log: ArticleFetchLog = self.db.query(ArticleFetchLog).order_by(desc(ArticleFetchLog.end_date)).first()
        return log.end_date if log else None

    def fetch_article_ids_for_period(self, start: date, end: date) -> List[str]:
        query = 'SARS-CoV-2 mutation AND "open access"[filter]'  # less articles but more relevant content
        # query = 'SARS-CoV-2AND "open access"[filter]' #more articles but less relevant content

        if end - start < timedelta(0):  # end == start is Ok. It means fetch for exactly one day
            logger.warning(f'Cannot fetch article IDs: end {end} is less then start {start}')
            return []

        ids = []
        pagesize = 500
        offset = 0
        while True:
            page_ids = self._fetch_article_ids_for_period_paged(query, start, end, pagesize, offset)
            if not page_ids:
                break

            ids += page_ids
            offset += pagesize

        return ids

    def fetch_and_save_new_article_ids(self) -> int:
        """Fetches external article IDs and returns them"""
        last_fetch_date = self.get_last_fetch_date()
        if last_fetch_date:
            start = last_fetch_date + timedelta(days=1)  # next day after last fetch
        else:
            start = date.min

        end = date.today() - timedelta(days=1)  # yesterday

        ids = self.fetch_article_ids_for_period(start, end)
        message = f"Fetched {len(ids)} new article IDs"

        if ids:
            bulk_insert = insert(Article).values([
                {
                    "id": str(uuid4()),
                    "external_id": id_,
                    "body": "",
                    "status": ArticleStatus.NEW,
                    "message": ""
                } for id_ in ids
            ]).on_conflict_do_update(
                index_elements=[Article.external_id],
                set_={"status": ArticleStatus.NEW, "updated": datetime.utcnow()}
            )
            self.db.execute(bulk_insert)

        self.db.add(ArticleFetchLog(id=str(uuid4()), start_date=start, end_date=end, message=message))
        self.db.commit()

        return len(ids)

    def fetch_and_save_article(self, id_: UUID):
        article = self.db.query(Article).filter(Article.id == id_).with_for_update(key_share=True).first()

        if not article:
            raise RuntimeError(f'Article {id_} not found')

        try:
            self._update_article_body(article)
        finally:
            self.db.commit()

    def fetch_and_save_new_article(self):
        article = self.db.query(Article)\
            .filter(Article.status == ArticleStatus.NEW)\
            .with_for_update(skip_locked=True, key_share=True)\
            .first()

        if not article:
            return

        try:
            self._update_article_body(article)
        finally:
            self.db.commit()

    def parse_article(self, id_):
        article = self.db.query(Article).filter(Article.id == id_).with_for_update(key_share=True).first()

        if not article:
            raise RuntimeError(f'Article {id_} not found')

        allowed_statuses = {ArticleStatus.FETCHED, ArticleStatus.PARSED}
        if article.status not in allowed_statuses:
            raise RuntimeError(f"Article body hasn't been fetched. Please fetch it first")

        try:
            self._parse_and_save_article_data(article)
        except Exception as e:
            logger.warning(f'Cannot parse article {article.id} body: {e}')
            article.status = ArticleStatus.ERROR
            article.message = str(e)
        else:
            article.status = ArticleStatus.PARSED

        self.db.commit()

    def parse_new_article(self):
        article = self.db.query(Article) \
            .filter(Article.status == ArticleStatus.FETCHED) \
            .with_for_update(skip_locked=True, key_share=True) \
            .first()

        if not article:
            return

        try:
            self._parse_and_save_article_data(article)
        except Exception as e:
            logger.warning(f'Cannot parse article {article.id} body: {e}')
            article.status = ArticleStatus.ERROR
            article.message = str(e)
        else:
            article.status = ArticleStatus.PARSED

        self.db.commit()

    def _parse_article(self, article) -> ArticleDataDict:
        article_xml = ArticleXml(article.body)  # catch exception

        title = article_xml.title()
        mutations = article_xml.mutations()
        abstract = article_xml.abstract()

        return ArticleDataDict(title=title, mutations=mutations, abstract=abstract)

    def _parse_and_save_article_data(self, article: Article):
        data = self._parse_article(article)
        data["url"] = f"{PMC_BASE_URL}{article.external_id}"

        logger.info(f'{len(data["mutations"])} mutations found for article {article.id}')
        logger.debug('Mutations for article {article.id}: ' + ', '.join(data["mutations"]))

        self._save_article_data(article, data)

    def _save_article_data(self, article: Article, data: ArticleDataDict):
        base_data_keys = {"title", "abstract", "url"}
        base_data = {key: value for (key, value) in data.items() if key in base_data_keys}
        insert_statement = insert(ArticleData).values({**base_data, **{"id": article.id}}).on_conflict_do_update(
            index_elements=[ArticleData.id],
            set_={**base_data, **{"updated": datetime.utcnow()}}
        )
        self.db.execute(insert_statement)
        self.db.flush()

        if data["mutations"]:
            self.db.query(ArticleMutation).filter(ArticleMutation.article_id == article.id).delete()
            self.db.add_all([
                ArticleMutation(article_id=article.id, mutation=m) for m in data["mutations"]
            ])

    def _fetch_article_ids_for_period_paged(self,
                                            query: str, start: date, end: date, limit=100000, offset=0) -> List[str]:
        mindate = start.strftime(ENTREZ_DATE_FORMAT)
        maxdate = end.strftime(ENTREZ_DATE_FORMAT)

        logger.info(f'Fetching article IDs from {mindate} till {maxdate}, limit: {limit}, offset: {offset}')

        handle = self.entrez.esearch(
            db=self.ncbi_db,
            term=query,
            mindate=mindate,
            maxdate=maxdate,
            datetype='pdat',
            retmax=limit,
            retstart=offset,
        )
        record = self.entrez.read(handle)
        handle.close()

        ids = record["IdList"]

        logger.info(f'Fetched {len(ids)} IDs')
        logger.debug('IDs: ' + ', '.join(ids))

        return ids

    def _update_article_body(self, article: Article) -> Article:
        try:
            response: HTTPResponse = self.entrez.efetch(
                db=self.ncbi_db,
                id=article.external_id,
                rettype="xml",
                retmode="text"
            )
            raw_body = response.read()
            charset = response.headers.get_content_charset('utf-8')
            body = raw_body.decode(charset)
        except Exception as e:
            logger.warning(f'Error while fetching article {article.id}: {e}')
            article.status = ArticleStatus.ERROR
            article.message = str(e)
        else:
            logger.info(f'Article {article.id} fetched')
            logger.debug(f'Article {article.id} body: {body}')
            article.status = ArticleStatus.FETCHED
            article.body = body
        finally:
            response.close()

        return article
예제 #14
0
 def __init__(self):
     self._db = SessionLocal()
     self._jinja_env = Environment(loader=FileSystemLoader(
         searchpath="src/templates"))
     self._yag = SMTP(settings.EMAIL_FROM,
                      oauth2_file=settings.GMAIL_CREDENTIALS_PATH)