import time from typing import Dict, List, Tuple, Optional from utils.logger_utils import LogManager from utils.str_utils import check_is_json from config import LOG_LEVEL, PROCESS_STATUS_FAIL from utils.time_utils import datetime_str_change_fmt from utils.exception_utils import LoginException, ParseDataException from spiders import BaseSpider, BaseSpiderParseMethodType, CookieUtils from utils.str_utils import check_is_phone_number, check_is_email_address logger = LogManager(__name__).get_logger_and_add_handlers( formatter_template=5, log_level_int=LOG_LEVEL) class JuejinSpider(BaseSpider): def __init__(self, task_id: str, username: str, password: str): self._main_url = "https://juejin.im/auth/type" self._blogs_url = "https://timeline-merger-ms.juejin.im/v1/get_entry_by_self" self._like_blogs_url = "https://user-like-wrapper-ms.juejin.im/v1/user" self._task_id = task_id self._login_username = username self._login_password = password self._spider_name: str = f"juejin:{self._login_username}" self._login_cookies: Optional[str] = None self._login_token: Optional[str] = None self._login_uid: Optional[str] = None self._login_client_id: Optional[str] = None
from fastapi import Query, FastAPI, Request, BackgroundTasks from utils.encrypt_utils import md5_str from call_spider import spider_task_receiver from utils.async_task_utils import MultiProcessQueue from pipeline.redis_pipeline import RedisPipelineHandler from utils.logger_utils import LogManager, UVICORN_LOGGING_CONFIG from config import ( LOG_LEVEL, SERVER_HOST, SERVER_PORT, SPIDER_SUPPORT_LIST, PROCESS_STATUS_FAIL, ) logger = LogManager("fastapi").get_logger_and_add_handlers( formatter_template=5, log_level_int=LOG_LEVEL) limiter = Limiter(key_func=get_remote_address) app = FastAPI( title="BlogsCrawler-Management", description="BlogsCrawler-API", version="1.0.0", openapi_prefix="", openapi_url="/fastapi/data_manger.json", docs_url="/fastapi/docs", redoc_url="/fastapi/redoc", ) app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) app_api_router: str = "/api/v1" app_redis_handler = RedisPipelineHandler()