Exemple #1
0
import time
from typing import Dict, List, Tuple, Optional

from utils.logger_utils import LogManager
from utils.str_utils import check_is_json
from config import LOG_LEVEL, PROCESS_STATUS_FAIL
from utils.time_utils import datetime_str_change_fmt
from utils.exception_utils import LoginException, ParseDataException
from spiders import BaseSpider, BaseSpiderParseMethodType, CookieUtils
from utils.str_utils import check_is_phone_number, check_is_email_address

logger = LogManager(__name__).get_logger_and_add_handlers(
    formatter_template=5, log_level_int=LOG_LEVEL)


class JuejinSpider(BaseSpider):
    def __init__(self, task_id: str, username: str, password: str):
        self._main_url = "https://juejin.im/auth/type"
        self._blogs_url = "https://timeline-merger-ms.juejin.im/v1/get_entry_by_self"
        self._like_blogs_url = "https://user-like-wrapper-ms.juejin.im/v1/user"

        self._task_id = task_id
        self._login_username = username
        self._login_password = password

        self._spider_name: str = f"juejin:{self._login_username}"
        self._login_cookies: Optional[str] = None

        self._login_token: Optional[str] = None
        self._login_uid: Optional[str] = None
        self._login_client_id: Optional[str] = None
Exemple #2
0
from fastapi import Query, FastAPI, Request, BackgroundTasks

from utils.encrypt_utils import md5_str
from call_spider import spider_task_receiver
from utils.async_task_utils import MultiProcessQueue
from pipeline.redis_pipeline import RedisPipelineHandler
from utils.logger_utils import LogManager, UVICORN_LOGGING_CONFIG
from config import (
    LOG_LEVEL,
    SERVER_HOST,
    SERVER_PORT,
    SPIDER_SUPPORT_LIST,
    PROCESS_STATUS_FAIL,
)

logger = LogManager("fastapi").get_logger_and_add_handlers(
    formatter_template=5, log_level_int=LOG_LEVEL)

limiter = Limiter(key_func=get_remote_address)
app = FastAPI(
    title="BlogsCrawler-Management",
    description="BlogsCrawler-API",
    version="1.0.0",
    openapi_prefix="",
    openapi_url="/fastapi/data_manger.json",
    docs_url="/fastapi/docs",
    redoc_url="/fastapi/redoc",
)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
app_api_router: str = "/api/v1"
app_redis_handler = RedisPipelineHandler()