Ejemplo n.º 1
0
        "headers": {
            "User-Agent": constants.USER_AGENT,
            "Content-Type": constants.CONTENT_TYPE
        },
        "cookies": {
            "language": constants.LANGUAGE,
            "vts_usr_lg": constants.USER_COOKIE
        },
        "meta": {
            "ticker": "",     # ticker
            "ReportType": ""  # document type, use this so we can use TickerCrawlSpiderMiddleware
        }
        }

log_settings = utilities.log_settings(spiderName=name,
                                      log_level="INFO",
                                      log_formatter="scraper_vietstock.spiders.models.utilities.TickerSpiderLogFormatter"
                                      )

middlewares_settings = {
    'DOWNLOADER_MIDDLEWARES': {
        'rotating_proxies.middlewares.RotatingProxyMiddleware': 610,
        'rotating_proxies.middlewares.BanDetectionMiddleware': 620,
        # 'scraper_vietstock.middlewares.TickerCrawlDownloaderMiddleware': 901,
        # 'scraper_vietstock.fad_stats.TickerCrawlerStats': 850,
        'scrapy.downloadermiddlewares.stats.DownloaderStats': None,
    },
    'SPIDER_MIDDLEWARES': {
        # 'scraper_vietstock.middlewares.TickerCrawlSpiderMiddleware': 45
    }
}
Ejemplo n.º 2
0
industry_list = {
    "url": "https://finance.vietstock.vn/data/industrylist",
    "headers": {
        "User-Agent": constants.USER_AGENT
    },
    "cookies": {
        "language": constants.LANGUAGE,
        "__RequestVerificationToken": constants.REQ_VER_TOKEN_COOKIE
    },
    "meta": {
        "bizType_id": "",
        "bizType_title": "",
    }
}

log_settings_regular = utilities.log_settings(spiderName=name_regular,
                                              log_level="INFO")

middlewares_settings = {
    'DOWNLOADER_MIDDLEWARES': {
        'rotating_proxies.middlewares.RotatingProxyMiddleware': 610,
        'rotating_proxies.middlewares.BanDetectionMiddleware': 620,
    }
}

proxy_settings = {
    'ROTATING_PROXY_LIST': constants.PRIVOXY_LOCAL_PROXY,
}

redis_settings = {
    'REDIS_HOST': constants.REDIS_HOST,
    'REDIS_PORT': constants.REDIS_PORT