class SpiderQueen(): scrapydo.setup() global rankings rankings = {"DPS": [], "Heal": [], "TDPS": []} def divide_chunks(l): l.sort(key=lambda x: float(x['average']), reverse=True) n = 14 divlist = [] for i in range(0, len(l), n): divlist.append(l[i:i + n]) return divlist def DPSSpiderCrawl(self): global DPS DPS = [] scrapydo.run_spider(DPSSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) DPSChunk = SpiderQueen.divide_chunks(DPS) return DPSChunk def TankSpiderCrawl(self): global Tank Tank = [] scrapydo.run_spider(TankSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) TankChunk = SpiderQueen.divide_chunks(Tank) return TankChunk def HealSpiderCrawl(self): global Heal Heal = [] scrapydo.run_spider(HealSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) HealChunk = SpiderQueen.divide_chunks(Heal) return HealChunk def Queen(self): global rankings rankings = {"DPS": [], "Heal": [], "TDPS": []} scrapydo.run_spider(DPSSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) scrapydo.run_spider(TankSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) scrapydo.run_spider(HealSpider(), settings={ 'USER_AGENT': 'Mozilla/5.0', }) return rankings
def main(argv): scrapydo.setup() settings = Settings() settings.set("USER_AGENT", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)") settings.set("FEED_FORMAT", "json") settings.set("FEED_URI", "result.json") try: opts, args = getopt.getopt(argv, "hs:", ["subreddit="]) except getopt.GetoptError: print( 'cli_crawler.py -s <lista de subreddit separado por vírgula, ex. programming;dogs;brazil>' ) sys.exit(2) if len(opts) == 0: print( 'cli_crawler.py -s <lista de subreddit separado por vírgula, ex. programming;dogs;brazil>' ) for opt, arg in opts: if opt == '-s': subreddits = arg print("Iniciando crawler para buscar dados dos subreddits " + subreddits + "...") data = scrapydo.run_spider(RedditSpider(), settings=settings, subreddits='askreddit') for item in data: if item["title"] == '': title = "_No Title_ :(" else: title = item["title"] message = item["subreddit"] + ", votes " + str(item["upvote"]) + " " + "[" + title + "](" + \ item["thread_link"] + ") \n" print(message) sys.exit()
def boot(self): self.logger = logging.getLogger("scrapy-x") scrapydo.setup() coloredlogs.install( fmt="[%(levelname)s] | %(asctime)s | %(message)s", logger=self.logger ) self.settings = get_project_settings() self.queue_name = self.settings.get('X_QUEUE_NAME', 'SCRAPY_X_QUEUE') self.queue_workers_count = self.settings.getint( 'X_QUEUE_WORKERS_COUNT', os.cpu_count() ) self.server_workers_count = self.settings.getint( 'X_SERVER_WORKERS_COUNT', os.cpu_count() ) self.server_listen_port = self.settings.getint( 'X_SERVER_LISTEN_PORT', 6800 ) self.server_listen_host = self.settings.get( 'X_SERVER_LISTEN_HOST', '0.0.0.0' ) self.enable_access_log = self.settings.getbool( 'X_ENABLE_ACCESS_LOG', True ) self.redis_config = { 'host': self.settings.get('X_REDIS_HOST', 'localhost'), 'port': self.settings.getint('X_REDIS_PORT', 6379), 'db': self.settings.getint('X_REDIS_DB', 0), 'password': self.settings.get('X_REDIS_PASSWORD', ''), } self.spiders = utils.discover_spiders(self.settings)
import sys import time import scrapydo import utils from ipproxytool.spiders.validator.douban import DoubanSpider from ipproxytool.spiders.validator.assetstore import AssetStoreSpider from ipproxytool.spiders.validator.gather import GatherSpider from ipproxytool.spiders.validator.httpbin import HttpBinSpider from ipproxytool.spiders.validator.steam import SteamSpider from ipproxytool.spiders.validator.boss import BossSpider from ipproxytool.spiders.validator.lagou import LagouSpider from ipproxytool.spiders.validator.liepin import LiepinSpider from ipproxytool.spiders.validator.jd import JDSpider scrapydo.setup() def validator(): validators = [ HttpBinSpider, # 必须 # LagouSpider, # BossSpider, # LiepinSpider, JDSpider, # DoubanSpider, ] process_list = [] for validator in validators: popen = subprocess.Popen(['python', 'run_spider.py', validator.name],
def setUp(self): super(APITest, self).setUp() scrapydo.setup()
def my_link(): scrapydo.setup() my_crawl() return render_template('abc.html')
def jobhunt(event, context): scrapydo.setup() settings = get_project_settings() scrapydo.run_spider(spider_dictionary[event['name']], settings=settings)
def real_response(url): scrapydo.setup() resp: Response = scrapydo.fetch(url, timeout=10) return resp if 200 <= resp.status < 300 else None
def __init__(self, ): self.proxy = ZapProxy() self.log_handlers = {} scrapydo.setup()