Exemple #1
0
class SpiderQueen():
    scrapydo.setup()
    global rankings
    rankings = {"DPS": [], "Heal": [], "TDPS": []}

    def divide_chunks(l):
        l.sort(key=lambda x: float(x['average']), reverse=True)
        n = 14
        divlist = []
        for i in range(0, len(l), n):
            divlist.append(l[i:i + n])
        return divlist

    def DPSSpiderCrawl(self):
        global DPS
        DPS = []
        scrapydo.run_spider(DPSSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        DPSChunk = SpiderQueen.divide_chunks(DPS)
        return DPSChunk

    def TankSpiderCrawl(self):
        global Tank
        Tank = []
        scrapydo.run_spider(TankSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        TankChunk = SpiderQueen.divide_chunks(Tank)
        return TankChunk

    def HealSpiderCrawl(self):
        global Heal
        Heal = []
        scrapydo.run_spider(HealSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        HealChunk = SpiderQueen.divide_chunks(Heal)
        return HealChunk

    def Queen(self):

        global rankings
        rankings = {"DPS": [], "Heal": [], "TDPS": []}
        scrapydo.run_spider(DPSSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        scrapydo.run_spider(TankSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        scrapydo.run_spider(HealSpider(),
                            settings={
                                'USER_AGENT': 'Mozilla/5.0',
                            })
        return rankings
Exemple #2
0
def main(argv):
    scrapydo.setup()

    settings = Settings()
    settings.set("USER_AGENT",
                 "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)")
    settings.set("FEED_FORMAT", "json")
    settings.set("FEED_URI", "result.json")

    try:
        opts, args = getopt.getopt(argv, "hs:", ["subreddit="])
    except getopt.GetoptError:
        print(
            'cli_crawler.py -s <lista de subreddit separado por vírgula, ex. programming;dogs;brazil>'
        )
        sys.exit(2)

    if len(opts) == 0:
        print(
            'cli_crawler.py -s <lista de subreddit separado por vírgula, ex. programming;dogs;brazil>'
        )

    for opt, arg in opts:
        if opt == '-s':
            subreddits = arg
            print("Iniciando crawler para buscar dados dos subreddits " +
                  subreddits + "...")
            data = scrapydo.run_spider(RedditSpider(),
                                       settings=settings,
                                       subreddits='askreddit')
            for item in data:

                if item["title"] == '':
                    title = "_No Title_ :("
                else:
                    title = item["title"]

                message = item["subreddit"] + ", votes " + str(item["upvote"]) + " " + "[" + title + "](" + \
                           item["thread_link"] + ") \n"

                print(message)

    sys.exit()
Exemple #3
0
    def boot(self):
        self.logger = logging.getLogger("scrapy-x")

        scrapydo.setup()
        coloredlogs.install(
            fmt="[%(levelname)s] | %(asctime)s |  %(message)s",
            logger=self.logger
        )

        self.settings = get_project_settings()
        self.queue_name = self.settings.get('X_QUEUE_NAME', 'SCRAPY_X_QUEUE')

        self.queue_workers_count = self.settings.getint(
            'X_QUEUE_WORKERS_COUNT', os.cpu_count()
        )

        self.server_workers_count = self.settings.getint(
            'X_SERVER_WORKERS_COUNT', os.cpu_count()
        )

        self.server_listen_port = self.settings.getint(
            'X_SERVER_LISTEN_PORT', 6800
        )

        self.server_listen_host = self.settings.get(
            'X_SERVER_LISTEN_HOST', '0.0.0.0'
        )

        self.enable_access_log = self.settings.getbool(
            'X_ENABLE_ACCESS_LOG', True
        )

        self.redis_config = {
            'host': self.settings.get('X_REDIS_HOST', 'localhost'),
            'port': self.settings.getint('X_REDIS_PORT', 6379),
            'db': self.settings.getint('X_REDIS_DB', 0),
            'password': self.settings.get('X_REDIS_PASSWORD', ''),
        }

        self.spiders = utils.discover_spiders(self.settings)
Exemple #4
0
import sys
import time
import scrapydo
import utils

from ipproxytool.spiders.validator.douban import DoubanSpider
from ipproxytool.spiders.validator.assetstore import AssetStoreSpider
from ipproxytool.spiders.validator.gather import GatherSpider
from ipproxytool.spiders.validator.httpbin import HttpBinSpider
from ipproxytool.spiders.validator.steam import SteamSpider
from ipproxytool.spiders.validator.boss import BossSpider
from ipproxytool.spiders.validator.lagou import LagouSpider
from ipproxytool.spiders.validator.liepin import LiepinSpider
from ipproxytool.spiders.validator.jd import JDSpider

scrapydo.setup()


def validator():
    validators = [
        HttpBinSpider,  # 必须
        # LagouSpider,
        # BossSpider,
        # LiepinSpider,
        JDSpider,
        # DoubanSpider,
    ]

    process_list = []
    for validator in validators:
        popen = subprocess.Popen(['python', 'run_spider.py', validator.name],
Exemple #5
0
 def setUp(self):
     super(APITest, self).setUp()
     scrapydo.setup()
def my_link():
  scrapydo.setup()
  my_crawl()
  return render_template('abc.html')
Exemple #7
0
def jobhunt(event, context):
    scrapydo.setup()
    settings = get_project_settings()
    scrapydo.run_spider(spider_dictionary[event['name']], settings=settings)
Exemple #8
0
 def setUp(self):
     super(APITest, self).setUp()
     scrapydo.setup()
Exemple #9
0
def real_response(url):
    scrapydo.setup()
    resp: Response = scrapydo.fetch(url, timeout=10)
    return resp if 200 <= resp.status < 300 else None
Exemple #10
0
 def __init__(self, ):
     self.proxy = ZapProxy()
     self.log_handlers = {}
     scrapydo.setup()