Ejemplo n.º 1
0
    def setUpClass(self):
        from pyspider.message_queue import connect_message_queue

        with utils.timeout(3):
            self.q1 = connect_message_queue("test_queue", "amqp://*****:*****@localhost:5672/")
        self.q2.delete()
        self.q2.reconnect()
        self.q3.delete()
        self.q3.reconnect()
Ejemplo n.º 2
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = connect_message_queue('test_queue', self.kombu_url, maxsize=5)
         self.q2 = connect_message_queue('test_queue', self.kombu_url, maxsize=5)
         self.q3 = connect_message_queue('test_queue_for_threading_test', self.kombu_url)
         while not self.q1.empty():
             self.q1.get()
         while not self.q2.empty():
             self.q2.get()
         while not self.q3.empty():
             self.q3.get()
Ejemplo n.º 3
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = connect_message_queue('test_queue', self.kombu_url, maxsize=5, lazy_limit=False)
         self.q2 = connect_message_queue('test_queue', self.kombu_url, maxsize=5, lazy_limit=False)
         self.q3 = connect_message_queue('test_queue_for_threading_test', self.kombu_url, lazy_limit=False)
         while not self.q1.empty():
             self.q1.get()
         while not self.q2.empty():
             self.q2.get()
         while not self.q3.empty():
             self.q3.get()
Ejemplo n.º 4
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = connect_message_queue('test_queue', 'amqp://*****:*****@localhost:5672/', lazy_limit=False)
     self.q2.delete()
     self.q2.reconnect()
     self.q3.delete()
     self.q3.reconnect()
Ejemplo n.º 5
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = connect_message_queue('test_queue', 'amqp://*****:*****@localhost:5672/', lazy_limit=False)
     self.q2.delete()
     self.q2.reconnect()
     self.q3.delete()
     self.q3.reconnect()
Ejemplo n.º 6
0
    def setUpClass(self):
        from pyspider.message_queue import connect_message_queue

        with utils.timeout(3):
            self.q1 = connect_message_queue("test_queue", "beanstalk://localhost:11300", maxsize=5)
            self.q2 = connect_message_queue("test_queue", "beanstalk://localhost:11300", maxsize=5)
            self.q3 = connect_message_queue("test_queue_for_threading_test", "beanstalk://localhost:11300")
            while not self.q1.empty():
                self.q1.get()
            while not self.q2.empty():
                self.q2.get()
            while not self.q3.empty():
                self.q3.get()
Ejemplo n.º 7
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = connect_message_queue('test_queue', 'beanstalk://localhost:11300',
                                         maxsize=5)
         self.q2 = connect_message_queue('test_queue', 'beanstalk://localhost:11300',
                                         maxsize=5)
         self.q3 = connect_message_queue('test_queue_for_threading_test',
                                         'beanstalk://localhost:11300')
         while not self.q1.empty():
             self.q1.get()
         while not self.q2.empty():
             self.q2.get()
         while not self.q3.empty():
             self.q3.get()
Ejemplo n.º 8
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     from pyspider.message_queue import redis_queue
     with utils.timeout(3):
         self.q1 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False)
         self.q2 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False)
         self.q3 = connect_message_queue('test_queue_for_threading_test',
                                         'redis://localhost:6379/')
         while not self.q1.empty():
             self.q1.get()
         while not self.q2.empty():
             self.q2.get()
         while not self.q3.empty():
             self.q3.get()
Ejemplo n.º 9
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     from pyspider.message_queue import redis_queue
     with utils.timeout(3):
         self.q1 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False)
         self.q2 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False)
         self.q3 = connect_message_queue('test_queue_for_threading_test',
                                         'redis://localhost:6379/')
         while not self.q1.empty():
             self.q1.get()
         while not self.q2.empty():
             self.q2.get()
         while not self.q3.empty():
             self.q3.get()
Ejemplo n.º 10
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     from pyspider.message_queue import redis_queue
     with utils.timeout(3):
         self.q1 = redis_queue.RedisQueue('test_queue',
                                          host='119.27.185.96',
                                          password='******',
                                          maxsize=5,
                                          lazy_limit=False)
         self.q2 = redis_queue.RedisQueue('test_queue',
                                          host='119.27.185.96',
                                          password='******',
                                          maxsize=5,
                                          lazy_limit=False)
         self.q3 = connect_message_queue(
             'test_queue_for_threading_test',
             'redis://:[email protected]:6379/1')
         while not self.q1.empty():
             print("get from q1: %s" % self.q1.get())
         while not self.q2.empty():
             print("get from q2: %s" % self.q2.get())
         while not self.q3.empty():
             print("get from q3: %s" % self.q3.get())
Ejemplo n.º 11
0
def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    if kwargs['add_sys_path']:
        sys.path.append(os.getcwd())

    logging.config.fileConfig(kwargs['logging_config'])

    # get db from env
    for db in ('taskdb', 'projectdb', 'resultdb'):
        if kwargs[db] is not None:
            continue
        if os.environ.get('MYSQL_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'sqlalchemy+mysql+%s://%s:%s/%s' % (
                    db, os.environ['MYSQL_PORT_3306_TCP_ADDR'],
                    os.environ['MYSQL_PORT_3306_TCP_PORT'], db)))
        elif os.environ.get('MONGODB_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'mongodb+%s://%s:%s/%s' % (
                    db, os.environ['MONGODB_PORT_27017_TCP_ADDR'],
                    os.environ['MONGODB_PORT_27017_TCP_PORT'], db)))
        elif ctx.invoked_subcommand == 'bench':
            if kwargs['data_path'] == './data':
                kwargs['data_path'] += '/bench'
                shutil.rmtree(kwargs['data_path'], ignore_errors=True)
                os.mkdir(kwargs['data_path'])
            if db in ('taskdb', 'resultdb'):
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db)))
            elif db in ('projectdb', ):
                kwargs[db] = utils.Get(lambda db=db: connect_database('local+%s://%s' % (
                    db, os.path.join(os.path.dirname(__file__), 'libs/bench.py'))))
        else:
            if not os.path.exists(kwargs['data_path']):
                os.mkdir(kwargs['data_path'])
            kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                db, kwargs['data_path'], db[:-2])))
            kwargs['is_%s_default' % db] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs['data_path']):
        os.mkdir(kwargs['data_path'])

    # message queue, compatible with old version
    if kwargs.get('message_queue'):
        pass
    elif kwargs.get('amqp_url'):
        kwargs['message_queue'] = kwargs['amqp_url']
    elif os.environ.get('RABBITMQ_NAME'):
        kwargs['message_queue'] = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s"
                                   ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ)
    elif kwargs.get('beanstalk'):
        kwargs['message_queue'] = "beanstalk://%s/" % kwargs['beanstalk']

    for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                 'fetcher2processor', 'processor2result'):
        if kwargs.get('message_queue'):
            kwargs[name] = utils.Get(lambda name=name: connect_message_queue(
                name, kwargs.get('message_queue'), kwargs['queue_maxsize']))
        else:
            kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'),
                                                 kwargs['queue_maxsize'])

    # phantomjs-proxy
    if kwargs.get('phantomjs_proxy'):
        pass
    elif os.environ.get('PHANTOMJS_NAME'):
        kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):]

    # puppeteer-proxy
    if kwargs.get('puppeteer_proxy'):
        pass
    elif os.environ.get('PUPPETEER_NAME'):
        kwargs['puppeteer_proxy'] = os.environ['PUPPETEER_PORT_22222_TCP'][len('tcp://'):]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj['instances'] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'):
        ctx.invoke(all)
    return ctx
Ejemplo n.º 12
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = self.q2 = connect_message_queue('test_queue', maxsize=5)
         self.q3 = connect_message_queue('test_queue_for_threading_test')
Ejemplo n.º 13
0
 def setUpClass(self):
     from pyspider.message_queue import connect_message_queue
     with utils.timeout(3):
         self.q1 = self.q2 = connect_message_queue('test_queue', maxsize=5)
         self.q3 = connect_message_queue('test_queue_for_threading_test')
Ejemplo n.º 14
0
def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    if kwargs['add_sys_path']:
        sys.path.append(os.getcwd())

    logging.config.fileConfig(kwargs['logging_config'])

    # get db from env
    for db in ('taskdb', 'projectdb', 'resultdb'):
        if kwargs[db] is not None:
            continue
        if os.environ.get('MYSQL_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'sqlalchemy+mysql+%s://%s:%s/%s' % (
                    db, os.environ['MYSQL_PORT_3306_TCP_ADDR'],
                    os.environ['MYSQL_PORT_3306_TCP_PORT'], db)))
        elif os.environ.get('MONGODB_NAME'):
            kwargs[db] = utils.Get(lambda db=db: connect_database(
                'mongodb+%s://%s:%s/%s' % (
                    db, os.environ['MONGODB_PORT_27017_TCP_ADDR'],
                    os.environ['MONGODB_PORT_27017_TCP_PORT'], db)))
        elif ctx.invoked_subcommand == 'bench':
            if kwargs['data_path'] == './data':
                kwargs['data_path'] += '/bench'
                shutil.rmtree(kwargs['data_path'], ignore_errors=True)
                os.mkdir(kwargs['data_path'])
            if db in ('taskdb', 'resultdb'):
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db)))
            else:
                kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                    db, kwargs['data_path'], db[:-2])))
        else:
            if not os.path.exists(kwargs['data_path']):
                os.mkdir(kwargs['data_path'])
            kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % (
                db, kwargs['data_path'], db[:-2])))
            kwargs['is_%s_default' % db] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs['data_path']):
        os.mkdir(kwargs['data_path'])

    # message queue, compatible with old version
    if kwargs.get('message_queue'):
        pass
    elif kwargs.get('amqp_url'):
        kwargs['message_queue'] = kwargs['amqp_url']
    elif os.environ.get('RABBITMQ_NAME'):
        kwargs['message_queue'] = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s"
                                   ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ)
    elif kwargs.get('beanstalk'):
        kwargs['message_queue'] = "beanstalk://%s/" % kwargs['beanstalk']

    for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher',
                 'fetcher2processor', 'processor2result'):
        if kwargs.get('message_queue'):
            kwargs[name] = utils.Get(lambda name=name: connect_message_queue(
                name, kwargs.get('message_queue'), kwargs['queue_maxsize']))
        else:
            kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'),
                                                 kwargs['queue_maxsize'])

    # phantomjs-proxy
    if kwargs.get('phantomjs_proxy'):
        pass
    elif os.environ.get('PHANTOMJS_NAME'):
        kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj['instances'] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'):
        ctx.invoke(all)
    return ctx
Ejemplo n.º 15
0
def cli(ctx, **kwargs):
    """
    A powerful spider system in python.
    """
    if kwargs["add_sys_path"]:
        sys.path.append(os.getcwd())

    logging.config.fileConfig(kwargs["logging_config"])

    # get db from env
    for db in ("taskdb", "projectdb", "resultdb"):
        if kwargs[db] is not None:
            continue
        if os.environ.get("MYSQL_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "sqlalchemy+mysql+%s://%s:%s/%s"
                    % (db, os.environ["MYSQL_PORT_3306_TCP_ADDR"], os.environ["MYSQL_PORT_3306_TCP_PORT"], db)
                )
            )
        elif os.environ.get("MONGODB_NAME"):
            kwargs[db] = utils.Get(
                lambda db=db: connect_database(
                    "mongodb+%s://%s:%s/%s"
                    % (db, os.environ["MONGODB_PORT_27017_TCP_ADDR"], os.environ["MONGODB_PORT_27017_TCP_PORT"], db)
                )
            )
        elif ctx.invoked_subcommand == "bench":
            if kwargs["data_path"] == "./data":
                kwargs["data_path"] += "/bench"
                shutil.rmtree(kwargs["data_path"], ignore_errors=True)
                os.mkdir(kwargs["data_path"])
            if db in ("taskdb", "resultdb"):
                kwargs[db] = utils.Get(lambda db=db: connect_database("sqlite+%s://" % (db)))
            else:
                kwargs[db] = utils.Get(
                    lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
                )
        else:
            if not os.path.exists(kwargs["data_path"]):
                os.mkdir(kwargs["data_path"])
            kwargs[db] = utils.Get(
                lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2]))
            )
            kwargs["is_%s_default" % db] = True

    # create folder for counter.dump
    if not os.path.exists(kwargs["data_path"]):
        os.mkdir(kwargs["data_path"])

    # message queue, compatible with old version
    if kwargs.get("message_queue"):
        pass
    elif kwargs.get("amqp_url"):
        kwargs["message_queue"] = kwargs["amqp_url"]
    elif os.environ.get("RABBITMQ_NAME"):
        kwargs["message_queue"] = (
            "amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ
        )
    elif kwargs.get("beanstalk"):
        kwargs["message_queue"] = "beanstalk://%s/" % kwargs["beanstalk"]

    for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"):
        if kwargs.get("message_queue"):
            kwargs[name] = utils.Get(
                lambda name=name: connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"])
            )
        else:
            kwargs[name] = connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"])

    # phantomjs-proxy
    if kwargs.get("phantomjs_proxy"):
        pass
    elif os.environ.get("PHANTOMJS_NAME"):
        kwargs["phantomjs_proxy"] = os.environ["PHANTOMJS_PORT_25555_TCP"][len("tcp://") :]

    ctx.obj = utils.ObjectDict(ctx.obj or {})
    ctx.obj["instances"] = []
    ctx.obj.update(kwargs)

    if ctx.invoked_subcommand is None and not ctx.obj.get("testing_mode"):
        ctx.invoke(all)
    return ctx