def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue("test_queue", "amqp://*****:*****@localhost:5672/") self.q2.delete() self.q2.reconnect() self.q3.delete() self.q3.reconnect()
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue('test_queue', self.kombu_url, maxsize=5) self.q2 = connect_message_queue('test_queue', self.kombu_url, maxsize=5) self.q3 = connect_message_queue('test_queue_for_threading_test', self.kombu_url) while not self.q1.empty(): self.q1.get() while not self.q2.empty(): self.q2.get() while not self.q3.empty(): self.q3.get()
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue('test_queue', self.kombu_url, maxsize=5, lazy_limit=False) self.q2 = connect_message_queue('test_queue', self.kombu_url, maxsize=5, lazy_limit=False) self.q3 = connect_message_queue('test_queue_for_threading_test', self.kombu_url, lazy_limit=False) while not self.q1.empty(): self.q1.get() while not self.q2.empty(): self.q2.get() while not self.q3.empty(): self.q3.get()
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue('test_queue', 'amqp://*****:*****@localhost:5672/', lazy_limit=False) self.q2.delete() self.q2.reconnect() self.q3.delete() self.q3.reconnect()
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue("test_queue", "beanstalk://localhost:11300", maxsize=5) self.q2 = connect_message_queue("test_queue", "beanstalk://localhost:11300", maxsize=5) self.q3 = connect_message_queue("test_queue_for_threading_test", "beanstalk://localhost:11300") while not self.q1.empty(): self.q1.get() while not self.q2.empty(): self.q2.get() while not self.q3.empty(): self.q3.get()
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = connect_message_queue('test_queue', 'beanstalk://localhost:11300', maxsize=5) self.q2 = connect_message_queue('test_queue', 'beanstalk://localhost:11300', maxsize=5) self.q3 = connect_message_queue('test_queue_for_threading_test', 'beanstalk://localhost:11300') while not self.q1.empty(): self.q1.get() while not self.q2.empty(): self.q2.get() while not self.q3.empty(): self.q3.get()
def setUpClass(self): from pyspider.message_queue import connect_message_queue from pyspider.message_queue import redis_queue with utils.timeout(3): self.q1 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False) self.q2 = redis_queue.RedisQueue('test_queue', maxsize=5, lazy_limit=False) self.q3 = connect_message_queue('test_queue_for_threading_test', 'redis://localhost:6379/') while not self.q1.empty(): self.q1.get() while not self.q2.empty(): self.q2.get() while not self.q3.empty(): self.q3.get()
def setUpClass(self): from pyspider.message_queue import connect_message_queue from pyspider.message_queue import redis_queue with utils.timeout(3): self.q1 = redis_queue.RedisQueue('test_queue', host='119.27.185.96', password='******', maxsize=5, lazy_limit=False) self.q2 = redis_queue.RedisQueue('test_queue', host='119.27.185.96', password='******', maxsize=5, lazy_limit=False) self.q3 = connect_message_queue( 'test_queue_for_threading_test', 'redis://:[email protected]:6379/1') while not self.q1.empty(): print("get from q1: %s" % self.q1.get()) while not self.q2.empty(): print("get from q2: %s" % self.q2.get()) while not self.q3.empty(): print("get from q3: %s" % self.q3.get())
def cli(ctx, **kwargs): """ A powerful spider system in python. """ if kwargs['add_sys_path']: sys.path.append(os.getcwd()) logging.config.fileConfig(kwargs['logging_config']) # get db from env for db in ('taskdb', 'projectdb', 'resultdb'): if kwargs[db] is not None: continue if os.environ.get('MYSQL_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'sqlalchemy+mysql+%s://%s:%s/%s' % ( db, os.environ['MYSQL_PORT_3306_TCP_ADDR'], os.environ['MYSQL_PORT_3306_TCP_PORT'], db))) elif os.environ.get('MONGODB_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'mongodb+%s://%s:%s/%s' % ( db, os.environ['MONGODB_PORT_27017_TCP_ADDR'], os.environ['MONGODB_PORT_27017_TCP_PORT'], db))) elif ctx.invoked_subcommand == 'bench': if kwargs['data_path'] == './data': kwargs['data_path'] += '/bench' shutil.rmtree(kwargs['data_path'], ignore_errors=True) os.mkdir(kwargs['data_path']) if db in ('taskdb', 'resultdb'): kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db))) elif db in ('projectdb', ): kwargs[db] = utils.Get(lambda db=db: connect_database('local+%s://%s' % ( db, os.path.join(os.path.dirname(__file__), 'libs/bench.py')))) else: if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % ( db, kwargs['data_path'], db[:-2]))) kwargs['is_%s_default' % db] = True # create folder for counter.dump if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) # message queue, compatible with old version if kwargs.get('message_queue'): pass elif kwargs.get('amqp_url'): kwargs['message_queue'] = kwargs['amqp_url'] elif os.environ.get('RABBITMQ_NAME'): kwargs['message_queue'] = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ) elif kwargs.get('beanstalk'): kwargs['message_queue'] = "beanstalk://%s/" % kwargs['beanstalk'] for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): if kwargs.get('message_queue'): kwargs[name] = utils.Get(lambda name=name: connect_message_queue( name, kwargs.get('message_queue'), kwargs['queue_maxsize'])) else: kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'), kwargs['queue_maxsize']) # phantomjs-proxy if kwargs.get('phantomjs_proxy'): pass elif os.environ.get('PHANTOMJS_NAME'): kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):] # puppeteer-proxy if kwargs.get('puppeteer_proxy'): pass elif os.environ.get('PUPPETEER_NAME'): kwargs['puppeteer_proxy'] = os.environ['PUPPETEER_PORT_22222_TCP'][len('tcp://'):] ctx.obj = utils.ObjectDict(ctx.obj or {}) ctx.obj['instances'] = [] ctx.obj.update(kwargs) if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'): ctx.invoke(all) return ctx
def setUpClass(self): from pyspider.message_queue import connect_message_queue with utils.timeout(3): self.q1 = self.q2 = connect_message_queue('test_queue', maxsize=5) self.q3 = connect_message_queue('test_queue_for_threading_test')
def cli(ctx, **kwargs): """ A powerful spider system in python. """ if kwargs['add_sys_path']: sys.path.append(os.getcwd()) logging.config.fileConfig(kwargs['logging_config']) # get db from env for db in ('taskdb', 'projectdb', 'resultdb'): if kwargs[db] is not None: continue if os.environ.get('MYSQL_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'sqlalchemy+mysql+%s://%s:%s/%s' % ( db, os.environ['MYSQL_PORT_3306_TCP_ADDR'], os.environ['MYSQL_PORT_3306_TCP_PORT'], db))) elif os.environ.get('MONGODB_NAME'): kwargs[db] = utils.Get(lambda db=db: connect_database( 'mongodb+%s://%s:%s/%s' % ( db, os.environ['MONGODB_PORT_27017_TCP_ADDR'], os.environ['MONGODB_PORT_27017_TCP_PORT'], db))) elif ctx.invoked_subcommand == 'bench': if kwargs['data_path'] == './data': kwargs['data_path'] += '/bench' shutil.rmtree(kwargs['data_path'], ignore_errors=True) os.mkdir(kwargs['data_path']) if db in ('taskdb', 'resultdb'): kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s://' % (db))) else: kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % ( db, kwargs['data_path'], db[:-2]))) else: if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) kwargs[db] = utils.Get(lambda db=db: connect_database('sqlite+%s:///%s/%s.db' % ( db, kwargs['data_path'], db[:-2]))) kwargs['is_%s_default' % db] = True # create folder for counter.dump if not os.path.exists(kwargs['data_path']): os.mkdir(kwargs['data_path']) # message queue, compatible with old version if kwargs.get('message_queue'): pass elif kwargs.get('amqp_url'): kwargs['message_queue'] = kwargs['amqp_url'] elif os.environ.get('RABBITMQ_NAME'): kwargs['message_queue'] = ("amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ) elif kwargs.get('beanstalk'): kwargs['message_queue'] = "beanstalk://%s/" % kwargs['beanstalk'] for name in ('newtask_queue', 'status_queue', 'scheduler2fetcher', 'fetcher2processor', 'processor2result'): if kwargs.get('message_queue'): kwargs[name] = utils.Get(lambda name=name: connect_message_queue( name, kwargs.get('message_queue'), kwargs['queue_maxsize'])) else: kwargs[name] = connect_message_queue(name, kwargs.get('message_queue'), kwargs['queue_maxsize']) # phantomjs-proxy if kwargs.get('phantomjs_proxy'): pass elif os.environ.get('PHANTOMJS_NAME'): kwargs['phantomjs_proxy'] = os.environ['PHANTOMJS_PORT_25555_TCP'][len('tcp://'):] ctx.obj = utils.ObjectDict(ctx.obj or {}) ctx.obj['instances'] = [] ctx.obj.update(kwargs) if ctx.invoked_subcommand is None and not ctx.obj.get('testing_mode'): ctx.invoke(all) return ctx
def cli(ctx, **kwargs): """ A powerful spider system in python. """ if kwargs["add_sys_path"]: sys.path.append(os.getcwd()) logging.config.fileConfig(kwargs["logging_config"]) # get db from env for db in ("taskdb", "projectdb", "resultdb"): if kwargs[db] is not None: continue if os.environ.get("MYSQL_NAME"): kwargs[db] = utils.Get( lambda db=db: connect_database( "sqlalchemy+mysql+%s://%s:%s/%s" % (db, os.environ["MYSQL_PORT_3306_TCP_ADDR"], os.environ["MYSQL_PORT_3306_TCP_PORT"], db) ) ) elif os.environ.get("MONGODB_NAME"): kwargs[db] = utils.Get( lambda db=db: connect_database( "mongodb+%s://%s:%s/%s" % (db, os.environ["MONGODB_PORT_27017_TCP_ADDR"], os.environ["MONGODB_PORT_27017_TCP_PORT"], db) ) ) elif ctx.invoked_subcommand == "bench": if kwargs["data_path"] == "./data": kwargs["data_path"] += "/bench" shutil.rmtree(kwargs["data_path"], ignore_errors=True) os.mkdir(kwargs["data_path"]) if db in ("taskdb", "resultdb"): kwargs[db] = utils.Get(lambda db=db: connect_database("sqlite+%s://" % (db))) else: kwargs[db] = utils.Get( lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2])) ) else: if not os.path.exists(kwargs["data_path"]): os.mkdir(kwargs["data_path"]) kwargs[db] = utils.Get( lambda db=db: connect_database("sqlite+%s:///%s/%s.db" % (db, kwargs["data_path"], db[:-2])) ) kwargs["is_%s_default" % db] = True # create folder for counter.dump if not os.path.exists(kwargs["data_path"]): os.mkdir(kwargs["data_path"]) # message queue, compatible with old version if kwargs.get("message_queue"): pass elif kwargs.get("amqp_url"): kwargs["message_queue"] = kwargs["amqp_url"] elif os.environ.get("RABBITMQ_NAME"): kwargs["message_queue"] = ( "amqp://*****:*****@%(RABBITMQ_PORT_5672_TCP_ADDR)s" ":%(RABBITMQ_PORT_5672_TCP_PORT)s/%%2F" % os.environ ) elif kwargs.get("beanstalk"): kwargs["message_queue"] = "beanstalk://%s/" % kwargs["beanstalk"] for name in ("newtask_queue", "status_queue", "scheduler2fetcher", "fetcher2processor", "processor2result"): if kwargs.get("message_queue"): kwargs[name] = utils.Get( lambda name=name: connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"]) ) else: kwargs[name] = connect_message_queue(name, kwargs.get("message_queue"), kwargs["queue_maxsize"]) # phantomjs-proxy if kwargs.get("phantomjs_proxy"): pass elif os.environ.get("PHANTOMJS_NAME"): kwargs["phantomjs_proxy"] = os.environ["PHANTOMJS_PORT_25555_TCP"][len("tcp://") :] ctx.obj = utils.ObjectDict(ctx.obj or {}) ctx.obj["instances"] = [] ctx.obj.update(kwargs) if ctx.invoked_subcommand is None and not ctx.obj.get("testing_mode"): ctx.invoke(all) return ctx