Beispiel #1
0
 def start():
     if os.path.exists('./Worker.log'):
         os.remove('./Worker.log')
     if os.path.exists('./Scrapy.log'):
         os.remove('./Scrapy.log')
     setproctitle.setproctitle(DBSession.query(WorkerModel).get(1).platform)
     reactor.__init__()  # @UndefinedVariable
     CrawlerManager()
     reactor.run()  # @UndefinedVariable
Beispiel #2
0
def sdfg(callback):
  log.startLogging(sys.stdout)

  # Test complete whether success or exception
  callback().then(promise.promise()).then(lambda _: reactor.stop())

  reactor.callLater(3, reactor.stop)
  reactor.run()
  reactor.__init__()
 def check_proxies(self):
     all_list = self.db_conn.get_list(ProxyManager.all_list)
     if all_list == None or len(all_list) == 0:
         return []
     self.valid_proxy = []
     self.unchecked_proxy = 0
     self.mutli_thread_lock = threading.Lock()
     self.stop_reactor_lock = threading.Lock()
     mutli_check_func = functools.partial(self.mutli_thread_check, all_list)
     if reactor._startedBefore:
         reactor.__init__()
     # callFromThread将阻塞调用该函数的线程
     # 将启动多线程的函数放入reactor的主线程,便于停止
     reactor.callFromThread(mutli_check_func)
     reactor.run()
     return self.valid_proxy
Beispiel #4
0
def main():
    '''
    启动爬虫客户端
    '''
    setproctitle.setproctitle(kid_setting.CLIENT_PROC_NAME)
    client_factory = KidClientFactory()
    reactor.__init__()  # @UndefinedVariable
    reactor.suggestThreadPoolSize(25)  # @UndefinedVariable
    connector = reactor.connectTCP(
        kid_setting.SERVER_IP,  # @UndefinedVariable
        kid_setting.SERVER_PORT,
        client_factory)
    connector.transport.getHandle().setsockopt(socket.SOL_SOCKET,
                                               socket.SO_SNDBUF, 4096 * 100)
    print_plus('IP:%s\tPort:%s\tHBase IP:%s\tCrawler Type:%d' %
               (kid_setting.SERVER_IP, kid_setting.SERVER_PORT,
                kid_setting.HBASE_HOST, kid_setting.CRAWLER_TYPE))
    reactor.run()  # @UndefinedVariable
Beispiel #5
0
def craw_all():
    """
    :return: list of dict[str title, str url], for all sites
    """
    @defer.inlineCallbacks
    def crawl():
        yield runner.crawl(GeneralSpider)
        reactor.stop()

    global settings_ref
    runner = CrawlerRunner()
    crawl()
    reactor.run(
    )  # the config will block here until the last crawl call is finished
    reactor.__init__()
    ret = []
    for filename in settings_ref.temp_file_name:
        with open(filename, 'r') as f:
            output = json.load(f)
        f.close()
        ret.append(output)
    return ret
Beispiel #6
0
 def Connection(self,host,port):
     f = MIGRobotBaseSchedulerFactory(self.platform_id,self.uid,self.robot_id)
     reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化
     reactor.connectTCP(host, int(port), f)
Beispiel #7
0
# coding=utf-8

import gevent.monkey; gevent.monkey.patch_all()
from twisted.internet import reactor
from ultron.cluster.central.central_engine import CentralEngine
from ultron.utilities.mlog import MLog

if __name__ == "__main__":
    MLog.config('central')
    reactor.__init__()
    central_engine = CentralEngine()
    reactor.run()
Beispiel #8
0
 def start():
     reactor.__init__()  # @UndefinedVariable
     CrawlerManager()
     reactor.run()  # @UndefinedVariable
Beispiel #9
0
	def setUp( self ):
		debug( "Setting up %s test...", self.__class__.__name__ )

		reactor.__init__()
		ThousandParsecClientFactory().makeTestSession( self )
 def Connection(self,host,port):
     f = MIGAssistantBaseSchedulerFactory(self.platform_id,self.uid,self.nickname)
     reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化
     reactor.connectTCP(host, port, f)
Beispiel #11
0
 def Connection(self,host,port):
     f = MIGBaseSchedulerFactory(self.platform_id,self.uid,self.token,self.oppid,self.oppo_type)
     reactor.__init__() #因使用进程池,故工作进程会把主进程的reactor拷贝过来,reactor在主进程已经运行,故需要重新初始化
     reactor.connectTCP(host, port, f)