コード例 #1
0
    def setUp(self):
        self.client_logger = get_logger(name='cola_test_client',
                                        server='localhost')
        self.server_logger = get_logger(name='cola_test_server')

        self.log_server = LogRecordSocketReceiver(logger=self.server_logger)
        threading.Thread(target=self.log_server.serve_forever).start()
コード例 #2
0
ファイル: parsers.py プロジェクト: friedvan/cola
 def __init__(self, opener=None, url=None, bundle=None, **kwargs):
     super(WeiboParser, self).__init__(opener=opener, url=url, **kwargs)
     self.bundle = bundle
     self.uid = bundle.label
     self.opener.set_default_timeout(TIMEOUT)
     if not hasattr(self, 'logger') or self.logger is None:
         self.logger = get_logger(name='weibo_parser')
コード例 #3
0
    def __init__(self,
                 job,
                 data_dir,
                 master,
                 local=None,
                 nodes=None,
                 context=None,
                 logger=None,
                 copies=1,
                 force=False):
        super(WorkerJobLoader, self).__init__(job,
                                              data_dir,
                                              context=context,
                                              logger=logger,
                                              local=local,
                                              nodes=nodes,
                                              copies=copies,
                                              force=force)
        log_level = logging.INFO if not job.debug else logging.DEBUG
        if self.logger is None:
            self.logger = get_logger(
                name='cola_worker_%s' % self.job.real_name,
                filename=os.path.join(self.root, 'job.log'),
                server=master.split(':')[0],
                basic_level=log_level)

        self.master = master
        self.run_lock = threading.Lock()
        self.run_lock.acquire()
コード例 #4
0
    def __init__(self,
                 job,
                 data_dir,
                 master=None,
                 local=None,
                 nodes=None,
                 context=None,
                 logger=None,
                 copies=1,
                 force=False):
        BasicWorkerJobLoader.__init__(self,
                                      job,
                                      data_dir,
                                      context=context,
                                      logger=logger,
                                      local=local,
                                      nodes=nodes,
                                      copies=copies,
                                      force=force)
        LimitionJobLoader.__init__(self, self.job, context=context)

        log_level = logging.INFO if not job.debug else logging.DEBUG
        if self.logger is None:
            self.logger = get_logger(
                name='cola_worker_%s' % self.job.real_name,
                filename=os.path.join(self.root, 'job.log'),
                basic_level=log_level)

        self.init_rate_clear()
コード例 #5
0
ファイル: master.py プロジェクト: laocheng/cola
    def __init__(self, ctx):
        self.ctx = ctx
        self.rpc_server = self.ctx.master_rpc_server
        assert self.rpc_server is not None

        self.working_dir = os.path.join(self.ctx.working_dir, 'master')
        self.zip_dir = os.path.join(self.working_dir, 'zip')
        self.job_dir = os.path.join(self.working_dir, 'jobs')
        if not os.path.exists(self.zip_dir):
            os.makedirs(self.zip_dir)
        if not os.path.exists(self.job_dir):
            os.makedirs(self.job_dir)

        self.worker_tracker = WorkerTracker()
        self.job_tracker = JobTracker()

        self.black_list = []

        self.stopped = threading.Event()

        self.logger = get_logger("cola_master")
        self._init_log_server(self.logger)

        self._register_rpc()
        self.load()
        FileTransportServer(self.rpc_server, self.zip_dir)
コード例 #6
0
ファイル: parsers.py プロジェクト: linVdcd/cola
 def __init__(self, opener=None, url=None, bundle=None, **kwargs):
     super(WeiboParser, self).__init__(opener=opener, url=url, **kwargs)
     self.bundle = bundle
     self.uid = bundle.label
     self.opener.set_default_timeout(TIMEOUT)
     if not hasattr(self, 'logger') or self.logger is None:
         self.logger = get_logger(name='weibo_parser')
コード例 #7
0
ファイル: master.py プロジェクト: awai0707/cola
    def __init__(self, ctx):
        self.ctx = ctx
        self.rpc_server = self.ctx.master_rpc_server
        assert self.rpc_server is not None
        
        self.working_dir = os.path.join(self.ctx.working_dir, 'master')
        self.zip_dir = os.path.join(self.working_dir, 'zip')
        self.job_dir = os.path.join(self.working_dir, 'jobs')
        if not os.path.exists(self.zip_dir):
            os.makedirs(self.zip_dir)
        if not os.path.exists(self.job_dir):
            os.makedirs(self.job_dir)
        
        self.worker_tracker = WorkerTracker()
        self.job_tracker = JobTracker()

        self.black_list = []
        
        self.stopped = threading.Event()
        
        self.logger = get_logger("cola_master")
        self._init_log_server(self.logger)
        
        self._register_rpc()
        self.load()
        FileTransportServer(self.rpc_server, self.zip_dir)
コード例 #8
0
ファイル: login.py プロジェクト: brightgems/cola
 def __init__(self, opener, username, passwd):
     self.opener = opener
     self.logger = get_logger("weibo.login")
     self.username = username
     self.passwd = passwd
     self.weibo_url = 'http://weibo.com/'
     self.prelogin_url = 'https://login.sina.com.cn/sso/prelogin.php'
     self.login_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)'
     self.captcha_url = 'http://login.sina.com.cn/cgi/pin.php'
コード例 #9
0
ファイル: loader.py プロジェクト: MwzkQmuUZkFLbXm/cola
 def __init__(self, job, data_dir, nodes, client=None,
              context=None, copies=1, force=False):
     ctx = context or job.context
     master_port = ctx.job.master_port
     local = '%s:%s' % (get_ip(), master_port)
     
     JobLoader.__init__(self, job, data_dir, local, 
                        context=ctx, copies=copies, force=force)
     LimitionJobLoader.__init__(self, job, context=ctx)
     
     # check
     self.check()
     
     self.nodes = nodes
     self.not_registered = self.nodes[:]
     self.not_finished = self.nodes[:]
     
     # mq
     self.mq_client = MessageQueueClient(self.nodes, copies=copies)
     
     # lock
     self.ready_lock = threading.Lock()
     self.ready_lock.acquire()
     self.finish_lock = threading.Lock()
     self.finish_lock.acquire()
     
     # logger
     self.logger = get_logger(
         name='cola_master_%s'%self.job.real_name,
         filename=os.path.join(self.root, 'job.log'),
         is_master=True)
     self.client = client
     self.client_handler = None
     if self.client is not None:
         self.client_handler = add_log_client(self.logger, self.client)
     
     self.init_rpc_server()
     self.init_rate_clear()
     self.init_logger_server(self.logger)
     
     # register rpc server
     self.rpc_server.register_function(self.client_stop, 'client_stop')
     self.rpc_server.register_function(self.ready, 'ready')
     self.rpc_server.register_function(self.worker_finish, 'worker_finish')
     self.rpc_server.register_function(self.complete, 'complete')
     self.rpc_server.register_function(self.error, 'error')
     self.rpc_server.register_function(self.get_nodes, 'get_nodes')
     self.rpc_server.register_function(self.apply, 'apply')
     self.rpc_server.register_function(self.require, 'require')
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.add_node, 'add_node')
     self.rpc_server.register_function(self.remove_node, 'remove_node')
     
     # register signal
     signal.signal(signal.SIGINT, self.signal_handler)
     signal.signal(signal.SIGTERM, self.signal_handler)
コード例 #10
0
    def __init__(self,
                 job,
                 data_dir,
                 context=None,
                 logger=None,
                 local=None,
                 nodes=None,
                 copies=1,
                 force=False):
        self.job = job
        ctx = context or self.job.context

        self.local = local
        if self.local is None:
            host, port = get_ip(), ctx.job.port
            self.local = '%s:%s' % (host, port)
        else:
            host, port = tuple(self.local.split(':', 1))
        self.nodes = nodes
        if self.nodes is None:
            self.nodes = [self.local]

        self.logger = logger
        self.info_logger = get_logger(name='cola_worker_info_%s' %
                                      self.job.real_name)

        super(BasicWorkerJobLoader, self).__init__(self.job,
                                                   data_dir,
                                                   self.local,
                                                   context=ctx,
                                                   copies=copies,
                                                   force=force)

        # instances count that run at the same time
        self.instances = max(min(self.ctx.job.instances, MAX_THREADS_SIZE), 1)
        # excecutings
        self.executings = []
        # exception times that continously throw
        self.error_times = 0
        # budget
        self.budget = 0

        self.check()
        # init rpc server
        self.init_rpc_server()
        # init message queue
        self.init_mq()

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

        self.rpc_server.register_function(self.stop, name='stop')
        self.rpc_server.register_function(self.add_node, name='add_node')
        self.rpc_server.register_function(self.remove_node, name='remove_node')
        self.rpc_server.register_function(self.run, name='run')
コード例 #11
0
ファイル: loader.py プロジェクト: friedvan/cola
 def __init__(self, job, data_dir, context=None, logger=None,
              local=None, nodes=None, copies=1, force=False):
     self.job = job
     ctx = context or self.job.context
     
     self.local = local
     if self.local is None:
         host, port = get_ip(), ctx.job.port
         self.local = '%s:%s' % (host, port)
     else:
         host, port = tuple(self.local.split(':', 1))
     self.nodes = nodes
     if self.nodes is None:
         self.nodes = [self.local]
         
     self.logger = logger
     self.info_logger = get_logger(
         name='cola_worker_info_%s'%self.job.real_name)
         
     super(BasicWorkerJobLoader, self).__init__(
         self.job, data_dir, self.local, 
         context=ctx, copies=copies, force=force)
     
     # instances count that run at the same time
     self.instances = max(min(self.ctx.job.instances, MAX_THREADS_SIZE), 1)
     # excecutings
     self.executings = []
     # exception times that continously throw
     self.error_times = 0
     # budget
     self.budget = 0
     
     # counter
     self.pages_size = 0
     
     # lock when not stopped
     self.stop_lock = threading.Lock()
     self.stop_lock.acquire()
     
     self.check()
     # init rpc server
     self.init_rpc_server()
     # init message queue
     self.init_mq()
     
     # register signal
     signal.signal(signal.SIGINT, self.signal_handler)
     signal.signal(signal.SIGTERM, self.signal_handler)
     
     self.rpc_server.register_function(self.stop, name='stop')
     self.rpc_server.register_function(self.add_node, name='add_node')
     self.rpc_server.register_function(self.remove_node, name='remove_node')
     self.rpc_server.register_function(self.run, name='run')
     self.rpc_server.register_function(self.pages, name='pages')
コード例 #12
0
 def __init__(self, content, base_url=None, logger=None, debug=False, **options):
     self._content = content
     self.logger = logger
     self.base_url = base_url
     if self.logger is None:
         self.logger = get_logger('cola_extractor')
     self.on_debug = debug
     self.debug = self.logger.info if debug else (lambda s: None)
     self.options = options
         
     self._title = None
     self._html = None
コード例 #13
0
ファイル: context.py プロジェクト: awai0707/cola
 def __init__(self, local_mode=False, is_master=False, master_addr=None, 
              is_client=False, working_dir=None, mkdirs=False, 
              ip=None, ips=None):
     self.is_local_mode = local_mode
     self.is_master = is_master
     self.is_client = is_client
     
     self.master_addr = master_addr
     self.master_ip = self.master_addr
     if not self.is_local_mode:
         if self.master_addr is None:
             raise ValueError('Master address must be supplied when local_mode is False')
             
         if ':' not in self.master_addr:
             self.master_addr = '%s:%s' % (self.master_addr, main_conf.master.port)
         else:
             self.master_ip = self.master_addr.split(':', 1)[0]
     
     self.working_dir = working_dir
     if self.working_dir is None:
         tmp = tempfile.gettempdir()
         self.working_dir = os.path.join(tmp, 'cola')
         if mkdirs and not os.path.exists(self.working_dir):
             os.makedirs(self.working_dir)
             
     self.ip = ip
     if self.ip is None:
         if self.is_master:
             self.ip = self.master_ip
         else:
             self.ip = get_ip()
             if self.is_local_mode and not self.ip:
                 self.ip = '127.0.0.1'
     if self.master_addr is None: self.master_addr = '%s:%s' % (self.ip, main_conf.master.port)
     self.worker_addr = '%s:%s' % (self.ip, main_conf.worker.port)
     
     self.ips = ips if ips is not None else []
     if not self.ips:
         self.ips.append(self.ip)
     self.addrs = [self.fix_addr(_ip) for _ip in self.ips]
         
     self.manager = ContextManager()
     self.manager.start(manager_init)
     self.env = self.manager.dict({'ip': self.ip, 
                                   'root': self.working_dir,
                                   'is_local': self.is_local_mode, 
                                   'master_ip': self.master_ip,
                                   'job_desc' : {}
                                   })
     self.logger = get_logger('cola_context')
     
     self.master_rpc_server = None
     self.worker_rpc_server = None
コード例 #14
0
ファイル: __init__.py プロジェクト: BUAA-DreamTeam/cola
 def __init__(self, content, base_url=None, logger=None, debug=False, **options):
     self._content = content
     self.logger = logger
     self.base_url = base_url
     if self.logger is None:
         self.logger = get_logger('cola_extractor')
     self.on_debug = debug
     self.debug = self.logger.info if debug else (lambda s: None)
     self.options = options
         
     self._title = None
     self._html = None
コード例 #15
0
ファイル: loader.py プロジェクト: bingyupj/cola
 def __init__(self, job, data_dir, master=None, local=None, nodes=None, 
              context=None, logger=None, copies=1, force=False):
     BasicWorkerJobLoader.__init__(self, job, data_dir, context=context, logger=logger,
                                   local=local, nodes=nodes, copies=copies, force=force)
     LimitionJobLoader.__init__(self, self.job, context=context)
     
     if self.logger is None:
         self.logger = get_logger(
             name='cola_worker_%s'%self.job.real_name,
             filename=os.path.join(self.root, 'job.log'))
         
     self.init_rate_clear()
コード例 #16
0
ファイル: loader.py プロジェクト: bingyupj/cola
 def __init__(self, job, data_dir, master, local=None, nodes=None, 
              context=None, logger=None, copies=1, force=False):
     super(WorkerJobLoader, self).__init__(job, data_dir, context=context, logger=logger, 
                                           local=local, nodes=nodes, copies=copies, force=force)
     if self.logger is None:
         self.logger = get_logger(
             name='cola_worker_%s'%self.job.real_name,
             filename=os.path.join(self.root, 'job.log'),
             server=master.split(':')[0])
         
     self.master = master
     self.run_lock = threading.Lock()
     self.run_lock.acquire()
コード例 #17
0
    def __init__(self, opener=None, url=None, bundle=None, **kwargs):
        super(DoubanMovieParser, self).__init__(opener=opener,
                                                url=url,
                                                **kwargs)
        if self.opener is None:
            self.opener = MechanizeOpener()

        self.url = url

        self.opener.set_default_timeout(TIMEOUT)

        if not hasattr(self, 'logger') or self.logger is None:
            self.logger = get_logger(name='douban_parser')
コード例 #18
0
ファイル: loader.py プロジェクト: iswangheng/cola
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')

    job = import_job(path)

    holder = os.path.join(root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)

    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))

    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')

    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)

    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes,
                   local_node,
                   mq_holder,
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)

    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)

            def _start():
                while not loader.stopped:
                    time.sleep(TIME_SLEEP)
                loader.run()

            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
コード例 #19
0
ファイル: loader.py プロジェクト: ballacky13/cola
def load_job(path, master=None):
    if not os.path.exists(path):
        raise ValueError('Job definition does not exist.')
        
    job = import_job(path)
    
    holder = os.path.join(
        root_dir(), 'data', 'worker', 'jobs', job.real_name)
    mq_holder = os.path.join(holder, 'mq')
    if not os.path.exists(mq_holder):
        os.makedirs(mq_holder)
    
    # Logger
    logger = get_logger(os.path.join(holder, 'job.log'))
    
    local_node = '%s:%s' % (get_ip(), job.context.job.port)
    nodes = [local_node]
    if master is not None:
        nodes = client_call(master, 'get_nodes')
    
    # Bloom filter hook
    bloom_filter_file = os.path.join(holder, 'bloomfilter')
    bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job)
    
    rpc_server = create_rpc_server(job)
    loader = JobLoader(job, rpc_server, logger=logger, master=master)
    loader.init_mq(nodes, local_node, mq_holder, 
                   verify_exists_hook=bloom_filter_hook,
                   copies=2 if master else 1)
    
    if master is None:
        try:
            loader.mq.put(job.starts)
            loader.run()
        finally:
            rpc_server.shutdown()
    else:
        try:
            client_call(master, 'ready', local_node)
            
            def _start():
                while not loader.stopped: 
                    time.sleep(TIME_SLEEP)
                loader.run()
            thread = threading.Thread(target=_start)
            thread.start()
            thread.join()
        finally:
            rpc_server.shutdown()
コード例 #20
0
ファイル: __init__.py プロジェクト: zzzz123321/cola
    def __init__(self,
                 ctx,
                 job_def_path,
                 job_name,
                 job_desc=None,
                 working_dir=None,
                 rpc_server=None,
                 manager=None,
                 job_offset=0):
        self.status = NOTSTARTED
        self.ctx = ctx
        self.shutdown_callbacks = []

        self.stopped = multiprocessing.Event()
        self.nonsuspend = multiprocessing.Event()
        self.nonsuspend.set()

        self.job_def_path = job_def_path
        self.job_name = job_name
        self.working_dir = working_dir or os.path.join(self.ctx.working_dir,
                                                       self.job_name)
        self.logger = get_logger(name='cola_job' + str(time.time()))
        self.job_desc = job_desc or import_job_desc(job_def_path)

        self.settings = self.job_desc.settings
        self.is_bundle = self.settings.job.mode == 'bundle'

        self.rpc_server = rpc_server

        self.n_instances = self.job_desc.settings.job.instances
        self.n_containers = min(get_cpu_count(), max(self.n_instances, 1))
        self.job_offset = job_offset
        self.is_multi_process = self.n_containers > 1
        self.processes = []

        self.idle_statuses = manager.list([False] * self.n_containers)

        self.manager = manager

        if not os.path.exists(self.working_dir):
            os.makedirs(self.working_dir)
        self.inited = False
        self._register_rpc()
コード例 #21
0
 def __init__(self, ctx):
     self.ctx = ctx
     self.master = self.ctx.master_addr
     self.working_dir = os.path.join(self.ctx.working_dir, 'worker')
     self.job_dir = os.path.join(self.working_dir, 'jobs')
     self.zip_dir = os.path.join(self.working_dir, 'zip')
     self.running_jobs = {}
     
     self.rpc_server = self.ctx.worker_rpc_server
     assert self.rpc_server is not None
     self._register_rpc()
     
     self.stopped = threading.Event()
     
     self.logger = get_logger('cola_worker', server=self.ctx.master_ip)
     
     self._ensure_exists(self.job_dir)
     self._ensure_exists(self.zip_dir)
     FileTransportServer(self.rpc_server, self.zip_dir)
コード例 #22
0
ファイル: container.py プロジェクト: awai0707/cola
 def init(self):
     with self.lock:
         if self.inited: return
         
         self.log_file = os.path.join(self.working_dir, 'job.log')
         self.logger = self.logger or get_logger(name='cola_task',
                                                 filename=self.log_file, 
                                                 server=self.master_ip)
         
         for i in range(self.n_tasks):
             self.counter_clients[i] = CounterClient(self.counter_server,
                                                     app_name=self.job_name)
             self.budget_clients[i] = BudgetApplyClient(self.budget_server,
                                                        app_name=self.job_name)
             self.speed_clients[i] = SpeedControlClient(self.speed_server, self.ip,
                                                        self.task_start_id+i,
                                                        app_name=self.job_name)
         self.init_tasks()
         self._init_counter_sync()
         self._init_idle_status_checker()
         
         self.inited = True
コード例 #23
0
ファイル: __init__.py プロジェクト: Andelfin/cola
 def __init__(self, ctx, job_def_path, job_name, 
              job_desc=None, working_dir=None, rpc_server=None,
              manager=None, job_offset=0):
     self.status = NOTSTARTED
     self.ctx = ctx
     self.shutdown_callbacks = []
     
     self.stopped = multiprocessing.Event()
     self.nonsuspend = multiprocessing.Event()
     self.nonsuspend.set()
     
     self.job_def_path = job_def_path
     self.job_name = job_name
     self.working_dir = working_dir or os.path.join(self.ctx.working_dir, 
                                                    self.job_name)
     self.logger = get_logger(name='cola_job'+str(time.time()))
     self.job_desc = job_desc or import_job_desc(job_def_path)
         
     self.settings = self.job_desc.settings
     self.is_bundle = self.settings.job.mode == 'bundle'
             
     self.rpc_server = rpc_server
     
     self.n_instances = self.job_desc.settings.job.instances
     self.n_containers = min(get_cpu_count(), max(self.n_instances, 1))
     self.job_offset = job_offset
     self.is_multi_process = self.n_containers > 1
     self.processes = []
     
     self.idle_statuses = manager.list([False] * self.n_containers)
         
     self.manager = manager
     
     if not os.path.exists(self.working_dir):
         os.makedirs(self.working_dir)
     self.inited = False
     self._register_rpc()
コード例 #24
0
ファイル: container.py プロジェクト: tmacmilan/cola
    def init(self):
        with self.lock:
            if self.inited: return

            self.log_file = os.path.join(self.working_dir, 'job.log')
            self.logger = self.logger or get_logger(name='cola_task',
                                                    filename=self.log_file,
                                                    server=self.master_ip)

            for i in range(self.n_tasks):
                self.counter_clients[i] = CounterClient(self.counter_server,
                                                        app_name=self.job_name)
                self.budget_clients[i] = BudgetApplyClient(
                    self.budget_server, app_name=self.job_name)
                self.speed_clients[i] = SpeedControlClient(
                    self.speed_server,
                    self.ip,
                    self.task_start_id + i,
                    app_name=self.job_name)
            self.init_tasks()
            self._init_counter_sync()
            self._init_idle_status_checker()

            self.inited = True
コード例 #25
0
ファイル: startproject.py プロジェクト: zhangw/cola
 def __init__(self):
     self.logger = get_logger("cola_startproject_command")
コード例 #26
0
ファイル: test_log.py プロジェクト: 0pengl/cola
 def setUp(self):
     self.client_logger = get_logger(name='cola_test_client', server='localhost')
     self.server_logger = get_logger(name='cola_test_server')
     
     self.log_server = LogRecordSocketReceiver(logger=self.server_logger)
     threading.Thread(target=self.log_server.serve_forever).start()
コード例 #27
0
ファイル: preprocess.py プロジェクト: zzzz123321/cola
 def __init__(self, html, base_url=None, logger=None):
     self.logger = logger
     if logger is None:
         self.logger = get_logger(name='cola_extractor')
     self.html = html
     self.base_url = base_url
コード例 #28
0
ファイル: coca.py プロジェクト: owengbs/cola_weido
'''

import argparse
import socket
import threading
import os
import tempfile
import shutil

from cola.core.logs import get_logger, LogRecordSocketReceiver
from cola.core.rpc import client_call, FileTransportClient, ColaRPCServer
from cola.core.utils import import_job, get_ip
from cola.core.zip import ZipHandler
from cola.core.config import main_conf

logger = get_logger(name='coca')
parser = argparse.ArgumentParser('Coca')
registered_func = {}

def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to cola master')

def register(func):
    func_name = func.__name__
    name = '-%s' % func_name.replace('_', '-').strip('-')
    help_ = func.__doc__.strip()

    registered_func[func_name] = func
コード例 #29
0
ファイル: parsers.py プロジェクト: renchaorevee/cola
 def __init__(self, opener=None, url=None, bundle=None, **kwargs):
     super(WeiboParser, self).__init__(opener=opener, url=url, **kwargs)
     self.bundle = bundle
     self.uid = bundle.label
     if not hasattr(self, "logger") or self.logger is None:
         self.logger = get_logger(name="weibo_parser")
コード例 #30
0
Created on 2013-6-22

@author: Chine
'''

import socket
import os

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.logs import get_logger
from cola.worker.recover import recover

from conf import user_config

logger = get_logger(name='sina_stop')

if __name__ == '__main__':
    ip, port = get_ip(), getattr(user_config.job, 'port')
    logger.info('Trying to stop single running worker')
    try:
        client_call('%s:%s' % (ip, port), 'stop')
    except socket.error:
        stop = raw_input("Force to stop? (y or n) ").strip()
        if stop == 'y' or stop == 'yes':
            job_path = os.path.split(os.path.abspath(__file__))[0]
            recover(job_path)
        else:
            print 'ignore'
    logger.info('Successfully stopped single running worker')
コード例 #31
0
ファイル: stop.py プロジェクト: 0pengl/cola
limitations under the License.

Created on 2013-6-22

@author: Chine
'''

import os
import socket

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.config import Config
from cola.core.logs import get_logger

logger = get_logger(name='generic_stop')

def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to single running worker.')
    except:
        pass

get_user_conf = lambda s: os.path.join(os.path.dirname(os.path.abspath(__file__)), s)
user_conf = get_user_conf('test.yaml')
if not os.path.exists(user_conf):
    user_conf = get_user_conf('generic.yaml')
user_config = Config(user_conf)
コード例 #32
0
'''

import argparse
import socket
import threading
import os
import tempfile
import shutil

from cola.core.logs import get_logger, LogRecordSocketReceiver
from cola.core.rpc import client_call, FileTransportClient, ColaRPCServer
from cola.core.utils import import_job, get_ip
from cola.core.zip import ZipHandler
from cola.core.config import main_conf

logger = get_logger(name='coca')
parser = argparse.ArgumentParser('Coca')
registered_func = {}

def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to cola master')

def register(func):
    func_name = func.__name__
    name = '-%s' % func_name.replace('_', '-').strip('-')
    help_ = func.__doc__.strip()
    
    registered_func[func_name] = func
コード例 #33
0
ファイル: stop.py プロジェクト: 0pengl/cola
limitations under the License.

Created on 2013-6-22

@author: Chine
'''

import os
import socket

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.config import Config
from cola.core.logs import get_logger

logger = get_logger(name='wiki_stop')

def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to single running worker.')
    except:
        pass

get_user_conf = lambda s: os.path.join(os.path.dirname(os.path.abspath(__file__)), s)
user_conf = get_user_conf('test.yaml')
if not os.path.exists(user_conf):
    user_conf = get_user_conf('wiki.yaml')
user_config = Config(user_conf)
コード例 #34
0
    def __init__(self,
                 job,
                 data_dir,
                 nodes,
                 local_ip=None,
                 client=None,
                 context=None,
                 copies=1,
                 force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)

        JobLoader.__init__(self,
                           job,
                           data_dir,
                           local,
                           context=ctx,
                           copies=copies,
                           force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)

        # check
        self.check()

        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]

        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)

        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()

        # logger
        self.logger = get_logger(name='cola_master_%s' % self.job.real_name,
                                 filename=os.path.join(self.root, 'job.log'),
                                 is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)

        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)

        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
コード例 #35
0
ファイル: cookiepool.py プロジェクト: brightgems/cola
 def __init__(self):
     self.count = -1
     self.logger = get_logger("cookie_pool")
コード例 #36
0
ファイル: preprocess.py プロジェクト: BUAA-DreamTeam/cola
 def __init__(self, html, base_url=None, logger=None):
     self.logger = logger
     if logger is None:
         self.logger = get_logger(name='cola_extractor')
     self.html = html
     self.base_url = base_url
コード例 #37
0
 def __init__(self):
     self.logger = get_logger('cola_startproject_command')
コード例 #38
0
ファイル: context.py プロジェクト: ll2088/cola
    def __init__(self,
                 local_mode=False,
                 is_master=False,
                 master_addr=None,
                 is_client=False,
                 working_dir=None,
                 mkdirs=False,
                 ip=None,
                 ips=None):
        self.is_local_mode = local_mode
        self.is_master = is_master
        self.is_client = is_client

        self.master_addr = master_addr
        self.master_ip = self.master_addr
        if not self.is_local_mode:
            if self.master_addr is None:
                raise ValueError(
                    'Master address must be supplied when local_mode is False')

            if ':' not in self.master_addr:
                self.master_addr = '%s:%s' % (self.master_addr,
                                              main_conf.master.port)
            else:
                self.master_ip = self.master_addr.split(':', 1)[0]

        self.working_dir = working_dir
        if self.working_dir is None:
            tmp = tempfile.gettempdir()
            self.working_dir = os.path.join(tmp, 'cola')
            if mkdirs and not os.path.exists(self.working_dir):
                os.makedirs(self.working_dir)

        self.ip = ip
        if self.ip is None:
            if self.is_master:
                self.ip = self.master_ip
            else:
                self.ip = get_ip()
                if self.is_local_mode and not self.ip:
                    self.ip = '127.0.0.1'
        if self.master_addr is None:
            self.master_addr = '%s:%s' % (self.ip, main_conf.master.port)
        self.worker_addr = '%s:%s' % (self.ip, main_conf.worker.port)

        self.ips = ips if ips is not None else []
        if not self.ips:
            self.ips.append(self.ip)
        self.addrs = [self.fix_addr(_ip) for _ip in self.ips]

        self.manager = ContextManager()
        self.manager.start(manager_init)
        self.env = self.manager.dict({
            'ip': self.ip,
            'root': self.working_dir,
            'is_local': self.is_local_mode,
            'master_ip': self.master_ip,
            'job_desc': {}
        })
        self.logger = get_logger('cola_context')

        self.master_rpc_server = None
        self.worker_rpc_server = None
コード例 #39
0
ファイル: job.py プロジェクト: zzzz123321/cola
 def __init__(self):
     self.logger = get_logger('cola_job_command')
コード例 #40
0
 def __init__(self):
     self.logger = get_logger('cola_worker_command')
コード例 #41
0
ファイル: stop.py プロジェクト: bingosummer/cola
Created on 2013-6-22

@author: Chine
'''


import os
import socket

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.config import Config
from cola.core.logs import get_logger

logger = get_logger(name='sina_stop')

def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to single running worker.')
    except:
        pass

get_user_conf = lambda s: os.path.join(os.path.dirname(os.path.abspath(__file__)), s)
user_conf = get_user_conf('test.yaml')
if not os.path.exists(user_conf):
    user_conf = get_user_conf('sina.yaml')
user_config = Config(user_conf)
コード例 #42
0
ファイル: job.py プロジェクト: ll2088/cola
 def __init__(self):
     self.logger = get_logger('cola_job_command')
コード例 #43
0
ファイル: stop.py プロジェクト: xren/cola
limitations under the License.

Created on 2013-6-22

@author: Chine
'''

import os
import socket

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.config import Config
from cola.core.logs import get_logger

logger = get_logger(name='generic_stop')


def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to single running worker.')
    except:
        pass


get_user_conf = lambda s: os.path.join(
    os.path.dirname(os.path.abspath(__file__)), s)
user_conf = get_user_conf('test.yaml')
if not os.path.exists(user_conf):
コード例 #44
0
ファイル: master.py プロジェクト: zhangw/cola
 def __init__(self):
     self.logger = get_logger("cola_master_command")
コード例 #45
0
Created on 2013-6-27

@author: Chine
'''

import socket
import os

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.logs import get_logger
from cola.worker.recover import recover

from conf import user_config

logger = get_logger(name='weibosearch_stop')

if __name__ == '__main__':
    ip, port = get_ip(), getattr(user_config.job, 'port')
    logger.info('Trying to stop single running worker')
    try:
        client_call('%s:%s' % (ip, port), 'stop')
    except socket.error:
        stop = raw_input("Force to stop? (y or n) ").strip()
        if stop == 'y' or stop == 'yes':
            job_path = os.path.split(os.path.abspath(__file__))[0]
            recover()
        else:
            print 'ignore'
    logger.info('Successfully stopped single running worker')
コード例 #46
0
limitations under the License.

Created on 2013-6-22

@author: Chine
'''

import os
import socket

from cola.core.rpc import client_call
from cola.core.utils import get_ip
from cola.core.config import Config
from cola.core.logs import get_logger

logger = get_logger(name='wiki_stop')


def _client_call(*args):
    try:
        return client_call(*args)
    except socket.error:
        logger.error('Cannot connect to single running worker.')
    except:
        pass


get_user_conf = lambda s: os.path.join(
    os.path.dirname(os.path.abspath(__file__)), s)
user_conf = get_user_conf('test.yaml')
if not os.path.exists(user_conf):
コード例 #47
0
 def setUp(self):
     self.client_logger = get_logger(name='cola_test_client', filename='file.log')