Ejemplo n.º 1
0
def put_starts(master=None):
    if master is None:
        nodes = ['%s:%s' % (get_ip(), getattr(user_config.job, 'port'))]
    else:
        nodes = client_call(master, 'get_nodes')
        
    mq_client = MessageQueueClient(nodes)
    with open(keywords_f) as f:
        keys = []
        size = 0
        for keyword in f.xreadlines():
            keys.append(keyword)
            size += 1
            if size >= PUTSIZE:
                mq_client.put(keys)
                size = 0
                keys = []
        if len(keys) > 0:
            mq_client.put(keys)
Ejemplo n.º 2
0
def put_starts(master=None):
    if master is None:
        nodes = ['%s:%s' % (get_ip(), getattr(user_config.job, 'port'))]
    else:
        nodes = client_call(master, 'get_nodes')

    mq_client = MessageQueueClient(nodes)
    with open(keywords_f) as f:
        keys = []
        size = 0
        for keyword in f.xreadlines():
            keys.append(keyword)
            size += 1
            if size >= PUTSIZE:
                mq_client.put(keys)
                size = 0
                keys = []
        if len(keys) > 0:
            mq_client.put(keys)
Ejemplo n.º 3
0
def put_starts(master=None):
    if master is None:
        master = ['%s:%s' % (get_ip(), getattr(user_config.master, 'port'))]
    print('master:%s' % master)
    jobs = client_call(master, 'runnable_jobs')
    app_name = ''
    for a, j in jobs.items():
        if j == "douban movie":
            app_name = a
            break
    if not app_name:
        raise Exception('douban movie job has not upload')

    nodes = client_call(master, 'list_workers')
    addrs = []
    default_addr = master.split(':')[0]
    for ap, s in nodes:
        a, p = ap.split(':')
        if a.lower() == 'localhost':
            addrs.append('%s:%s' % (default_addr, p))
        else:
            addrs.append(ap)

    mq_client = MessageQueueClient(addrs, app_name)
    print('get:%s' % mq_client.get())
    urls = []
    size = 0
    for url in starts:
        urls.append(url)
        size += 1
        if size >= PUTSIZE:
            mq_client.put(urls)
            size = 0
            urls = []
    if len(urls) > 0:
        mq_client.put(urls)
Ejemplo n.º 4
0
class Test(unittest.TestCase):


    def setUp(self):
        ports = (11111, 11211, 11311)
        self.nodes = ['localhost:%s'%port for port in ports]
        self.dirs = [tempfile.mkdtemp() for _ in range(2*len(ports))]
        self.size = len(ports)
        
        for i in range(self.size):
            setattr(self, 'rpc_server%s'%i, ColaRPCServer(('localhost', ports[i])))
            setattr(self, 'mq%s'%i, 
                MessageQueue(self.nodes[:], self.nodes[i], getattr(self, 'rpc_server%s'%i))
            )
            getattr(self, 'mq%s'%i).init_store(self.dirs[2*i], self.dirs[2*i+1])
            thd = threading.Thread(target=getattr(self, 'rpc_server%s'%i).serve_forever)
            thd.setDaemon(True)
            thd.start()
            
        self.client = MessageQueueClient(self.nodes)

    def tearDown(self):
        try:
            for i in range(self.size):
                getattr(self, 'rpc_server%s'%i).shutdown()
                getattr(self, 'mq%s'%i).shutdown()
        finally:
            for d in self.dirs:
                shutil.rmtree(d)


    def testMQ(self):
        mq = self.mq0
        data = [str(random.randint(10000, 50000)) for _ in range(20)]
              
        mq.put(data)
        gets = []
        while True:
            get = mq.get()
            if get is None:
                break
            gets.append(get)
              
        self.assertEqual(sorted(data), sorted(gets))
            
        # test mq client
        data = str(random.randint(10000, 50000))
        self.client.put(data)
            
        get = self.client.get()
                 
        self.assertEqual(data, get)
        
    def testAddOrRemoveNode(self):
        mq = self.mq0
        data = [str(i) for i in range(100)]
           
        mq.put(data)
        self.mq2.shutdown()
        self.assertEqual(len(self.nodes), 3)
        self.mq0.remove_node(self.nodes[2])
        self.assertEqual(len(self.nodes), 3)
        self.mq1.remove_node(self.nodes[2])
           
        gets = []
        while True:
            get = mq.get()
            if get is None:
                break
            gets.append(get)
             
        self.assertEqual(sorted(data), sorted(gets))
Ejemplo n.º 5
0
Archivo: loader.py Proyecto: Ganer/cola
class MasterJobLoader(LimitionJobLoader, JobLoader):
    def __init__(self, job, data_dir, nodes, local_ip=None, client=None,
                 context=None, copies=1, force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)
        
        JobLoader.__init__(self, job, data_dir, local, 
                           context=ctx, copies=copies, force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)
        
        # check
        self.check()
        
        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]
        
        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)
        
        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()
        
        # logger
        self.logger = get_logger(
            name='cola_master_%s'%self.job.real_name,
            filename=os.path.join(self.root, 'job.log'),
            is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)
        
        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)
        
        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')
        
        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        
    def init_logger_server(self, logger):
        self.log_server = LogRecordSocketReceiver(host=get_ip(), logger=logger)
        threading.Thread(target=self.log_server.serve_forever).start()
        
    def stop_logger_server(self):
        if hasattr(self, 'log_server'):
            self.log_server.shutdown()
            self.log_server.stop()
            
    def client_stop(self):
        if self.client_handler is not None:
            self.logger.removeHandler(self.client_handler)
                
    def check(self):
        env_legal = self.check_env(force=self.force)
        if not env_legal:
            raise JobMasterRunning('There has been a running job master.')
        
    def release_lock(self, lock):
        try:
            lock.release()
        except:
            pass
        
    def finish(self):
        self.release_lock(self.ready_lock)
        self.release_lock(self.finish_lock)
        
        LimitionJobLoader.finish(self)
        JobLoader.finish(self)
        self.stop_logger_server()
        
        try:
            for handler in self.logger.handlers:
                handler.close()
        except:
            pass
            
        if self.client is not None:
            rpc_client = '%s:%s' % (
                self.client.split(':')[0], 
                main_conf.client.port
            )
            client_call(rpc_client, 'stop', ignore=True)
            
        self.stopped = True
        
    def stop(self):
        for node in self.nodes:
            try:
                client_call(node, 'stop')
            except socket.error:
                pass
        self.finish()
        
    def signal_handler(self, signum, frame):
        self.stop()
        
    def get_nodes(self):
        return self.nodes
        
    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.ready_lock.release()
                
    def worker_finish(self, node):
        if node in self.not_finished:
            self.not_finished.remove(node)
            if len(self.not_finished) == 0:
                self.finish_lock.release()
                
    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node)
        self.nodes.append(node)
        client_call(node, 'run')
        
    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node)
        self.nodes.remove(node)
        
    def run(self):
        self.ready_lock.acquire()
        
        if not self.stopped and len(self.not_registered) == 0:
            self.mq_client.put(self.job.starts)
            for node in self.nodes:
                client_call(node, 'run')
            
        self.finish_lock.acquire()
        
        try:
            master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
            client_call(master_watcher, 'finish_job', self.job.real_name)
        except socket.error:
            pass
        
    def __enter__(self):
        return self
    
    def __exit__(self, type_, value, traceback):
        self.finish()
Ejemplo n.º 6
0
class MasterJobLoader(LimitionJobLoader, JobLoader):
    def __init__(self,
                 job,
                 data_dir,
                 nodes,
                 local_ip=None,
                 client=None,
                 context=None,
                 copies=1,
                 force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)

        JobLoader.__init__(self,
                           job,
                           data_dir,
                           local,
                           context=ctx,
                           copies=copies,
                           force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)

        # check
        self.check()

        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]

        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)

        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()

        # logger
        self.logger = get_logger(name='cola_master_%s' % self.job.real_name,
                                 filename=os.path.join(self.root, 'job.log'),
                                 is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)

        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)

        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

    def init_logger_server(self, logger):
        self.log_server = LogRecordSocketReceiver(host=get_ip(), logger=logger)
        threading.Thread(target=self.log_server.serve_forever).start()

    def stop_logger_server(self):
        if hasattr(self, 'log_server'):
            self.log_server.shutdown()

    def client_stop(self):
        if self.client_handler is not None:
            self.logger.removeHandler(self.client_handler)

    def check(self):
        env_legal = self.check_env(force=self.force)
        if not env_legal:
            raise JobMasterRunning('There has been a running job master.')

    def release_lock(self, lock):
        try:
            lock.release()
        except:
            pass

    def finish(self):
        self.release_lock(self.ready_lock)
        self.release_lock(self.finish_lock)

        LimitionJobLoader.finish(self)
        JobLoader.finish(self)
        self.stop_logger_server()

        try:
            for handler in self.logger.handlers:
                handler.close()
        except:
            pass

        if self.client is not None:
            rpc_client = '%s:%s' % (self.client.split(':')[0],
                                    main_conf.client.port)
            client_call(rpc_client, 'stop', ignore=True)

        self.stopped = True

    def stop(self):
        for node in self.nodes:
            client_call(node, 'stop', ignore=True)
        self.finish()

    def signal_handler(self, signum, frame):
        self.stop()

    def get_nodes(self):
        return self.nodes

    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.ready_lock.release()

    def worker_finish(self, node):
        if node in self.not_finished:
            self.not_finished.remove(node)
            if len(self.not_finished) == 0:
                self.finish_lock.release()

    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node, ignore=True)
        self.nodes.append(node)
        client_call(node, 'run', ignore=True)

    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node, ignore=True)
        if node in self.nodes:
            self.nodes.remove(node)

    def run(self):
        self.ready_lock.acquire()

        if not self.stopped and len(self.not_registered) == 0:
            self.mq_client.put(self.job.starts)
            for node in self.nodes:
                client_call(node, 'run')

        self.finish_lock.acquire()

        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher,
                    'finish_job',
                    self.job.real_name,
                    ignore=True)

    def __enter__(self):
        return self

    def __exit__(self, type_, value, traceback):
        self.finish()
Ejemplo n.º 7
0
class Test(unittest.TestCase):
    def setUp(self):
        ports = (11111, 11211, 11311)
        self.nodes = ['localhost:%s' % port for port in ports]
        self.dirs = [tempfile.mkdtemp() for _ in range(2 * len(ports))]
        self.size = len(ports)

        for i in range(self.size):
            setattr(self, 'rpc_server%s' % i,
                    ColaRPCServer(('localhost', ports[i])))
            setattr(
                self, 'mq%s' % i,
                MessageQueue(self.nodes[:], self.nodes[i],
                             getattr(self, 'rpc_server%s' % i)))
            getattr(self, 'mq%s' % i).init_store(self.dirs[2 * i],
                                                 self.dirs[2 * i + 1])
            thd = threading.Thread(target=getattr(self, 'rpc_server%s' %
                                                  i).serve_forever)
            thd.setDaemon(True)
            thd.start()

        self.client = MessageQueueClient(self.nodes)

    def tearDown(self):
        try:
            for i in range(self.size):
                getattr(self, 'rpc_server%s' % i).shutdown()
                getattr(self, 'mq%s' % i).shutdown()
        finally:
            for d in self.dirs:
                shutil.rmtree(d)

    def testMQ(self):
        mq = self.mq0
        data = [str(random.randint(10000, 50000)) for _ in range(20)]

        mq.put(data)
        gets = []
        while True:
            get = mq.get()
            if get is None:
                break
            gets.append(get)

        self.assertEqual(sorted(data), sorted(gets))

        # test mq client
        data = str(random.randint(10000, 50000))
        self.client.put(data)

        get = self.client.get()

        self.assertEqual(data, get)

    def testAddOrRemoveNode(self):
        mq = self.mq0
        data = [str(i) for i in range(100)]

        mq.put(data)
        self.mq2.shutdown()
        self.assertEqual(len(self.nodes), 3)
        self.mq0.remove_node(self.nodes[2])
        self.assertEqual(len(self.nodes), 3)
        self.mq1.remove_node(self.nodes[2])

        gets = []
        while True:
            get = mq.get()
            if get is None:
                break
            gets.append(get)

        self.assertEqual(sorted(data), sorted(gets))
Ejemplo n.º 8
0
class JobLoader(object):
    def __init__(self, job, nodes, rpc_server, 
                 context=None, copies=2):
        self.job = job
        self.ctx = context or job.context
        
        self.nodes = nodes
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)
        
        self.not_registered = self.nodes[:]
        self.is_ready = False
        self.stopped = False
        
        # destination size
        self.size = self.ctx.job.size
        self.limit_size = self.size > 0
        self.finishes = 0
        
        # speed limits
        self.limits = self.ctx.job.limits
        self.limit_speed = self.limits > 0
        self.in_minute = 0
        
        # register rpc server
        rpc_server.register_function(self.ready, 'ready')
        rpc_server.register_function(self.complete, 'complete')
        rpc_server.register_function(self.get_nodes, 'get_nodes')
        rpc_server.register_function(self.require, 'require')
        rpc_server.register_function(self.stop, 'stop')
        rpc_server.register_function(self.add_node, 'add_node')
        rpc_server.register_function(self.remove_node, 'remove_node')
        
        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        
    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.is_ready = True
    
    def get_nodes(self):
        return self.nodes
    
    def require(self, count):
        if self.limit_speed:
            if self.in_minute < self.limit_size:
                res = max(count, self.limit_size - self.in_minute)
                self.in_minute += res
                return res
            else:
                return 0
        return count if not self.stopped else 0
    
    def complete(self, obj):
        if self.limit_size:
            self.finishes += 1
            completed = self.finishes >= self.size
            if completed:
                self.stopped = True
            return completed
        return False if not self.stopped else True
    
    def _in_minute_clear(self):
        def _clear():
            self.in_minute = 0
            time.sleep(60)
            if not self.stopped:
                _clear()
        thd = threading.Thread(target=_clear)
        thd.setDaemon(True)
        thd.start()
        
    def signal_handler(self, signum, frame):
        self.stop()
        
    def stop(self):
        for node in self.nodes:
            try:
                client_call(node, 'stop')
            except socket.error:
                pass
        self.stopped = True
        
    def run(self):
        # wait until all the workers initialized
        while not self.is_ready: pass
        
        if self.limit_speed:
            self._in_minute_clear()
            
        self.mq_client.put(self.job.starts)
        for node in self.nodes:
            client_call(node, 'run')
        
        def _run():
            while not self.stopped:
                time.sleep(TIME_SLEEP)
        main_thread = threading.Thread(target=_run)
        main_thread.start()
        main_thread.join()
        
    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node)
        self.nodes.append(node)
        client_call(node, 'run')
        
    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node)
        self.nodes.remove(node)
Ejemplo n.º 9
0
class JobLoader(object):
    def __init__(self, job, nodes, rpc_server, context=None, copies=2):
        self.job = job
        self.ctx = context or job.context

        self.nodes = nodes
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)

        self.not_registered = self.nodes[:]
        self.is_ready = False
        self.stopped = False

        # destination size
        self.size = self.ctx.job.size
        self.limit_size = self.size > 0
        self.finishes = 0

        # speed limits
        self.limits = self.ctx.job.limits
        self.limit_speed = self.limits > 0
        self.in_minute = 0

        # register rpc server
        rpc_server.register_function(self.ready, 'ready')
        rpc_server.register_function(self.complete, 'complete')
        rpc_server.register_function(self.get_nodes, 'get_nodes')
        rpc_server.register_function(self.require, 'require')
        rpc_server.register_function(self.stop, 'stop')
        rpc_server.register_function(self.add_node, 'add_node')
        rpc_server.register_function(self.remove_node, 'remove_node')

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.is_ready = True

    def get_nodes(self):
        return self.nodes

    def require(self, count):
        if self.limit_speed:
            if self.in_minute < self.limit_size:
                res = max(count, self.limit_size - self.in_minute)
                self.in_minute += res
                return res
            else:
                return 0
        return count if not self.stopped else 0

    def complete(self, obj):
        if self.limit_size:
            self.finishes += 1
            completed = self.finishes >= self.size
            if completed:
                self.stopped = True
            return completed
        return False if not self.stopped else True

    def _in_minute_clear(self):
        def _clear():
            self.in_minute = 0
            time.sleep(60)
            if not self.stopped:
                _clear()

        thd = threading.Thread(target=_clear)
        thd.setDaemon(True)
        thd.start()

    def signal_handler(self, signum, frame):
        self.stop()

    def stop(self):
        for node in self.nodes:
            try:
                client_call(node, 'stop')
            except socket.error:
                pass
        self.stopped = True

    def run(self):
        # wait until all the workers initialized
        while not self.is_ready:
            pass

        if self.limit_speed:
            self._in_minute_clear()

        self.mq_client.put(self.job.starts)
        for node in self.nodes:
            client_call(node, 'run')

        def _run():
            while not self.stopped:
                time.sleep(TIME_SLEEP)

        main_thread = threading.Thread(target=_run)
        main_thread.start()
        main_thread.join()

    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node)
        self.nodes.append(node)
        client_call(node, 'run')

    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node)
        self.nodes.remove(node)