Esempio n. 1
0
    def __init__(self, server, port, rank, dir):
        """ Establish communication with scheduler
        """
        self.options = None
        logfile = os.path.join(dir, 'log-mrlite-rank-%s.txt' % rank)
        config_logging(logfile)
        logging.debug('communicator %s started in %s:%s at %s' %
                      (rank, server, port, time.asctime()))
        sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sockobj.connect((server, port))
        sock = SocketWrapper(sockobj)
        sock.send('rank %s %s' % (rank, os.getpid()))
        pickled_options = sock.recv()
        options = pickle.loads(pickled_options)

        self.task_callback = {
            'start_mapper': None,
            'start_reducer': None,
            'status': self.report_status,
            'quit': self.quit,
            'exit': self.quit,
        }
        self.server = server
        self.rank = rank
        self.sock = sock
        self.options = options
        self.process = None
        if options.maponly_mode:
            self.worker = MapOnlyWorker(options, rank, sock)
        elif rank < options.num_map_worker:
            self.worker = MapWorker(options, rank, sock)
        else:
            self.worker = ReduceWorker(options, rank, sock)
Esempio n. 2
0
    def __init__(self, server, port, rank, dir):
        """ Establish communication with scheduler
        """
        self.options = None
        logfile = os.path.join(dir, 'log-mrlite-rank-%s.txt' %rank)
        config_logging(logfile)
        logging.debug('communicator %s started in %s:%s at %s' %(
            rank, server, port, time.asctime()))
        sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sockobj.connect((server, port))
        sock = SocketWrapper(sockobj)
        sock.send('rank %s %s' %(rank, os.getpid()))
        pickled_options = sock.recv()
        options = pickle.loads(pickled_options)

        self.task_callback = {
            'start_mapper' : None,
            'start_reducer' : None,
            'status': self.report_status,
            'quit'  : self.quit,
            'exit'  : self.quit,
        }
        self.server = server
        self.rank = rank
        self.sock = sock
        self.options = options
        self.process = None
        if options.maponly_mode:
            self.worker = MapOnlyWorker(options, rank, sock)
        elif rank < options.num_map_worker:
            self.worker = MapWorker(options, rank, sock)
        else:
            self.worker = ReduceWorker(options, rank, sock)
Esempio n. 3
0
    def start_communicators(self):
        """ Start the server socket where the scheduler locates, and start all
        communicators, each communicator will connected to the server socket.
        The communicators will be used to control map/reduce workers.
        """
        options = self.options

        # start server
        server_ip = options.mapreduce_scheduler_ip
        server_sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server_sockobj.bind((server_ip, 0))
        server_sockobj.listen(5)
        temp_ip, server_port = server_sockobj.getsockname()

        # start client
        for rank in range(options.num_worker):
            task = options.all_tasks[rank]
            machine = task['machine']
            tmp_dir = task['tmp_dir']
            worker_script = '%s/%s' %(tmp_dir, SCRIPT_WORKER)
            worker_cmd = 'python %s -s %s -p %s -r %s -d %s' %(
                worker_script,
                server_ip, server_port,
                rank, tmp_dir)
            self.run_ssh_cmd(machine, worker_cmd)

        #send global options to each communicator
        pickled_options =  pickle.dumps(options)
        all_socks = [None] * options.num_worker
        all_pids = [None] * options.num_worker
        for i in range(options.num_worker):
            connection, address = server_sockobj.accept()
            sock = SocketWrapper(connection)
            mesg, rank, pid = sock.recv().split()
            if mesg == 'rank':
                mesg = 'Socket communicator %s started at %s pid=%s' %(
                    rank, address, pid)
                logging.debug(mesg)
            sock.send(pickled_options)
            rank = int(rank)
            all_socks[rank] = sock
            all_pids[rank] = int(pid)
        self.map_socks = all_socks[0:options.num_map_worker]
        self.reduce_socks = all_socks[options.num_map_worker:]
        self.all_socks = all_socks
        self.all_pids = all_pids
Esempio n. 4
0
    def start_communicators(self):
        """ Start the server socket where the scheduler locates, and start all
        communicators, each communicator will connected to the server socket.
        The communicators will be used to control map/reduce workers.
        """
        options = self.options

        # start server
        server_ip = options.mapreduce_scheduler_ip
        server_sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server_sockobj.bind((server_ip, 0))
        server_sockobj.listen(5)
        temp_ip, server_port = server_sockobj.getsockname()

        # start client
        for rank in range(options.num_worker):
            task = options.all_tasks[rank]
            machine = task['machine']
            tmp_dir = task['tmp_dir']
            worker_script = '%s/%s' % (tmp_dir, SCRIPT_WORKER)
            worker_cmd = 'python %s -s %s -p %s -r %s -d %s' % (
                worker_script, server_ip, server_port, rank, tmp_dir)
            self.run_ssh_cmd(machine, worker_cmd)

        #send global options to each communicator
        pickled_options = pickle.dumps(options)
        all_socks = [None] * options.num_worker
        all_pids = [None] * options.num_worker
        for i in range(options.num_worker):
            connection, address = server_sockobj.accept()
            sock = SocketWrapper(connection)
            mesg, rank, pid = sock.recv().split()
            if mesg == 'rank':
                mesg = 'Socket communicator %s started at %s pid=%s' % (
                    rank, address, pid)
                logging.debug(mesg)
            sock.send(pickled_options)
            rank = int(rank)
            all_socks[rank] = sock
            all_pids[rank] = int(pid)
        self.map_socks = all_socks[0:options.num_map_worker]
        self.reduce_socks = all_socks[options.num_map_worker:]
        self.all_socks = all_socks
        self.all_pids = all_pids