def __init__(self, server, port, rank, dir): """ Establish communication with scheduler """ self.options = None logfile = os.path.join(dir, 'log-mrlite-rank-%s.txt' % rank) config_logging(logfile) logging.debug('communicator %s started in %s:%s at %s' % (rank, server, port, time.asctime())) sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sockobj.connect((server, port)) sock = SocketWrapper(sockobj) sock.send('rank %s %s' % (rank, os.getpid())) pickled_options = sock.recv() options = pickle.loads(pickled_options) self.task_callback = { 'start_mapper': None, 'start_reducer': None, 'status': self.report_status, 'quit': self.quit, 'exit': self.quit, } self.server = server self.rank = rank self.sock = sock self.options = options self.process = None if options.maponly_mode: self.worker = MapOnlyWorker(options, rank, sock) elif rank < options.num_map_worker: self.worker = MapWorker(options, rank, sock) else: self.worker = ReduceWorker(options, rank, sock)
def __init__(self, server, port, rank, dir): """ Establish communication with scheduler """ self.options = None logfile = os.path.join(dir, 'log-mrlite-rank-%s.txt' %rank) config_logging(logfile) logging.debug('communicator %s started in %s:%s at %s' %( rank, server, port, time.asctime())) sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sockobj.connect((server, port)) sock = SocketWrapper(sockobj) sock.send('rank %s %s' %(rank, os.getpid())) pickled_options = sock.recv() options = pickle.loads(pickled_options) self.task_callback = { 'start_mapper' : None, 'start_reducer' : None, 'status': self.report_status, 'quit' : self.quit, 'exit' : self.quit, } self.server = server self.rank = rank self.sock = sock self.options = options self.process = None if options.maponly_mode: self.worker = MapOnlyWorker(options, rank, sock) elif rank < options.num_map_worker: self.worker = MapWorker(options, rank, sock) else: self.worker = ReduceWorker(options, rank, sock)
def start_communicators(self): """ Start the server socket where the scheduler locates, and start all communicators, each communicator will connected to the server socket. The communicators will be used to control map/reduce workers. """ options = self.options # start server server_ip = options.mapreduce_scheduler_ip server_sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_sockobj.bind((server_ip, 0)) server_sockobj.listen(5) temp_ip, server_port = server_sockobj.getsockname() # start client for rank in range(options.num_worker): task = options.all_tasks[rank] machine = task['machine'] tmp_dir = task['tmp_dir'] worker_script = '%s/%s' %(tmp_dir, SCRIPT_WORKER) worker_cmd = 'python %s -s %s -p %s -r %s -d %s' %( worker_script, server_ip, server_port, rank, tmp_dir) self.run_ssh_cmd(machine, worker_cmd) #send global options to each communicator pickled_options = pickle.dumps(options) all_socks = [None] * options.num_worker all_pids = [None] * options.num_worker for i in range(options.num_worker): connection, address = server_sockobj.accept() sock = SocketWrapper(connection) mesg, rank, pid = sock.recv().split() if mesg == 'rank': mesg = 'Socket communicator %s started at %s pid=%s' %( rank, address, pid) logging.debug(mesg) sock.send(pickled_options) rank = int(rank) all_socks[rank] = sock all_pids[rank] = int(pid) self.map_socks = all_socks[0:options.num_map_worker] self.reduce_socks = all_socks[options.num_map_worker:] self.all_socks = all_socks self.all_pids = all_pids
def start_communicators(self): """ Start the server socket where the scheduler locates, and start all communicators, each communicator will connected to the server socket. The communicators will be used to control map/reduce workers. """ options = self.options # start server server_ip = options.mapreduce_scheduler_ip server_sockobj = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server_sockobj.bind((server_ip, 0)) server_sockobj.listen(5) temp_ip, server_port = server_sockobj.getsockname() # start client for rank in range(options.num_worker): task = options.all_tasks[rank] machine = task['machine'] tmp_dir = task['tmp_dir'] worker_script = '%s/%s' % (tmp_dir, SCRIPT_WORKER) worker_cmd = 'python %s -s %s -p %s -r %s -d %s' % ( worker_script, server_ip, server_port, rank, tmp_dir) self.run_ssh_cmd(machine, worker_cmd) #send global options to each communicator pickled_options = pickle.dumps(options) all_socks = [None] * options.num_worker all_pids = [None] * options.num_worker for i in range(options.num_worker): connection, address = server_sockobj.accept() sock = SocketWrapper(connection) mesg, rank, pid = sock.recv().split() if mesg == 'rank': mesg = 'Socket communicator %s started at %s pid=%s' % ( rank, address, pid) logging.debug(mesg) sock.send(pickled_options) rank = int(rank) all_socks[rank] = sock all_pids[rank] = int(pid) self.map_socks = all_socks[0:options.num_map_worker] self.reduce_socks = all_socks[options.num_map_worker:] self.all_socks = all_socks self.all_pids = all_pids