def __init__(self, fconf, handler): """ Initialize a MasterServer instance @param fconf the path to the configuration file @param handler the handler object in charge of managing HTTP requests """ Logger.__init__(self, "Manager") conf = json.load(open(fconf)) # Jinja2 initialization. tmpl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templates') self.env = Environment(loader=FileSystemLoader(tmpl_path)) self.status = ApplicationStatus() # This is a dictionary structure in the form # reduce_dict["group-name"] = [ # [ file list by unique integers, size in byte # ] => Reduce-0 # [ # ] => Reduce-1 # ] self.reduce_mark = set() self.reduce_dict = defaultdict(list) self.dead_reduce_dict = defaultdict(list) # This is a dictionary nick => Handler instance self.masters = {} self.last_id = -1 self.pending_works = defaultdict(list) # nick => [work, ...] self.ping_max = int(conf["ping-max"]) self.ping_interval = int(conf["ping-interval"]) self.num_reducer = int(conf["num-reducer"]) # This will just keep track of the name of the files self.reduce_files = [] self.results_printed = False for _ in range(self.num_reducer): self.reduce_files.append("N/A") # Load the input module and assing the generator to the work_queue module = load_module(conf["input-module"]) cls = getattr(module, "Input", None) # Some code for the DFS generator = cls(fconf).input() self.use_dfs = use_dfs = conf['dfs-enabled'] if use_dfs: dfsconf = conf['dfs-conf'] dfsconf['host'] = dfsconf['master'] self.path = conf['output-prefix'] else: dfsconf = None self.path = os.path.join( os.path.join(conf['datadir'], conf['output-prefix']) ) self.work_queue = WorkQueue(self.logger, generator, use_dfs, dfsconf) # Lock to synchronize access to the timestamps dictionary self.lock = Lock() self.timestamps = {} # nick => (send_ts:enum, ts:float) # Ping thread self.hb_thread = Thread(target=self.hearthbeat) # Event to mark the end of the server self.finished = Event() self.addrinfo = (conf['master-host'], conf['master-port']) Server.__init__(self, self.addrinfo[0], self.addrinfo[1], handler)
class MasterServer(Server, Logger): def __init__(self, fconf, handler): """ Initialize a MasterServer instance @param fconf the path to the configuration file @param handler the handler object in charge of managing HTTP requests """ Logger.__init__(self, "Manager") conf = json.load(open(fconf)) # Jinja2 initialization. tmpl_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'templates') self.env = Environment(loader=FileSystemLoader(tmpl_path)) self.status = ApplicationStatus() # This is a dictionary structure in the form # reduce_dict["group-name"] = [ # [ file list by unique integers, size in byte # ] => Reduce-0 # [ # ] => Reduce-1 # ] self.reduce_mark = set() self.reduce_dict = defaultdict(list) self.dead_reduce_dict = defaultdict(list) # This is a dictionary nick => Handler instance self.masters = {} self.last_id = -1 self.pending_works = defaultdict(list) # nick => [work, ...] self.ping_max = int(conf["ping-max"]) self.ping_interval = int(conf["ping-interval"]) self.num_reducer = int(conf["num-reducer"]) # This will just keep track of the name of the files self.reduce_files = [] self.results_printed = False for _ in range(self.num_reducer): self.reduce_files.append("N/A") # Load the input module and assing the generator to the work_queue module = load_module(conf["input-module"]) cls = getattr(module, "Input", None) # Some code for the DFS generator = cls(fconf).input() self.use_dfs = use_dfs = conf['dfs-enabled'] if use_dfs: dfsconf = conf['dfs-conf'] dfsconf['host'] = dfsconf['master'] self.path = conf['output-prefix'] else: dfsconf = None self.path = os.path.join( os.path.join(conf['datadir'], conf['output-prefix']) ) self.work_queue = WorkQueue(self.logger, generator, use_dfs, dfsconf) # Lock to synchronize access to the timestamps dictionary self.lock = Lock() self.timestamps = {} # nick => (send_ts:enum, ts:float) # Ping thread self.hb_thread = Thread(target=self.hearthbeat) # Event to mark the end of the server self.finished = Event() self.addrinfo = (conf['master-host'], conf['master-port']) Server.__init__(self, self.addrinfo[0], self.addrinfo[1], handler) def run(self): "Start the server" # Just redirects every message logged to the application status object # in order to make it available through the web interface self.logger.addHandler(PushHandler(self.status.push_log)) if self.work_queue.use_dfs: self.info("Starting Distributed Filesystem") self.work_queue.fs.start() self.info("Server started on http://%s:%d" % self.addrinfo) self.hb_thread.start() Server.run(self) def stop(self): "Stop the server" self.finished.set() if self.work_queue.use_dfs: self.work_queue.fs.stop() def retrieve_file(self, nick, reduce_idx, file): fid, fsize = file fname = get_file_name(self.path, reduce_idx, fid) self.reduce_files[reduce_idx] = (nick, fname, fsize) def print_results(self): if self.results_printed: return if not self.reduce_mark and not self.dead_reduce_dict and \ self.status.phase == self.status.PHASE_MERGE: self.results_printed = True for nick, fname, fsize in self.reduce_files: self.info("Group %s produced %s [%d bytes] output file" % \ (nick, fname, fsize)) def on_group_died(self, nick, is_error): """ Called whenever a master disconnected from the server @param nick the nick of the master dying @param is_error a boolean indicating if this was an abnormal error or whether the socket was safely shutted down. """ # NB: Possibly we can restart the master through a bash script or # provide to the final user an overridable method in order to manage # the situation and apply different policies. self.status.update_master_status(nick, {'status': 'dead'}) self.status.faults += 1 # Remove any pending map activity lst = self.pending_works[nick] del self.pending_works[nick] for wstatus in lst: self.status.map_faulted += 1 self.work_queue.push(wstatus.state) # Remove any pending reduce activity lst = self.reduce_dict[nick] if lst: # This might be None self.dead_reduce_dict[nick] = lst for reducer_lst in lst: if reducer_lst: self.status.reduce_faulted += 1 del self.reduce_dict[nick] del self.masters[nick] def hearthbeat(self): """ This method is executed in an external thread namely the hearthbeat thread. The aim of the code is to periodically ping all the masters. """ while not self.finished.is_set(): with self.lock: for nick in self.masters: self.timestamps[nick] = (PING_EXECUTE, 0) time.sleep(self.ping_interval) # Here we do not do anything if a given master overflows a specific # the specified limit but just warn the user about the violation. with self.lock: for nick in self.masters: status, rtt = self.timestamps.get(nick, (None, None)) if status is not None and rtt > self.ping_max: self.warning( "RTT for %s is above the limit (%.2f > %.2f)" % \ (nick, rtt, self.ping_max) )