def run(self): """ Start the master and connect it to the server indicated in the configuration file. """ url = urlparse.urlparse(self.url) self.connect((url.hostname, url.port or 80)) HTTPClient.run(self)
def __init__(self, nick, fconf): """ Create a Master client/server @param nick a friendly name string for identification @param fconf path to the configuration file """ Logger.__init__(self, "Master") HTTPClient.__init__(self) self.fconf = fconf self.conf = json.load(open(fconf)) # This keep track of the statistics of the master. See status.py self.status = MasterStatus() # Set to true if the registration was succesful self.registered = False self.unique_id = -1 # Marks the end of the stream. The server has no more maps to execute. # Set to true whenever a end-of-stream message is received self.end_of_stream = False self.comm = MPI.COMM_WORLD self.n_machines = count_machines(self.conf["machine-file"]) # The mux object. self.communicators = None # The lock is used to synchronize the access to units_to_kill variable # which will be accessed by two different threads, namely the one # interacting with server and the one interacting with the workers self.kill_lock = Lock() self.units_to_kill = 0 self.info("We have %d available slots" % (self.n_machines)) self.nick = nick self.url = self.conf['master-url'] self.sleep_inter = self.conf['sleep-interval'] # Generic lock to synchronize the access to the instance variables of # the object itself. Its use should be minimized. self.lock = Lock() # Integer marking the number of maps which are currently being # executed. Incremented on assignment, decremented on finish. self.num_map = 0 # Simple queue of WorkerStatus(TYPE_MAP, ..) objects. Filled whenever # the server returns us a compute-map message. self.map_queue = [] # An event that whenever is set marks the end of the computation, set # upon reception of the plz-die message self.ev_finished = Event() # Maximum number of simultaneous files that the reduce may manage in # one row. Usually should be set to the MAX_FD of the system. self.threshold_nfile = int(self.conf["threshold-nfile"]) # Simple lock that synchronize access to reduc* instance variables. self.reduce_lock = Lock() # This holds the triples in the sense that for each reduce we have # a nested list which integers representing output of the mappers. # If we have two reducers we will have for example: # [ # [(0, 45), (1, 32), (3, 331)], # [(5, 22), (6, 99)] # ] # Meaning: # Reduce #1: -> output-reduce-000000-000000, 45 bytes # -> output-reduce-000000-000001, 32 bytes # -> output-reduce-000000-000003, 331 bytes # Reduce #2: -> output-reduce-000001-000005, 22 bytes # -> output-reduce-000001-000006, 99 bytes self.reducing_files = [] # It will contain boolean values indicating the status of the reducers self.reduce_started = [] for _ in xrange(int(self.conf['num-reducer'])): self.reduce_started.append(False) self.reducing_files.append([]) # The timer will be used to unlock the semaphore that is used as # bounding mechanism for requesting new jobs to the server. self.timer = None self.num_pending_request = Semaphore(self.n_machines) # Here we start two simple thread one in charge of executing requests # and the other which is in charge of executing the main loop. There is # also another thread executing asyncore.loop that manages the http # communication with the server. self.requester_thread = Thread(target=self.__requester_thread) self.main_thread = Thread(target=self.__main_loop)