Esempio n. 1
0
    def __init__(self, nick, fconf):
        """
        Create a Master client/server
        @param nick a friendly name string for identification
        @param fconf path to the configuration file
        """

        Logger.__init__(self, "Master")
        HTTPClient.__init__(self)

        self.fconf = fconf
        self.conf = json.load(open(fconf))

        # This keep track of the statistics of the master. See status.py
        self.status = MasterStatus()

        # Set to true if the registration was succesful
        self.registered = False
        self.unique_id = -1

        # Marks the end of the stream. The server has no more maps to execute.
        # Set to true whenever a end-of-stream message is received
        self.end_of_stream = False

        self.comm = MPI.COMM_WORLD
        self.n_machines = count_machines(self.conf["machine-file"])

        # The mux object.
        self.communicators = None

        # The lock is used to synchronize the access to units_to_kill variable
        # which will be accessed by two different threads, namely the one
        # interacting with server and the one interacting with the workers
        self.kill_lock = Lock()
        self.units_to_kill = 0

        self.info("We have %d available slots" % (self.n_machines))

        self.nick = nick
        self.url = self.conf['master-url']
        self.sleep_inter = self.conf['sleep-interval']

        # Generic lock to synchronize the access to the instance variables of
        # the object itself. Its use should be minimized.
        self.lock = Lock()

        # Integer marking the number of maps which are currently being
        # executed. Incremented on assignment, decremented on finish.
        self.num_map = 0

        # Simple queue of WorkerStatus(TYPE_MAP, ..) objects. Filled whenever
        # the server returns us a compute-map message.
        self.map_queue = []

        # An event that whenever is set marks the end of the computation, set
        # upon reception of the plz-die message
        self.ev_finished = Event()

        # Maximum number of simultaneous files that the reduce may manage in
        # one row. Usually should be set to the MAX_FD of the system.
        self.threshold_nfile = int(self.conf["threshold-nfile"])

        # Simple lock that synchronize access to reduc* instance variables.
        self.reduce_lock = Lock()

        # This holds the triples in the sense that for each reduce we have
        # a nested list which integers representing output of the mappers.
        # If we have two reducers we will have for example:
        # [
        #  [(0, 45), (1, 32), (3, 331)],
        #  [(5, 22), (6, 99)]
        # ]
        # Meaning:
        #  Reduce #1: -> output-reduce-000000-000000, 45 bytes
        #             -> output-reduce-000000-000001, 32 bytes
        #             -> output-reduce-000000-000003, 331 bytes
        #  Reduce #2: -> output-reduce-000001-000005, 22 bytes
        #             -> output-reduce-000001-000006, 99 bytes
        self.reducing_files = []

        # It will contain boolean values indicating the status of the reducers
        self.reduce_started = []

        for _ in xrange(int(self.conf['num-reducer'])):
            self.reduce_started.append(False)
            self.reducing_files.append([])

        # The timer will be used to unlock the semaphore that is used as
        # bounding mechanism for requesting new jobs to the server.
        self.timer = None
        self.num_pending_request = Semaphore(self.n_machines)

        # Here we start two simple thread one in charge of executing requests
        # and the other which is in charge of executing the main loop. There is
        # also another thread executing asyncore.loop that manages the http
        # communication with the server.
        self.requester_thread = Thread(target=self.__requester_thread)
        self.main_thread = Thread(target=self.__main_loop)
Esempio n. 2
0
class Master(Logger, HTTPClient):
    def __init__(self, nick, fconf):
        """
        Create a Master client/server
        @param nick a friendly name string for identification
        @param fconf path to the configuration file
        """

        Logger.__init__(self, "Master")
        HTTPClient.__init__(self)

        self.fconf = fconf
        self.conf = json.load(open(fconf))

        # This keep track of the statistics of the master. See status.py
        self.status = MasterStatus()

        # Set to true if the registration was succesful
        self.registered = False
        self.unique_id = -1

        # Marks the end of the stream. The server has no more maps to execute.
        # Set to true whenever a end-of-stream message is received
        self.end_of_stream = False

        self.comm = MPI.COMM_WORLD
        self.n_machines = count_machines(self.conf["machine-file"])

        # The mux object.
        self.communicators = None

        # The lock is used to synchronize the access to units_to_kill variable
        # which will be accessed by two different threads, namely the one
        # interacting with server and the one interacting with the workers
        self.kill_lock = Lock()
        self.units_to_kill = 0

        self.info("We have %d available slots" % (self.n_machines))

        self.nick = nick
        self.url = self.conf['master-url']
        self.sleep_inter = self.conf['sleep-interval']

        # Generic lock to synchronize the access to the instance variables of
        # the object itself. Its use should be minimized.
        self.lock = Lock()

        # Integer marking the number of maps which are currently being
        # executed. Incremented on assignment, decremented on finish.
        self.num_map = 0

        # Simple queue of WorkerStatus(TYPE_MAP, ..) objects. Filled whenever
        # the server returns us a compute-map message.
        self.map_queue = []

        # An event that whenever is set marks the end of the computation, set
        # upon reception of the plz-die message
        self.ev_finished = Event()

        # Maximum number of simultaneous files that the reduce may manage in
        # one row. Usually should be set to the MAX_FD of the system.
        self.threshold_nfile = int(self.conf["threshold-nfile"])

        # Simple lock that synchronize access to reduc* instance variables.
        self.reduce_lock = Lock()

        # This holds the triples in the sense that for each reduce we have
        # a nested list which integers representing output of the mappers.
        # If we have two reducers we will have for example:
        # [
        #  [(0, 45), (1, 32), (3, 331)],
        #  [(5, 22), (6, 99)]
        # ]
        # Meaning:
        #  Reduce #1: -> output-reduce-000000-000000, 45 bytes
        #             -> output-reduce-000000-000001, 32 bytes
        #             -> output-reduce-000000-000003, 331 bytes
        #  Reduce #2: -> output-reduce-000001-000005, 22 bytes
        #             -> output-reduce-000001-000006, 99 bytes
        self.reducing_files = []

        # It will contain boolean values indicating the status of the reducers
        self.reduce_started = []

        for _ in xrange(int(self.conf['num-reducer'])):
            self.reduce_started.append(False)
            self.reducing_files.append([])

        # The timer will be used to unlock the semaphore that is used as
        # bounding mechanism for requesting new jobs to the server.
        self.timer = None
        self.num_pending_request = Semaphore(self.n_machines)

        # Here we start two simple thread one in charge of executing requests
        # and the other which is in charge of executing the main loop. There is
        # also another thread executing asyncore.loop that manages the http
        # communication with the server.
        self.requester_thread = Thread(target=self.__requester_thread)
        self.main_thread = Thread(target=self.__main_loop)

    ##########################################################################
    # Events handling
    ##########################################################################

    def _on_change_degree(self, nick, data):
        self.info("Requested a parallelism degree change")
        # TODO: Ignore in the final phase

        if data < 0:
            data = abs(data)
            if self.communicators.get_total() - data > 0:
                with self.kill_lock:
                    self.units_to_kill += data
        else:
            self.communicators.spawn_more(data)

            total = self.communicators.get_total()

            self.status.nproc = total
            self.__send_req('change-degree-ack', data=total)

    def _on_plz_die(self, nick, data):
        self.info("Exit message received. Sending termination messages")
        self.communicators.send_all(Message(MSG_QUIT, 0, None), True)

        self.ev_finished.set()
        self.num_pending_request.release()

        self.close()

    def _on_try_later(self, nick, data):
        #self.info("Waiting 1 second before unblocking requester.")

        if self.timer is None:
            self.timer = Timer(1, self._unblock_requester)
            self.timer.start()

    def _unblock_requester(self):
        self.num_pending_request.release()
        self.timer = None

    def _on_request_error(self, nick, data):
        self.error("Error in request")

    def _on_connected(self):
        self.info("Succesfully connected to the server. Trying registration.")
        self.__send_req('registration', immediate=True)

    def _on_reduce_recovery(self, nick, data):
        self.info("We need to recover something %s" % str(data))
        self.reducing_files = data

    def _on_registration_ok(self, nick, data):
        with self.lock:
            if self.registered:
                self.error("Already registered")
            else:
                self.registered = True
                self.unique_id = data

                self.info("Succesfully registered with ID=%d" % data)
                self.__inner_start()

    def _on_change_nick(self, nick, data):
        self.warning("Nick already used. Randomizing nick for your fun")
        self.nick = MPI.Get_processor_name() + str(random.randint(0, 100))
        self.warning("Your new nick is %s" % self.nick)
        self.__send_req('registration', immediate=True)

    def _on_end_of_stream(self, nick, data):
        with self.lock:
            self.end_of_stream = True

    def _on_compute_map(self, nick, data):
        self.__push_work(WorkerStatus(TYPE_MAP, nick, data))

    def _on_keep_alive(self, nick, data):
        msg = {'timeprobe': data, 'status': self.status.serialize()}
        self.__send_req('keep-alive', data=msg, immediate=True)

    def __send_req(self, type, nick=None, data='', immediate=True):
        """
        Put the request in a buffer of requests. The buffer will be flushed
        in FIFO fashion whenever it is possible.

        @param type a string representing the type of the message
        @param nick set it to None to use the current nick
        @param data user data to send as payload to the message
        @param immediate True to insert the request in the first position of
                         the buffer in order to prioritize it
        """
        if nick is None:
            nick = self.nick

        url = urlparse.urlparse(self.url)
        data = json.dumps({'type': type, 'nick': nick, 'data': data})
        self._add_request("POST %s HTTP/1.1\r\n"               \
                          "Host: %s\r\n"                       \
                          "Connection: keep-alive\r\n"         \
                          "Content-Type: application/json\r\n" \
                          "Content-Length: %d\r\n\r\n%s" %     \
                          (url.path, url.hostname, len(data), data), immediate)

    def __requester_thread(self):
        while not self.ev_finished.is_set():
            self.num_pending_request.acquire()
            self.__send_req('work-request')

        self.info("Requester thread exited correctly")

    def __finished(self):
        """
        Check the status of the master
        @return True if the stream is finished, all the mapper returned and
                there is no reducer active
        """
        with self.lock:
            exit = self.end_of_stream == True and \
                   self.num_map == 0 and          \
                   len(self.map_queue) == 0

        if exit:
            return exit and self.__finished_reduce()

        return False

    def __finished_reduce(self):
        "@return True if there is no reducer started"

        with self.reduce_lock:
            if not any(self.reduce_started):
                return True
        return False

    def __pop_work(self):
        """
        Extract a job from the work_queue
        @return a WorkerStatus instance
        """
        with self.lock:
            if len(self.map_queue) > 0:
                self.num_map += 1
                return self.map_queue.pop(0)

        return WorkerStatus(TYPE_DUMMY, 0, self.sleep_inter)

    def __push_work(self, wstatus):
        """
        Insert a WorkerStatus object in the map_queue.
        @param wstatus a WorkerStatus instance
        """
        with self.lock:
            self.map_queue.append(wstatus)

    def __map_finished(self, msg):
        """
        Update the status of the master and send back ack to the server.

        The method is also responsible of pushing future reduce work in the
        reducing_files structure.

        @param the Message object returned by the generic worker
        """

        with self.lock:
            self.num_map -= 1

        self.num_pending_request.release()

        # Note: msg.result is a list of tuples representing output files
        #       in the form: (rid, fid, fsize)
        self.__send_req('map-ack', data=(msg.tag, msg.result))

        nfiles = 0
        filesize = 0

        for rid, fid, fsize in msg.result:
            nfiles += 1
            filesize += fsize
            self.reducing_files[rid].append((fid, fsize))

        self.status.increase(
            map_finished=1,
            map_ongoing=-1,
            map_file=nfiles,
            map_file_size=filesize,
            bandwidth=msg.info[0],
            time=msg.info[1],
        )

    def __reduce_finished(self, msg, skip=False):
        """
        Update the status of the master and send back ack to the server.
        @param the Message object returned by the generic worker
        """

        if not skip:
            with self.reduce_lock:
                self.reduce_started[msg.tag] = False
                self.reducing_files[msg.tag].append(msg.result[0])

        # This contain previously reduced files in input and the result file
        # data will be an array (<reduceidx>, (<outpu>, <inp1>, <inp2>, ...))
        self.__send_req('reduce-ack', data=(msg.tag, msg.result))

        self.status.increase(
            reduce_finished=1,
            reduce_ongoing=-1,
            reduce_file=1,
            reduce_file_size=msg.result[0][1],
            bandwidth=msg.info[0],
            time=msg.info[1],
        )

    ##########################################################################
    # Main loop
    ##########################################################################

    def __got_killed(self, comm):
        """
        This method is called in order to accomodate parallelism degree change
        during the computation. It returns a boolean indicating if the
        communicator has been killed or not.

        @return True if comm was killed
        """
        to_kill = False

        with self.kill_lock:
            if self.units_to_kill > 0:
                self.units_to_kill -= 1
                to_kill = True

        if to_kill:
            comm.send(Message(MSG_QUIT, 0, None))
            self.communicators.remove(comm)

            total = self.communicators.get_total()

            self.status.nproc = total

            # Note we will send this message at every kill. This can be summed
            # up in order to avoid useless messages traveling the network.
            self.__send_req('change-degree-ack', data=total)

        return to_kill

    def __main_loop(self):
        """
        This is the main loop of the application. It is organized in three
        loops.

         1. The first will continue until the stream is finished and all the
            maps and reducers assigned will eventually return.
         2. The second loop will start by assigning at most num-reducer reducer
            jobs to the generic workers. This is needed to reduce all the
            partial files that may be produced by the first loop.
         3. Eventually a third merge cycle is started until ev_finished is set.
            This is needed in order to have a global merging scheme that is
            conducted with the assistance of the global server which is in
            charge of orchestrating this phase.
        """

        while not self.__finished():
            idx, comm = self.communicators.receive()

            if comm is None:
                break

            # Here we wait until all the map are assigned and also all the
            # assigned reduce are finished.
            msg = comm.recv()

            if msg.command == MSG_AVAILABLE:
                if not self.__got_killed(comm):
                    self.__assign_work(idx, comm)
                else:
                    self.info("Worker %d was killed as requested" % idx)

            elif msg.command == MSG_FINISHED_MAP:
                self.on_map_finished(msg.result)
                self.__map_finished(msg)

            elif msg.command == MSG_FINISHED_REDUCE:
                self.on_reduce_finished(msg.result)
                self.__reduce_finished(msg)

        to_assign = int(self.conf["num-reducer"])

        self.info("Final phase. Start all the reducers")
        self.info("We have to execute %d reducer works" % to_assign)

        while to_assign > 0:
            idx, comm = self.communicators.receive()

            if comm is None:
                break

            msg = comm.recv()

            if msg.command == MSG_AVAILABLE:
                if not self.__got_killed(comm):
                    # Here we need to check about the type of assignment made.
                    # If we don't have any more reduce to assign it convenient
                    # to get out of the cycle
                    if self.__assign_work(idx, comm, True) == TYPE_DUMMY:
                        to_assign -= 1
                else:
                    self.info("Worker %d was killed as requested" % idx)

            elif msg.command == MSG_FINISHED_REDUCE:
                self.on_reduce_finished(msg.result)
                self.__reduce_finished(msg, True)
                to_assign -= 1

        if not self.ev_finished.is_set():
            self.__merge_phase()
        else:
            self.info("Merge was not necessary")

    def __merge_phase(self):
        # Let's reset all the status of the reducers
        with self.reduce_lock:
            for reduce_idx, reduce_list in enumerate(self.reducing_files):
                self.reducing_files[reduce_idx] = []
                self.reduce_started[reduce_idx] = False

        for _ in xrange(self.n_machines):
            self.num_pending_request.release()

        self.info("Entering in the merge phase")

        while not self.ev_finished.is_set():
            idx, comm = self.communicators.receive()

            if comm is None:
                break

            msg = comm.recv()

            if msg.command == MSG_AVAILABLE:
                if not self.__got_killed(comm):
                    self.__assign_work(idx, comm, True)
                else:
                    self.info("Worker %d was killed as requested" % idx)

            elif msg.command == MSG_FINISHED_REDUCE:
                self.on_reduce_finished(msg.result)
                self.__reduce_finished(msg)
                ###
                self.num_pending_request.release()

        self.__send_req('all-finished')

    def __assign_work(self, idx, comm, final_phase=False):
        """
        Assign a job to a generic worker

        @param idx the index as returned by the communicators struct
        @param comm the MPI Intercommunicator
        @param final_phase True if we have to assign to the reducers all the
                           files available if we are approaching the last phase
        @return an integer indicating the type of work assigned
        """

        wstatus = self._check_threshold(final_phase)

        if wstatus is None:
            wstatus = self.__pop_work()
        else:
            with self.reduce_lock:
                self.reduce_started[wstatus.tag] = True

        trans = {TYPE_MAP:    MSG_COMPUTE_MAP,
                 TYPE_REDUCE: MSG_COMPUTE_REDUCE,
                 TYPE_DUMMY:  MSG_SLEEP}

        msg = trans[wstatus.type]

        if msg == MSG_COMPUTE_MAP:
            self.status.increase(map_ongoing=1)
        elif msg == MSG_COMPUTE_REDUCE:
            self.status.increase(reduce_ongoing=1)

        self.debug("Assigning %s as role to worker %d" % (MSG_TO_STR[msg], idx))
        comm.send(Message(msg, wstatus.tag, wstatus.state), dest=0)

        return wstatus.type

    def _check_threshold(self, ignore_limits=False):
        """
        This function has to check the reducing_file_sizes and reducing_files
        and if a given threshold is met return a WorkerStatus representing a
        reduce operation.

        @param ignore_limits True if we have to skip threshold-nfile limit
                             check
        @return a WorkerStatus instance or None if a reduce is not required
        """

        overflow = False
        valid_found = False
        reduce_idx = 0

        num_files = 0

        with self.reduce_lock:
            for reduce_idx, reduce_list in enumerate(self.reducing_files):

                # Skip already started reducers
                if self.reduce_started[reduce_idx]:
                    continue

                for fid, _ in reduce_list:
                    num_files += 1

                    if ignore_limits:
                        continue

                    if num_files >= self.threshold_nfile:
                        overflow = True
                        break

                if num_files > 1:
                    # Try to fill as much as possible the queue of input files
                    # up to threshold limit.
                    if ignore_limits:
                        valid_found = True
                    break
                else:
                    num_files = 0
                    overflow = False
                    continue

        if valid_found:
            # The last valid reduce_idx will be our target
            files = self.reducing_files[reduce_idx]
            self.reducing_files[reduce_idx] = files[num_files:]
            assigned = files[:num_files]

            files_id = map(lambda x: x[0], assigned)

            self.info("Files to reduce %s [overflow check: %s]" % \
                     (str(files_id), str(overflow)))

            return WorkerStatus(TYPE_REDUCE, reduce_idx, (reduce_idx, files_id))
        else:
            return None

    ##########################################################################
    # Public functions
    ##########################################################################

    def run(self):
        """
        Start the master and connect it to the server indicated in the
        configuration file.
        """
        url = urlparse.urlparse(self.url)
        self.connect((url.hostname, url.port or 80))

        HTTPClient.run(self)

    def __inner_start(self):
        self.info("Starting requester and main thread")

        # Try to find the number of processing element trying to maximize it
        num_machines = min(self.n_machines,
                           max(self.conf['num-mapper'],
                               self.conf['num-reducer']))

        self.info("We will use %d slots" % (num_machines))

        filename = 'worker' + __file__[__file__.rindex('.'):]
        filename = os.path.join(
            os.path.dirname(os.path.abspath(__file__)), filename
        )

        self.debug("Using %s as spawner" % filename)

        self.communicators = Muxer(self.unique_id, num_machines, (filename, self.fconf))
        self.status.nproc = num_machines

        self.main_thread.start()
        self.requester_thread.start()

    def on_map_finished(self, result):
        "You are free to override this"
        pass

    def on_reduce_finished(self, result):
        "You are free to override this"
        self.info("Final result %s" % str(result))