Exemple #1
0
class DNACurveNode(ApplicationNode):
    """
    DNA Curve Analysis application
    """
    implements(IApp)
    
    factor = IntItem('dnasample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row""")
    
    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(DNACurveNode, self).app_init()
    
    def app_main(self):
        """
        Applications main entry
        """
        return super(DNACurveNode, self).app_main()
    
    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.dna_system = DNACurveTaskSystem("ATGCAAATTG"*1000, "trifonov", name="Example", maxlen=1024*1024, factor=self.factor)
        return self.dna_system
    
    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        # Reassamble result to be processed further
        try:
            print("Total time: {}".format(time.time() - self.start_time))
        except:
            traceback.print_exc()
        self.shutdown_main_loop()
    
    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")
    
    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
Exemple #2
0
class Pickler(Component):
    implements(IPickler)
    """
    Class responsible for pickling and unpickling objects
    """
    pickle_protocol = IntItem(
        'pickler', 'protocol', pickle.HIGHEST_PROTOCOL,
        """Protocol used when pickling, by default pickle.HIGHEST_PROTOCOL""")

    secret = ConfigItem(
        'pickler', 'secret', 'JhTv535Vg385V',
        """Default salt used on decrypting encrypting a pickle""")

    # salt size in bytes
    salt_size = IntItem('pickler', 'salt_size', 16,
                        """Size of the salt used in the encryption process""")

    # number of iterations in the key generation
    num_iterations = IntItem(
        'pickler', 'num_iterations', 20,
        """Number of iterations used in the key generation""")

    # the size multiple required for AES
    aes_padding = IntItem('pickler', 'aes_padding', 16,
                          """Padding used for AES encryption""")

    def __init__(self):
        super(Pickler, self).__init__()
        self.crypto_helper = CryptoHelper(self.salt_size, self.num_iterations,
                                          self.aes_padding)

        if self.secret == Pickler.secret.default.decode('utf-8'):
            self.log.warn(
                "Pickler using default secret, please setup you own to avoid security vulnerabilities!"
            )

    def pickle_f(self, fname, obj):
        """
        picke an object into a file
        """
        try:
            pickle.dump(obj=obj,
                        file=gzip.open(fname, "wb"),
                        protocol=self.pickle_protocol)
        except:
            raise PickleException()

    def unpickle_f(self, fname):
        """
        Unpicke an object from a file
        """
        try:
            return pickle.load(gzip.open(fname, "rb"))
        except:
            raise UnpickleException()

    def pickle_s(self, obj):
        """
        pickle an object and return the pickled string
        """
        try:
            return pickle.dumps(obj, protocol=self.pickle_protocol)
        except:
            raise PickleException()

    def pickle_encode_s(self, obj):
        """
        Encode a pickled object
        """
        try:
            return base64.b64encode(
                self.crypto_helper.encrypt(self.pickle_s(obj), self.secret))
        except:
            raise PickleException()

    def unpickle_s(self, pickle_string):
        """
        unpickle a string and return an object
        """
        try:
            return pickle.loads(pickle_string)
        except:
            raise UnpickleException()

    def unpickle_decode_s(self, pickle_string):
        """
        Unpickle a base64 string and return an object
        """
        try:
            return self.unpickle_s(
                self.crypto_helper.decrypt(base64.b64decode(pickle_string),
                                           self.secret))
        except:
            raise UnpickleException()
Exemple #3
0
class ZMQTaskManager(Component, threading.Thread):
    implements(ITaskManager)
    """
    Simple task manager used in simple single job applications
    """
    
    num_workers = IntItem('ZMQTaskManager', 'num_workers', -1,
        """Number of worker processed to be created, -1 will spawn as much as physical cores.""")
    
    master_backend_port = HostItem('ZMQTaskManager', 'master_backend_port', 'localhost:5001',
        """Masters backend port where we will request tasks.""")
    
    def __init__(self):
        threading.Thread.__init__(self)
        Component.__init__(self)
        
        # Some thread related stuff
        self.daemon = True
        self.kill_switch = False
        
        # Create contect and socket
        self.context = zmq.Context()
        
        # Initialize base manager stuff
        self._num_workers = 0
        self.results = multiprocessing.JoinableQueue()
        
    def init(self, identity, address):
        """
        Initialize the manager
        """
        self.identity = identity
        self.host = address[0]
        self.port = address[1]
        
        self._num_workers = self.num_workers
        if self._num_workers <= 0:
            self._num_workers = multiprocessing.cpu_count()
            
        # We now prepare our queues, both the joinable and the results
        # queues. Then we just create a process for each worker
        self.tasks = multiprocessing.JoinableQueue()
        self.processes = [TaskProcess(self.results, i, self.tasks, self.identity, self.host, self.port) for i in range(self._num_workers)]
        #self.processes = [TaskProcess(self.results, i) for i in range(self._num_workers)]
        
        context = zmq.Context()
        self.ventilator_send = context.socket(zmq.PUSH)
        self.ventilator_send.bind("tcp://127.0.0.1:%d" % WORKER_PORT)
    
    def get_num_workers(self):
        """
        Return the number of workers we use for our processing
        """
        return self._num_workers
    
    def start(self):
        """
        Start our worker processes
        """
        threading.Thread.start(self)
        for worker in self.processes:
            worker.daemon = True
            worker.start()

    def stop(self):
        """
        Stop our worker processes
        """
        self.log.info("Shutting down ZMQTaskManager")
        for i in xrange(self._num_workers):
            #send_to_zmq_zipped(self.ventilator_send, None)
            self.tasks.put(None)
        # Poison for result listener
        self.results.put(None)
        
        # Kill our own thread
        self.kill_switch = True
        self.context.term()
        self.join(5000)
        self.log.info("ZMQTaskManager shutdown finished")
        
    def run(self):
        self.log.info("ZMQTaskManager started")
        
        # Create and connect to our scheduler socket
        self.socket = self.context.socket(zmq.PULL)
        self.socket.setsockopt(zmq.LINGER, 0)
        self.socket.set_hwm(0)
        self.socket.connect('tcp://{host}:{port}'.format(host=self.master_backend_port[0], port=self.master_backend_port[1]))
        
        # Start receiving messages
        while not self.kill_switch:
            try:
                next_task = receive_from_zmq_zipped(self.socket)
                self.push_task(next_task)
            except zmq.ContextTerminated:
                break
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    pass  # no message was ready
                else:
                    break
            except:
                traceback.print_exc()
            
        self.socket.close()
        self.log.info("ZMQTaskManager stopped")

    def update_pool(self, _num_workers=-1):
        """
        Set the number of workers the task manager should use
        """
        self.stop()
        self.init(_num_workers)
        self.start()

    def push_task(self, task):
        """
        Push a task that should be completed by the workers
        """
        try:
            #send_to_zmq_zipped(self.ventilator_send, task)
            self.tasks.put(task)
        except:
            traceback.print_exc()
        return True

    def wait_for_all(self):
        """
        Wait until all tasks has been finished
        """
        pass
    
    def get_results_queue(self):
        """
        Return a refernce to the result queue
        """
        return self.results
    
    def task_finished(self, task, result, error):
        """
        Called once a task has been performed
        """
        task.finished(result, error)
Exemple #4
0
class ZMQTaskScheduler(Component, threading.Thread):
    implements(ITaskScheduler)
    """
    Different task scheduler implementation using ZMQ push/pull sockets. Uses a simple round-robin
    mechanism to handle multiple slaves.
    """
    
    frontend_port = IntItem('ZMQTaskScheduler', 'frontend_port', 5000,
        """Frontend port used to send tasks to the scheduler""")
    
    backend_port = IntItem('ZMQTaskScheduler', 'backend_port', 5001,
        """Backend port used to send tasks to the scheduler. Slaves will receive tasks on it.""")
    
    def __init__(self):
        threading.Thread.__init__ (self)
        Component.__init__(self)
        self.stats = Stats.getInstance()
        
        # Some thread related stuff
        self.daemon = True
        self.kill_switch = False
        
        # The socket framework
        self.context = zmq.Context()
        self.frontend = self.context.socket(zmq.PULL)
        self.frontend.bind('tcp://*:{port}'.format(port=self.frontend_port))
        self.frontend.setsockopt(zmq.LINGER, 0)
        self.frontend.set_hwm(0)
        self.backend = self.context.socket(zmq.PUSH)
        self.backend.bind('tcp://*:{port}'.format(port=self.backend_port))
        self.backend.setsockopt(zmq.LINGER, 0)
        self.backend.set_hwm(0)
        
        # The poller is used to poll for incomming messages for both
        # the frontend (internet) and the backend (scheduling)
        self.poll = zmq.Poller()
        self.poll.register(self.frontend, zmq.POLLIN)
        
        # Connected socket locally to frontend to send tasks, this socket
        # provides a lock to be able to be thread-safe
        self.frontend_push = self.context.socket(zmq.PUSH)
        self.frontend_push.connect('tcp://localhost:{port}'.format(port=self.frontend_port))
        self.frontend_push.setsockopt(zmq.LINGER, 0)
        self.frontend_push.set_hwm(0)
        
        
        # Our lock used to protect the frontend_push socket
        self.lock = threading.Lock()
        
    def setup(self, master):
        self.master = master
        self.start()
    
    def run(self):
        self.log.info("ZMQTaskScheduler started")
        
        # Start receiving messages
        while not self.kill_switch:
            try:
                sockets = dict(self.poll.poll(1000))
                if self.frontend in sockets:
                    msg = self.frontend.recv(flags=zmq.NOBLOCK)
                    #tprint('Server received message from %s' % (ident))
                    self.backend.send(msg, flags=zmq.NOBLOCK)
            except zmq.Again:
                # Timeouy just fired, no problem!
                pass
            except KeyboardInterrupt:
                break
            except zmq.ContextTerminated:
                break
            except zmq.ZMQError as e:
                if e.errno == zmq.EAGAIN:
                    pass  # no message was ready
                else:
                    break
            except:
                traceback.print_exc()
                # Not really good to just pass but saver for now!
                pass

        self.frontend.close()
        self.backend.close()
        with self.lock:
            self.frontend_push.close()
        self.context.term()
        self.log.info("ZMQTaskScheduler stopped")
    
    def stop(self):
        self.log.info("Shutting down ZMQTaskScheduler")
        self.kill_switch = True
        self.join(5000)
        self.log.info("ZMQTaskScheduler shutdown finished")
            
    def start_system(self, task_system):
        """
        Start an incomming task system
        """
        self.push_tasks(task_system.generate_tasks(self.master))
    
    def _push_task(self, task):
        """
        No lock variant of push task method
        """
        send_to_zmq_zipped(self.frontend_push, task)
        
    def push_tasks(self, tasks):
        """
        Push all tasks on the global task queue
        """
        with self.lock:
            # DO NOT USE push_task to queue tasks! It would be a deadlock!
            for task in tasks:
                self._push_task(task)
                #self.tasks.put(task)
    
    def push_task(self, task):
        """
        Put a task on the global task queue
        """
        with self.lock:
            # Do not poison ourselfs!
            if task:
                self._push_task(task)
                #self.tasks.put(task)
    
    def rate_slaves(self):
        """
        Update slaves
        """
        pass
    
    def _tasked_pushed(self, slave_id):
        """
        A slave has aquired a new task, update its rank
        """
        pass
    
    def task_finished(self, task, result, error):
        """
        A slave has finished a new task, update its rank
        """
        task.finished(result, error)
Exemple #5
0
class MandlebrotSimpleNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using just tasks
    """
    implements(IApp)

    use_optimized_task = BoolItem(
        'mandlebrotsample', 'use_optimized_task', True,
        """Should we use the data optimized task or the lazy task""")

    send_task_batch = BoolItem(
        'mandlebrotsample', 'task_batch', True,
        """Should we send all tasks one by one or should we batch them into a hughe list"""
    )

    factor = IntItem(
        'mandlebrotsample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row"""
    )

    iters = IntItem('mandlebrotsample', 'iters', 20,
                    """Mandlebrot iterations per pixel""")

    height = IntItem('mandlebrotsample', 'height', 1024,
                     """Height of the mandlebrot set image""")

    width = IntItem('mandlebrotsample', 'width', 1536,
                    """Width of the mandlebrot set image""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(MandlebrotSimpleNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(MandlebrotSimpleNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        # Do not create a tasks system, we will handle tasks on our own
        return None

    def start_processing(self):
        """
        Called when the app is not using a ITaskSystem and will instead just add tasks and
        will take care of the task flow itself
        """
        self.log.info("Starting computation")
        if self.send_task_batch:
            self.log.info(" Task batching enabled")

        self.start_time = time.time()
        self.image = np.zeros((self.height, self.width), dtype=np.uint8)

        # Init task related stuff
        self.min_x = -2.0
        self.max_x = 1.0
        self.min_y = -1.0
        self.max_y = 1.0

        self.pixel_size_x = (self.max_x - self.min_x) / self.width
        self.pixel_size_y = (self.max_y - self.min_y) / self.height

        # Job handling (very optimistic :D)
        self.jobs = 0
        self.finished_jobs = 0

        job_list = []
        workload = []

        rows = 0
        x = 0

        if self.use_optimized_task:
            num_tasks, reminder = divmod(self.width, self.factor)
            self.jobs = num_tasks + reminder

            for i in xrange(0, self.jobs):
                if self.send_task_batch:
                    job_list.append(
                        MandlebrotTaskOptimized("m",
                                                None,
                                                self.node_id_str,
                                                iters=self.iters,
                                                start_x=i,
                                                rows=self.factor,
                                                cols=self.height,
                                                pixel_size_x=self.pixel_size_x,
                                                pixel_size_y=self.pixel_size_y,
                                                min_x=self.min_x,
                                                min_y=self.min_y))
                else:
                    self.push_task(
                        MandlebrotTaskOptimized("m",
                                                None,
                                                self.node_id_str,
                                                iters=self.iters,
                                                start_x=i,
                                                rows=self.factor,
                                                cols=self.height,
                                                pixel_size_x=self.pixel_size_x,
                                                pixel_size_y=self.pixel_size_y,
                                                min_x=self.min_x,
                                                min_y=self.min_y))
        else:
            for x in range(self.width):
                # Distribute using rows
                rows += 1

                real = self.min_x + x * self.pixel_size_x
                for y in range(self.height):
                    imag = self.min_y + y * self.pixel_size_y
                    workload.append((x, y, real, imag, self.iters))

                # every self.factor rows create a task with the workload. Note that in this case we will force the system_id to be None while setting the client id
                if rows == self.factor:
                    if self.send_task_batch:
                        job_list.append(
                            MandlebrotTask("mandle_{}".format(x),
                                           None,
                                           self.node_id_str,
                                           iters=self.iters,
                                           workload=workload))
                    else:
                        self.push_task(
                            MandlebrotTask("mandle_{}".format(x),
                                           None,
                                           self.node_id_str,
                                           iters=self.iters,
                                           workload=workload))
                        self.jobs += 1
                    workload = []
                    rows = 0

            # Add last task with rest of workload
            if len(workload) > 0:
                if self.send_task_batch:
                    job_list.append(
                        MandlebrotTask("mandle_{}".format(x),
                                       None,
                                       self.node_id_str,
                                       iters=self.iters,
                                       workload=workload))
                else:
                    self.push_task(
                        MandlebrotTask("mandle_{}".format(x),
                                       None,
                                       self.node_id_str,
                                       iters=self.iters,
                                       workload=workload))
                    self.jobs += 1

            if self.send_task_batch:
                self.jobs = len(job_list)

        # Send batch or check for eventual end condition
        if self.send_task_batch:
            self.push_tasks(job_list)
        else:
            # Check in case we are already done!
            self.check_finished()

    def task_finished(self, task, result, error):
        """
        Called when a task has been done
        """
        # Integrate results in our image
        if result:
            for x, column in result.iteritems():
                for y, value in column.iteritems():
                    self.image[y, x] = value

        self.finished_jobs += 1
        self.check_finished()

    def check_finished(self):
        """
        Check if we finsihed all computation or not
        """
        if self.finished_jobs == self.jobs:
            self.log.info("All tasks finished!!")
            print("Calculated in {} seconds!".format(time.time() -
                                                     self.start_time))
            self.shutdown_main_loop()
            imshow(self.image)
            show()

    def push_task_response(self, result):
        """
        We just add a Task to the computation framework
        """
        pass
        #self.log.info("Task send to computation framework")

    def push_task_failed(self, result):
        """
        We failed to add a Task to the computation framework
        """
        self.log.info("Failed to send task send to computation framework")

    def push_tasks_response(self, result):
        """
        We just add a set of Tasks to the computation framework
        """
        self.log.info("Tasks send to computation framework")

    def push_tasks_failed(self, result):
        """
        We failed to add a set of Tasks to the computation framework
        """
        self.log.info("Failed to send tasks send to computation framework")
Exemple #6
0
class GenericTaskManager(Component):
    implements(ITaskManager)
    """
    Simple task manager used in simple single job applications
    """
    
    num_workers = IntItem('GenericTaskManager', 'num_workers', -1,
        """Number of worker processed to be created, -1 will spawn as much as physical cores.""")
    
    def __init__(self, *args, **kwargs):
        Component.__init__(self, *args, **kwargs)
        # Initialize base manager stuff
        self._num_workers = 0
        self.results = multiprocessing.JoinableQueue()
        
    def init(self, identity, address):
        """
        Initialize the manager
        """
        self.identity = identity
        self.host = address[0]
        self.port = address[1]
        
        self._num_workers = self.num_workers
        if self._num_workers <= 0:
            self._num_workers = multiprocessing.cpu_count()
            
        # We now prepare our queues, both the joinable and the results
        # queues. Then we just create a process for each worker
        self.tasks = multiprocessing.JoinableQueue()
        self.processes = [TaskProcess(self.results, i, self.tasks, self.identity, self.host, self.port) for i in range(self._num_workers)]
        #self.processes = [TaskProcess(self.results, i) for i in range(self._num_workers)]
        
        context = zmq.Context()
        self.ventilator_send = context.socket(zmq.PUSH)
        self.ventilator_send.bind("tcp://127.0.0.1:%d" % WORKER_PORT)
    
    def get_num_workers(self):
        """
        Return the number of workers we use for our processing
        """
        return self._num_workers
    
    def start(self):
        """
        Start our worker processes
        """
        for worker in self.processes:
            worker.daemon = True
            worker.start()

    def stop(self):
        """
        Stop our worker processes
        """
        for i in xrange(self._num_workers):
            #send_to_zmq_zipped(self.ventilator_send, None)
            print("Adding task")
            self.tasks.put(None)
        # Poison for result listener
        self.results.put(None)

    def update_pool(self, _num_workers=-1):
        """
        Set the number of workers the task manager should use
        """
        self.stop()
        self.init(_num_workers)
        self.start()

    def push_task(self, task):
        """
        Push a task that should be completed by the workers
        """
        try:
            #send_to_zmq_zipped(self.ventilator_send, task)
            self.tasks.put(task)
        except:
            traceback.print_exc()
        return True

    def wait_for_all(self):
        """
        Wait until all tasks has been finished
        """
        pass
    
    def get_results_queue(self):
        """
        Return a refernce to the result queue
        """
        return self.results
    
    def task_finished(self, task, result, error):
        """
        Called once a task has been performed
        """
        task.finished(result, error)
Exemple #7
0
class MandlebrotNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using an autonomous task system
    """
    implements(IApp)

    use_optimized_task = BoolItem(
        'mandlebrotsample', 'use_optimized_task', True,
        """Should we use the data optimized task or the lazy task""")

    factor = IntItem(
        'mandlebrotsample', 'factor', 1,
        """How many workloads does a single task get assigned, in our a workload is considered a row"""
    )

    iters = IntItem('mandlebrotsample', 'iters', 20,
                    """Mandlebrot iterations per pixel""")

    height = IntItem('mandlebrotsample', 'height', 1024,
                     """Height of the mandlebrot set image""")

    width = IntItem('mandlebrotsample', 'width', 1536,
                    """Width of the mandlebrot set image""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(MandlebrotNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(MandlebrotNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.system = MandlebrotTaskSystem(-2.0, 1.0, -1.0, 1.0, self.height,
                                           self.width, self.iters, self.factor,
                                           self.use_optimized_task)
        return self.system

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        print("Total time: {}".format(time.time() - self.start_time))
        self.shutdown_main_loop()
        # Reassamble result to be processed further
        try:
            self.system.image = np.zeros((self.height, self.width),
                                         dtype=np.uint8)
            self.system.do_post_run(result)
        except:
            traceback.print_exc()

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")

    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
class BaseNode(object):
    """
    Base node, all nodes will be atleast of this type.
    Responsible for hosting and exposing a simple API
    apart from listening on a TCP port for socket interactions.
    """

    port = IntItem('node', 'port', 8080,
                   """Port of the API interface with this node""")

    use_gzip = BoolItem(
        'node', 'use_gzip', True,
        """Check if we should gzip all interactions (recommended)""")

    pickler = ExtensionPointItem(
        'Node', 'pickler', IPickler, 'Pickler',
        """Pickler class used by the whole framework""")

    proxy_api = IntItem('node', 'proxy_api', 1,
                        """API version used for any client JSON RPC calls""")

    proxy_username = ConfigItem(
        'node', 'proxy_username', '',
        """Username used when performing API client calls""")

    proxy_password = ConfigItem(
        'node', 'proxy_password', '',
        """Password used when performing API client calls""")

    heartbeat_timer = FloatItem(
        'node', 'heartbeat_timer', 5.0,
        """Timer used to send periodically heartbeats to the master""")

    stats_dump_timer = FloatItem(
        'node', 'stats_dump_timer', 30.0,
        """Timer used to dump stats into the log. -1 will never dump stats.""")

    secret = ConfigItem(
        'node', 'crypot_secret', 'JhTv535Vg385V',
        """Default salt used on decrypting encrypting a pickle""")

    # salt size in bytes
    salt_size = IntItem('node', 'crypot_salt_size', 16,
                        """Size of the salt used in the encryption process""")

    # number of iterations in the key generation
    num_iterations = IntItem(
        'node', 'crypot_num_iterations', 20,
        """Number of iterations used in the key generation""")

    # the size multiple required for AES
    aes_padding = IntItem('node', 'crypot_aes_padding', 16,
                          """Padding used for AES encryption""")

    urls = (
        # Get and basic API handling (not versioned!)
        '/',
        'index_get',
        '/ping/',
        'ping_get',
        '/ping',
        'pint_get',
        '/status/',
        'status_get',
        '/status',
        'status_get',
        '/stats/',
        'stats_get',
        '/stats',
        'stats_get'
        # Post API handling of version 1
        ,
        '/api/1/',
        'APIHandlerV1',
        '/api/1',
        'APIHandlerV1')

    def app_init(self):
        """
        Initialize application just before running it
        """
        self.lock_cache = RWLockCache()

    def app_main(self):
        """
        Launch a concurrent application
        """
        # Generate rest API
        self.generate_api()

        # Now run our API listener
        self.log.debug("Hosting application on port %d" % (self.port))

        # Get a ref to our stats helper
        self.stats = Stats.getInstance()

        # Create cryto helper used for network communciation
        self.crypto_helper = CryptoHelper(self.salt_size, self.num_iterations,
                                          self.aes_padding)

        # Make sure the URL proxy knows us
        global global_hook
        global_hook = GlobalHook({'node': self})

        #api should only be there for the master node and used for node registration and heartbeats. Each node will have a socket
        #while slave nodes will have a local server too. There servers are no web servers because they are too expensive!

        #refactor server thingy tomorrow and add client which will be connected with the server through a normal socket!

        #The master server will act as only that, a controller and will distribute work using a better performing mechanism: UDP?

        #Use asycn calls for heartbeat for example

        #Create the server the same way the guys from PP do! (See ppserver) Try using a multithreaded pool to handle connections instead of threads!

        self.api_thread = api_thread(self.log, self.urls, self.port,
                                     self.use_gzip)
        self.api_thread.daemon = True
        self.api_thread.start()

        self.heartbeat_threshold = self.heartbeat_timer
        self.current_time = 0
        self.last_time = 0
        self.last_delta_time = 0

        self.stats_dump_threshold = self.stats_dump_timer

        # Bool flag used to control the main loop
        self.kill_received = False

        # Give us some time until its up
        time.sleep(0.5)
        return APP_RET_CODE_SUCCESS

    def stop_api_thread(self):
        self.api_thread.stop()

    def main_loop(self):
        # Register with master before anything
        if self.has_master():
            self.register_with_master()
        self.last_time = time.time()
        while not self.kill_received:
            try:
                # Calculate delta time for this frame
                self.current_time = time.time()
                delta_time = self.current_time - self.last_time
                self.on_update(delta_time)

                # Safe last time
                self.last_time = self.current_time
                self.last_delta_time = delta_time
            except KeyboardInterrupt:
                try:
                    if self.has_master():
                        self.unregister_from_master()
                except Exception as e:
                    traceback.print_exc()
                self.log.info("Exiting main loop")
                self.kill_received = True
            except Exception as e:
                traceback.print_exc()
                self.log.error("Mainloop exception: %s" % (e))
        self.log.info("Main loop exited!")

    def shutdown_main_loop(self):
        self.kill_received = True

    def on_update(self, delta_time):
        # Only dump is requested
        if self.stats_dump_timer > 0:
            self.stats_dump_threshold -= delta_time
            if self.stats_dump_threshold < 0:
                self.stats.dump_stats(self.log)
                self.stats_dump_threshold = self.stats_dump_timer

    def generate_api(self):
        # API service handler for version 1 (only version for now)
        self.api_service_v1 = SimpleJSONRPCService(api_version=1)

        @jsonremote(self.api_service_v1)
        def ping(request):
            return "pong"

        @jsonremote(self.api_service_v1)
        def status(request):
            return self.status()

        @jsonremote(self.api_service_v1)
        def api(request):
            return self.api_service_v1.api()

    def ping(self):
        return "pong"

    def index(self):
        return "OK"

    def status(self):
        status = {
            'node': self.__class__.__name__,
            'systeminfo': self.compmgr.systeminfo
        }
        return status

    def get_stats(self):
        return self.stats.dump_all()

    def create_node_proxy(self, url):
        """
        Create a new json proxy instance used by the node when acting as a client role
        """
        return NodeProxy(
            pyjsonrpc.HttpClient(
                url=("http://%s/api/%d") % (url, self.proxy_api),
                username=self.proxy_username,
                password=self.proxy_password), self.log, self.rpc_call_success,
            self.rpc_call_failed)

    def create_tcp_proxy(self, host, port):
        """
        Create a JSON TCP socket proxy instance to a server
        """
        #tcp_client = TCPClient(self.log, host, port, self)
        #return TCPProxy(tcp_client, self.log), tcp_client
        tcp_client = TCPServerProxyZMQ(self.node_id_str, host, port, self.log)
        return TCPProxy(tcp_client, self.log), tcp_client

    def create_tcp_client_proxy(self, sock, request):
        """
        Create a JSON TCP socket proxy instance to a client
        """
        return TCPProxyZMQ(sock, request, self.log)

    def create_tcp_client_proxy_zmq(self, context, identity):
        """
        Create a JSON TCP socket proxy instance to a client
        """
        return TCPProxy(TCPClientProxyZMQ(context, identity, self.log),
                        self.log)
Exemple #9
0
class ExpensiveSimpleNode(ApplicationNode):
    """
    Application node distributing the computation of the mandlebrot set using just tasks
    """
    implements(IApp)

    send_task_batch = BoolItem(
        'expensivesample', 'task_batch', True,
        """Should we send all tasks one by one or should we batch them into a hughe list"""
    )

    time_per_task = IntItem(
        'expensivesample', 'time_per_task', 1,
        """Time each task will perform on doing nothind (active wait) to simulate an expensive computation"""
    )

    num_tasks = IntItem('expensivesample', 'num_tasks', 8,
                        """Number of tasks that must be performend""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(ExpensiveSimpleNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(ExpensiveSimpleNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        # Do not create a tasks system, we will handle tasks on our own
        return None

    def start_processing(self):
        """
        Called when the app is not using a ITaskSystem and will instead just add tasks and
        will take care of the task flow itself
        """
        self.log.info("Starting computation")
        if self.send_task_batch:
            self.log.info(" Task batching enabled")

        self.start_time = time.time()
        self.finished_jobs = 0
        if self.send_task_batch:
            self.push_tasks([
                ExpensiveTask("expensive_{}".format(i),
                              None,
                              self.node_id_str,
                              sleep_time=self.time_per_task)
                for i in range(self.num_tasks)
            ])
        else:
            for i in range(self.num_tasks):
                self.push_task(
                    ExpensiveTask("expensive_{}".format(i),
                                  None,
                                  self.node_id_str,
                                  sleep_time=self.time_per_task))
            self.check_finished()

    def task_finished(self, task, result, error):
        """
        Called when a task has been done
        """
        self.finished_jobs += 1
        self.check_finished()

    def check_finished(self):
        """
        Check if we finsihed all computation or not
        """
        self.log.info("%d -> %d" % (self.finished_jobs, self.num_tasks))
        if self.finished_jobs == self.num_tasks:
            self.log.info("All tasks finished!!")
            end_time = time.time() - self.start_time
            self.log.info("Total time: {}".format(end_time))

            # Print expected single threaded time and improvement
            expected_time = self.time_per_task * self.num_tasks
            self.log.info(
                "Plain python expected time: {}".format(expected_time))
            self.log.info("Concurrent improvememnet: {}%".format(
                (expected_time / end_time) * 100.0))
            self.shutdown_main_loop()

    def push_task_response(self, result):
        """
        We just add a Task to the computation framework
        """
        pass
        #self.log.info("Task send to computation framework")

    def push_task_failed(self, result):
        """
        We failed to add a Task to the computation framework
        """
        self.log.info("Failed to send task send to computation framework")

    def push_tasks_response(self, result):
        """
        We just add a set of Tasks to the computation framework
        """
        self.log.info("Tasks send to computation framework")

    def push_tasks_failed(self, result):
        """
        We failed to add a set of Tasks to the computation framework
        """
        self.log.info("Failed to send tasks send to computation framework")
Exemple #10
0
class ExpensiveNode(ApplicationNode):
    """
    Application node distributing the computation of an expensive task
    """
    implements(IApp)

    time_per_task = IntItem(
        'expensivesample', 'time_per_task', 1,
        """Time each task will perform on doing nothind (active wait) to simulate an expensive computation"""
    )

    num_tasks = IntItem('expensivesample', 'num_tasks', 8,
                        """Number of tasks that must be performend""")

    def app_init(self):
        """
        Called just before the main entry. Used as the initialization point instead of the ctor
        """
        super(ExpensiveNode, self).app_init()

    def app_main(self):
        """
        Applications main entry
        """
        return super(ExpensiveNode, self).app_main()

    def get_task_system(self):
        """
        Called from the base class when we are connected to a MasterNode and we are 
        able to send computation tasks over
        """
        self.start_time = time.time()
        self.system = ExpensiveNodeTaskSystem(self.time_per_task,
                                              self.num_tasks)
        return self.system

    def work_finished(self, result, task_system):
        """
        Called when the work has been done, the results is what our ITaskSystem
        sent back to us. Check resukt for more info
        """
        end_time = time.time() - self.start_time
        self.log.info("Total time: {}".format(end_time))

        # Print expected single threaded time and improvement
        expected_time = self.time_per_task * self.num_tasks
        self.log.info("Plain python expected time: {}".format(expected_time))
        self.log.info("Concurrent improvememnet: {}%".format(
            (expected_time / end_time) * 100.0))
        self.shutdown_main_loop()

    def push_tasksystem_response(self, result):
        """
        We just added a ITaskSystem on the framwork. Check result for more info
        """
        self.log.info("Tasks system send to computation framework")

    def push_tasksystem_failed(self, result):
        """
        We failed to push a ITaskSystem on the computation framework!
        """
        self.log.error("Tasks system failed to be send to framework!")
        # Check if the resuklt dict contains a traceback
        if "t" in result:
            self.log.error(result["t"])
class MasterNode(Component, BaseNode):
    implements(IApp)
    """
    A MasterNode is a compute node that can act and be used in computation when in standalone mode
    but is mainly used to dsitribute jobs along registered slaves. Once the jobs of a slave, or
    its own, are finished we will redistribute the results to the responsible client nodes.
    """
    is_standalone = BoolItem('masternode', 'is_standalone', 'False',
        """Master node is also a slave and a standalone application""")
    
    inactivity_time_multiplier = IntItem('node', 'inactivity_time_multiplier', 3,
        """Inactivty multiplier multiplies the heartbeat time to ensure inactivity is always several heartbeats""")
    
    registry_mirror_timer = FloatItem('masternode', 'registry_mirror_timer', 30.0,
        """Timer used to update node registry mirror""")
    
    registry_cleanup_timer = FloatItem('masternode', 'registry_cleanup_timer', 60.0,
        """Timer used to cleanup the node registry""")
    
    task_scheduler= ExtensionPointItem('masternode', 'task_scheduler', ITaskScheduler, 'GenericTaskScheduler',
        """Task scheduler used by the master node""")
    
    master_port = IntItem('node', 'master_port', 8081,
        """Port used by the master node for high-performance communication and dedicated persistent connections""")
    
    def app_init(self):
        """
        Initialize application just before running it
        """
        super(MasterNode, self).app_init()
        
        # Start our TCPServer,
        #self.server = TCPServer("localhost", self.master_port, self)
        #self.server_thread = threading.Thread(name="tcp_server", target=self.server.serve_forever)
        #self.server_thread.daemon = True
        
        # Setup our ZeroMQ asyn server
        self.zmq_server = TCPServerZMQ(self.master_port, self.log, 5)
        
        # The node registry holds updated into about slaves/clients and its processing
        # we week track of number of tasks submitted to each slave, how they perform
        # general statistics and more.
        self.node_registry = defaultdict(self._default_node)
        self.registry_lock = self.lock_cache.registry_lock
        self.node_cleanup_threshold = self.registry_cleanup_timer
        self.task_scheduler.setup(self)
        
        # Our client registry
        self.client_registry = defaultdict(self._default_node)
        self.client_registry_lock = self.lock_cache.client_registry_lock
        
        # The registry mirror is used to send all updates from time to time and cache it.
        # We use a different dict so client status request do not block
        self.node_registry_mirror = {}
        self.registry_mirror_lock = self.lock_cache.registry_mirror_lock
        self.registry_mirror_threshold = self.registry_mirror_timer
        self.registry_mirror_dirty = True
        
        # Client registry mirror
        self.client_registry_mirror = {}
        self.client_registry_mirror_lock = self.lock_cache.client_registry_mirror_lock
        
        # Timer which controls inactivity handling of a node, being it a slave or a client
        self.inactivity_timer = self.heartbeat_timer*self.inactivity_time_multiplier
        self.inactivity_unregister_timer = self.inactivity_timer * 3
        self.inactivity_threshold = self.inactivity_timer
        
        self.test_timer = 1
        self.test_app_id = uuid.uuid1()
        
        # Our task system registry
        self.tasksystem_registry = defaultdict(self._default_tasksystem)
        self.tasksystem_lock = self.lock_cache.tasksystem_lock
        
        # Create master thread
        #self.master_thread = master_thread(self.log)
    
    def app_main(self):
        """
        Launch a concurrent application
        """
        self.log.info("Initializing MasterNode")
        result = super(MasterNode, self).app_main()
        if result not in SUCCESS_RET_CODES:
            return result

        # Start the main server thread
        #self.server_thread.start()
        self.zmq_server.start()
            
        # Enter mail loop
        self.main_loop()
        
        # Stop all threads processes
        #self.server.shutdown()
        self.zmq_server.stop()
        self.notify_shutdown()
        self.stop_api_thread()
        #self.stop_master_thread()
        self.task_scheduler.stop()        
        
        # Now launch base node
        return result
    
    def handle_echo(self, sock, address):
        print(address)
        fp = sock.makefile()
        while True:
            line = fp.readline()
            if line:
                fp.write(line)
                fp.flush()
            else:
                break
    
    def stop_master_thread(self):
        self.master_thread.stop()
         
    def generate_api(self):
        """
        Create all rpc methods the node requires
        """
        super(MasterNode, self).generate_api()
        if not self.is_standalone:
            @jsonremote(self.api_service_v1)
            def register_slave(request, node_id, port, data):
                self.stats.add_avg('register_slave')
                return self.register_node(node_id, web.ctx['ip'], port, data, NodeType.slave)
            
            @tcpremote(self.zmq_server, name='register_slave')
            #@tcpremote(self.server, name='register_slave')
            def register_slave_tcp(handler, request, node_id):
                self.stats.add_avg('register_slave_tcp')
                return self.register_node_tcp(handler, request, node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def register_client(request, node_id, port, data):
                self.stats.add_avg('register_client')
                return self.register_node(node_id, web.ctx['ip'], port, data, NodeType.client)
            
            @tcpremote(self.zmq_server, name='register_client')
            #@tcpremote(self.server, name='register_client')
            def register_client_tcp(handler, request, node_id):
                self.stats.add_avg('register_client_tcp')
                return self.register_node_tcp(handler, request, node_id, NodeType.client)
            
            @jsonremote(self.api_service_v1)
            def unregister_slave(request, node_id):
                self.stats.add_avg('unregister_slave')
                return self.unregister_node(node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def unregister_client(request, node_id):
                self.stats.add_avg('unregister_client')
                return self.unregister_node(node_id, NodeType.client)
            
            @jsonremote(self.api_service_v1)
            def heartbeat_slave(request, node_id):
                self.stats.add_avg('heartbeat_slave')
                return self.heartbeat(node_id, NodeType.slave)
            
            @jsonremote(self.api_service_v1)
            def heartbeat_client(request, node_id):
                self.stats.add_avg('heartbeat_client')
                return self.heartbeat(node_id, NodeType.client)
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def task_finished(handler, request, task, result, error):
                self.stats.add_avg('task_finished')
                self.task_finished(task, result, error)
                # This is an end method for the interaction
                raise NoResponseRequired()
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def push_task_response(handler, request, result):
                # TODO: Handle failure when result is False!
                pass
            
            @tcpremote(self.zmq_server)
            #@tcpremote(self.server)
            def push_task_failed(handler, request, result):
                # TODO: Handle failure when pushing tasks failed!
                pass

        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_tasksystem(handler, request, tasksystem):
            """
            Push a application onto the computation framework
            """
            self.stats.add_avg('push_tasksystem')
            return self.push_tasksystem(request, tasksystem)
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_task(handler, request, task):
            """
            Push a task onto the computation framework
            """
            self.stats.add_avg('push_task')
            return self.push_task(request, task)
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def push_tasks(handler, request, tasks):
            """
            Push a set of tasks onto the computation framework
            """
            self.stats.add_avg('push_tasks')
            if isinstance(tasks, list):
                for task in tasks:
                    if not self.push_task(request, task):
                        return False
            return True
        
        @tcpremote(self.zmq_server)
        #@tcpremote(self.server)
        def test_method(handler, request):
            print("test_method from {}".format(request))
            raise NoResponseRequired()
    
    def _generate_status_dict(self, node):
        return {'type':node.type,'state':node.state}
    
    def status(self):
        status = ComputeNode.status(self)
        with self.registry_mirror_lock.readlock:
            status['nodes'] = dict((k, self._generate_status_dict(v)) for k, v in self.node_registry_mirror.iteritems() if v)
        with self.client_registry_mirror_lock.readlock:
            status['clients'] = dict((k, self._generate_status_dict(v)) for k, v in self.client_registry_mirror.iteritems() if v)
        return status
    
    def on_update(self, delta_time):
        super(MasterNode, self).on_update(delta_time)
        
        # Update map
        self.registry_mirror_threshold -= delta_time
        if self.registry_mirror_threshold < 0:
            self.update_registry_mirror()
            self.registry_mirror_threshold = self.registry_mirror_timer
        
        # Handle inactive nodes or cleanup empty nodes
        self.inactivity_threshold -= delta_time
        self.node_cleanup_threshold -= delta_time
        if self.inactivity_threshold < 0:
            self.update_inactive_nodes()
            self.inactivity_threshold = self.inactivity_timer
        elif self.node_cleanup_threshold < 0:
            self.clean_node_map()
            self.node_cleanup_threshold = self.registry_cleanup_timer
    
    def has_master(self):
        """
        Check if the node has a master or not. Master node has no master itself
        """
        return False
    
    def _handle_timeout(self, node):
        """
        Handle state for a given node checking the nodes timestamp value
        """
        ellapsed_time = self.current_time - node['heartbeat']
        if node['state'] == NodeState.active and ellapsed_time > self.inactivity_timer:
            self.log.info("Node %s set to inactive (t:%f)" % (node['node_id'], ellapsed_time))
            node['state'] = NodeState.inactive
            self.set_registry_dirty()
        elif node['state'] == NodeState.inactive and ellapsed_time > self.inactivity_unregister_timer:
            # Delete node! To much time inactive!
            self.log.info("Node %s kicked from system! To much time of inactivity! (t:%f)" % (node['node_id'], ellapsed_time))
            self.set_registry_dirty()
            return None
        return node
    
    def set_registry_dirty(self):
        """
        Set the registry dirty, this will force an update of the task scheduler
        """
        self.registry_mirror_dirty = True
        self.update_scheduler()
        
    def update_scheduler(self):
        """
        Update task scheduler with the current list of slaves
        """
        self.task_scheduler.rate_slaves()
        
    def update_inactive_nodes(self):
        """
        Called when we check for inactive nodes, those that have not send any heartbeat for a while
        """
        self.log.info("Checking for inactive nodes...") 
        with self.registry_lock.writelock:
            self.node_registry = dict((k, self._handle_timeout(v)) for k, v in self.node_registry.iteritems() if v)
        with self.client_registry_lock.writelock:
            self.client_registry = dict((k, self._handle_timeout(v)) for k, v in self.client_registry.iteritems() if v)
    
    def update_registry_mirror(self):
        """
        Update the registry mirror with a copy of the registry. Used to expose a copy dict to the public.
        """
        if self.registry_mirror_dirty:
            self.log.info("Updating node registry mirror...")
            with self.registry_mirror_lock.writelock:
                self.node_registry_mirror = dict((k, v) for k, v in self.node_registry.iteritems() if v)
            with self.client_registry_mirror_lock.writelock:
                self.client_registry_mirror = dict((k, v) for k, v in self.client_registry.iteritems() if v)
            self.registry_mirror_dirty = False
    
    def clean_node_map(self):
        """
        Clean node map for any empty node values.
        """
        self.log.info("Cleaning node registry...")
        with self.registry_lock.writelock:
            self.node_registry = dict((k, v) for k, v in self.node_registry.iteritems() if v)
        with self.client_registry_lock.writelock:
            self.client_registry = dict((k, v) for k, v in self.client_registry.iteritems() if v)
    
    def get_node_id_no_lock(self, url):
        return next((k for k, v in self.node_registry.iteritems() if v and v.url == url), None)
    
    def get_node_id(self, url):
        """
        Return a node id given an url
        """
        with self.registry_lock.readlock:
            node_id = self.get_node_id_no_lock(url)
        return node_id
    
    def get_client_id_no_lock(self, url):
        return next((k for k, v in self.client_registry.iteritems() if v and v.url == url), None)
    
    def get_client_id(self, url):
        """
        Return a client id given an url
        """
        with self.client_registry_lock.readlock:
            node_id = self.get_client_id_no_lock(url)
        return node_id
    
    def get_node(self, url):
        """
        Get a node representation given an url
        """
        node = None
        with self.registry_lock.readlock:
            node_id = self.get_node_id_no_lock(url)
            if node_id:
                node = self.node_registry[node_id]
        return node
    
    def get_client(self, url):
        """
        Get a node representation given an url
        """
        node = None
        with self.registry_lock.readlock:
            node_id = self.get_client_id_no_lock(url)
            if node_id:
                node = self.node_registry[node_id]
        return node
    
    def _default_node(self):
        return {}
    
    def _default_tasksystem(self):
        return Bunch({})
    
    def _default_slave_bunch(self):
        return Bunch({'node_id':'', 'url':'', 'ip':'', 'port':0, 'type':NodeType.slave, 'state':NodeState.inactive, 'heartbeat':0, 'proxy':None, 'workers':0, 'tasks':0, 'rating':0.0, 'handler': None})
    
    def _default_client_bunch(self):
        return Bunch({'node_id':'', 'url':'', 'ip':'', 'port':0, 'type':NodeType.slave, 'state':NodeState.inactive, 'heartbeat':0, 'proxy':None, 'handler': None})
    
    def register_node(self, node_id, ip, port, data, node_type):
        """
        Register a node within our node map
        """
        try:
            # TODO: CHECK ALL CLIENT DATA!
            url = ("%s:%d") % (ip, port)
            if NodeType.slave == node_type:
                with self.registry_lock.writelock:
                    node = self.get_node(url)
                    if node is None:
                        # This is a node that is registering again so reuse it
                        node = self.node_registry[node_id] = self._default_slave_bunch()
                    
                    # Basic node values
                    node.node_id = node_id
                    node.url = url
                    node.ip = ip
                    node.port = port
                    node.type = node_type
                    node.proxy = self.create_node_proxy(url)
                    node.state = NodeState.pending
                    node.heartbeat = time.time()
                    
                    # Add slave data                   
                    node.workers = data['workers']
                    node.tasks = 0
                    # Rating goes from [0, ..) 0 is the best rating and so asuitable candidate
                    node.rating = 0
                    node.handler = None
                    node.tcp_proxy = None
                    
                    # Make sure the mirror updates properly
                    self.set_registry_dirty()
                    
                    # Send back the generated id
                    return {'id': node.node_id, 'port': self.master_port}
            elif NodeType.client == node_type:
                with self.client_registry_lock.writelock:
                    node = self.get_node(url)
                    if node is None:
                        # This is a node that is registering again so reuse it
                        node = self.client_registry[node_id] = self._default_client_bunch()
                    
                    # Basic node values
                    node.node_id = node_id
                    node.url = url
                    node.ip = ip
                    node.port = port
                    node.type = node_type
                    node.proxy = self.create_node_proxy(url)
                    node.state = NodeState.pending
                    node.heartbeat = time.time()
                    
                    # Add client data
                    node.handler = None
                    node.tcp_proxy = None
                    
                    # Make sure the mirror updates properly
                    self.set_registry_dirty()
                    
                    # Send back the generated id
                    return {'id': node.node_id, 'port': self.master_port}
            else:
                raise NotImplementedError("Unkown node")
        except Exception as e:
            traceback.print_exc()
            # Make sure to cleanup node from node map!
            if node_id:
                self.unregister_node(node_id, node_type)
            raise e
    
    def unregister_node(self, node_id, node_type):
        """
        Unregister a node within our node map
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    self.node_registry[node_id] = None
                    # Make sure we let the mirror update
                    self.registry_mirror_dirty = True
                    self.set_registry_dirty()
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    # if we had a socket close it now!
                    self.client_registry[node_id] = None
                    # Get rid of any registered task system
                    with self.tasksystem_lock.writelock:
                        if node_id in self.tasksystem_registry:
                            del self.tasksystem_registry[node_id]
                    # Make sure we let the mirror update
                    self.registry_mirror_dirty = True
                    self.set_registry_dirty()
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
    
    def register_node_tcp(self, handler, request, node_id, node_type):
        """
        Slave has just registered itself throug the compute channel
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    # The handler is shared between many client sockets!
                    self.node_registry[node_id].handler = handler
                    self.node_registry[node_id].socket = handler.worker
                    #self.node_registry[node_id].tcp_proxy = self.create_tcp_client_proxy(handler.worker, request)
                    self.node_registry[node_id].tcp_proxy = self.create_tcp_client_proxy_zmq(self.zmq_server.context, request)
                    self.node_registry[node_id].state = NodeState.active
                    # Let the slave know that the handshake worked
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    # The handler is shared between many client sockets!
                    self.client_registry[node_id].handler = handler
                    self.client_registry[node_id].socket = handler.worker
                    #self.client_registry[node_id].tcp_proxy = self.create_tcp_client_proxy(handler.worker, request)
                    self.client_registry[node_id].tcp_proxy = self.create_tcp_client_proxy_zmq(self.zmq_server.context, request)
                    self.client_registry[node_id].state = NodeState.active
                    # Safe some data within the handler itself
                    handler.node_id = node_id
                    handler.node_type = NodeType.client
                    # Let the client know that the handshake worked
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
                    
            
        
    def notify_shutdown(self):
        """
        Notify a global shutdown to all nodes
        """
        with self.registry_lock.readlock:
            for node_id in self.node_registry:
                if self.node_registry[node_id] and self.node_registry[node_id].proxy:
                    try:
                        self.node_registry[node_id].proxy.master_disconnected()
                    except:
                        pass
        with self.client_registry_lock.readlock:
            for node_id in self.client_registry:
                if self.client_registry[node_id] and self.client_registry[node_id].proxy:
                    try:
                        self.client_registry[node_id].proxy.master_disconnected()
                    except:
                        pass
    
    def heartbeat(self, node_id, node_type):
        """
        We just received a nice beat from a node, update it's last heartbeat
        timestamp to perevent timeouts
        """
        if NodeType.slave == node_type:
            with self.registry_lock.writelock:
                if node_id in self.node_registry:
                    self.node_registry[node_id].heartbeat = time.time()
                    if self.node_registry[node_id].state == NodeState.inactive:
                        self.node_registry[node_id].state = NodeState.active
                    #self.log.info("Node %s just ticked" % (node_id))
                    return True
                return False
        elif NodeType.client == node_type:
            with self.client_registry_lock.writelock:
                if node_id in self.client_registry:
                    self.client_registry[node_id].heartbeat = time.time()
                    if self.client_registry[node_id].state == NodeState.inactive:
                        self.client_registry[node_id].state = NodeState.active
                    #self.log.info("Node %s just ticked" % (node_id))
                    return True
                return False
        else:
            raise NotImplementedError("Unkown node")
    
    def rpc_call_failed(self, proxy, method, reason):
        """
        Called when an RPC call failed for an unexpected reason
        """
        self.log.info("Method %s failed because of %s" % (method, reason))
    
    def rpc_call_success(self, proxy, method, result):
        """
        Called when an RPC call succeded
        """
        self.log.info("Method %s succeded with %s" % (method, result))
        return result
    
    def push_tasksystem(self, request, tasksystem):
        """
        We received a task system from a client. Get the first list of tasks and save out the
        system itself for later access
        """
        
        # Easier access
        node_id = request
        
        # Now get the
        with self.tasksystem_lock.writelock:
            # No re-registering!
            system_id = tasksystem.system_id
            if system_id in self.tasksystem_registry:
                return False
            
            # Safe out the registry
            system_entry = self.tasksystem_registry[system_id] = self._default_tasksystem()
            system_entry.system = tasksystem
            system_entry.client_id = node_id
            system_entry.system_id = system_id
            
            # Now gather task and push them to the system
            system_entry.system.log = self.log
            system_entry.system.init_system(self)
            self.task_scheduler.start_system(system_entry.system)
        return True
    
    def push_task(self, request, task):
        """
        We received a task from a client, add it to the system to be processed
        """
        if isinstance(task, Task):
            self.task_scheduler.push_task(task)
            return True
        return False
    
    def task_finished(self, task, result, error):
        """
        Called when a task has finished its computation, the result object contains the task, 
        the result or an error and additional information
        """
        # if the task does not specify a ITaskSystem id its a single executed task which is not controller by
        # a dedicated autonomouse system on the master
        if task.system_id is None:
            client_id = task.client_id
            with self.client_registry_lock.readlock:
                if client_id in self.client_registry:
                    self.client_registry[client_id].tcp_proxy.task_finished(task.task_id, result, error)
        else:
            # If we do have a system id let it process it instead
            with self.tasksystem_lock.writelock:
                if task.system_id in self.tasksystem_registry:
                    system_entry = self.tasksystem_registry[task.system_id]
                    system_entry.system.task_finished(self, task, result, error)
                    
                    # Inform scheduler of the task
                    self.task_scheduler.task_finished(task, result, error)
                    
                    # Check for end
                    if system_entry.system.is_complete(self):
                        try:
                            # Gather results
                            final_results = system_entry.system.gather_result(self)
                            
                            # Send to client proxy the results
                            client_id = system_entry.client_id
                            with self.client_registry_lock.readlock:
                                if client_id in self.client_registry:
                                    self.client_registry[client_id].tcp_proxy.work_finished(final_results, system_entry.system.system_id)
                        finally:
                            del self.tasksystem_registry[task.system_id]