Example #1
0
def master_main(options):
    create_pycurl_thread(ciel.engine)

    deferred_worker = DeferredWorkPlugin(ciel.engine)
    deferred_worker.subscribe()

    worker_pool = WorkerPool(ciel.engine, deferred_worker, None)
    worker_pool.subscribe()

    task_failure_investigator = TaskFailureInvestigator(
        worker_pool, deferred_worker)

    job_pool = JobPool(ciel.engine, options.journaldir, None,
                       task_failure_investigator, deferred_worker, worker_pool,
                       options.task_log_root)
    job_pool.subscribe()

    worker_pool.job_pool = job_pool

    backup_sender = BackupSender(cherrypy.engine)
    backup_sender.subscribe()

    if options.hostname is not None:
        local_hostname = options.hostname
    else:
        local_hostname = socket.getfqdn()
    local_port = cherrypy.config.get('server.socket_port')
    master_netloc = '%s:%d' % (local_hostname, local_port)
    ciel.log('Local port is %d' % local_port, 'STARTUP', logging.INFO)

    if options.blockstore is None:
        static_content_root = tempfile.mkdtemp(
            prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        ciel.log(
            'Block store directory not specified: storing in %s' %
            static_content_root, 'STARTUP', logging.WARN)
    else:
        static_content_root = options.blockstore
    block_store_dir = os.path.join(static_content_root, "data")
    try:
        os.mkdir(block_store_dir)
    except:
        pass

    block_store = BlockStore(local_hostname, local_port, block_store_dir)
    block_store.build_pin_set()
    block_store.check_local_blocks()

    # TODO: Re-enable this and test thoroughly.
    # if options.master is not None:
    #    monitor = MasterRecoveryMonitor(cherrypy.engine, 'http://%s/' % master_netloc, options.master, job_pool)
    #    monitor.subscribe()
    # else:
    monitor = None

    recovery_manager = RecoveryManager(ciel.engine, job_pool, block_store,
                                       deferred_worker)
    recovery_manager.subscribe()

    root = MasterRoot(worker_pool, block_store, job_pool, backup_sender,
                      monitor)

    cherrypy.config.update({"server.thread_pool": 50})
    cherrypy.config.update({"checker.on": False})

    cherrypy_conf = dict()

    app = cherrypy.tree.mount(root, "", cherrypy_conf)

    lighty = LighttpdAdapter(ciel.engine, static_content_root, local_port)
    lighty.subscribe()
    # Zap CherryPy's original flavour server
    cherrypy.server.unsubscribe()
    server = cherrypy.process.servers.FlupFCGIServer(
        application=app, bindAddress=lighty.socket_path)
    adapter = cherrypy.process.servers.ServerAdapter(
        cherrypy.engine, httpserver=server, bind_addr=lighty.socket_path)
    # Insert a FastCGI server in its place
    adapter.subscribe()

    if hasattr(ciel.engine, "signal_handler"):
        ciel.engine.signal_handler.subscribe()
    if hasattr(ciel.engine, "console_control_handler"):
        ciel.engine.console_control_handler.subscribe()

    ciel.engine.start()
    ciel.engine.block()
Example #2
0
def master_main(options):

    create_pycurl_thread(ciel.engine)

    deferred_worker = DeferredWorkPlugin(ciel.engine)
    deferred_worker.subscribe()

    worker_pool = WorkerPool(ciel.engine, deferred_worker, None)
    worker_pool.subscribe()

    task_failure_investigator = TaskFailureInvestigator(worker_pool, deferred_worker)
    
    job_pool = JobPool(ciel.engine, options.journaldir, None, task_failure_investigator, deferred_worker, worker_pool, options.task_log_root)
    job_pool.subscribe()
    
    worker_pool.job_pool = job_pool
    
    mc_policy = PerformancePolicy('Performance')
    mission_controller = MissionController(ciel.engine, job_pool, mc_policy)
    mission_controller.subscribe()

    backup_sender = BackupSender(cherrypy.engine)
    backup_sender.subscribe()

    if options.hostname is not None:
        local_hostname = options.hostname
    else:
        local_hostname = socket.getfqdn()
    local_port = cherrypy.config.get('server.socket_port')
    master_netloc = '%s:%d' % (local_hostname, local_port)
    ciel.log('Local port is %d' % local_port, 'STARTUP', logging.INFO)
    
    if options.blockstore is None:
        static_content_root = tempfile.mkdtemp(prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        ciel.log('Block store directory not specified: storing in %s' % static_content_root, 'STARTUP', logging.WARN)
    else:
        static_content_root = options.blockstore
    block_store_dir = os.path.join(static_content_root, "data")
    try:
        os.mkdir(block_store_dir)
    except:
        pass

    block_store = BlockStore(local_hostname, local_port, block_store_dir)
    block_store.build_pin_set()
    block_store.check_local_blocks()

    # TODO: Re-enable this and test thoroughly.
    #if options.master is not None:
    #    monitor = MasterRecoveryMonitor(cherrypy.engine, 'http://%s/' % master_netloc, options.master, job_pool)
    #    monitor.subscribe()
    #else:
    monitor = None

    recovery_manager = RecoveryManager(ciel.engine, job_pool, block_store, deferred_worker)
    recovery_manager.subscribe()
  
    root = MasterRoot(worker_pool, block_store, job_pool, backup_sender, monitor)

    cherrypy.config.update({"server.thread_pool" : 50})
    cherrypy.config.update({"checker.on" : False})

    cherrypy_conf = dict()
    
    app = cherrypy.tree.mount(root, "", cherrypy_conf)
    
    lighty = LighttpdAdapter(ciel.engine, static_content_root, local_port)
    lighty.subscribe()
    # Zap CherryPy's original flavour server
    cherrypy.server.unsubscribe()
    server = cherrypy.process.servers.FlupFCGIServer(application=app, bindAddress=lighty.socket_path)
    adapter = cherrypy.process.servers.ServerAdapter(cherrypy.engine, httpserver=server, bind_addr=lighty.socket_path)
    # Insert a FastCGI server in its place
    adapter.subscribe()

    if hasattr(ciel.engine, "signal_handler"):
        ciel.engine.signal_handler.subscribe()
    if hasattr(ciel.engine, "console_control_handler"):
        ciel.engine.console_control_handler.subscribe()

    ciel.engine.start()
    ciel.engine.block()
Example #3
0
    def __init__(self, bus, port, options):
        plugins.SimplePlugin.__init__(self, bus)

        create_pycurl_thread(bus)
        if options.aux_port is not None:
            create_tcp_server(options.aux_port)

        self.id = None
        self.port = port
        self.master_url = options.master
        self.master_proxy = MasterProxy(self, bus, self.master_url)
        self.master_proxy.subscribe()
        
        # This will now be set by the pinger when it attempts to contact the master.
        self.hostname = None
#        if options.hostname is None:
#            self.hostname = self.master_proxy.get_public_hostname()
#        else:
#            self.hostname = options.hostname

        if options.blockstore is None:
            self.static_content_root = tempfile.mkdtemp(prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        else:
            self.static_content_root = options.blockstore
        block_store_dir = os.path.join(self.static_content_root, "data")
        try:
            os.mkdir(block_store_dir)
        except:
            pass
        
        # The hostname is None because we get this from the master.
        self.block_store = BlockStore(None, self.port, block_store_dir, ignore_blocks=False)
        self.block_store.build_pin_set()
        self.block_store.check_local_blocks()
        create_watcher_thread(bus, self.block_store)
        self.upload_deferred_work = DeferredWorkPlugin(bus, 'upload_work')
        self.upload_deferred_work.subscribe()
        self.upload_manager = UploadManager(self.block_store, self.upload_deferred_work)
        self.execution_features = ExecutionFeatures()
        #self.task_executor = TaskExecutorPlugin(bus, self, self.master_proxy, self.execution_features, 1)
        #self.task_executor.subscribe()
        
        self.scheduling_classes = parse_scheduling_class_option(None, options.num_threads)
        
        self.multiworker = MultiWorker(ciel.engine, self)
        self.multiworker.subscribe()
        self.process_pool = ProcessPool(bus, self, self.execution_features.process_cacheing_executors)
        self.process_pool.subscribe()
        self.runnable_executors = self.execution_features.runnable_executors.keys()
        self.server_root = WorkerRoot(self)
        self.pinger = Pinger(bus, self.master_proxy, None)
        self.pinger.subscribe()
        self.stopping = False
        self.event_log = []
        self.log_lock = Lock()
        self.log_condition = Condition(self.log_lock)

        self.cherrypy_conf = {}

        cherrypy.config.update({"server.thread_pool" : 20})
        cherrypy.config.update({"checker.on" : False})

        self.subscribe()
Example #4
0
class Worker(plugins.SimplePlugin):
    
    def __init__(self, bus, port, options):
        plugins.SimplePlugin.__init__(self, bus)

        create_pycurl_thread(bus)
        if options.aux_port is not None:
            create_tcp_server(options.aux_port)

        self.id = None
        self.port = port
        self.master_url = options.master
        self.master_proxy = MasterProxy(self, bus, self.master_url)
        self.master_proxy.subscribe()
        
        # This will now be set by the pinger when it attempts to contact the master.
        self.hostname = None
#        if options.hostname is None:
#            self.hostname = self.master_proxy.get_public_hostname()
#        else:
#            self.hostname = options.hostname

        if options.blockstore is None:
            self.static_content_root = tempfile.mkdtemp(prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        else:
            self.static_content_root = options.blockstore
        block_store_dir = os.path.join(self.static_content_root, "data")
        try:
            os.mkdir(block_store_dir)
        except:
            pass
        
        # The hostname is None because we get this from the master.
        self.block_store = BlockStore(None, self.port, block_store_dir, ignore_blocks=False)
        self.block_store.build_pin_set()
        self.block_store.check_local_blocks()
        create_watcher_thread(bus, self.block_store)
        self.upload_deferred_work = DeferredWorkPlugin(bus, 'upload_work')
        self.upload_deferred_work.subscribe()
        self.upload_manager = UploadManager(self.block_store, self.upload_deferred_work)
        self.execution_features = ExecutionFeatures()
        #self.task_executor = TaskExecutorPlugin(bus, self, self.master_proxy, self.execution_features, 1)
        #self.task_executor.subscribe()
        
        self.scheduling_classes = parse_scheduling_class_option(None, options.num_threads)
        
        self.multiworker = MultiWorker(ciel.engine, self)
        self.multiworker.subscribe()
        self.process_pool = ProcessPool(bus, self, self.execution_features.process_cacheing_executors)
        self.process_pool.subscribe()
        self.runnable_executors = self.execution_features.runnable_executors.keys()
        self.server_root = WorkerRoot(self)
        self.pinger = Pinger(bus, self.master_proxy, None)
        self.pinger.subscribe()
        self.stopping = False
        self.event_log = []
        self.log_lock = Lock()
        self.log_condition = Condition(self.log_lock)

        self.cherrypy_conf = {}

        cherrypy.config.update({"server.thread_pool" : 20})
        cherrypy.config.update({"checker.on" : False})

        self.subscribe()

    def subscribe(self):
        self.bus.subscribe('stop', self.stop, priority=10)
        self.bus.subscribe("worker_event", self.add_log_entry)
        
    def unsubscribe(self):
        self.bus.unsubscribe('stop', self.stop)
        self.bus.unsubscribe("worker_event", self.add_log_entry)

    def netloc(self):
        return '%s:%d' % (self.hostname, self.port)

    def as_descriptor(self):
        return {'netloc': self.netloc(), 'features': self.runnable_executors, 'has_blocks': not self.block_store.is_empty(), 'scheduling_classes': self.scheduling_classes}

    def set_hostname(self, hostname):
        self.hostname = hostname
        self.block_store.set_hostname(hostname)

    def set_master(self, master_details):
        self.master_url = master_details['master']
        self.master_proxy.change_master(self.master_url)
        self.pinger.poke()

    def start_running(self):

        app = cherrypy.tree.mount(self.server_root, "", self.cherrypy_conf)

        lighty = LighttpdAdapter(ciel.engine, self.static_content_root, self.port)
        lighty.subscribe()
        # Zap CherryPy's original flavour server
        cherrypy.server.unsubscribe()
        server = cherrypy.process.servers.FlupFCGIServer(application=app, bindAddress=lighty.socket_path)
        adapter = cherrypy.process.servers.ServerAdapter(cherrypy.engine, httpserver=server, bind_addr=lighty.socket_path)
        # Insert a FastCGI server in its place
        adapter.subscribe()

        ciel.engine.start()
        if hasattr(ciel.engine, "signal_handler"):
            ciel.engine.signal_handler.subscribe()
        if hasattr(ciel.engine, "console_control_handler"):
            ciel.engine.console_control_handler.subscribe()
        ciel.engine.block()

    def stop(self):
        with self.log_lock:
            self.stopping = True
            self.log_condition.notify_all()
    
    def submit_task(self, task_descriptor):
        ciel.engine.publish("worker_event", "Start task " + repr(task_descriptor["task_id"]))
        ciel.engine.publish('execute_task', task_descriptor)
                
    def abort_task(self, task_id):
        ciel.engine.publish("worker_event", "Abort task " + repr(task_id))
        self.task_executor.abort_task(task_id)

    def add_log_entry(self, log_string):
        with self.log_lock:
            self.event_log.append((datetime.now(), log_string))
            self.log_condition.notify_all()

    def get_log_entries(self, start_index, end_index):
        with self.log_lock:
            return self.event_log[start_index:end_index]

    def await_log_entries_after(self, index):
        with self.log_lock:
            while len(self.event_log) <= int(index):
                if self.stopping == True:
                    break
                self.log_condition.wait()
            if self.stopping:
                raise Exception("Worker stopping")
Example #5
0
    def __init__(self, bus, port, options):
        plugins.SimplePlugin.__init__(self, bus)

        create_pycurl_thread(bus)
        if options.aux_port is not None:
            create_tcp_server(options.aux_port)

        self.id = None
        self.port = port
        self.master_url = options.master
        self.master_proxy = MasterProxy(self, bus, self.master_url)
        self.master_proxy.subscribe()

        # This will now be set by the pinger when it attempts to contact the master.
        self.hostname = None
        #        if options.hostname is None:
        #            self.hostname = self.master_proxy.get_public_hostname()
        #        else:
        #            self.hostname = options.hostname

        if options.blockstore is None:
            self.static_content_root = tempfile.mkdtemp(
                prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        else:
            self.static_content_root = options.blockstore
        block_store_dir = os.path.join(self.static_content_root, "data")
        try:
            os.mkdir(block_store_dir)
        except:
            pass

        # The hostname is None because we get this from the master.
        self.block_store = BlockStore(None,
                                      self.port,
                                      block_store_dir,
                                      ignore_blocks=False)
        self.block_store.build_pin_set()
        self.block_store.check_local_blocks()
        create_watcher_thread(bus, self.block_store)
        self.upload_deferred_work = DeferredWorkPlugin(bus, 'upload_work')
        self.upload_deferred_work.subscribe()
        self.upload_manager = UploadManager(self.block_store,
                                            self.upload_deferred_work)
        self.execution_features = ExecutionFeatures()
        #self.task_executor = TaskExecutorPlugin(bus, self, self.master_proxy, self.execution_features, 1)
        #self.task_executor.subscribe()

        self.scheduling_classes = parse_scheduling_class_option(
            None, options.num_threads)

        self.multiworker = MultiWorker(ciel.engine, self)
        self.multiworker.subscribe()
        self.process_pool = ProcessPool(
            bus, self, self.execution_features.process_cacheing_executors)
        self.process_pool.subscribe()
        self.runnable_executors = self.execution_features.runnable_executors.keys(
        )
        self.server_root = WorkerRoot(self)
        self.pinger = Pinger(bus, self.master_proxy, None)
        self.pinger.subscribe()
        self.stopping = False
        self.event_log = []
        self.log_lock = Lock()
        self.log_condition = Condition(self.log_lock)

        self.cherrypy_conf = {}

        cherrypy.config.update({"server.thread_pool": 20})
        cherrypy.config.update({"checker.on": False})

        self.subscribe()
Example #6
0
class Worker(plugins.SimplePlugin):
    def __init__(self, bus, port, options):
        plugins.SimplePlugin.__init__(self, bus)

        create_pycurl_thread(bus)
        if options.aux_port is not None:
            create_tcp_server(options.aux_port)

        self.id = None
        self.port = port
        self.master_url = options.master
        self.master_proxy = MasterProxy(self, bus, self.master_url)
        self.master_proxy.subscribe()

        # This will now be set by the pinger when it attempts to contact the master.
        self.hostname = None
        #        if options.hostname is None:
        #            self.hostname = self.master_proxy.get_public_hostname()
        #        else:
        #            self.hostname = options.hostname

        if options.blockstore is None:
            self.static_content_root = tempfile.mkdtemp(
                prefix=os.getenv('TEMP', default='/tmp/ciel-files-'))
        else:
            self.static_content_root = options.blockstore
        block_store_dir = os.path.join(self.static_content_root, "data")
        try:
            os.mkdir(block_store_dir)
        except:
            pass

        # The hostname is None because we get this from the master.
        self.block_store = BlockStore(None,
                                      self.port,
                                      block_store_dir,
                                      ignore_blocks=False)
        self.block_store.build_pin_set()
        self.block_store.check_local_blocks()
        create_watcher_thread(bus, self.block_store)
        self.upload_deferred_work = DeferredWorkPlugin(bus, 'upload_work')
        self.upload_deferred_work.subscribe()
        self.upload_manager = UploadManager(self.block_store,
                                            self.upload_deferred_work)
        self.execution_features = ExecutionFeatures()
        #self.task_executor = TaskExecutorPlugin(bus, self, self.master_proxy, self.execution_features, 1)
        #self.task_executor.subscribe()

        self.scheduling_classes = parse_scheduling_class_option(
            None, options.num_threads)

        self.multiworker = MultiWorker(ciel.engine, self)
        self.multiworker.subscribe()
        self.process_pool = ProcessPool(
            bus, self, self.execution_features.process_cacheing_executors)
        self.process_pool.subscribe()
        self.runnable_executors = self.execution_features.runnable_executors.keys(
        )
        self.server_root = WorkerRoot(self)
        self.pinger = Pinger(bus, self.master_proxy, None)
        self.pinger.subscribe()
        self.stopping = False
        self.event_log = []
        self.log_lock = Lock()
        self.log_condition = Condition(self.log_lock)

        self.cherrypy_conf = {}

        cherrypy.config.update({"server.thread_pool": 20})
        cherrypy.config.update({"checker.on": False})

        self.subscribe()

    def subscribe(self):
        self.bus.subscribe('stop', self.stop, priority=10)
        self.bus.subscribe("worker_event", self.add_log_entry)

    def unsubscribe(self):
        self.bus.unsubscribe('stop', self.stop)
        self.bus.unsubscribe("worker_event", self.add_log_entry)

    def netloc(self):
        return '%s:%d' % (self.hostname, self.port)

    def as_descriptor(self):
        return {
            'netloc': self.netloc(),
            'features': self.runnable_executors,
            'has_blocks': not self.block_store.is_empty(),
            'scheduling_classes': self.scheduling_classes
        }

    def set_hostname(self, hostname):
        self.hostname = hostname
        self.block_store.set_hostname(hostname)

    def set_master(self, master_details):
        self.master_url = master_details['master']
        self.master_proxy.change_master(self.master_url)
        self.pinger.poke()

    def start_running(self):

        app = cherrypy.tree.mount(self.server_root, "", self.cherrypy_conf)

        lighty = LighttpdAdapter(ciel.engine, self.static_content_root,
                                 self.port)
        lighty.subscribe()
        # Zap CherryPy's original flavour server
        cherrypy.server.unsubscribe()
        server = cherrypy.process.servers.FlupFCGIServer(
            application=app, bindAddress=lighty.socket_path)
        adapter = cherrypy.process.servers.ServerAdapter(
            cherrypy.engine, httpserver=server, bind_addr=lighty.socket_path)
        # Insert a FastCGI server in its place
        adapter.subscribe()

        ciel.engine.start()
        if hasattr(ciel.engine, "signal_handler"):
            ciel.engine.signal_handler.subscribe()
        if hasattr(ciel.engine, "console_control_handler"):
            ciel.engine.console_control_handler.subscribe()
        ciel.engine.block()

    def stop(self):
        with self.log_lock:
            self.stopping = True
            self.log_condition.notify_all()

    def submit_task(self, task_descriptor):
        ciel.engine.publish("worker_event",
                            "Start task " + repr(task_descriptor["task_id"]))
        ciel.engine.publish('execute_task', task_descriptor)

    def abort_task(self, task_id):
        ciel.engine.publish("worker_event", "Abort task " + repr(task_id))
        self.task_executor.abort_task(task_id)

    def add_log_entry(self, log_string):
        with self.log_lock:
            self.event_log.append((datetime.now(), log_string))
            self.log_condition.notify_all()

    def get_log_entries(self, start_index, end_index):
        with self.log_lock:
            return self.event_log[start_index:end_index]

    def await_log_entries_after(self, index):
        with self.log_lock:
            while len(self.event_log) <= int(index):
                if self.stopping == True:
                    break
                self.log_condition.wait()
            if self.stopping:
                raise Exception("Worker stopping")