Exemple #1
0
def schedule(options, timeout):
    """
    This celery task wraps the main work queue, and schedules individual builds on worker machines.
    :param options: A dictionary containing build information to configure the worker with.
    :param timeout: A value passed from Chorus to maintain uniformity with timeout configuration.
    :return: A confirmation message indicating if the build was started successfully.
    """
    LOG.info("Received Schedule Task Order!")
    global SchedulerLock
    while SchedulerLock.acquire(False) is False:
        continue
    filtered = []
    while len(filtered) == 0:
        filtered = []
        raw = broadcast('busy', arguments={}, reply=True)
        for item in raw:
            for key, value in item.iteritems():
                key = key.split('@')[1]
                if key[0:6] == 'feeder' or key[0:6] == 'master':
                    continue
                if value is False:
                    filtered.append(key)
                    break
        time.sleep(5)
    target = 'celery@' + filtered[0]
    LOG.info("Scheduling to target: %s" % target)
    options['host'] = target
    result = broadcast('start_build', arguments={'options': options}, destination=[target], reply=True)
    SchedulerLock.release()
    return result
Exemple #2
0
    def capture(self, limit=None, timeout=None):
        """Open up a consumer capturing events.

        This has to run in the main process, and it will never
        stop unless forced via :exc:`KeyboardInterrupt` or :exc:`SystemExit`.

        """
        consumer = self.consumer()
        consumer.consume()
        if self.wakeup:
            from celery.task.control import broadcast
            broadcast("heartbeat")
        try:
            for iteration in count(0):
                if limit and iteration > limit:
                    break
                try:
                    consumer.connection.drain_events(timeout=timeout)
                except socket.timeout:
                    if timeout:
                        raise
                except socket.error:
                    pass
        finally:
            consumer.close()
Exemple #3
0
    def handle(self, *args, **options):
        try:
            from celery.task.control import broadcast
        except ImportError:
            raise CommandError("Celery is not currently installed.")

        # Shut them all down.
        broadcast("shutdown")
 def handle(self, *args, **options):
     try:
         from celery.task.control import broadcast
     except ImportError:
         raise CommandError("Celery is not currently installed.")
     
     # Shut them all down.
     broadcast("shutdown")
 def tearDownClass(cls):
     """
     Shut down the celeryd processes we created earlier.
     """
     for hostname, hostinfo in cls.celeryd_hosts.items():
         print "Requesting shutdown of celeryd [%s]..." % hostname
         control.broadcast("shutdown", destination=[hostname])
         cls.celeryd_hosts[hostname]["Popen"].wait()
         print "celeryd closed."
Exemple #6
0
 def test_death(self):
     """
     Test death broadcast command.
     @param self Object reference.
     """
     result = broadcast("death", arguments={"run_number":1}, reply=True)
     result.sort()
     print "death: %s " % result
     self.validate_status(result)
     # Expect subsequent attempt to succeed.
     result = broadcast("death", arguments={"run_number":1}, reply=True)
     self.validate_status(result)
Exemple #7
0
 def reset_worker(self): # pylint:disable = R0201
     """
     Reset the Celery workers, via a broadcast, to have no
     configuration and to use MapPyDoNothing. 
     @param self Object reference.
     @return status of update.
     """
     config_id = datetime.now().microsecond
     broadcast("birth", 
         arguments={"configuration":self.config_doc,
                    "transform":"MapPyDoNothing", 
                    "config_id":config_id}, reply=True)
Exemple #8
0
 def test_death(self):
     """
     Test death broadcast command.
     @param self Object reference.
     """
     result = broadcast("death", arguments={"run_number": 1}, reply=True)
     result.sort()
     print "death: %s " % result
     self.validate_status(result)
     # Expect subsequent attempt to succeed.
     result = broadcast("death", arguments={"run_number": 1}, reply=True)
     self.validate_status(result)
Exemple #9
0
def watcher():
    try:
        f = open(RUN_FLAG_FILE, 'r')
    except IOError:
        with open(RUN_FLAG_FILE, 'w') as f:
            i = inspect()
            print >>f, '1'
            workers = i.active()
            print >>f, '2'
            if not all(workers.itervalues()):
                pirnt >>f, '3'
                broadcast('shutdown')
                f.write('%s%s' % ('true', os.linesep))
Exemple #10
0
 def reset_worker(self):  # pylint:disable = R0201
     """
     Reset the Celery workers, via a broadcast, to have no
     configuration and to use MapPyDoNothing. 
     @param self Object reference.
     @return status of update.
     """
     config_id = datetime.now().microsecond
     broadcast("birth",
               arguments={
                   "configuration": self.config_doc,
                   "transform": "MapPyDoNothing",
                   "config_id": config_id
               },
               reply=True)
Exemple #11
0
def shrink_worker_pool(request, name=None, num=1):
    """ Kills a running worker (celeryd).  However, no action will be taken 
        unless the request method is 'POST'.
        
        name:
            The name of the worker to stop.  The special value 'all' will stop 
            *all* workers.
    """
    if request.method != 'POST':
        return HttpResponseNotAllowed(['POST'])
    else:
        name = _resolve_name_param(name)
        dest = name and [name]  # dest will be None or a list of a single name
        broadcast('pool_shrink', destination=dest, arguments={'n':num})
        return HttpResponse('')
Exemple #12
0
 def birth_celery(transform, config, config_id, run_number, timeout = 1000):
     """
     Set new configuration and transforms in Celery nodes, and
     birth the transforms. An initial ping is done to
     identify the number of live nodes. 
     Each node is given up to 1000s to reply
     but if they all reply then the method returns sooner.
     @param transform Either a single name can be given - representing
     a single transform - or a list of transforms - representing a
     MapPyGroup. Sub-lists are treated as nested MapPyGroups. If None
     then the current transform is deathed and rebirthed.  
     @param config Valid JSON configuration document
     @param config_id Configuration ID from client.
     @param timeout Time to wait for replies.
     @return results from Celery.
     @throws RabbitMQException if RabbitMQ cannot be contacted.
     @throws CeleryNodeException if one or more Celery 
     nodes fails to configure or birth.
     """
     num_nodes = CeleryUtilities.ping_celery_nodes()
     try:
         response = broadcast("birth", arguments={
             "transform": transform, 
             "configuration": config,
             "config_id": config_id,
             "run_number": run_number},
             reply=True, timeout=timeout, limit=num_nodes)
     except socket.error as exc:
         raise RabbitMQException(exc)
     CeleryUtilities.validate_celery_response(response)
     run_headers = []
     for machines in response:
         for name in machines.keys():
             run_headers += machines[name]["run_headers"]
     return run_headers
Exemple #13
0
 def birth(self,
           config_id,
           configuration=None,
           transform="MapPyDoNothing",
           merge_configuration=False):  # pylint:disable = R0201, C0301
     """
     Configure the Celery workers, via a broadcast.
     @param self Object reference.
     @param config_id Configuration ID.
     @param configuration Configuration.
     @param transform Transform specification.
     @return status of update.
     """
     if configuration == None:
         configuration = self.config_doc
     elif merge_configuration:
         my_config = json.loads(self.config_doc)
         config_user = json.loads(configuration)
         for key in config_user:
             my_config[key] = config_user[key]
         configuration = json.dumps(my_config)
     return broadcast("birth",
         arguments={"configuration":configuration, \
                    "transform":transform,
                    "config_id":config_id,
                    "run_number":1}, reply=True)
Exemple #14
0
def kill_worker(request, name=None):
    """ Kills a running worker (celeryd).  However, no action will be taken 
        unless the request method is 'POST'.
        
        name:
            The name of the worker to stop.  The special value 'all' will stop 
            *all* workers.
    """
    if request.method != 'POST':
        return HttpResponseNotAllowed(['POST'])
    else:
        name = _resolve_name_param(name)
        dest = name and [name]  # dest will be None or a list of a single name
        print 'name: {0}'.format(name)
        print 'dest: {0}'.format(dest)
        broadcast('shutdown', destination=dest)
        return HttpResponse('success')
Exemple #15
0
 def broadcast(self, name, *args, **kwargs):
     if 'reply' not in kwargs:
         kwargs['reply'] = True
     result = broadcast(name, *args, **kwargs)
     result = util._merge_broadcast_result(result)  # turn it into a single dict
     result = util._condense_broadcast_result(result)  # remove worker key
     for k,v in result.iteritems():
         if isinstance(v, dict) and 'error' in v:
             raise RuntimeError('Found error in broadcast()')
     return result
Exemple #16
0
 def test_death_exception(self):
     """
     Test death broadcast command where the transform throws
     an exception when death is called.
     @param self Object reference.
     """
     # Set up a transform that will fail when it is deathed.
     config_id = datetime.now().microsecond
     transform = "MapPyTestMap"
     configuration = """{"death_result":%s, "maus_version":"%s"}""" \
         % (MapPyTestMap.EXCEPTION, self.__version)
     result = self.birth(config_id, configuration, transform)
     # Check the status is OK.
     self.validate_status(result)
     # Now death the transform.
     result = broadcast("death", arguments={"run_number":1}, reply=True)
     print "death(transform.death exception): %s " % result
     self.validate_status(result, "error")
     # Expect subsequent attempt to succeed.
     result = broadcast("death", arguments={"run_number":1}, reply=True)
     self.validate_status(result)
Exemple #17
0
 def test_death_exception(self):
     """
     Test death broadcast command where the transform throws
     an exception when death is called.
     @param self Object reference.
     """
     # Set up a transform that will fail when it is deathed.
     config_id = datetime.now().microsecond
     transform = "MapPyTestMap"
     configuration = """{"death_result":%s, "maus_version":"%s"}""" \
         % (MapPyTestMap.EXCEPTION, self.__version)
     result = self.birth(config_id, configuration, transform)
     # Check the status is OK.
     self.validate_status(result)
     # Now death the transform.
     result = broadcast("death", arguments={"run_number": 1}, reply=True)
     print "death(transform.death exception): %s " % result
     self.validate_status(result, "error")
     # Expect subsequent attempt to succeed.
     result = broadcast("death", arguments={"run_number": 1}, reply=True)
     self.validate_status(result)
Exemple #18
0
def kill(host, bid):
    """
    A celery task the kills a specific container on a specific host.
    :param host: The host to send the kill message to.
    :param bid: The bid of the build that created the container.
    :return: A confirmation message from the stop_container control command.
    """
    LOG.info("KILL! bid: %s on host: %s" % (bid, host))
    raw = broadcast('stop_container', arguments={'bid': bid}, destination=[host], reply=True)
    for r in raw:
        ret = r.values()[0]
    return ret
Exemple #19
0
    def command(self): 
        from celery.task.control import broadcast
        self.parse_config()

        self.log_break()
        self.log('Starting celerystop...')
        
        #
        # Look for hosts to broadcast to in the command line arguments,
        # e.g.,
        # pecan celerystop config.py --hosts=8.17.172.226,8.17.12.225
        # If no arguments are found, defaults to the hostname of the executing machine.
        #
        
        hosts = []
        args = self.options.hosts
        if args:
            hosts = args.split(',')
        else:
            hosts = [gethostname()]
            
        broadcast('shutdown', destination=hosts)
def get_worker_subprocesses(dest=None):
    """ Retrieve the number of subprocesses for each worker.  The return value 
        is a dict where the keys are worker names and the values are the number 
        of subprocesses. 
    """
    stats = {}
    for x in broadcast("stats", destination=dest, reply=True):
        stats.update(x)
    
    workercounts = {}
    for workername in stats.iterkeys():
        procs = stats[workername]['pool']['processes']
        workercounts[workername] = len(procs)
    
    return workercounts
Exemple #21
0
def retrieve_log_page(app_id, hostname, log_id):
    """
    Get a page of log content.
    """
    log_contents = broadcast(
        "distlog_get_log_content",
        destination=[hostname],
        arguments={"app_id": app_id, "hostname": hostname, "log_id": log_id},
        reply=True,
        timeout=taskconfig.LOGS_CELERY_BCAST_TIMEOUT,
    )
    for host_response in log_contents:
        for host, result in host_response.iteritems():
            if result:
                return result

    return ""
Exemple #22
0
    def test_birth_map_cpp(self):
        """
        Test birth can birth a MapCpp*. This is a regression of #1483

        @param self Object reference.
        """
        config_id = datetime.now().microsecond
        transform = "MapCppExampleMAUSDataInput"
        result = broadcast("birth",
            arguments={"configuration":Configuration().getConfigJSON(),
                       "transform":transform, 
                       "config_id":config_id,
                       "run_number":1}, reply=True)
        print "birth(OK): %s " % result
        for item in result:
            for value in item.values():
                self.assertEquals(value['status'], 'ok')
        return True
Exemple #23
0
 def death_celery(run_number):
     """
     Call death on transforms in Celery nodes.
     @throws RabbitMQException if RabbitMQ cannot be contacted.
     @throws CeleryNodeException if one or more Celery 
     nodes fails to death.
     """
     try:
         response = broadcast("death", arguments={"run_number":run_number},
                              reply=True)
     except socket.error as exc:
         raise RabbitMQException(exc)
     CeleryUtilities.validate_celery_response(response)
     run_footers = []
     for machines in response:
         for name in machines.keys():
             run_footers += machines[name]["run_footers"]
     return run_footers
Exemple #24
0
def ping():
    """
    Ping is one of the essential celery control commands Chorus uses to determine worker state.
    :return: A list of results from control pong commands that provides Chorus with detailed state information.
    """
    LOG.info("PING!")
    myself = System.get_celery_hostname().split('@')[1]
    State.master = True
    raw = broadcast('pong', arguments={'rabbitmq': os.environ['CHORUS_HOST']}, reply=True)
    filtered = []
    for result in raw:
        for key, value in result.iteritems():
            key = key.split('@')[1]
            if key == myself or key[0:6] == 'feeder':
                continue
            filtered.append([key] + value)
    sorted_results = sorted(filtered)
    return sorted_results
Exemple #25
0
 def validate_configuration(self, configuration, transform,
     config_id = None):
     """
     Validate workers have the given configuration using
     set_maus_configuration and the same MAUS version as the
     test class.
     @param self Object reference.
     @param configuration Expected configuration.
     @param transform Expected transform specification.
     @param config_id Expected configuration ID.
     """
     result = broadcast("get_maus_configuration", reply=True)
     print "get_maus_configuration: %s " % result
     # Use built-in Celery worker inspection command to get
     # worker names.
     check_workers = self.__inspection.stats()
     check_worker_names = check_workers.keys()
     self.assertEquals(len(check_worker_names), len(result), 
         "Number of worker entries does not match that expected")
     for worker in result:
         worker_name = worker.keys()[0]
         self.assertTrue(worker_name in check_worker_names, 
             "Cannot find entry for worker %s" % worker_name)
         worker_config = worker[worker_name]
         self.assertTrue(worker_config.has_key("config_id"),
             "Configuration has no config_id entry")
         if (config_id != None):
             self.assertEquals(config_id, worker_config["config_id"],
                 "Unexpected config_id value")
         self.assertTrue(worker_config.has_key("configuration"),
             "Configuration has no configuration entry")
         self.assertEquals(configuration,
             worker_config["configuration"],
             "Unexpected configuration value\n\n%s\n\n%s" % (configuration,
             worker_config["configuration"]))
         self.assertTrue(worker_config.has_key("transform"),
             "Configuration has no transform entry")
         self.assertEquals(transform, worker_config["transform"],
             "Unexpected transform value")
         self.assertTrue(worker_config.has_key("version"),
             "Configuration has no version entry")
         self.assertEquals(self.__version, 
             worker_config["version"],
             "Unexpected version value")
Exemple #26
0
 def death_celery(run_number):
     """
     Call death on transforms in Celery nodes.
     @throws RabbitMQException if RabbitMQ cannot be contacted.
     @throws CeleryNodeException if one or more Celery 
     nodes fails to death.
     """
     try:
         response = broadcast("death",
                              arguments={"run_number": run_number},
                              reply=True)
     except socket.error as exc:
         raise RabbitMQException(exc)
     CeleryUtilities.validate_celery_response(response)
     run_footers = []
     for machines in response:
         for name in machines.keys():
             run_footers += machines[name]["run_footers"]
     return run_footers
Exemple #27
0
 def validate_configuration(self, configuration, transform, config_id=None):
     """
     Validate workers have the given configuration using
     set_maus_configuration and the same MAUS version as the
     test class.
     @param self Object reference.
     @param configuration Expected configuration.
     @param transform Expected transform specification.
     @param config_id Expected configuration ID.
     """
     result = broadcast("get_maus_configuration", reply=True)
     print "get_maus_configuration: %s " % result
     # Use built-in Celery worker inspection command to get
     # worker names.
     check_workers = self.__inspection.stats()
     check_worker_names = check_workers.keys()
     self.assertEquals(
         len(check_worker_names), len(result),
         "Number of worker entries does not match that expected")
     for worker in result:
         worker_name = worker.keys()[0]
         self.assertTrue(worker_name in check_worker_names,
                         "Cannot find entry for worker %s" % worker_name)
         worker_config = worker[worker_name]
         self.assertTrue(worker_config.has_key("config_id"),
                         "Configuration has no config_id entry")
         if (config_id != None):
             self.assertEquals(config_id, worker_config["config_id"],
                               "Unexpected config_id value")
         self.assertTrue(worker_config.has_key("configuration"),
                         "Configuration has no configuration entry")
         self.assertEquals(
             configuration, worker_config["configuration"],
             "Unexpected configuration value\n\n%s\n\n%s" %
             (configuration, worker_config["configuration"]))
         self.assertTrue(worker_config.has_key("transform"),
                         "Configuration has no transform entry")
         self.assertEquals(transform, worker_config["transform"],
                           "Unexpected transform value")
         self.assertTrue(worker_config.has_key("version"),
                         "Configuration has no version entry")
         self.assertEquals(self.__version, worker_config["version"],
                           "Unexpected version value")
Exemple #28
0
 def test_process_after_death(self):
     """
     Test process command fails after death has been called.
     @param self Object reference.
     """
     config_id = datetime.now().microsecond
     transform = "MapPyTestMap"
     configuration = """{"maus_version":"%s"}""" % self.__version
     result = self.birth(config_id, configuration, transform)
     self.validate_status(result)
     result = broadcast("death", arguments={"run_number": 1}, reply=True)
     self.validate_status(result)
     # Call process.
     result = execute_transform.delay("{}", 1)
     # Wait for it to complete.
     try:
         result.wait()
     except Exception:  # pylint:disable = W0703
         pass
     self.assertTrue(result.failed(), "Expected failure")
Exemple #29
0
 def test_process_after_death(self):
     """
     Test process command fails after death has been called.
     @param self Object reference.
     """
     config_id = datetime.now().microsecond
     transform = "MapPyTestMap"
     configuration = """{"maus_version":"%s"}""" % self.__version
     result = self.birth(config_id, configuration, transform)
     self.validate_status(result)
     result = broadcast("death", arguments={"run_number":1}, reply=True)
     self.validate_status(result)
     # Call process.
     result = execute_transform.delay("{}", 1)
     # Wait for it to complete.
     try:
         result.wait()
     except Exception:  # pylint:disable = W0703
         pass
     self.assertTrue(result.failed(), "Expected failure")
Exemple #30
0
 def birth_celery(transform, config, config_id, run_number, timeout=1000):
     """
     Set new configuration and transforms in Celery nodes, and
     birth the transforms. An initial ping is done to
     identify the number of live nodes. 
     Each node is given up to 1000s to reply
     but if they all reply then the method returns sooner.
     @param transform Either a single name can be given - representing
     a single transform - or a list of transforms - representing a
     MapPyGroup. Sub-lists are treated as nested MapPyGroups. If None
     then the current transform is deathed and rebirthed.  
     @param config Valid JSON configuration document
     @param config_id Configuration ID from client.
     @param timeout Time to wait for replies.
     @return results from Celery.
     @throws RabbitMQException if RabbitMQ cannot be contacted.
     @throws CeleryNodeException if one or more Celery 
     nodes fails to configure or birth.
     """
     num_nodes = CeleryUtilities.ping_celery_nodes()
     try:
         response = broadcast("birth",
                              arguments={
                                  "transform": transform,
                                  "configuration": config,
                                  "config_id": config_id,
                                  "run_number": run_number
                              },
                              reply=True,
                              timeout=timeout,
                              limit=num_nodes)
     except socket.error as exc:
         raise RabbitMQException(exc)
     CeleryUtilities.validate_celery_response(response)
     run_headers = []
     for machines in response:
         for name in machines.keys():
             run_headers += machines[name]["run_headers"]
     return run_headers
Exemple #31
0
    def test_birth_map_cpp(self):
        """
        Test birth can birth a MapCpp*. This is a regression of #1483

        @param self Object reference.
        """
        config_id = datetime.now().microsecond
        transform = "MapCppExampleMAUSDataInput"
        result = broadcast("birth",
                           arguments={
                               "configuration":
                               Configuration().getConfigJSON(),
                               "transform": transform,
                               "config_id": config_id,
                               "run_number": 1
                           },
                           reply=True)
        print "birth(OK): %s " % result
        for item in result:
            for value in item.values():
                self.assertEquals(value['status'], 'ok')
        return True
Exemple #32
0
 def birth(self, config_id, configuration = None,
           transform = "MapPyDoNothing", merge_configuration = False): # pylint:disable = R0201, C0301
     """
     Configure the Celery workers, via a broadcast.
     @param self Object reference.
     @param config_id Configuration ID.
     @param configuration Configuration.
     @param transform Transform specification.
     @return status of update.
     """
     if configuration == None:
         configuration = self.config_doc
     elif merge_configuration:
         my_config = json.loads(self.config_doc)
         config_user = json.loads(configuration)
         for key in config_user:
             my_config[key] = config_user[key]
         configuration = json.dumps(my_config)
     return broadcast("birth", 
         arguments={"configuration":configuration, \
                    "transform":transform, 
                    "config_id":config_id,
                    "run_number":1}, reply=True)
Exemple #33
0
def find_available_logs(app_id):
    """
    Query all log-keeping servers for logs pertaining to the given app.
    """
    found_logs = broadcast(
        "distlog_get_available_logs",
        arguments={"app_id": app_id},
        reply=True,
        timeout=taskconfig.LOGS_CELERY_BCAST_TIMEOUT,
    )
    if not found_logs:
        return LOG_SERVICE_NOT_AVAILABLE

    result = []
    for nodedict in found_logs:
        for node, logs_from_node in nodedict.items():
            if logs_from_node:
                for loginfo in logs_from_node:
                    if not isinstance(loginfo, dict):
                        raise ValueError("Unexpected loginfo entry: " "%r in found_logs %r" % (loginfo, found_logs))
                    if "mod_time" in loginfo:
                        loginfo["mod_dt"] = datetime.datetime.fromtimestamp(loginfo["mod_time"])
                    result.append(loginfo)
    return result
Exemple #34
0
 def enable_events(self, request, queryset):
     broadcast("enable_events", destination=[n.hostname for n in queryset])
Exemple #35
0
 def enable_events(self, request, queryset):
     broadcast('enable_events',
               destination=[n.hostname for n in queryset])
Exemple #36
0
    def process(self, jobs, runner='unit2 $TEST', callback=None):
        """
        ``jobs`` is a list of path.to.TestCase strings to process.
        
        ``runner`` should be defined as command exectuable in bash, where $TEST is
        the current job.
        
        ``callback`` will execute a callback after each result is returned, in
        addition to return the aggregate of all results after completion.
        """
        self.logger.info("Processing build %s", self.build_id)

        self.logger.info("Provisioning (up to) %d worker(s)", self.max_workers)
        
        actual = None
        
        while not actual:
            # We need to determine which queues are available to use
            i = inspect()
            active_queues = i.active_queues() or {}
        
            if not active_queues:
                self.logger.error('No queue workers available, retrying in 1s')
                time.sleep(1)
                continue
            
            available = [host for host, queues in active_queues.iteritems() if conf.DEFAULT_QUEUE in [q['name'] for q in queues]]
        
            if not available:
                # TODO: we should probably sleep/retry (assuming there were *any* workers)
                self.logger.info('All workers are busy, retrying in 1s')
                time.sleep(1)
                continue
        
            # Attempt to provision workers which reported as available
            actual = []
            for su_response in broadcast('mule_setup',
                             arguments={'build_id': self.build_id,
                                        'workspace': self.workspace,
                                        'script': load_script(self.workspace, 'setup')},
                             destination=available[:self.max_workers],
                             reply=True,
                             timeout=0):
                for host, message in su_response.iteritems():
                    if message.get('error'):
                        self.logger.error('%s failed to setup: %s', host, message['error'])
                    elif message.get('status') == 'ok':
                        actual.append(host)
                    if message.get('stdout'):
                        self.logger.info('stdout from %s: %s', host, message['stdout'])
                    if message.get('stderr'):
                        self.logger.info('stderr from %s: %s', host, message['stderr'])
        
            if not actual:
                # TODO: we should probably sleep/retry (assuming there were *any* workers)
                self.logger.info('Failed to provision workers (busy), retrying in 1s')
                time.sleep(1)
                continue
        
        if len(actual) != len(available):
            # We should begin running tests and possibly add more, but its not a big deal
            pass

        self.logger.info('%d worker(s) were provisioned', len(actual))
            
        self.logger.info("Building queue of %d test job(s)", len(jobs))
        
        try:
            taskset = TaskSet(run_test.subtask(
                build_id=self.build_id,
                runner=runner,
                workspace=self.workspace,
                job='%s.%s' % (job.__module__, job.__name__),
                options={
                    # 'routing_key': 'mule-%s' % self.build_id,
                    'queue': 'mule-%s' % self.build_id,
                    # 'exchange': 'mule-%s' % self.build_id,
                }) for job in jobs)
            
            result = taskset.apply_async()

            self.logger.info("Waiting for response...")
            # response = result.join()
            # propagate=False ensures we get *all* responses        
            response = []
            try:
                for task_response in result.iterate():
                    response.append(task_response)
                    if callback:
                        callback(task_response)
            except KeyboardInterrupt, e:
                print '\nReceived keyboard interrupt, closing workers.\n'
        
        finally:
            self.logger.info("Tearing down %d worker(s)", len(actual))

            # Send off teardown task to all workers in pool
            for td_response in broadcast('mule_teardown',
                                        arguments={'build_id': self.build_id,
                                                   'workspace': self.workspace,
                                                   'script': load_script(self.workspace, 'teardown')},
                                        destination=actual,
                                        reply=True
                                    ):
                for host, message in td_response.iteritems():
                    if message.get('error'):
                        self.logger.error('%s failed to teardown: %s', host, message['error'])
                    if message.get('stdout'):
                        self.logger.info('stdout from %s: %s', host, message['stdout'])
                    if message.get('stderr'):
                        self.logger.info('stderr from %s: %s', host, message['stderr'])
        
        self.logger.info('Finished')
        
        return response
Exemple #37
0
    def process(self, jobs, runner='unit2 $TEST', callback=None):
        """
        ``jobs`` is a list of path.to.TestCase strings to process.
        
        ``runner`` should be defined as command exectuable in bash, where $TEST is
        the current job.
        
        ``callback`` will execute a callback after each result is returned, in
        addition to return the aggregate of all results after completion.
        """
        self.logger.info("Processing build %s", self.build_id)

        self.logger.info("Provisioning (up to) %d worker(s)", self.max_workers)

        actual = None

        while not actual:
            # We need to determine which queues are available to use
            i = inspect()
            active_queues = i.active_queues() or {}

            if not active_queues:
                self.logger.error('No queue workers available, retrying in 1s')
                time.sleep(1)
                continue

            available = [
                host for host, queues in active_queues.iteritems()
                if conf.DEFAULT_QUEUE in [q['name'] for q in queues]
            ]

            if not available:
                # TODO: we should probably sleep/retry (assuming there were *any* workers)
                self.logger.info('All workers are busy, retrying in 1s')
                time.sleep(1)
                continue

            # Attempt to provision workers which reported as available
            actual = []
            for su_response in broadcast(
                    'mule_setup',
                    arguments={
                        'build_id': self.build_id,
                        'workspace': self.workspace,
                        'script': load_script(self.workspace, 'setup')
                    },
                    destination=available[:self.max_workers],
                    reply=True,
                    timeout=0):
                for host, message in su_response.iteritems():
                    if message.get('error'):
                        self.logger.error('%s failed to setup: %s', host,
                                          message['error'])
                    elif message.get('status') == 'ok':
                        actual.append(host)
                    if message.get('stdout'):
                        self.logger.info('stdout from %s: %s', host,
                                         message['stdout'])
                    if message.get('stderr'):
                        self.logger.info('stderr from %s: %s', host,
                                         message['stderr'])

            if not actual:
                # TODO: we should probably sleep/retry (assuming there were *any* workers)
                self.logger.info(
                    'Failed to provision workers (busy), retrying in 1s')
                time.sleep(1)
                continue

        if len(actual) != len(available):
            # We should begin running tests and possibly add more, but its not a big deal
            pass

        self.logger.info('%d worker(s) were provisioned', len(actual))

        self.logger.info("Building queue of %d test job(s)", len(jobs))

        try:
            taskset = TaskSet(
                run_test.subtask(
                    build_id=self.build_id,
                    runner=runner,
                    workspace=self.workspace,
                    job='%s.%s' % (job.__module__, job.__name__),
                    options={
                        # 'routing_key': 'mule-%s' % self.build_id,
                        'queue': 'mule-%s' % self.build_id,
                        # 'exchange': 'mule-%s' % self.build_id,
                    }) for job in jobs)

            result = taskset.apply_async()

            self.logger.info("Waiting for response...")
            # response = result.join()
            # propagate=False ensures we get *all* responses
            response = []
            try:
                for task_response in result.iterate():
                    response.append(task_response)
                    if callback:
                        callback(task_response)
            except KeyboardInterrupt, e:
                print '\nReceived keyboard interrupt, closing workers.\n'

        finally:
            self.logger.info("Tearing down %d worker(s)", len(actual))

            # Send off teardown task to all workers in pool
            for td_response in broadcast('mule_teardown',
                                         arguments={
                                             'build_id':
                                             self.build_id,
                                             'workspace':
                                             self.workspace,
                                             'script':
                                             load_script(
                                                 self.workspace, 'teardown')
                                         },
                                         destination=actual,
                                         reply=True):
                for host, message in td_response.iteritems():
                    if message.get('error'):
                        self.logger.error('%s failed to teardown: %s', host,
                                          message['error'])
                    if message.get('stdout'):
                        self.logger.info('stdout from %s: %s', host,
                                         message['stdout'])
                    if message.get('stderr'):
                        self.logger.info('stderr from %s: %s', host,
                                         message['stderr'])

        self.logger.info('Finished')

        return response
Exemple #38
0
 def disable_events(self, request, queryset):
     broadcast("disable_events",
               destination=[n.hostname for n in queryset])
Exemple #39
0
 def disable_events(self, request, queryset):
     broadcast('disable_events', destination=[n.hostname for n in queryset])
         nm.SetProp("_Name", str(i) + "_test")
         mol_list_2.append(nm)
     #removing duplicates and writing new generation
     new_mols = ga_setup_selection.check_previous_gens(mol_list_2)
     ga_setup_selection.new_gen_dumper(args.population_size, ng, new_mols)
 else:
     mol_list = cPickle.load(open(p_name, "rb"))
     print len(mol_list)
     run_mols = [
         mol for mol in mol_list if mol.GetProp("_Energy") == "None"
     ]
     print len(run_mols)
     if len(run_mols) <= 1:
         print "GA has hit generation of size 1, will quit now"
         print "Shutting down workers"
         broadcast("shutdown")
         exit()
     all_mol_coords = ga_setup_selection.coords_generator(run_mols)
     if args.gauss_calc == "reorg_en":
         gen_result = ga_gauss.reorg_en_calc(all_mol_coords)
     if args.gauss_calc == "hl_diff":
         gen_result = ga_gauss.hl_diff_calc(all_mol_coords)
     if args.gauss_calc == "max_dipole":
         gen_result = ga_gauss.max_dipole_calc(all_mol_coords)
     if args.gauss_calc == "lumo":
         gen_result = ga_gauss.lumo_calc(all_mol_coords)
     print gen_result
     if args.gauss_calc == "lumo" or "max_dipole" or "hl_diff":
         ml_sorted_by_en = ga_setup_selection.result_matcher(
             gen_result, mol_list, maximise="True")
     if args.gauss_calc == "reorg_en":
Exemple #41
0
 def test_broadcast_limit(self):
     control.broadcast("foobarbaz1", arguments=[], limit=None,
             destination=[1, 2, 3])
     self.assertIn("foobarbaz1", MockBroadcastPublisher.sent)
Exemple #42
0
 def test_broadcast(self):
     control.broadcast("foobarbaz", arguments=[])
     self.assertIn("foobarbaz", MockBroadcastPublisher.sent)
Exemple #43
0
 def shutdown_nodes(self, request, queryset):
     broadcast("shutdown", destination=[n.hostname for n in queryset])
Exemple #44
0
def ping_workers():
    from celery.task.control import broadcast
    broadcast('report_alive')
Exemple #45
0
 def shutdown_nodes(self, request, queryset):
     broadcast('shutdown', destination=[n.hostname for n in queryset])
def shutdown_workers(workers):
    return control.broadcast('shutdown', destination=workers)
 def handle(self, *args, **options):
     self.stdout.write("Sending shutdown event")
     broadcast("shutdown")
     self.stdout.write("Shutdown event sent")
def set_rate_limit(task_name,rate_limit,workers=None):
    return control.broadcast("rate_limit", 
                             {"task_name": "myapp.mytask",
                              "rate_limit": "200/m"}, reply=True,
                             destination=workers)