def setUp(self): django.setup() scheduler_mgr.config.is_paused = False self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.agent_3 = Agent('agent_3', 'host_2') # Will represent a new agent ID for host 2 self.node_1 = node_test_utils.create_node(hostname='host_1')
def setUp(self): django.setup() reset_error_cache() self.framework_id = '1234' Scheduler.objects.initialize_scheduler() Scheduler.objects.update( num_message_handlers=0 ) # Prevent message handler tasks from scheduling self._client = MagicMock() scheduler_mgr.sync_with_database() scheduler_mgr.update_from_mesos(framework_id=self.framework_id) resource_mgr.clear() job_exe_mgr.clear() self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.agent_3 = Agent('agent_3', 'host_2') node_mgr.clear() node_mgr.register_agents([self.agent_1, self.agent_2]) node_mgr.sync_with_database(scheduler_mgr.config) # Ignore initial cleanup, health check, and image pull tasks for node in node_mgr.get_nodes(): node._last_health_task = now() node._initial_cleanup_completed() node._is_image_pulled = True node._update_state() if node.agent_id == 'agent_1': self.node_1_id = node.id cleanup_mgr.update_nodes(node_mgr.get_nodes()) self.node_1 = Node.objects.get(id=self.node_1_id) # Ignore system tasks system_task_mgr._is_db_update_completed = True self.queue_1 = queue_test_utils.create_queue(cpus_required=4.0, mem_required=1024.0, disk_in_required=100.0, disk_out_required=200.0, disk_total_required=300.0) self.queue_2 = queue_test_utils.create_queue(cpus_required=8.0, mem_required=512.0, disk_in_required=400.0, disk_out_required=45.0, disk_total_required=445.0) self.queue_large = queue_test_utils.create_queue( resources=NodeResources([Cpus( 125.0), Mem(12048.0), Disk(12048.0)])) job_type_mgr.sync_with_database()
def setUp(self): django.setup() resource_mgr.clear() self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.framework_id = '1234' offer_1 = ResourceOffer( 'offer_1', self.agent_1.agent_id, self.framework_id, NodeResources([Cpus(2.0), Mem(1024.0), Disk(1024.0)]), now(), None) offer_2 = ResourceOffer( 'offer_2', self.agent_2.agent_id, self.framework_id, NodeResources([Cpus(25.0), Mem(2048.0), Disk(2048.0)]), now(), None) resource_mgr.add_new_offers([offer_1, offer_2]) resource_mgr.refresh_agent_resources([], now())
def setUp(self): django.setup() self.maxDiff = None add_message_backend(AMQPMessagingBackend) from scheduler.models import Scheduler Scheduler.objects.create(id=1) scheduler_mgr.config.is_paused = False self.agent_1 = Agent('agent_1', 'host_1') self.agent_2 = Agent('agent_2', 'host_2') self.agent_3 = Agent('agent_3', 'host_3') self.agent_4 = Agent('agent_4', 'host_4') self.agent_5 = Agent('agent_5', 'host_5') self.agent_6 = Agent('agent_6', 'host_6') self.agent_7 = Agent('agent_7', 'host_7') self.agent_8 = Agent('agent_8', 'host_8') self.agent_9 = Agent('agent_9', 'host_9') self.agent_10 = Agent('agent_10', 'host_10')
def resourceOffers(self, driver, offers): """ Invoked when resources have been offered to this framework. A single offer will only contain resources from a single slave. Resources associated with an offer will not be re-offered to _this_ framework until either (a) this framework has rejected those resources (see SchedulerDriver.launchTasks) or (b) those resources have been rescinded (see Scheduler.offerRescinded). Note that resources may be concurrently offered to more than one framework at a time (depending on the allocator being used). In that case, the first framework to launch tasks using those resources will be able to use them while the other frameworks will have those resources rescinded (or if a framework has already launched tasks with those resources then those tasks will fail with a TASK_LOST status and a message saying as much). See documentation for :meth:`mesos_api.mesos.Scheduler.resourceOffers`. """ started = now() agents = {} resource_offers = [] total_resources = NodeResources() for offer in offers: offer_id = offer.id.value agent_id = offer.slave_id.value framework_id = offer.framework_id.value hostname = offer.hostname resource_list = [] for resource in offer.resources: if resource.type == 0: # This is the SCALAR type resource_list.append( ScalarResource(resource.name, resource.scalar.value)) resources = NodeResources(resource_list) total_resources.add(resources) agents[agent_id] = Agent(agent_id, hostname) resource_offers.append( ResourceOffer(offer_id, agent_id, framework_id, resources, started)) node_mgr.register_agents(agents.values()) resource_mgr.add_new_offers(resource_offers) num_offers = len(resource_offers) logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents)) scheduler_mgr.add_new_offer_count(num_offers) duration = now() - started msg = 'Scheduler resourceOffers() took %.3f seconds' if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds())
def offers(self, offers): """ Invoked when resources have been offered to this framework. A single offer will only contain resources from a single agent. Resources associated with an offer will not be re-offered to _this_ framework until either (a) this framework has rejected those resources or (b) those resources have been rescinded. Note that resources may be concurrently offered to more than one framework at a time (depending on the allocator being used). In that case, the first framework to launch tasks using those resources will be able to use them while the other frameworks will have those resources rescinded (or if a framework has already launched tasks with those resources then those tasks will fail with a TASK_LOST status and a message saying as much). """ started = now() agents = {} offered_nodes = [] resource_offers = [] total_resources = NodeResources() skipped_roles = set() for offer in offers: scale_offer = from_mesos_offer(offer) offer_id = scale_offer.id.value agent_id = scale_offer.agent_id.value framework_id = scale_offer.framework_id.value hostname = scale_offer.hostname offered_nodes.append(hostname) # ignore offers while we're paused if scheduler_mgr.config.is_paused: offer.decline() continue resource_list = [] for resource in scale_offer.resources: # Only accept resource that are of SCALAR type and have a role matching our accept list if resource.type == RESOURCE_TYPE_SCALAR: if resource.role in settings.ACCEPTED_RESOURCE_ROLE: logger.debug("Received scalar resource %s with value %i associated with role %s" % (resource.name, resource.scalar.value, resource.role)) resource_list.append(ScalarResource(resource.name, resource.scalar.value)) else: skipped_roles.add(resource.role) offer.decline() logger.debug("Number of resources: %i" % len(resource_list)) # Only register agent, if offers are being received if len(resource_list) > 0: resources = NodeResources(resource_list) total_resources.add(resources) agents[agent_id] = Agent(agent_id, hostname) resource_offers.append(ResourceOffer(offer_id, agent_id, framework_id, resources, started, offer)) logger.debug("Offer analysis complete with %i resource offers." % len(resource_offers)) node_mgr.register_agents(agents.values()) logger.debug("Agents registered.") resource_mgr.add_new_offers(resource_offers) logger.debug("Resource offers added.") Node.objects.update_node_offers(offered_nodes, now()) logger.debug("Node offer times updated.") num_offers = len(resource_offers) logger.info('Received %d offer(s) with %s from %d node(s)', num_offers, total_resources, len(agents)) if len(skipped_roles): logger.warning('Skipped offers from roles that are not marked as accepted: %s', ','.join(skipped_roles)) scheduler_mgr.add_new_offer_count(num_offers) duration = now() - started msg = 'Scheduler resourceOffers() took %.3f seconds' if duration > ScaleScheduler.NORMAL_WARN_THRESHOLD: logger.warning(msg, duration.total_seconds()) else: logger.debug(msg, duration.total_seconds())