def sync_with_database(self, master_hostname, master_port): """Syncs with the database to retrieve updated node models and queries Mesos for unknown agent IDs :param master_hostname: The name of the Mesos master host :type master_hostname: str :param master_port: The port used by the Mesos master :type master_port: int """ # Get current node IDs and new, unknown agent IDs with self._lock: new_agent_ids = set(self._new_agent_ids) node_ids = [] for node in self._nodes.values(): node_ids.append(node.id) # Query for unknown agent IDs # TODO: refactor register_node() to handle multiple nodes at once updated_nodes = [] for slave_info in api.get_slaves(master_hostname, master_port): if slave_info.slave_id in new_agent_ids: node = Node.objects.register_node(slave_info.hostname, slave_info.port, slave_info.slave_id) updated_nodes.append(node) # Query database for existing nodes updated_nodes.extend(Node.objects.filter(id__in=node_ids).iterator()) # Update with results with self._lock: self._new_agent_ids -= new_agent_ids self._nodes = {} for node in updated_nodes: node.is_online = node.slave_id in self._online_nodes self._nodes[node.slave_id] = node
def get(self, request): '''Retrieves the list of all nodes with execution status and returns it in JSON form :param request: the HTTP GET request :type request: :class:`rest_framework.request.Request` :rtype: :class:`rest_framework.response.Response` :returns: the HTTP response to send back to the user ''' # Get a list of all node status counts started = rest_util.parse_timestamp(request, 'started', 'PT3H0M0S') ended = rest_util.parse_timestamp(request, 'ended', required=False) node_statuses = Node.objects.get_status(started, ended) # Get the online nodes try: sched = Scheduler.objects.get_master() slaves = mesos_api.get_slaves(sched.master_hostname, sched.master_port) slaves_dict = {s.hostname for s in slaves} except: logger.exception('Unable to fetch nodes online status') slaves_dict = dict() # Add the online status to each node for node_status in node_statuses: node_status.is_online = node_status.node.hostname in slaves_dict page = rest_util.perform_paging(request, node_statuses) serializer = NodeStatusListSerializer(page, context={'request': request}) return Response(serializer.data, status=status.HTTP_200_OK)
def list(self, request): """Retrieves the list of all nodes with execution status and returns it in JSON form :param request: the HTTP GET request :type request: :class:`rest_framework.request.Request` :rtype: :class:`rest_framework.response.Response` :returns: the HTTP response to send back to the user """ if request.version == 'v5': raise Http404 # Get a list of all node status counts started = rest_util.parse_timestamp(request, 'started', 'PT3H0M0S') ended = rest_util.parse_timestamp(request, 'ended', required=False) node_statuses = Node.objects.get_status(started, ended) # Get the online nodes try: sched = Scheduler.objects.get_master() slaves = mesos_api.get_slaves(sched.master_hostname, sched.master_port) slaves_dict = {s.hostname for s in slaves} except: logger.exception('Unable to fetch nodes online status') slaves_dict = dict() # Add the online status to each node for node_status in node_statuses: node_status.is_online = node_status.node.hostname in slaves_dict page = self.paginate_queryset(node_statuses) serializer = self.get_serializer(page, many=True) return self.get_paginated_response(serializer.data)
def sync_with_database(self, master_hostname, master_port): """Syncs with the database to retrieve updated node models and queries Mesos for unknown agent IDs :param master_hostname: The name of the Mesos master host :type master_hostname: string :param master_port: The port used by the Mesos master :type master_port: int """ # Get existing node IDs and hostnames, and new/unknown agent IDs with self._lock: new_agent_ids = set(self._new_agent_ids) node_ids = [] node_hostnames = self._nodes.keys() for node in self._nodes.values(): node_ids.append(node.id) # Query Mesos to get node details for unknown agent IDs # Unknown agent IDs are either existing nodes with a new agent ID or entirely new nodes # TODO: refactor register_node() to handle multiple nodes at once # TODO: consider refactoring node model to remove port and agent/slave ID nodes_with_new_agent_id = {} # {hostname: slave_info} new_node_models = [] for slave_info in api.get_slaves(master_hostname, master_port): if slave_info.slave_id in new_agent_ids: node_model = Node.objects.register_node( slave_info.hostname, slave_info.port, slave_info.slave_id) if slave_info.hostname in node_hostnames: # New agent ID for existing node nodes_with_new_agent_id[slave_info.hostname] = slave_info else: # Entirely new node new_node_models.append(node_model) # Query database for existing node details existing_node_models = list( Node.objects.filter(id__in=node_ids).iterator()) with self._lock: # Add new nodes for node_model in new_node_models: logger.info('New node %s registered with agent ID %s', node_model.hostname, node_model.slave_id) self._nodes[node_model.hostname] = SchedulerNode( node_model.slave_id, node_model) self._agent_ids[node_model.slave_id] = node_model.hostname # Update nodes with new agent IDs for hostname, slave_info in nodes_with_new_agent_id.items(): old_agent_id = self._nodes[hostname].agent_id # For is_online, check if new agent ID is still in set or gone (i.e. removed by lost_node()) self._nodes[hostname].update_from_mesos( agent_id=slave_info.slave_id, port=slave_info.port, is_online=(slave_info.slave_id in self._new_agent_ids)) del self._agent_ids[old_agent_id] self._agent_ids[slave_info.slave_id] = hostname logger.info('Node %s registered with new agent ID %s', hostname, slave_info.slave_id) # Update nodes from database models for node_model in existing_node_models: if node_model.hostname in self._nodes: node = self._nodes[node_model.hostname] node.update_from_model(node_model) if node.should_be_removed(): del self._agent_ids[node.agent_id] del self._nodes[node_model.hostname] else: logger.error( 'Node %s appears to have been removed from the database', node_model.hostname) self._new_agent_ids -= new_agent_ids # Batch of new agent IDs has been processed