Exemple #1
0
    def sync_with_database(self, master_hostname, master_port):
        """Syncs with the database to retrieve updated node models and queries Mesos for unknown agent IDs

        :param master_hostname: The name of the Mesos master host
        :type master_hostname: str
        :param master_port: The port used by the Mesos master
        :type master_port: int
        """

        # Get current node IDs and new, unknown agent IDs
        with self._lock:
            new_agent_ids = set(self._new_agent_ids)
            node_ids = []
            for node in self._nodes.values():
                node_ids.append(node.id)

        # Query for unknown agent IDs
        # TODO: refactor register_node() to handle multiple nodes at once
        updated_nodes = []
        for slave_info in api.get_slaves(master_hostname, master_port):
            if slave_info.slave_id in new_agent_ids:
                node = Node.objects.register_node(slave_info.hostname, slave_info.port, slave_info.slave_id)
                updated_nodes.append(node)

        # Query database for existing nodes
        updated_nodes.extend(Node.objects.filter(id__in=node_ids).iterator())

        # Update with results
        with self._lock:
            self._new_agent_ids -= new_agent_ids
            self._nodes = {}
            for node in updated_nodes:
                node.is_online = node.slave_id in self._online_nodes
                self._nodes[node.slave_id] = node
Exemple #2
0
    def get(self, request):
        '''Retrieves the list of all nodes with execution status and returns it in JSON form

        :param request: the HTTP GET request
        :type request: :class:`rest_framework.request.Request`
        :rtype: :class:`rest_framework.response.Response`
        :returns: the HTTP response to send back to the user
        '''

        # Get a list of all node status counts
        started = rest_util.parse_timestamp(request, 'started', 'PT3H0M0S')
        ended = rest_util.parse_timestamp(request, 'ended', required=False)
        node_statuses = Node.objects.get_status(started, ended)

        # Get the online nodes
        try:
            sched = Scheduler.objects.get_master()
            slaves = mesos_api.get_slaves(sched.master_hostname, sched.master_port)
            slaves_dict = {s.hostname for s in slaves}
        except:
            logger.exception('Unable to fetch nodes online status')
            slaves_dict = dict()

        # Add the online status to each node
        for node_status in node_statuses:
            node_status.is_online = node_status.node.hostname in slaves_dict

        page = rest_util.perform_paging(request, node_statuses)
        serializer = NodeStatusListSerializer(page, context={'request': request})
        return Response(serializer.data, status=status.HTTP_200_OK)
Exemple #3
0
    def list(self, request):
        """Retrieves the list of all nodes with execution status and returns it in JSON form

        :param request: the HTTP GET request
        :type request: :class:`rest_framework.request.Request`
        :rtype: :class:`rest_framework.response.Response`
        :returns: the HTTP response to send back to the user
        """

        if request.version == 'v5':
            raise Http404

        # Get a list of all node status counts
        started = rest_util.parse_timestamp(request, 'started', 'PT3H0M0S')
        ended = rest_util.parse_timestamp(request, 'ended', required=False)
        node_statuses = Node.objects.get_status(started, ended)

        # Get the online nodes
        try:
            sched = Scheduler.objects.get_master()
            slaves = mesos_api.get_slaves(sched.master_hostname,
                                          sched.master_port)
            slaves_dict = {s.hostname for s in slaves}
        except:
            logger.exception('Unable to fetch nodes online status')
            slaves_dict = dict()

        # Add the online status to each node
        for node_status in node_statuses:
            node_status.is_online = node_status.node.hostname in slaves_dict

        page = self.paginate_queryset(node_statuses)
        serializer = self.get_serializer(page, many=True)
        return self.get_paginated_response(serializer.data)
Exemple #4
0
    def sync_with_database(self, master_hostname, master_port):
        """Syncs with the database to retrieve updated node models and queries Mesos for unknown agent IDs

        :param master_hostname: The name of the Mesos master host
        :type master_hostname: str
        :param master_port: The port used by the Mesos master
        :type master_port: int
        """

        # Get current node IDs and new, unknown agent IDs
        with self._lock:
            new_agent_ids = set(self._new_agent_ids)
            node_ids = []
            for node in self._nodes.values():
                node_ids.append(node.id)

        # Query for unknown agent IDs
        # TODO: refactor register_node() to handle multiple nodes at once
        updated_nodes = []
        for slave_info in api.get_slaves(master_hostname, master_port):
            if slave_info.slave_id in new_agent_ids:
                node = Node.objects.register_node(slave_info.hostname,
                                                  slave_info.port,
                                                  slave_info.slave_id)
                updated_nodes.append(node)

        # Query database for existing nodes
        updated_nodes.extend(Node.objects.filter(id__in=node_ids).iterator())

        # Update with results
        with self._lock:
            self._new_agent_ids -= new_agent_ids
            self._nodes = {}
            for node in updated_nodes:
                node.is_online = node.slave_id in self._online_nodes
                self._nodes[node.slave_id] = node
Exemple #5
0
    def sync_with_database(self, master_hostname, master_port):
        """Syncs with the database to retrieve updated node models and queries Mesos for unknown agent IDs

        :param master_hostname: The name of the Mesos master host
        :type master_hostname: string
        :param master_port: The port used by the Mesos master
        :type master_port: int
        """

        # Get existing node IDs and hostnames, and new/unknown agent IDs
        with self._lock:
            new_agent_ids = set(self._new_agent_ids)
            node_ids = []
            node_hostnames = self._nodes.keys()
            for node in self._nodes.values():
                node_ids.append(node.id)

        # Query Mesos to get node details for unknown agent IDs
        # Unknown agent IDs are either existing nodes with a new agent ID or entirely new nodes
        # TODO: refactor register_node() to handle multiple nodes at once
        # TODO: consider refactoring node model to remove port and agent/slave ID
        nodes_with_new_agent_id = {}  # {hostname: slave_info}
        new_node_models = []
        for slave_info in api.get_slaves(master_hostname, master_port):
            if slave_info.slave_id in new_agent_ids:
                node_model = Node.objects.register_node(
                    slave_info.hostname, slave_info.port, slave_info.slave_id)
                if slave_info.hostname in node_hostnames:
                    # New agent ID for existing node
                    nodes_with_new_agent_id[slave_info.hostname] = slave_info
                else:
                    # Entirely new node
                    new_node_models.append(node_model)

        # Query database for existing node details
        existing_node_models = list(
            Node.objects.filter(id__in=node_ids).iterator())

        with self._lock:
            # Add new nodes
            for node_model in new_node_models:
                logger.info('New node %s registered with agent ID %s',
                            node_model.hostname, node_model.slave_id)
                self._nodes[node_model.hostname] = SchedulerNode(
                    node_model.slave_id, node_model)
                self._agent_ids[node_model.slave_id] = node_model.hostname
            # Update nodes with new agent IDs
            for hostname, slave_info in nodes_with_new_agent_id.items():
                old_agent_id = self._nodes[hostname].agent_id
                # For is_online, check if new agent ID is still in set or gone (i.e. removed by lost_node())
                self._nodes[hostname].update_from_mesos(
                    agent_id=slave_info.slave_id,
                    port=slave_info.port,
                    is_online=(slave_info.slave_id in self._new_agent_ids))
                del self._agent_ids[old_agent_id]
                self._agent_ids[slave_info.slave_id] = hostname
                logger.info('Node %s registered with new agent ID %s',
                            hostname, slave_info.slave_id)
            # Update nodes from database models
            for node_model in existing_node_models:
                if node_model.hostname in self._nodes:
                    node = self._nodes[node_model.hostname]
                    node.update_from_model(node_model)
                    if node.should_be_removed():
                        del self._agent_ids[node.agent_id]
                        del self._nodes[node_model.hostname]
                else:
                    logger.error(
                        'Node %s appears to have been removed from the database',
                        node_model.hostname)
            self._new_agent_ids -= new_agent_ids  # Batch of new agent IDs has been processed