def _stop_server(self, context, body=None):
        """Noout and stop all osd service, then stop the server.
           body = {u'servers': [{u'cluster_id': 1, u'id': u'1'},
                        {u'cluster_id': 1, u'id': u'2'}]}
        """
        LOG.info("DEBUG in stop server in scheduler manager.")

        server_list = body['servers']
        need_change_mds = False
        for ser in server_list:
            ser_ref = db.init_node_get(context, ser['id'])
            ser['host'] = ser_ref['host']
            if ser_ref['mds'] == 'yes':
                need_change_mds = True

        active_monitor = self._get_active_monitor(context)
        LOG.info("stop_server of scheduer manager %s" % server_list)
        for item in server_list:
            res = db.init_node_get(context, item['id'])
            self._start_stop(context, item['id'])
            res = self._agent_rpcapi.stop_server(context,
                                                 item['id'],
                                                 res['host'])

            self._agent_rpcapi.update_osd_state(context,
                                                active_monitor['host'])

        LOG.info('need_change_mds = %s' % need_change_mds)

        if need_change_mds:
            self.add_mds(context, server_list)
        return True
Beispiel #2
0
    def get_server(self, context, id):
        LOG.info('get_server_list in conductor manager')
        server = db.init_node_get(context, id)
        ret = self._set_error(context)
        if ret:
            server['status'] = 'unavailable'

        LOG.info("CEPH_LOG log server %s" % server)
        return server
    def get_server(self, context, id):
        LOG.info('get_server_list in conductor manager')
        server = db.init_node_get(context, id)
        ret = self._set_error(context)
        if ret:
            server['status'] = 'unavailable'

        LOG.info("CEPH_LOG log server %s" % server)
        return server
    def _add_servers(self, context, body=None):
        """Add the servers into ceph cluster.

           It's notable that, the type of body['servers']
           looks as below:

                [
                    {u'is_storage': True,
                     u'is_monitor': True,
                     u'id': u'1',
                     u'zone_id': u'1'},
                    {u'is_storage': True,
                     u'is_monitor': False,
                     u'id': u'2',
                     u'zone_id': u'2'}
                ]

           Here we also need to fetch info from DB.
        """
        def _update_ssh_key():
            server_list = db.init_node_get_all(context)
            for ser in server_list:
                if ser['status'] == 'Active' or ser['status'] == 'available':
                    self._agent_rpcapi.update_ssh_keys(context, ser['host'])

        server_list = body['servers']
        for ser in server_list:
            ser_ref = db.init_node_get(context, ser['id'])
            ser['host'] = ser_ref['host']
            ser['cluster_id'] = self._agent_rpcapi.cluster_id(context,
                                                              ser['host'])
            # It need to change the role defined in
            # server.manifest
            if ser['is_monitor'] == False:
                if ser_ref['type'].find('monitor') != -1:
                    values = {'type': 'storage'}
                    db.init_node_update(context, ser_ref['id'], values)

        self._update_server_list_status(context, server_list, 'running')
        _update_ssh_key()

        self.add_monitor(context, server_list)

        # Begin to add osds.
        LOG.info("start to add storage")
        self.add_osd(context, server_list)

        self._judge_drive_ext_threshold(context)
        return True
    def _remove_servers(self, context, body=None):
        """
                [
                    {u'remove_storage': True,
                     u'remove_monitor': True,
                     u'id': u'1',
                     u'zone_id': u'1'},
                    {u'remove_storage': True,
                     u'remove_monitor': False,
                     u'id': u'2',
                     u'zone_id': u'2'}
                ]

        """
        server_list = body['servers']
        LOG.info('remove_servers = %s ' % server_list)
        if len(server_list) <= 0:
            return True

        need_change_mds = False
        for ser in server_list:
            ser_ref = db.init_node_get(context, ser['id'])
            ser['host'] = ser_ref['host']
            ser['type'] = ser_ref['type']
            ser['remove_monitor'] = ser['type'].find('monitor') != -1
            ser['status'] = ser_ref['status']
            ser['mds'] = ser_ref['mds']
            if ser['mds'] == 'yes':
                need_change_mds = True

        LOG.info("start to remove monitor")
        self.remove_monitors(context, server_list)

        LOG.info("start to remove storage")
        self.remove_osd(context, server_list)

        if need_change_mds:
            LOG.info("start to remove mds")
            self.remove_mds(context, server_list)
            self.add_mds(context, server_list)
        return True
    def _start_server(self, context, body=None):
        """Start all osd service, then start the server.
           body = {u'servers': [{u'cluster_id': 1, u'id': u'1'},
                        {u'cluster_id': 1, u'id': u'2'}]}
        """
        LOG.info("DEBUG in start server in scheduler manager.")
        server_list = body['servers']
        active_monitor = self._get_active_monitor(context)
        for item in server_list:
            #host = self._get_monitor_by_cluster_id(context, item['cluster_id'])
            res = db.init_node_get(context, item['id'])
            if not res:
                LOG.error("No available node for node id %s" % \
                         item['id'])
                try:
                    raise StorageServerStartFailed
                except Exception, e:
                    LOG.error("%s: %s " %(e.code, e.message))
                raise

            self._start_start(context, item['id'])
            res = self._agent_rpcapi.start_server(context, item['id'], res['host'])
            self._agent_rpcapi.update_osd_state(context, active_monitor['host'])
    def create_cluster(self, context, server_list):
        """Add the servers into ceph cluster.

        It's notable that, the type of body['servers']
        looks as below:

           [
               {u'is_storage': True,
                u'is_monitor': True,
                u'id': u'1',
                u'zone_id': u'1'},
               {u'is_storage': True,
                u'is_monitor': False,
                u'id': u'2',
                u'zone_id': u'2'}
           ]

        Here we also need to fetch info from DB.
        """
        # Add hostname here.
        for ser in server_list:
            ser_ref = db.init_node_get(context, ser['id'])
            ser['host'] = ser_ref['host']

        # Use mkcephfs to set up ceph system.
        monitor_node = self._select_monitor(context, server_list)
        LOG.info('Choose monitor node = %s' % monitor_node)

        def _update(status):
            LOG.debug('status = %s' % status)
            self._update_server_list_status(context,
                                            server_list,
                                            status)
            if status.lower().find('error') != -1:
                raise

        # Set at least 3 mons when creating cluster
        nums = len(server_list)
        if nums >= 3:
            count = 0
            rest_mon_num = 0
            for ser in server_list:
                if ser['is_monitor'] == True:
                    count += 1
            if count < 3:
                rest_mon_num = 3 - count
            if rest_mon_num > 0:
                for ser in server_list:
                    if ser['is_monitor'] == False:
                        ser['is_monitor'] = True
                        rest_mon_num -= 1
                        if rest_mon_num <= 0:
                            break

        # Clean ceph data.
        def __clean_data(host):
            self._agent_rpcapi.update_ssh_keys(context, host)
            self._agent_rpcapi.clean_ceph_data(context, host)

        def __create_crushmap(context, server_list, host):
            self._agent_rpcapi.create_crushmap(context,
                                               server_list=server_list,
                                               host=host)

        try:
            _update("Cleaning")
            thd_list = []
            for ser in server_list:
                thd = utils.MultiThread(__clean_data, host=ser['host'])
                thd_list.append(thd)
            utils.start_threads(thd_list)
            _update("Clean success")
        except:
            _update("ERROR: Cleaning")

        # When clean data, we also begin to create ceph.conf
        # and init osd in db.
        # Do not run with the same time as clean_data.
        # It maybe cleaned by clean_data.
        try:
            _update("Create ceph.conf")
            self._agent_rpcapi.inital_ceph_osd_db_conf(context,
                                                       server_list=server_list,
                                                       host=monitor_node['host'])
            _update("Create ceph.conf success")
        except:
            _update("ERROR: ceph.conf")

        try:
            _update("create crushmap")
            # Then begin to create crush map file.
            create_crushmap = utils.MultiThread(__create_crushmap,
                                    context=context,
                                    server_list=server_list,
                                    host=monitor_node['host'])
            create_crushmap.start()
        except:
            _update("ERROR: crushmap")

        try:
            # Begin to mount disks on the mount_point.
            _update("Mount disks")
            def __mount_disk(host):
                self._agent_rpcapi.mount_disks(context, host)

            thd_list = []
            for ser in server_list:
                thd = utils.MultiThread(__mount_disk, host=ser['host'])
                thd_list.append(thd)
            utils.start_threads(thd_list)
            _update("Mount disks success")
        except:
            _update("ERROR: mount disk")

        # Generate monitor keyring file.
        _update("start montior")
        monitor_keyring = utils.gen_mon_keyring()
        def __write_monitor_keyring(host):
            self._agent_rpcapi.write_monitor_keyring(context,
                                                     monitor_keyring,
                                                     host)

        thd_list = []
        for ser in server_list:
            thd = utils.MultiThread(__write_monitor_keyring, host=ser['host'])
            thd_list.append(thd)
        utils.start_threads(thd_list)
        _update("start monitor success")

        self._track_monitors(context, server_list)

        self._agent_rpcapi.prepare_osds(context,
                                        server_list,
                                        host=monitor_node['host'])
        self._agent_rpcapi.upload_keyring_admin_into_db(context,
                host=monitor_node['host'])
        # Begin to start osd service.
        _update('Start osds')
        def __start_osd(host):
            self._agent_rpcapi.start_osd(context, host)

        thd_list = []
        for ser in server_list:
            thd = utils.MultiThread(__start_osd, host=ser['host'])
            thd_list.append(thd)
        utils.start_threads(thd_list)
        _update('OSD success')

        # add mds service
        try:
            _update("Start mds")
            LOG.info('start mds services, host = %s' % monitor_node['host'])
            self._agent_rpcapi.add_mds(context, host=monitor_node['host'])
        except:
            _update("ERROR: mds")
        # Created begin to get ceph status

        try:
            _update('Ceph status')
            stat = self._agent_rpcapi.get_ceph_health(context,
                                                  monitor_node['host'])
        except:
            _update('ERROR: ceph -s')

        if stat == False:
            self._update_server_list_status(context,
                                            server_list,
                                            "Ceph Start Error")
            LOG.error('Ceph starting failed!')
            raise

        try:
            _update('Set crushmap')
            # Wait until it's created over.
            while create_crushmap.is_alive():
                time.sleep(1)

            def __set_crushmap(context, host):
                self._agent_rpcapi.set_crushmap(context,
                                                host)
                _update('Active')
            set_crushmap = utils.MultiThread(__set_crushmap,
                                             context=context,
                                             host=monitor_node['host'])
            set_crushmap.start()
        except:
            _update('ERROR: set crushmap')

        self._update_init_node(context, server_list)
        while set_crushmap.is_alive():
            time.sleep(1)
        self._agent_rpcapi.update_all_status(context,
            host=monitor_node['host'])
        self._judge_drive_ext_threshold(context)
        self._update_drive_ext_threshold(context)
        return {'message':'res'}