def _stop_server(self, context, body=None): """Noout and stop all osd service, then stop the server. body = {u'servers': [{u'cluster_id': 1, u'id': u'1'}, {u'cluster_id': 1, u'id': u'2'}]} """ LOG.info("DEBUG in stop server in scheduler manager.") server_list = body['servers'] need_change_mds = False for ser in server_list: ser_ref = db.init_node_get(context, ser['id']) ser['host'] = ser_ref['host'] if ser_ref['mds'] == 'yes': need_change_mds = True active_monitor = self._get_active_monitor(context) LOG.info("stop_server of scheduer manager %s" % server_list) for item in server_list: res = db.init_node_get(context, item['id']) self._start_stop(context, item['id']) res = self._agent_rpcapi.stop_server(context, item['id'], res['host']) self._agent_rpcapi.update_osd_state(context, active_monitor['host']) LOG.info('need_change_mds = %s' % need_change_mds) if need_change_mds: self.add_mds(context, server_list) return True
def get_server(self, context, id): LOG.info('get_server_list in conductor manager') server = db.init_node_get(context, id) ret = self._set_error(context) if ret: server['status'] = 'unavailable' LOG.info("CEPH_LOG log server %s" % server) return server
def _add_servers(self, context, body=None): """Add the servers into ceph cluster. It's notable that, the type of body['servers'] looks as below: [ {u'is_storage': True, u'is_monitor': True, u'id': u'1', u'zone_id': u'1'}, {u'is_storage': True, u'is_monitor': False, u'id': u'2', u'zone_id': u'2'} ] Here we also need to fetch info from DB. """ def _update_ssh_key(): server_list = db.init_node_get_all(context) for ser in server_list: if ser['status'] == 'Active' or ser['status'] == 'available': self._agent_rpcapi.update_ssh_keys(context, ser['host']) server_list = body['servers'] for ser in server_list: ser_ref = db.init_node_get(context, ser['id']) ser['host'] = ser_ref['host'] ser['cluster_id'] = self._agent_rpcapi.cluster_id(context, ser['host']) # It need to change the role defined in # server.manifest if ser['is_monitor'] == False: if ser_ref['type'].find('monitor') != -1: values = {'type': 'storage'} db.init_node_update(context, ser_ref['id'], values) self._update_server_list_status(context, server_list, 'running') _update_ssh_key() self.add_monitor(context, server_list) # Begin to add osds. LOG.info("start to add storage") self.add_osd(context, server_list) self._judge_drive_ext_threshold(context) return True
def _remove_servers(self, context, body=None): """ [ {u'remove_storage': True, u'remove_monitor': True, u'id': u'1', u'zone_id': u'1'}, {u'remove_storage': True, u'remove_monitor': False, u'id': u'2', u'zone_id': u'2'} ] """ server_list = body['servers'] LOG.info('remove_servers = %s ' % server_list) if len(server_list) <= 0: return True need_change_mds = False for ser in server_list: ser_ref = db.init_node_get(context, ser['id']) ser['host'] = ser_ref['host'] ser['type'] = ser_ref['type'] ser['remove_monitor'] = ser['type'].find('monitor') != -1 ser['status'] = ser_ref['status'] ser['mds'] = ser_ref['mds'] if ser['mds'] == 'yes': need_change_mds = True LOG.info("start to remove monitor") self.remove_monitors(context, server_list) LOG.info("start to remove storage") self.remove_osd(context, server_list) if need_change_mds: LOG.info("start to remove mds") self.remove_mds(context, server_list) self.add_mds(context, server_list) return True
def _start_server(self, context, body=None): """Start all osd service, then start the server. body = {u'servers': [{u'cluster_id': 1, u'id': u'1'}, {u'cluster_id': 1, u'id': u'2'}]} """ LOG.info("DEBUG in start server in scheduler manager.") server_list = body['servers'] active_monitor = self._get_active_monitor(context) for item in server_list: #host = self._get_monitor_by_cluster_id(context, item['cluster_id']) res = db.init_node_get(context, item['id']) if not res: LOG.error("No available node for node id %s" % \ item['id']) try: raise StorageServerStartFailed except Exception, e: LOG.error("%s: %s " %(e.code, e.message)) raise self._start_start(context, item['id']) res = self._agent_rpcapi.start_server(context, item['id'], res['host']) self._agent_rpcapi.update_osd_state(context, active_monitor['host'])
def create_cluster(self, context, server_list): """Add the servers into ceph cluster. It's notable that, the type of body['servers'] looks as below: [ {u'is_storage': True, u'is_monitor': True, u'id': u'1', u'zone_id': u'1'}, {u'is_storage': True, u'is_monitor': False, u'id': u'2', u'zone_id': u'2'} ] Here we also need to fetch info from DB. """ # Add hostname here. for ser in server_list: ser_ref = db.init_node_get(context, ser['id']) ser['host'] = ser_ref['host'] # Use mkcephfs to set up ceph system. monitor_node = self._select_monitor(context, server_list) LOG.info('Choose monitor node = %s' % monitor_node) def _update(status): LOG.debug('status = %s' % status) self._update_server_list_status(context, server_list, status) if status.lower().find('error') != -1: raise # Set at least 3 mons when creating cluster nums = len(server_list) if nums >= 3: count = 0 rest_mon_num = 0 for ser in server_list: if ser['is_monitor'] == True: count += 1 if count < 3: rest_mon_num = 3 - count if rest_mon_num > 0: for ser in server_list: if ser['is_monitor'] == False: ser['is_monitor'] = True rest_mon_num -= 1 if rest_mon_num <= 0: break # Clean ceph data. def __clean_data(host): self._agent_rpcapi.update_ssh_keys(context, host) self._agent_rpcapi.clean_ceph_data(context, host) def __create_crushmap(context, server_list, host): self._agent_rpcapi.create_crushmap(context, server_list=server_list, host=host) try: _update("Cleaning") thd_list = [] for ser in server_list: thd = utils.MultiThread(__clean_data, host=ser['host']) thd_list.append(thd) utils.start_threads(thd_list) _update("Clean success") except: _update("ERROR: Cleaning") # When clean data, we also begin to create ceph.conf # and init osd in db. # Do not run with the same time as clean_data. # It maybe cleaned by clean_data. try: _update("Create ceph.conf") self._agent_rpcapi.inital_ceph_osd_db_conf(context, server_list=server_list, host=monitor_node['host']) _update("Create ceph.conf success") except: _update("ERROR: ceph.conf") try: _update("create crushmap") # Then begin to create crush map file. create_crushmap = utils.MultiThread(__create_crushmap, context=context, server_list=server_list, host=monitor_node['host']) create_crushmap.start() except: _update("ERROR: crushmap") try: # Begin to mount disks on the mount_point. _update("Mount disks") def __mount_disk(host): self._agent_rpcapi.mount_disks(context, host) thd_list = [] for ser in server_list: thd = utils.MultiThread(__mount_disk, host=ser['host']) thd_list.append(thd) utils.start_threads(thd_list) _update("Mount disks success") except: _update("ERROR: mount disk") # Generate monitor keyring file. _update("start montior") monitor_keyring = utils.gen_mon_keyring() def __write_monitor_keyring(host): self._agent_rpcapi.write_monitor_keyring(context, monitor_keyring, host) thd_list = [] for ser in server_list: thd = utils.MultiThread(__write_monitor_keyring, host=ser['host']) thd_list.append(thd) utils.start_threads(thd_list) _update("start monitor success") self._track_monitors(context, server_list) self._agent_rpcapi.prepare_osds(context, server_list, host=monitor_node['host']) self._agent_rpcapi.upload_keyring_admin_into_db(context, host=monitor_node['host']) # Begin to start osd service. _update('Start osds') def __start_osd(host): self._agent_rpcapi.start_osd(context, host) thd_list = [] for ser in server_list: thd = utils.MultiThread(__start_osd, host=ser['host']) thd_list.append(thd) utils.start_threads(thd_list) _update('OSD success') # add mds service try: _update("Start mds") LOG.info('start mds services, host = %s' % monitor_node['host']) self._agent_rpcapi.add_mds(context, host=monitor_node['host']) except: _update("ERROR: mds") # Created begin to get ceph status try: _update('Ceph status') stat = self._agent_rpcapi.get_ceph_health(context, monitor_node['host']) except: _update('ERROR: ceph -s') if stat == False: self._update_server_list_status(context, server_list, "Ceph Start Error") LOG.error('Ceph starting failed!') raise try: _update('Set crushmap') # Wait until it's created over. while create_crushmap.is_alive(): time.sleep(1) def __set_crushmap(context, host): self._agent_rpcapi.set_crushmap(context, host) _update('Active') set_crushmap = utils.MultiThread(__set_crushmap, context=context, host=monitor_node['host']) set_crushmap.start() except: _update('ERROR: set crushmap') self._update_init_node(context, server_list) while set_crushmap.is_alive(): time.sleep(1) self._agent_rpcapi.update_all_status(context, host=monitor_node['host']) self._judge_drive_ext_threshold(context) self._update_drive_ext_threshold(context) return {'message':'res'}