def check_ceph(): # node role check if not NODE_ROLE.is_fuel(): if not NODE_ROLE.is_controller(): if not NODE_ROLE.is_ceph_osd(): LOG.warn('This command can only run on fuel or controller or ceph-osd node !') return if NODE_ROLE.is_fuel(): check_all_nodes('ceph') return # get cluster status LOG.info('%s%s Checking ceph cluster status' %('='*5, '>')) ceph_check_health() # check osd status LOG.info('%s%s Checking ceph osd status' %('='*5, '>')) check_success = True osd_status = get_ceph_osd_status() if not osd_status: LOG.error('Can not get ceph osd status !') check_success = False else: for l in osd_status.split('\n'): if 'id' not in l and 'weigh' not in l and 'osd.' in l: osd = l.split()[2] status = l.split()[3] if status != 'up': LOG.error('%s status is not correct, please check it !' % osd) check_success = False if check_success: LOG.info('Ceph osd status check successfully !')
def write_db(backup_id, backup_file): # append try: with open('/tmp/tools.db', 'a') as db: db.writelines('%s' % backup_id + ' ' + '%s\n' % backup_file) except Exception: LOG.error('Write to db error!')
def setup(parser): """Set things up for the upgrade operation.""" if NODE_ROLE.is_fuel(): setup_rsyncd_config() setup_nodes(parser.MYIP) else: LOG.error('This command can only be run on the fuel node.')
def base_delete(self, resource_name, resource_ids, delete_func): no_log_resources = [] while resource_ids: for resource_id in resource_ids: # avoid LOG delete info many times if resource_id not in no_log_resources: with log_disabled(): LOG.info('Delete %s [%s]' % (resource_name, resource_id)) no_log_resources.append(resource_id) try: delete_func(resource_id) # delete successfully, break resource_ids.remove(resource_id) break except Conflict: # retry: deal with conflict. continue except NotFound: # when call destroy_volume(), # will delete volumes and snapshots, # if snapshots NotFound, do nothing. resource_ids.remove(resource_id) break except Exception as e: LOG.warn('Can not delete %s [%s]' % (resource_name, resource_id)) LOG.error(e) # something else wrong, break, won't retry resource_ids.remove(resource_id) break
def _check_managed_status(resource): if resource['@managed'] == 'true': LOG.info('Resource %s was managed on node %s' \ % (resource['@id'], resource['node']['@name'])) else: LOG.error('Resource %s was unmanaged on node %s' \ % (resource['@id'], resource['node']['@name']))
def delete_instance(instance_id, delete_disk=False): if not pc.nova_server_exist(instance_id): LOG.error('Instance "%s" is not exist !' % instance_id) return instance_status = get_instance_status(instance_id) if determine_delete_instance(instance_id, instance_status): LOG.info('Delete instance "%s".' % instance_id) instance_power_state = get_instance_power_state(instance_id) if instance_power_state == 'running': LOG.info('Instance "%s" is running, try to destroy it.' % instance_id) if destroy_instance(instance_id): delete_vnic_vbr(instance_id) delete_instance_dir(instance_id) undefine_instance(instance_id) delete_ports(instance_id) update_disk_state(instance_id) if delete_disk: delete_disks(instance_id) update_nova_db(instance_id) else: delete_vnic_vbr(instance_id) delete_instance_dir(instance_id) undefine_instance(instance_id) delete_ports(instance_id) update_disk_state(instance_id) if delete_disk: delete_disks(instance_id) update_nova_db(instance_id)
def check_ntp(): check_service('ntpd') ntpserver = _get_ntpserver() if not ntpserver: LOG.error('Can not get ntp server, please check it.') else: LOG.debug('ntpserver is %s' % ntpserver)
def push_yaml_to_node(host, src_path, dst_file_name): (out, err) = ssh_connect2(host, 'test -d /etc/hiera || mkdir /etc/hiera') if err == '': LOG.debug('Push %s to node %s .' % (src_path, host)) scp_connect(host, src_path, '/etc/hiera/%s' % dst_file_name) else: LOG.error('Can not create "/etc/hiera/" on node %s .' % host)
def _network_remote_network_inf(cfg): nodes = cfg['nodes'] all_node_inf = [] for n in nodes: try: node_inf = {} if n['role'].endswith('controller'): # dont care primary-controller, consider it as normal # controller node_inf['role'] = 'controller' else: node_inf['role'] = n['role'] if n['role'].endswith('controller'): node_inf['public_address'] = n['public_address'] node_inf['internal_address'] = n['internal_address'] node_inf['host'] = n['fqdn'] if not n['role'].endswith('mongo'): node_inf['storage_address'] = n['storage_address'] if n['role'].endswith('ceph-osd'): node_inf['ceph_cluster_address'] = n['ceph_cluster_address'] all_node_inf.append(node_inf) except: LOG.error("failed to parse node:%s" % n['fqdn']) continue return all_node_inf
def _run(self): self.base_delete('floating ip', self.floatingips, neutronclient.delete_floatingip) for port_id in self.ports: try: with log_disabled(): LOG.info('Delete port [%s]' % port_id) neutronclient.delete_port(port_id) except Conflict as e: with log_disabled(): LOG.info(' Solving conflict: remove interface...') router_id = neutronclient.show_port( port_id)['port']['device_id'] neutronclient.remove_interface_router(router_id, {'port_id': port_id}) except Exception as e: LOG.warn('Can not delete port [%s]' % port_id) LOG.error(e) # if firewall create with target router, # CAN NOT delete router before firewall is deleted. # NOTE: already add retry self.base_delete('router', self.routers, neutronclient.delete_router) self.base_delete('subnet', self.subnets, neutronclient.delete_subnet) self.base_delete('network', self.networks, neutronclient.delete_network)
def check_neutron_agents(agents_list): dhcp_agent_alive_node_number = 0 dhcp_agent_not_alive_node = [] for agent in agents_list: _msg_admin_state = ( 'Neutron agent %s on %s admin_state_up is %s' % (agent['binary'], agent['host'], str(agent['admin_state_up']))) _msg_not_alive = ('Neutron agent %s on %s is not alive' % (agent['binary'], agent['host'])) if not agent['admin_state_up']: LOG.warn(_msg_admin_state) else: LOG.debug(_msg_admin_state) if not agent['alive']: if agent['binary'] == 'neutron-dhcp-agent': LOG.debug(_msg_not_alive) dhcp_agent_not_alive_node.append(agent) else: LOG.error(_msg_not_alive) else: LOG.debug('Neutron agent %s on %s is alive' % (agent['binary'], agent['host'])) if agent['binary'] == 'neutron-dhcp-agent': dhcp_agent_alive_node_number += 1 # NOTE:at least one dhcp-agent is alive is ok if dhcp_agent_alive_node_number < 1: for agent in dhcp_agent_not_alive_node: LOG.error('Neutron agent %s on %s is not alive' % (agent['binary'], agent['host']))
def ssh_connect(hostname, commands, key_file=os.environ['HOME'] + '/.ssh/id_rsa', ssh_port=22, username='******', timeout=2): # Temporarily disable INFO level logging logging.disable(logging.INFO) # need use rsa key, if use dsa key replace 'RSA' to 'DSS' key = paramiko.RSAKey.from_private_key_file(key_file) s = paramiko.SSHClient() s.set_missing_host_key_policy(paramiko.AutoAddPolicy()) try: s.connect(hostname, ssh_port, username=username, pkey=key, timeout=timeout) stdin, stdout, stderr = s.exec_command(commands) result_out = stdout.read() result_err = stderr.read() except paramiko.ssh_exception.AuthenticationException: result_out = result_err = '' LOG.error('Can not connect to %s, Authentication (publickey) ' 'failed !' % (hostname)) except socket.timeout: result_out = result_err = '' LOG.error('Can not connect to %s, Connect time out !' % (hostname)) finally: s.close() logging.disable(logging.NOTSET) return result_out, result_err
def get_value_common(key, filepath): (s, o) = commands.getstatusoutput('grep "^%s" %s | cut -d "=" -f 2' % (key, filepath)) if s != 0 or o is None: LOG.error('Can not get %s\'s value ! Please check file: %s.' % (key, filepath)) return o.strip()
def check_ceph(): # node role check if not NODE_ROLE.is_fuel(): if not NODE_ROLE.is_controller(): if not NODE_ROLE.is_ceph_osd(): LOG.warn( 'This command can only run on fuel or controller or ceph-osd node !' ) return if NODE_ROLE.is_fuel(): check_all_nodes('ceph') return # get cluster status LOG.info('%s%s Checking ceph cluster status' % ('=' * 5, '>')) ceph_check_health() # check osd status LOG.info('%s%s Checking ceph osd status' % ('=' * 5, '>')) check_success = True osd_status = get_ceph_osd_status() if not osd_status: LOG.error('Can not get ceph osd status !') check_success = False else: for l in osd_status.split('\n'): if 'id' not in l and 'weigh' not in l and 'osd.' in l: osd = l.split()[2] status = l.split()[3] if status != 'up': LOG.error('%s status is not correct, please check it !' % osd) check_success = False if check_success: LOG.info('Ceph osd status check successfully !')
def check_ntp(): check_service("ntpd") ntpserver = _get_ntpserver() if not ntpserver: LOG.error("Can not get ntp server, please check it.") else: LOG.debug("ntpserver is %s" % ntpserver)
def detach_disk_on_compute_node(servers, volume_id): for server_id in servers: LOG.info(' Detaching disk "%s" from instance "%s".' % (volume_id, server_id)) logging.disable(logging.INFO) server = pc.nova_server(server_id) server_status = server._info['status'] server_host = server._info['OS-EXT-SRV-ATTR:host'] server_instance_name = server._info['OS-EXT-SRV-ATTR:instance_name'] server_device = os.path.basename(pc.nova_volume(server_id, volume_id)._info['device']) logging.disable(logging.NOTSET) if disk_attached(server_host, server_instance_name, server_device): if server_status == 'ACTIVE': detach_disk_cmd = 'virsh detach-disk %s %s --persistent' \ % (server_instance_name, server_device) reval = ssh_connect(server_host, detach_disk_cmd) if 'Disk detached successfully\n\n' in reval: LOG.info(' Detach disk %s on instance %s successfully.' \ % (server_device, server_instance_name)) return True else: LOG.error(' Detach disk %s on instance %s failed.' \ % (server_device, server_instance_name)) return False else: LOG.info(' Disk %s already detached from instance %s.' \ % (server_device, server_instance_name)) return True
def stack(parser): # if node role is "unknow", go back if NODE_ROLE.is_unknown(): LOG.error('Can not confirm the node role!') return if not NODE_ROLE.is_fuel(): if parser.CONTROLLER: if not NODE_ROLE.is_controller(): cmd_warn('controller') return if parser.COMPUTE: if not NODE_ROLE.is_compute(): cmd_warn('compute') return if parser.MONGO: if not NODE_ROLE.is_mongo(): cmd_warn('mongo') return if parser.CONTROLLER or parser.COMPUTE or parser.MONGO: if parser.PROFILE and not parser.SERVICE and not parser.CHECK_ALL: if parser.CONTROLLER: check('controller', 'profile') if parser.COMPUTE: check('compute', 'profile') if parser.MONGO: check('mongo', 'profile') if parser.SERVICE and not parser.PROFILE and not parser.CHECK_ALL: if parser.CONTROLLER: check('controller', 'service') if parser.COMPUTE: check('compute', 'service') if parser.MONGO: check('mongo', 'service') if parser.SERVICE and parser.PROFILE or parser.CHECK_ALL or not parser.PROFILE and not parser.SERVICE: if parser.CONTROLLER: check('controller', 'all') if parser.COMPUTE: check('compute', 'all') if parser.MONGO: check('mongo', 'all') return # check all if parser.CHECK_ALL and parser.PROFILE and parser.SERVICE: check_all() return elif parser.CHECK_ALL and parser.PROFILE: check_all_profile() return elif parser.CHECK_ALL and parser.SERVICE: check_all_service() return elif parser.CHECK_ALL: check_all() return # check profile or service if parser.PROFILE: check_all_profile() if parser.SERVICE: check_all_service()
def ping(peer,hostname,network_role): (status, out) = commands.getstatusoutput('ping -c 1 %s' % (peer)) if status == 0: LOG.debug('ping %s(%s) reached --- %s network' \ % (peer,hostname,network_role)) else: LOG.error('ping %s(%s) can not be reached --- %s network!' \ % (peer,hostname,network_role))
def protect_image(uuid): '''Protect kernel image and initrd image''' LOG.info('Image protecting...') (stat, out) = commands.getstatusoutput('source %s && glance image-update --is-protected True %s' % (env_path, uuid)) if stat != 0: LOG.error('%s' % out) else: LOG.info('Protected successfully.\n')
def init(parser): if NODE_ROLE.is_unknown(): LOG.error("Can not confirm the node role!") if not NODE_ROLE.is_fuel(): LOG.warn("This command can only run on fuel node !") return init_node_list_file() init_node_role_file()
def update_volume_table(volume_id): LOG.info(' [%s]Updating volumes table ...' % volume_id) sql_update = 'UPDATE volumes SET deleted=1,status=\'deleted\' WHERE id=\'%s\';' % volume_id db_connect(sql_update) sql_select = 'SELECT deleted,status FROM volumes WHERE id =\'%s\';' % volume_id rest = db_connect(sql_select) if rest[0] != 1 or rest[1] != 'deleted': LOG.error(' Database update faild !')
def run_command(cmd): reval = None (status, out) = commands.getstatusoutput(cmd) if status != 0: LOG.error("run %s error: %s" % (cmd, out)) else: reval = out return reval
def check_file_resolvability(filepath): tp = ConfigParser.ConfigParser() try: tp.read(filepath) except ConfigParser.ParsingError, msg: LOG.error(msg) LOG.error('Abort this check!') return False
def check_services(services_list): for service in services_list: if service['status'] != 'enabled': LOG.warn('Service %s on %s status is %s' % (service['binary'], service['host'], service['status'])) if service['state'] != 'up': LOG.error('Service %s on %s state is %s' % (service['binary'], service['host'], service['state']))
def init(parser): if NODE_ROLE.is_unknown(): LOG.error('Can not confirm the node role!') if not NODE_ROLE.is_fuel(): LOG.warn('This command can only run on fuel node !') return init_node_list_file() init_node_role_file()
def vrouter_get_l3_host(rid): cmd = "neutron l3-agent-list-hosting-router -f csv %s" % (rid) out = run_command(cmd).strip("\r\n") if out: hosts = csv2dict(out) return hosts[0]["host"] else: LOG.error("can not get l3 host for router %s" % (rid)) return None
def go(parser): """Upgrade""" if NODE_ROLE.is_fuel(): if parser.CHECK_ONLY: check_upgrade_process() else: go_upgrade(parser.MYIP) else: LOG.error('This command can only be run on the fuel node.')
def restore_from_file(backup_path): LOG.info('Starting Restore ...') LOG.info('Backup is in progress, Please wait ...\n') (stat, out) = commands.getstatusoutput('dockerctl restore %s' % backup_path) if stat != 0: LOG.error('%s' % out) else: LOG.info('Restore successfully completed!\n')
def get_check_list_common(filepath): (s, o) = commands.getstatusoutput('grep -v "^$" %s | grep -v "^#" | cut -d "=" -f 1' % filepath) if s != 0: LOG.error('Can not get check options list ! Please check file: %s.' % filepath) else: check_list_common = [] for key in o.split('\n'): check_list_common.append(key.strip()) return check_list_common
def run_command(cmd): reval = None run_cmd = 'source /root/openrc;' + cmd (status, out) = commands.getstatusoutput(run_cmd) if status != 0: LOG.error("run %s error: %s" % (run_cmd, out)) else: reval = out return reval
def delete_snapshots(snapshots_id, volume_id): LOG.info('Deleting snapshot %s ...' % snapshots_id) if delete_backend_snapshots(snapshots_id, volume_id): try: delete_image(snapshots_id) except Exception, ex: LOG.error(' Delete image failed!\n %s' % ex) update_snapshots_db(snapshots_id, volume_id) return True
def get_config(section, key): profile = '/etc/cinder/cinder.conf' try: cp = ConfigParser.ConfigParser() cp.read(profile) value = cp.get(section, key) return value except: LOG.error(' Can not get %s\'s value !' % key)
def get_haproxy_monitor_content(url): content = None try: wp = urllib.urlopen(url) content = wp.read() except IOError: LOG.error('Can not connect to %s.' % url) finally: return content
def delete_snapshots(snapshots_id, volume_id): LOG.info('Deleting snapshot %s ...' % snapshots_id) if delete_backend_snapshots(snapshots_id, volume_id): try: delete_image(snapshots_id) except Exception,ex: LOG.error(' Delete image failed!\n %s' % ex) update_snapshots_db(snapshots_id, volume_id) return True
def restore_from_file(backup_path): LOG.info('Starting Restore ...') LOG.info('Backup is in progress, Please wait ...\n') (stat, out) = commands.getstatusoutput( 'dockerctl restore %s' % backup_path) if stat != 0: LOG.error('%s' % out) else: LOG.info('Restore successfully completed!\n')
def get_node_list(role): node_list = [] try: for node in NODE_ROLE.nodes: if node['roles'] == role: node_list.append(node['host']) except: LOG.error('Can not get the node list !') node_list = [] return node_list
def check_mysql_connect(server, user, pwd, dbname): try: db = MySQLdb.connect(server, user, pwd, dbname) cursor = db.cursor() cursor.execute('SELECT VERSION()') cursor.fetchone() db.close() LOG.debug('Check Sucessfully.') except: LOG.error('Check Faild.')
def protect_image(uuid): '''Protect kernel image and initrd image''' LOG.info('Image protecting...') (stat, out) = commands.getstatusoutput( 'source %s && glance image-update --is-protected True %s' % (env_path, uuid)) if stat != 0: LOG.error('%s' % out) else: LOG.info('Protected successfully.\n')
def vrouter_get_l3_host(rid): cmd = "neutron l3-agent-list-hosting-router -f csv %s" \ % (rid) out = run_command(cmd).strip('\r\n') if out: hosts = csv2dict(out) return hosts[0]['host'] else: LOG.error('can not get l3 host for router %s' % (rid)) return None
def volume(parser): if not NODE_ROLE.is_controller(): LOG.warn('This command can only run on controller node !') return if parser.DESTROY_VOLUME: if not parser.ID: LOG.error('Please use [--id ID] to specify the volume ID !') else: volume_id = parser.ID destroy_volume(volume_id)
def delete_image(uuid): '''Delete tmp kernel file''' LOG.info('Image deleting...\n') (stat, out) = commands.getstatusoutput('source %s && glance image-delete %s' % (env_path, uuid)) if stat != 0: LOG.error('%s' % out) # if delete failed, tell user the uuid and let user delete manually LOG.error('Please use "glance image-delete" to delete it. The uuid is %s\n' % uuid) else: LOG.info('The image was deleted.\n')
def check_profile(profile, role): # if the profile file is not exists, go back if not os.path.exists(profile): LOG.error('Can not find this profile. Abort this check!') return # get template path template = get_template_path(profile, role) # if the template file is not exists, go back if not os.path.exists(template): LOG.error('Template file is missing, Please check it by yourself.') return if role is not 'mongo': # check file resolvability, if not resolvability, go back for filepath in (profile, template): if not check_file_resolvability(filepath): return # Check profile keys check_list = get_check_list(profile) miss_keys = [] for section in sorted(check_list.keys()): for key in check_list[section]: (miss_key, current_value) = check_key(section, key, profile, template) if miss_key: if '[' + section + ']' not in miss_keys: miss_keys.append('[' + section + ']') miss_keys.append(key + ' = ' + current_value) else: miss_keys.append(key + ' = ' + current_value) if miss_keys: LOG.warn('Can not check following option, please check it by yourself. ') for entry in miss_keys: fmt_print(entry) # some keys in template but not in profile(named lost keys) t_check_list = get_check_list(template) for t_section in sorted(t_check_list.keys()): for t_key in t_check_list[t_section]: check_lost_key(t_section, t_key, profile) else: # Check profile keys check_list = get_check_list_common(profile) for key in check_list: check_key_common(key, profile, template) # some keys in template but not in profile(named lost keys) t_check_list = get_check_list_common(template) for t_key in t_check_list: check_lost_key_common(t_key, profile)
def delete_backend_snapshots_eqlx(snapshots_id, volume_id): LOG.info(' Deleting backend(eqlx) snapshots ...') for snapshot_id in snapshots_id: LOG.info(' [%s]Deleting backend snapshot ...' % snapshot_id) cmd_delete_snapshot = 'volume select volume-%s snapshot delete snapshot-%s' % (volume_id, snapshot_id) result = eqlx_ssh_execute(cmd_delete_snapshot) if 'Snapshot deletion succeeded.' not in result: LOG.error(' Can not delete snapshot "%s" !' % snapshot_id) return False else: return True