def cts_findlogseg(self): ''' ''' #TODO return '' utillib.debug( 'This is cts find log function, need to be finished later!:)')
def dumplogset(self): ''' find log/set of logs which are interesting for us ''' logf = envir.HA_LOG from_time = int(envir.FROM_TIME) to_time = int(envir.TO_TIME) logf_set = self.arch_logs(logf, from_time, to_time) if not len(logf_set): return '' oldest = logf_set[len(logf_set) - 1] newest = logf_set[0] if len(logf_set) > 2: logf_set.remove(oldest) logf_set.remove(newest) mid_logfiles = logf_set else: mid_logfiles = [] if len(logf_set) == 1: logseg = self.find_logseg(newest, from_time, to_time) else: logseg = self.find_logseg(oldest, from_time, 0) for f in mid_logfiles: self.find_log(f) utillib.debug('including complete ' + f + ' logfile') logseg = self.find_logseg(newest, 0, to_time) return logseg
def find_logseg(self, logf, from_time, to_time): logseg_path = os.path.join(envir.HA_NOARCHBIN, 'print_logseg') if os.access(logseg_path, os.F_OK) and os.access(logsef_path, os.X_OK): utillib.do_command([logseg_path, logf, from_time, to_time]) cat = self.find_decompressor(logf).split() cat.append(logf) srcstr = utillib.do_command(cat) srcf = tempfile.mkstemp()[1] self.RM_FILES.append(srcf) srcfd = open(srcf, 'w') srcfd.write(srcstr) srcfd.close() if from_time == 0: FROM_LINE = 1 else: FROM_LINE = utillib.findln_by_time(self, srcf, from_time) if not FROM_LINE: warning("couldn't find line for time " + from_time + '; corrupt log file?') return '' TO_LINE = 0 if to_time != 0: TO_LINE = utillib.findln_by_time(self, srcf, to_time) if not TO_LINE: utillib.warning("couldn't find for time " + to_time + '; corrupt log file?') return '' utillib.debug('including log segment[' + str(FROM_LINE) + '-' + str(TO_LINE) + '] from' + logf) return '\n'.join(srcstr.split('\n')[FROM_LINE:TO_LINE])
def findlog(self): ''' First try syslog files, if none found then use the logfile/debugfile settings ''' logf = '' if len(envir.HA_LOGFACILITY): logf = utillib.findmsg() if os.path.isfile(logf): return logf if len(envir.EXTRA_LOGS): for l in envir.EXTRA_LOGS: if os.path.isfile(l) and l != envir.PCMK_LOG: return l if os.path.isfile(os.path.join(self.WORKDIR, envir.JOURNAL_F)): return os.path.join(self.WORKDIR, envir.JOURNAL_F) if os.path.isfile(envir.PCMK_LOG): return envir.PCMK_LOG if len(envir.HA_DEBUGFILE): snd_logf = envir.HA_DEBUGFILE return envir.HA_DEBUGFILE else: snd_logf = envir.HA_LOGFILE return envir.HA_LOGFILE if len(snd_logf): utillib.debug('will try with ' + snd_logf)
def getbacktraces(self): flist = [] bt_files = utillib.find_files(self, envir.CORES_DIRS) for f in bt_files: bf = os.path.basename(f) bf_num = utillib.do_command(['expr', 'match', bf, 'core']) if bf_num > 0: flist.append(f) if len(flist): utillib.getbt(flist, os.path.join(self.WORKDIR, envir.BT_F)) utillib.debug('found basktraces: ' + ' '.join(flist))
def get_cib_dir2(self): ''' Failed to get CIB_DIR from crmsh HA_VARKIB is nornally set to {localstatedir}/heartbeat ''' localstatedir = os.path.dirname(envir.HA_VARLIB) for p in ['pacemaker/cib', 'heartbeat/crm']: if os.path.isfile(localstatedir + '/' + p + '/cib.xml'): utillib.debug("setting CIB_DIR to localstatedir+'/'+p") envir.CIB_DIR = localstatedir + '/' + p break
def pe2dot(self, path): pef = os.path.basename(path) if pef.endswith('.bz2'): dotf = pef[0:len(pef) - 4] if not len(envir.PTEST): return False try: msg = utillib.do_command([envir.PTEST, '-D', 'dotf', '-x', pef]) except: utillib.debug(envir.PTEST + ' faild! ') return
def get_crm_daemon_dir2(self): ''' Get_crm_daemon_dir function failed ''' for p in ['/usr', '/usr/local', '/opt']: for d in ['libexec', 'lib64', 'lib']: for d2 in ['pacemaker', 'heartbeat']: if os.access(p + '/' + d + '/' + d2 + '/crmd', os.X_OK): utillib.debug("setting CRM_DAEMON_CRM to" + p + '/' + d + '/' + d2 + '/crmd') envir.CRM_DAEMON_DIR = p + '/' + d + '/' + d2 + '/crmd' break
def get_crm_daemon_dir(self): ''' Get envir.CRM_DARMON_DIR ''' libdir = os.path.dirname(envir.HA_BIN) for p in ['/pacemaker', '/heartbeat']: if os.access(libdir + p + '/crmd', os.X_OK): utillib.debug("setting CRM_DAEMON_DIR to" + libdir + p) envir.CRM_DAEMON_DIR = libdir + p return 0 return 1
def compabitility_pcmk(self): if self.get_crm_daemon_dir(): #have not tested carefully self.get_crm_daemon_dir2() if not len(envir.CRM_DAEMON_DIR): utillib.fatal("cannot find pacemaker daemon directory!") if self.get_pe_state_dir(): self.get_pe_state_dir2() if self.get_cib_dir(): self.get_cib_dir2() utillib.debug("setting PCMK_LIB to `dirname $CIB_DIR`") envir.PCMK_LIB = os.path.dirname(envir.CIB_DIR) envir.PTEST = self.echo_ptest_tool()
def run(master_flag): sla = collector() #if this is master node, then flag THIS_IS_NDOE is 1, else case it is 0 sla.THIS_IS_NODE = master_flag #who am i sla.WE = socket.gethostname() print 'start collector on ', sla.WE utillib.parse_xml() #get WORKDIR sla.WORKDIR = sla.mktemp(sla.WE) sla.WORKDIR = sla.WORKDIR + "/" + sla.WE sla.compabitility_pcmk() sla.cluster_type() support = __import__(sla.import_support()) support.get_log_var() utillib.debug('log setting :facility = ' + envir.HA_LOGFACILITY + ' logfile = ' + envir.HA_LOGFILE + ' debug file = ' + envir.HA_DEBUGFILE) #In order to avoid master node delete envirenv file before scp it to another node #Then master node donot need to delete here, it will be deleted before master node end of run try: if not sla.THIS_IS_NODE: if not utillib.do_rm(sla.WE, os.path.join(envir.XML_PATH, envir.XML_NAME)): raise IOError('NO Such file or directory') except IOError as msg: print msg sys.exit(1) sla.collect_info() sla.return_result() # #part 4: endgames: # remove tmpfile and logs we do not need # utillib.remove_files(sla)
def getratraces(self): i = 0 trace_dir = os.path.join(envir.HA_VARLIB, 'trace_ra') if not os.path.isdir(trace_dir): return False utillib.debug('looking for RA trace files in ' + trace_dir) sed_pro = subprocess.Popen( ['sed', "s," + os.path.dirname(trace_dir) + "/,,g"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) flist = sed_pro.communicate(' '.join( utillib.find_file(trace_dir)))[0].split('\n') if len(flist): for f in flist: shutil.copyfile(f, self.WORKDIR) i = i + 1 utillib.debug('found ' + str(i) + ' trace files in ' + trace_dir)
def get_log_var(): ''' Get log variable ''' if not len(envir.HA_LOGFACILITY): envir.HA_LOGFACILITY = envir.DEFAULT_HA_LOGFACILITY envir.HA_DEBUGLEVEL = 'info' if iscfvartrue('debug'): HA_LOGDEVEL = 'debug' if uselogd(): if not os.path.isfile(envir.LOGD_CF): #no configurations: use default return else: utillib.debug('reading log settings from ' + envir.LOGD_CF) utillib.get_logd_logvars() #TODO else: utillib.debug('reading log setting from ' + envir.CONF) get_coro_logvars()
def get_log_var(self): ''' Get log variable ''' if not len(envir.HA_LOGFACILITY): envir.HA_LOGFACILITY = envir.DEFAULT_HA_LOGFACILITY envir.HA_DEBUGLEVEL = 'info' if envir.USER_CLUSTER_TYPE == 'heartbeat': cfdebug = ha_cf_support.getcfvar('debug') else: if corosync_conf_support.iscfvartrue('debug'): HA_LOGDEVEL = 'debug' if corosync_conf_support.uselogd(): if not os.path.isfile(envir.LOGD_CF): #no configurations: use default return else: utillib.debug('reading log settings from ' + envir.LOGD_CF) corosync.get_logd_logvars()
def get_pe_state_dir2(self): ''' Failed to get PE_STATE_DIR from crmsh ''' localstatedir = os.path.dirname(envir.HA_VARLIB) found = utillib.find_dir("pengine", "/var/lib") files = os.listdir(found) for i in files: if i.find(".last") != -1: lastf = os.path.join(found, i) if os.path.isfile(lastf): envir.PE_STATE_DIR = os.path.dirname(lastf) else: for p in ['pacemaker/pengine', 'pengine', 'heartbeat/pengine']: if os.path.isdir(localstatedir + '/' + p): utillib.debug("setting PE_STATE_DIR to " + localstatedir + '/' + p) envir.PE_STATE_DIR = localstatedir + '/' + p break
def getpeinputs(self, workdir): i = 0 utillib.debug('looking for PE files in' + envir.PE_STATE_DIR) flist = utillib.find_files(self, envir.PE_STATE_DIR.split()) grep_pro = subprocess.Popen(['grep', '-v', "[.]last$"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) flist = grep_pro.communicate(' '.join(flist))[0].split() if len(flist): filename = os.path.basename(envir.PE_STATE_DIR) pengine_dir = os.path.join(workdir, filename) os.mkdir(pengine_dir) for f in flist: os.symlink(f, pengine_dir) # utillib.do_command(['ln','-s',f,pengine_dir]) i = i + 1 utillib.debug('found ' + str(i) + ' pengine input files in ' + envir.PE_STATE_DIR) if i >= 20: for f in flist: if not self.skip_lvl(1): path = os.path.join(workdir, os.path.basename(envir.PE_STATE_DIR)) path = os.path.join(path, os.path.basename(f)) self.pe2dot(path) else: utillib.debug('too many PE inputs to create dot files')
def start_slave_collector(self, nodes, port=22, username='******'): fdout = open(os.path.join(self.WORKDIR, 'output.txt'), 'a') fderr = open(os.path.join(self.WORKDIR, 'error.txt'), 'a') utillib.debug( 'running class collector function run to collect log on ' + nodes) paramiko.util.log_to_file('/tmp/paramiko.log') client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) client.connect(nodes, port, username) path = os.path.join(envir.CRM_PATH, 'collector.py') utillib.debug(nodes + ' collector script path :' + path) command = 'python ' + envir.EXCUTE_PATH + '/hb_report __slave' stdin, stdout, stderr = client.exec_command(command) outmsg = nodes + ' output :' + stdout.read() fdout.write(outmsg) fdout.close() errmsg = nodes + ' error: ' + stderr.read() fderr.write(errmsg) fderr.close()
def arch_logs(self, logf, from_time, to_time): next_log = [] return_log = [] #look for the file such as: ha-log-20090308 or #ha-log-20090308.gz(.ba2) or ha-log.0,etc #the date need to match user input or today if not os.path.isdir(logf): next_log = os.listdir(os.path.dirname(logf)) dirname = os.path.dirname(logf) else: next_log = os.listdir(logf) dirname = logf for n in next_log: ret = -1 if re.search('^' + os.path.basename(logf) + '[0-9]*.*', n): if re.search('\d+', n): if n.find(envir.DATE) != -1: ret = self.is_our_log(n, from_time, to_time) else: ret = self.is_our_log(os.path.join(dirname, n), from_time, to_time) if ret == 0: pass elif ret == 1: utillib.debug('found log ' + next_log) return_log.append(os.path.join(dirs, n)) elif ret == 2: #do not have to go to older logs break elif ret == 3: return_log.append(os.path.join(dirname, n)) return return_log
def findsshuser(self): ''' If user not provide ssh users, find ssh user by itself ''' rc = 0 ssh_user = '******' if not len(envir.SSH_USER): try_user_list = '__default ' + ' '.join(envir.TRY_SSH) else: try_user_list = ' '.join(envir.SSH_USER) #debug message utillib.debug('FROM FINDSSHUSER: node name is ' + ' '.join(envir.USER_NODES)) for n in envir.USER_NODES: rc = 1 if n == self.WE: # Ahh, It' me, will break! continue for u in try_user_list.split(' '): if u != '__default': ssh_s = u + '@' + n else: ssh_s = n if self.testsshconn(ssh_s): utillib.debug('ssh ' + ssh_s + ' OK') ssh_user = u try_ssh_list = u rc = 0 break else: utillib.debug('ssh ' + ssh_s + ' failed') if rc: envir.SSH_PASSWD_NODES = envir.SSH_PASSWD_NODES + n if len(envir.SSH_PASSWD_NODES): utillib.warning('passwordless ssh to node(s) ' + envir.SSH_PASSWD_NODES + ' does not work') if ssh_user == '__undef': return 1 if ssh_user != '__default': envir.SSH_USER = ssh_user #ssh user is default return 0
def get_cluster_type(self): ''' User do not input cluster type We figure out it with ourselves ''' if utillib.ps_grep("corosync"): if not os.path.isfile('/etc/corosync/corosync.conf' ) or os.path.isfile(envir.HA_CF): utillib.debug("this is Heartbeat cluster stack") envir.USER_CLUSTER_TYPE = 'heartbeat' else: utillib.debug("this is Corosync cluster stack") envir.USER_CLUSTER_TYPE = 'corosync' else: utillib.debug("this is Corosync cluster stack") envir.USER_CLUSTER_TYPE = 'corosync'
def run(): ''' This method do most of the job that master node should do ''' utillib.check_user() utillib.setvarsanddefaults() utillib.get_ocf_directories() mtr = master() mtr.analyzed_argvment(sys.argv) #who am i mtr.WE = socket.gethostname() envir.MASTER = mtr.WE #get WORKDIR mtr.WORKDIR = mtr.mktemp(envir.DEST) mtr.WORKDIR = os.path.join(mtr.WORKDIR, envir.DEST) envir.MASTER_WORKDIR = mtr.WORKDIR mtr.compabitility_pcmk() mtr.cluster_type() support = __import__(mtr.import_support()) if len(envir.CTS): support.get_log_var() utillib.debug('log setting :facility = ' + envir.HA_LOGFACILITY + ' logfile = ' + envir.HA_LOGFILE + ' debug file = ' + envir.HA_DEBUGFILE) else: mtr.get_cts_log() # #part 1:get nodes # utillib.get_nodes() utillib.debug('nodes: ' + ' '.join(envir.USER_NODES)) mtr.is_member() #this is node for n in envir.USER_NODES: if n == mtr.WE: mtr.THIS_IS_NODE = 1 if not mtr.is_node and envir.NODE_SOUECE != 'user': utillib.warning( 'this is not a node and you didn\'t specify a list of nodes using -n' ) # #part 2: ssh business # #find out id ssh works if not envir.NO_SSH: mtr.findsshuser() if len(envir.SSH_USER): envir.SSH_OPTS = envir.SSH_OPTS.append('-o User='******'' euid = os.geteuid() if not len(envir.SSH_USER) and euid != 0: utillib.debug('ssh user other than root, use sudo') SUDO = 'sudo -u root' LOCAL_SUDO = '' if not euid: utillib.debug('local user ither than root, use sudo') LOCAL_SUDO = 'sudo -u root' # #part 4: find the logs and cut out the segment for the period # if mtr.THIS_IS_NODE: mtr.getlog() #create xml before collect utillib.creat_xml() #then scp the file to collector for n in envir.USER_NODES: p = Process(target=mtr.send_env, args=(n, )) p.start() if not envir.NO_SSH: mtr.collect_for_nodes(envir.USER_NODES) elif is_node: mtr.collecct_for_nodes([mtr.WE]) # #part 5: # slaves tar their result to stdout, send it to master, # then master analyses result, asks the user to edit the # problem description template, and print final words # mtr.get_result() Process(target=mtr.analyze).start() Process(target=mtr.events).start() # #part 6: endgame: # remove tmpfiles and logs we do not need utillib.remove_files(mtr)
def get_cts_log(self): #TODO utillib.debug('need to finish later')
def get_user_node_cts(self, ctslog): #TODO utillib.debug('need to finish later')