def canPing(device): """ Check a device is reachable by ping Returns False on failure, True on Success """ curRetry = 0 log.info("INFO: attempting to ping device") while curRetry < MAX_RETRIES: ret, _ = pingDevice(device) if not ret: curRetry += 1 if curRetry == MAX_RETRIES: setFlag( errorFile, "Automation Error: Unable to ping device after %s attempts" % MAX_RETRIES) return False else: log.info( "INFO: Unable to ping device after %s try. Sleeping for 90s then retrying" % curRetry) time.sleep(90) else: break # we're done here return True
def canPing(device): """ Check a device is reachable by ping Returns False on failure, True on Success """ curRetry = 0 log.info("INFO: attempting to ping device") while curRetry < MAX_RETRIES: ret, _ = pingDevice(device) if not ret: curRetry += 1 if curRetry == MAX_RETRIES: setFlag(errorFile, "Automation Error: Unable to ping device after %s attempts" % MAX_RETRIES) return False else: log.info("INFO: Unable to ping device after %s try. Sleeping for 90s then retrying" % curRetry) time.sleep(90) else: break # we're done here return True
def checkTegra(master, tegra): tegraIP = getIPAddress(tegra) tegraPath = os.path.join(options.bbpath, tegra) exportFile = os.path.join(tegraPath, '%s_status.log' % tegra) errorFile = os.path.join(tegraPath, 'error.flg') errorFlag = os.path.isfile(errorFile) sTegra = 'OFFLINE' sutFound = False logTD = None status = {'tegra': tegra, 'active': False, 'cp': 'OFFLINE', 'bs': 'OFFLINE', 'msg': '', } log.debug('%s: %s' % (tegra, tegraIP)) if master is None: status['environment'] = 's' status['master'] = 'localhost' else: status['environment'] = master['environment'][0] status['master'] = 'http://%s:%s' % ( master['hostname'], master['http_port']) fPing, lPing = pingDevice(tegra) if fPing: try: hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) hbSocket.settimeout(float(120)) hbSocket.connect((tegraIP, 20700)) sutFound = True time.sleep(2) hbSocket.send('info\n') d = hbSocket.recv(4096) log.debug('socket data length %d' % len(d)) log.debug(d) status['active'] = True hbSocket.close() except: status['active'] = False dumpException('socket') if status['active']: sTegra = 'online' else: sTegra = 'INACTIVE' if not sutFound: status['msg'] += 'SUTAgent not present;' else: status['msg'] += '%s %s;' % (lPing[0], lPing[1]) # Cheat until we have a better check solution for new watch_devices.sh status['cp'] = 'active' # pretend all is well if checkSlaveAlive(tegraPath): logTD = checkSlaveActive(tegraPath) if logTD is not None: if (logTD.days > 0) or (logTD.days == 0 and logTD.seconds > 3600): status['bs'] = 'INACTIVE' status['msg'] += 'BS %dd %ds;' % (logTD.days, logTD.seconds) else: status['bs'] = 'active' else: status['bs'] = 'INACTIVE' else: # scan thru tegra-### dir and see if any buildbot.tac.bug#### files # exist but ignore buildbot.tac file itself (except to note that it is # missing) files = os.listdir(tegraPath) found = False for f in files: if f.startswith('buildbot.tac'): found = True if len(f) > 12: status['msg'] += '%s;' % f if not found: status['msg'] += 'buildbot.tac NOT found;' if errorFlag: status['msg'] += 'error.flg [%s] ' % getLastLine(errorFile) s = '%s %s %9s %8s %8s :: %s' % (status['tegra'], status['environment'], sTegra, status['cp'], status['bs'], status['msg']) ts = time.strftime('%Y-%m-%d %H:%M:%S') log.info(s) open(exportFile, 'a+').write('%s %s\n' % (ts, s)) summary(status['tegra'], status['environment'], sTegra, status[ 'cp'], status['bs'], status['msg'], ts, status['master']) if errorFlag and options.reset: stopProcess(os.path.join(tegraPath, 'twistd.pid'), 'buildslave') if not options.reboot: try: hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) hbSocket.settimeout(float(120)) hbSocket.connect((tegraIP, 20700)) hbSocket.send('rebt\n') hbSocket.close() log.info('rebooting tegra') except: dumpException('socket') if errorFlag: log.info('clearing error.flg') os.remove(errorFile) # Here we try to catch the state where sutagent and cp are inactive that # is determined by: # sTegra == 'INACTIVE' and # status['cp'] == 'INACTIVE' # status['cp'] will be set to INACTIVE only if logTD.seconds (last time # clientproxy updated it's log file) is > 3600 if options.reboot: if not sutFound and status['bs'] != 'active': log.info('power cycling tegra') reboot_device(tegra) else: if sTegra == 'OFFLINE' and status['bs'] != 'active': log.info('power cycling tegra') reboot_device(tegra) if options.reset and sTegra == 'INACTIVE' and status['cp'] == 'INACTIVE': log.info('stopping hung clientproxy') stopDevice(tegra) time.sleep(5) log.info('starting clientproxy for %s' % tegra) os.chdir(tegraPath) runCommand(['python', 'clientproxy.py', '-b', '--device=%s' % tegra])
def checkTegra(master, tegra): tegraIP = getIPAddress(tegra) tegraPath = os.path.join(options.bbpath, tegra) exportFile = os.path.join(tegraPath, '%s_status.log' % tegra) errorFile = os.path.join(tegraPath, 'error.flg') errorFlag = os.path.isfile(errorFile) sTegra = 'OFFLINE' sutFound = False logTD = None status = { 'tegra': tegra, 'active': False, 'cp': 'OFFLINE', 'bs': 'OFFLINE', 'msg': '', } log.debug('%s: %s' % (tegra, tegraIP)) if master is None: status['environment'] = 's' status['master'] = 'localhost' else: status['environment'] = master['environment'][0] status['master'] = 'http://%s:%s' % (master['hostname'], master['http_port']) fPing, lPing = pingDevice(tegra) if fPing: try: hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) hbSocket.settimeout(float(120)) hbSocket.connect((tegraIP, 20700)) sutFound = True time.sleep(2) hbSocket.send('info\n') d = hbSocket.recv(4096) log.debug('socket data length %d' % len(d)) log.debug(d) status['active'] = True hbSocket.close() except: status['active'] = False dumpException('socket') if status['active']: sTegra = 'online' else: sTegra = 'INACTIVE' if not sutFound: status['msg'] += 'SUTAgent not present;' else: status['msg'] += '%s %s;' % (lPing[0], lPing[1]) # Cheat until we have a better check solution for new watch_devices.sh status['cp'] = 'active' # pretend all is well if checkSlaveAlive(tegraPath): logTD = checkSlaveActive(tegraPath) if logTD is not None: if (logTD.days > 0) or (logTD.days == 0 and logTD.seconds > 3600): status['bs'] = 'INACTIVE' status['msg'] += 'BS %dd %ds;' % (logTD.days, logTD.seconds) else: status['bs'] = 'active' else: status['bs'] = 'INACTIVE' else: # scan thru tegra-### dir and see if any buildbot.tac.bug#### files # exist but ignore buildbot.tac file itself (except to note that it is # missing) files = os.listdir(tegraPath) found = False for f in files: if f.startswith('buildbot.tac'): found = True if len(f) > 12: status['msg'] += '%s;' % f if not found: status['msg'] += 'buildbot.tac NOT found;' if errorFlag: status['msg'] += 'error.flg [%s] ' % getLastLine(errorFile) s = '%s %s %9s %8s %8s :: %s' % (status['tegra'], status['environment'], sTegra, status['cp'], status['bs'], status['msg']) ts = time.strftime('%Y-%m-%d %H:%M:%S') log.info(s) open(exportFile, 'a+').write('%s %s\n' % (ts, s)) summary(status['tegra'], status['environment'], sTegra, status['cp'], status['bs'], status['msg'], ts, status['master']) if errorFlag and options.reset: stopProcess(os.path.join(tegraPath, 'twistd.pid'), 'buildslave') if not options.reboot: try: hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) hbSocket.settimeout(float(120)) hbSocket.connect((tegraIP, 20700)) hbSocket.send('rebt\n') hbSocket.close() log.info('rebooting tegra') except: dumpException('socket') if errorFlag: log.info('clearing error.flg') os.remove(errorFile) # Here we try to catch the state where sutagent and cp are inactive that # is determined by: # sTegra == 'INACTIVE' and # status['cp'] == 'INACTIVE' # status['cp'] will be set to INACTIVE only if logTD.seconds (last time # clientproxy updated it's log file) is > 3600 if options.reboot: if not sutFound and status['bs'] != 'active': log.info('power cycling tegra') reboot_device(tegra) else: if sTegra == 'OFFLINE' and status['bs'] != 'active': log.info('power cycling tegra') reboot_device(tegra) if options.reset and sTegra == 'INACTIVE' and status['cp'] == 'INACTIVE': log.info('stopping hung clientproxy') stopDevice(tegra) time.sleep(5) log.info('starting clientproxy for %s' % tegra) os.chdir(tegraPath) runCommand(['python', 'clientproxy.py', '-b', '--device=%s' % tegra])