コード例 #1
0
def getHostname():
    result = 'unknown'
    p, o = runCommand([
        'hostname',
    ], logEcho=False)
    if len(o) > 0:
        result = o[0]
    return result
コード例 #2
0
ファイル: clientproxy.py プロジェクト: EkkiD/build-tools
def monitorEvents(options, events):
    """This is the main state machine for the Tegra monitor.

    Respond to the events sent via queue and also monitor the
    state of the buildslave if it's been started.
    """
    pidFile   = os.path.join(options.bbpath, 'twistd.pid')
    flagFile  = os.path.join(options.bbpath, 'proxy.flg')
    errorFile = os.path.join(options.bbpath, 'error.flg')
    bbEnv     = { 'PATH':     os.getenv('PATH'),
                  'SUT_NAME': options.tegra,
                  'SUT_IP':   options.tegraIP,
                }

    event         = None
    bbActive      = False
    tegraActive   = False
    connected     = False
    nChatty       = 0
    maxChatty     = 10
    hbFails       = 0
    maxFails      = 50
    sleepFails    = 5
    softCount     = 0    # how many times tegraActive is True
                         # but errorFlag is set
    softCountMax  = 5    # how many active events to wait bdfore
                         # triggering a soft reset
    softResets    = 0
    softResetMax  = 5    # how many soft resets do we try before
                         # waiting for a hard reset
    hardResets    = 0
    hardResetsMax = 3
    lastHangCheck = time.time()

    log.info('monitoring started (process pid %s)' % current_process().pid)

    while True:
        if not connected:
            try:
                hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                hbSocket.settimeout(float(120))
                hbSocket.connect((options.tegraIP, sutDataPort))
                connected = True
            except:
                connected = False
                hbFails  += 1
                log.info('Error connecting to data port - sleeping for %d seconds' % sleepFails)
                time.sleep(sleepFails)

        try:
            event = events.get(False)
        except Empty:
            event = None

        if event is None:
            if connected:
                try:
                    hbData = hbSocket.recv(4096)
                except:
                    hbData    = ''
                    connected = False
                    dumpException('hbSocket.recv()')

                if len(hbData) > 1:
                    log.debug('socket data [%s]' % hbData[:-1])

                    if 'ebooting ...' in hbData:
                        log.warning('device is rebooting')
                        events.put(('reboot',))
                        if os.path.isfile(flagFile):
                            time.sleep(5)
                        hbSocket.close()
                        connected = False
                    else:
                        log.info('heartbeat detected')
                        events.put(('active',))
                        hbFails = 0
                else:
                    hbFails += 1
        else:
            state = event[0]
            s     = 'event %s hbFails %d / %d' % (state, hbFails, maxFails)
            nChatty += 1
            if nChatty > maxChatty:
                log.info(s)
            else:
                log.debug(s)
            if nChatty > maxChatty:
                nChatty = 0

            if state == 'reboot':
                tegraActive = False
                if not os.path.isfile(flagFile):
                    log.warning('Tegra rebooting, stopping buildslave')
                    events.put(('stop',))
            elif state == 'stop' or state == 'offline':
                stopSlave(pidFile)
                bbActive = False
                if connected and state == 'offline':
                    hbSocket.close()
                    connected = False
            elif state == 'active' or state == 'dialback':
                tegraActive = True
                if not bbActive:
                    if os.path.isfile(errorFile):
                        log.warning('Tegra active but error flag set [%d/%d]' % (softCount, softResets))
                        softCount += 1
                        if softCount > softCountMax:
                            softCount = 0
                            if softResets < softResetMax:
                                softResets += 1
                                log.warning('removing error flag to see if tegra comes back')
                                os.remove(errorFile)
                            else:
                                hardResets += 1
                                log.warning('hard reset reboot check [%d/%d]' % (hardResets, hardResetsMax))
                                if hardResets < hardResetsMax:
                                    sendReboot(options.tegraIP, sutDataPort)
                                else:
                                    events.put(('offline',))
                    else:
                        events.put(('start',))
            elif state == 'start':
                if tegraActive and not bbActive:
                    log.debug('starting buildslave in %s' % options.bbpath)
                    bbProc, _ = runCommand(['twistd', '--no_save',
                                                      '--rundir=%s' % options.bbpath,
                                                      '--pidfile=%s' % pidFile,
                                                      '--python=%s' % os.path.join(options.bbpath, 'buildbot.tac')], 
                                           env=bbEnv)
                    log.info('buildslave start returned %s' % bbProc.returncode)
                    if bbProc.returncode == 0 or bbProc.returncode == 1:
                        # pause to give twistd a chance to generate the pidfile
                        # before the code that follows goes off killing it because
                        # it thinks that it didn't start properly
                        # OMGRACECONDITIONWTF
                        nTries = 0
                        while nTries < 20:
                            nTries += 1
                            if os.path.isfile(pidFile):
                                log.debug('pidfile found, setting bbActive to True')
                                bbActive = True
                                break
                            else:
                                time.sleep(5)
            elif state == 'dialback':
                softCount  = 0
                softResets = 0
                hardResets = 0
            elif state == 'terminate':
                break

        if hbFails > maxFails:
            hbFails     = 0
            sleepFails += 5
            if sleepFails > 300:
                sleepFails = 300
            if os.path.isfile(flagFile):
                log.debug('install flag found, resetting error count')
            else:
                events.put(('offline',))
            if connected:
                hbSocket.close()
                connected = False

        log.debug('bbActive %s tegraActive %s' % (bbActive, tegraActive))

        if os.path.isfile(errorFile):
            if bbActive:
                log.error('errorFile detected - sending stop request')
                events.put(('stop',))

        if bbActive:
            if os.path.isfile(pidFile):
                n = time.time()
                if not checkSlaveAlive(options.bbpath):
                    log.warning('buildslave should be active but pid is not alive')
                    if int(n - lastHangCheck) > 300:
                        lastHangCheck = n
                        logTD = checkSlaveActive(options.bbpath)
                        if logTD.days > 0 or (logTD.days == 0 and logTD.seconds > options.hangtime):
                            log.error('last activity was %d days %d seconds ago - marking as hung slave' % 
                                      (logTD.days, logTD.seconds))
                            events.put(('offline',))
            else:
                log.warning('buildslave should be active but pidfile not found, marking as offline')
                events.put(('offline',))
        else:
            if os.path.isfile(pidFile):
                if checkSlaveAlive(options.bbpath):
                    log.error('buildslave should NOT be active but pidfile found, killing buildbot')
                    events.put(('stop',))
                else:
                    log.warning('buildslave not active but pidfile found, removing pidfile')
                    os.remove(pidFile)

    if bbActive:
        stopSlave(pidFile)

    log.info('monitor stopped')
コード例 #3
0
ファイル: check.py プロジェクト: magnyld/build-tools
def checkTegra(master, tegra):
    tegraIP = getIPAddress(tegra)
    tegraPath = os.path.join(options.bbpath, tegra)
    exportFile = os.path.join(tegraPath, '%s_status.log' % tegra)
    errorFile = os.path.join(tegraPath, 'error.flg')
    errorFlag = os.path.isfile(errorFile)
    sTegra = 'OFFLINE'
    sutFound = False
    logTD = None

    status = {'tegra': tegra,
              'active': False,
              'cp': 'OFFLINE',
              'bs': 'OFFLINE',
              'msg': '',
              }

    log.debug('%s: %s' % (tegra, tegraIP))

    if master is None:
        status['environment'] = 's'
        status['master'] = 'localhost'
    else:
        status['environment'] = master['environment'][0]
        status['master'] = 'http://%s:%s' % (
            master['hostname'], master['http_port'])

    fPing, lPing = pingDevice(tegra)
    if fPing:
        try:
            hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            hbSocket.settimeout(float(120))
            hbSocket.connect((tegraIP, 20700))

            sutFound = True

            time.sleep(2)

            hbSocket.send('info\n')

            d = hbSocket.recv(4096)

            log.debug('socket data length %d' % len(d))
            log.debug(d)

            status['active'] = True

            hbSocket.close()
        except:
            status['active'] = False
            dumpException('socket')

        if status['active']:
            sTegra = 'online'
        else:
            sTegra = 'INACTIVE'

        if not sutFound:
            status['msg'] += 'SUTAgent not present;'
    else:
        status['msg'] += '%s %s;' % (lPing[0], lPing[1])

    # Cheat until we have a better check solution for new watch_devices.sh
    status['cp'] = 'active'  # pretend all is well

    if checkSlaveAlive(tegraPath):
        logTD = checkSlaveActive(tegraPath)
        if logTD is not None:
            if (logTD.days > 0) or (logTD.days == 0 and logTD.seconds > 3600):
                status['bs'] = 'INACTIVE'
                status['msg'] += 'BS %dd %ds;' % (logTD.days, logTD.seconds)
            else:
                status['bs'] = 'active'
        else:
            status['bs'] = 'INACTIVE'
    else:
        # scan thru tegra-### dir and see if any buildbot.tac.bug#### files
        # exist but ignore buildbot.tac file itself (except to note that it is
        # missing)
        files = os.listdir(tegraPath)
        found = False
        for f in files:
            if f.startswith('buildbot.tac'):
                found = True
                if len(f) > 12:
                    status['msg'] += '%s;' % f
        if not found:
            status['msg'] += 'buildbot.tac NOT found;'

    if errorFlag:
        status['msg'] += 'error.flg [%s] ' % getLastLine(errorFile)

    s = '%s %s %9s %8s %8s :: %s' % (status['tegra'], status['environment'],
                                     sTegra, status['cp'], status['bs'],
                                     status['msg'])
    ts = time.strftime('%Y-%m-%d %H:%M:%S')
    log.info(s)
    open(exportFile, 'a+').write('%s %s\n' % (ts, s))
    summary(status['tegra'], status['environment'], sTegra, status[
            'cp'], status['bs'], status['msg'], ts, status['master'])

    if errorFlag and options.reset:
        stopProcess(os.path.join(tegraPath, 'twistd.pid'), 'buildslave')

        if not options.reboot:
            try:
                hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                hbSocket.settimeout(float(120))
                hbSocket.connect((tegraIP, 20700))
                hbSocket.send('rebt\n')
                hbSocket.close()
                log.info('rebooting tegra')
            except:
                dumpException('socket')

        if errorFlag:
            log.info('clearing error.flg')
            os.remove(errorFile)

    # Here we try to catch the state where sutagent and cp are inactive that
    # is determined by:
    #     sTegra == 'INACTIVE' and
    #     status['cp'] == 'INACTIVE'
    # status['cp'] will be set to INACTIVE only if logTD.seconds (last time
    # clientproxy updated it's log file) is > 3600

    if options.reboot:
        if not sutFound and status['bs'] != 'active':
            log.info('power cycling tegra')
            reboot_device(tegra)
        else:
            if sTegra == 'OFFLINE' and status['bs'] != 'active':
                log.info('power cycling tegra')
                reboot_device(tegra)

    if options.reset and sTegra == 'INACTIVE' and status['cp'] == 'INACTIVE':
        log.info('stopping hung clientproxy')
        stopDevice(tegra)
        time.sleep(5)
        log.info('starting clientproxy for %s' % tegra)
        os.chdir(tegraPath)
        runCommand(['python', 'clientproxy.py', '-b', '--device=%s' % tegra])
コード例 #4
0
ファイル: check.py プロジェクト: magnyld/build-tools
def getHostname():
    result = 'unknown'
    p, o = runCommand(['hostname', ], logEcho=False)
    if len(o) > 0:
        result = o[0]
    return result
コード例 #5
0
ファイル: installApp.py プロジェクト: mjessome/tools
def one_time_setup(ip_addr, major_source):
    """ One time setup of state

    ip_addr - of the tegra we want to install app at
    major_source - we've hacked this script to install
            may-also-be-needed tools, but the source we're asked to
            install has the meta data we need

    Side Effects:
        We two globals, needed for error reporting:
            errorFile, proxyFile
    """

    # set up the flag files, used throughout
    cwd = os.getcwd()
    global proxyFile, errorFile
    proxyFile = os.path.join(cwd, "..", "proxy.flg")
    errorFile = os.path.join(cwd, "..", "error.flg")

    proxyIP = getOurIP()
    proxyPort = calculatePort()

    workdir = os.path.dirname(major_source)
    inifile = os.path.join(workdir, "fennec", "application.ini")
    remoteappini = os.path.join(workdir, "talos", "remoteapp.ini")
    print "copying %s to %s" % (inifile, remoteappini)
    runCommand(["cp", inifile, remoteappini])

    print "connecting to: %s" % ip_addr
    dm = devicemanager.DeviceManagerSUT(ip_addr)
    # Moar data!
    dm.debug = 3

    devRoot = checkDeviceRoot(dm)

    if devRoot is None or devRoot == "/tests":
        setFlag(errorFile, "Remote Device Error: devRoot from devicemanager [%s] is not correct - exiting" % devRoot)
        sys.exit(1)

    try:
        setFlag(proxyFile)
        print proxyIP, proxyPort
        getDeviceTimestamp(dm)
        setDeviceTimestamp(dm)
        getDeviceTimestamp(dm)
        dm.getInfo("process")
        dm.getInfo("memory")
        dm.getInfo("uptime")

        width, height = getResolution(dm)
        # adjust resolution down to allow fennec to install without memory issues
        if width >= 1050 or height >= 1050:
            dm.adjustResolution(1024, 768, "crt")
            print "calling reboot"
            dm.reboot(proxyIP, proxyPort)
            waitForDevice(dm)

            width, height = getResolution(dm)
            if width != 1024 and height != 768:
                clearFlag(proxyFile)
                setFlag(
                    errorFile,
                    "Remote Device Error: Resolution change failed.  Should be %d/%d but is %d/%d"
                    % (1024, 768, width, height),
                )
                sys.exit(1)

    finally:
        clearFlag(proxyFile)

    return dm, devRoot
コード例 #6
0
def one_time_setup(ip_addr, major_source):
    ''' One time setup of state

    ip_addr - of the device we want to install app at
    major_source - we've hacked this script to install
            may-also-be-needed tools, but the source we're asked to
            install has the meta data we need

    Side Effects:
        global, needed for error reporting:
            errorFile
    '''

    # set up the flag files, used throughout
    cwd = os.getcwd()
    global errorFile
    errorFile = os.path.join(cwd, '..', 'error.flg')
    deviceName = os.path.basename(cwd)

    proxyIP = getOurIP()
    proxyPort = calculatePort()

    workdir = os.path.dirname(major_source)
    inifile = os.path.join(workdir, 'fennec', 'application.ini')
    remoteappini = os.path.join(workdir, 'talos', 'remoteapp.ini')
    log.info('copying %s to %s' % (inifile, remoteappini))
    runCommand(['cp', inifile, remoteappini])

    log.info("connecting to: %s" % ip_addr)
    dm = devicemanager.DeviceManagerSUT(ip_addr)
    # Moar data!
    dm.debug = 3

    devRoot = checkDeviceRoot(dm)

    if devRoot is None or devRoot == '/tests':
        setFlag(
            errorFile,
            "Remote Device Error: devRoot from devicemanager [%s] is not correct - exiting"
            % devRoot)
        return None, None

    try:
        log.info("%s, %s" % (proxyIP, proxyPort))
        getDeviceTimestamp(dm)
        setDeviceTimestamp(dm)
        getDeviceTimestamp(dm)
        dm.getInfo('process')
        dm.getInfo('memory')
        dm.getInfo('uptime')

        width, height = getResolution(dm)
        # adjust resolution down to allow fennec to install without memory
        # issues
        if (width == 1600 or height == 1200):
            dm.adjustResolution(1024, 768, 'crt')
            log.info('forcing device reboot')
            if not powermanagement.soft_reboot_and_verify(
                    device=deviceName, dm=dm, ipAddr=proxyIP, port=proxyPort):
                return None, None

            width, height = getResolution(dm)
            if width != 1024 and height != 768:
                setFlag(
                    errorFile,
                    "Remote Device Error: Resolution change failed.  Should be %d/%d but is %d/%d"
                    % (1024, 768, width, height))
                return None, None

    except devicemanager.AgentError, err:
        log.error(
            "remoteDeviceError: while doing one time setup for installation: %s"
            % err)
        return None, None
コード例 #7
0
ファイル: installApp.py プロジェクト: B-Rich/build-tools
def one_time_setup(ip_addr, major_source):
    ''' One time setup of state

    ip_addr - of the device we want to install app at
    major_source - we've hacked this script to install
            may-also-be-needed tools, but the source we're asked to
            install has the meta data we need

    Side Effects:
        global, needed for error reporting:
            errorFile
    '''

    # set up the flag files, used throughout
    cwd = os.getcwd()
    global errorFile
    errorFile = os.path.join(cwd, '..', 'error.flg')
    deviceName = os.path.basename(cwd)

    proxyIP = getOurIP()
    proxyPort = calculatePort()

    workdir = os.path.dirname(major_source)
    inifile = os.path.join(workdir, 'fennec', 'application.ini')
    remoteappini = os.path.join(workdir, 'talos', 'remoteapp.ini')
    log.info('copying %s to %s' % (inifile, remoteappini))
    runCommand(['cp', inifile, remoteappini])

    log.info("connecting to: %s" % ip_addr)
    dm = devicemanager.DeviceManagerSUT(ip_addr)
# Moar data!
    dm.debug = 3

    devRoot = checkDeviceRoot(dm)

    if devRoot is None or devRoot == '/tests':
        setFlag(errorFile, "Remote Device Error: devRoot from devicemanager [%s] is not correct - exiting" % devRoot)
        return None, None

    try:
        log.info("%s, %s" % (proxyIP, proxyPort))
        getDeviceTimestamp(dm)
        setDeviceTimestamp(dm)
        getDeviceTimestamp(dm)
        dm.getInfo('process')
        dm.getInfo('memory')
        dm.getInfo('uptime')

        width, height = getResolution(dm)
        # adjust resolution down to allow fennec to install without memory
        # issues
        if (width == 1600 or height == 1200):
            dm.adjustResolution(1024, 768, 'crt')
            log.info('forcing device reboot')
            if not powermanagement.soft_reboot_and_verify(device=deviceName, dm=dm, ipAddr=proxyIP, port=proxyPort):
                return None, None

            width, height = getResolution(dm)
            if width != 1024 and height != 768:
                setFlag(errorFile, "Remote Device Error: Resolution change failed.  Should be %d/%d but is %d/%d" % (1024, 768, width, height))
                return None, None

    except devicemanager.AgentError, err:
        log.error("remoteDeviceError: while doing one time setup for installation: %s" % err)
        return None, None
コード例 #8
0
def one_time_setup(ip_addr, major_source):
    ''' One time setup of state

    ip_addr - of the tegra we want to install app at
    major_source - we've hacked this script to install
            may-also-be-needed tools, but the source we're asked to
            install has the meta data we need

    Side Effects:
        We two globals, needed for error reporting:
            errorFile, proxyFile
    '''

    # set up the flag files, used throughout
    cwd = os.getcwd()
    global proxyFile, errorFile
    proxyFile = os.path.join(cwd, '..', 'proxy.flg')
    errorFile = os.path.join(cwd, '..', 'error.flg')

    proxyIP = getOurIP()
    proxyPort = calculatePort()

    workdir = os.path.dirname(major_source)
    inifile = os.path.join(workdir, 'fennec', 'application.ini')
    remoteappini = os.path.join(workdir, 'talos', 'remoteapp.ini')
    print 'copying %s to %s' % (inifile, remoteappini)
    runCommand(['cp', inifile, remoteappini])

    print "connecting to: %s" % ip_addr
    dm = devicemanager.DeviceManagerSUT(ip_addr)
    # Moar data!
    dm.debug = 3

    devRoot = checkDeviceRoot(dm)

    if devRoot is None or devRoot == '/tests':
        setFlag(
            errorFile,
            "Remote Device Error: devRoot from devicemanager [%s] is not correct - exiting"
            % devRoot)
        sys.exit(1)

    try:
        setFlag(proxyFile)
        print proxyIP, proxyPort
        getDeviceTimestamp(dm)
        setDeviceTimestamp(dm)
        getDeviceTimestamp(dm)
        dm.getInfo('process')
        dm.getInfo('memory')
        dm.getInfo('uptime')

        width, height = getResolution(dm)
        #adjust resolution down to allow fennec to install without memory issues
        if (width >= 1050 or height >= 1050):
            dm.adjustResolution(1024, 768, 'crt')
            print 'calling reboot'
            dm.reboot(proxyIP, proxyPort)
            waitForDevice(dm)

            width, height = getResolution(dm)
            if width != 1024 and height != 768:
                clearFlag(proxyFile)
                setFlag(
                    errorFile,
                    "Remote Device Error: Resolution change failed.  Should be %d/%d but is %d/%d"
                    % (1024, 768, width, height))
                sys.exit(1)

    finally:
        clearFlag(proxyFile)

    return dm, devRoot
コード例 #9
0
ファイル: installApp.py プロジェクト: lsblakk/tools
        width, height = getResolution(dm)
        #adjust resolution down to allow fennec to install without memory issues
        if (width >= 1050 or height >= 1050):
            dm.adjustResolution(1024, 768, 'crt')
            print 'calling reboot'
            dm.reboot(proxyIP, proxyPort)
            waitForDevice(dm)

            width, height = getResolution(dm)
            if width != 1024 and height != 768:
                clearFlag(proxyFile)
                setFlag(errorFile, "Remote Device Error: Resolution change failed.  Should be %d/%d but is %d/%d" % (1024,768,width,height))
                sys.exit(1)

        print 'copying %s to %s' % (inifile, remoteappini)
        runCommand(['cp', inifile, remoteappini])

        status = dm.installApp(target)
        if status is None:
            print '-'*42
            print 'installApp() done - gathering debug info'
            dm.getInfo('process')
            dm.getInfo('memory')
            dm.getInfo('uptime')
            try:
                print dm.sendCMD(['exec su -c "logcat -d -v time *:W"'])
            except devicemanager.DMError, e:
                print "Exception hit while trying to run logcat: %s" % str(e)
                setFlag(errorFile, "Remote Device Error: can't run logcat")
                sys.exit(1)
        else:
コード例 #10
0
ファイル: clientproxy.py プロジェクト: EkkiD/build-tools
def monitorEvents(options, events):
    """This is the main state machine for the Tegra monitor.

    Respond to the events sent via queue and also monitor the
    state of the buildslave if it's been started.
    """
    pidFile = os.path.join(options.bbpath, 'twistd.pid')
    flagFile = os.path.join(options.bbpath, 'proxy.flg')
    errorFile = os.path.join(options.bbpath, 'error.flg')
    bbEnv = {
        'PATH': os.getenv('PATH'),
        'SUT_NAME': options.tegra,
        'SUT_IP': options.tegraIP,
    }

    event = None
    bbActive = False
    tegraActive = False
    connected = False
    nChatty = 0
    maxChatty = 10
    hbFails = 0
    maxFails = 50
    sleepFails = 5
    softCount = 0  # how many times tegraActive is True
    # but errorFlag is set
    softCountMax = 5  # how many active events to wait bdfore
    # triggering a soft reset
    softResets = 0
    softResetMax = 5  # how many soft resets do we try before
    # waiting for a hard reset
    hardResets = 0
    hardResetsMax = 3
    lastHangCheck = time.time()

    log.info('monitoring started (process pid %s)' % current_process().pid)

    while True:
        if not connected:
            try:
                hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                hbSocket.settimeout(float(120))
                hbSocket.connect((options.tegraIP, sutDataPort))
                connected = True
            except:
                connected = False
                hbFails += 1
                log.info(
                    'Error connecting to data port - sleeping for %d seconds' %
                    sleepFails)
                time.sleep(sleepFails)

        try:
            event = events.get(False)
        except Empty:
            event = None

        if event is None:
            if connected:
                try:
                    hbData = hbSocket.recv(4096)
                except:
                    hbData = ''
                    connected = False
                    dumpException('hbSocket.recv()')

                if len(hbData) > 1:
                    log.debug('socket data [%s]' % hbData[:-1])

                    if 'ebooting ...' in hbData:
                        log.warning('device is rebooting')
                        events.put(('reboot', ))
                        if os.path.isfile(flagFile):
                            time.sleep(5)
                        hbSocket.close()
                        connected = False
                    else:
                        log.info('heartbeat detected')
                        events.put(('active', ))
                        hbFails = 0
                else:
                    hbFails += 1
        else:
            state = event[0]
            s = 'event %s hbFails %d / %d' % (state, hbFails, maxFails)
            nChatty += 1
            if nChatty > maxChatty:
                log.info(s)
            else:
                log.debug(s)
            if nChatty > maxChatty:
                nChatty = 0

            if state == 'reboot':
                tegraActive = False
                if not os.path.isfile(flagFile):
                    log.warning('Tegra rebooting, stopping buildslave')
                    events.put(('stop', ))
            elif state == 'stop' or state == 'offline':
                stopSlave(pidFile)
                bbActive = False
                if connected and state == 'offline':
                    hbSocket.close()
                    connected = False
            elif state == 'active' or state == 'dialback':
                tegraActive = True
                if not bbActive:
                    if os.path.isfile(errorFile):
                        log.warning('Tegra active but error flag set [%d/%d]' %
                                    (softCount, softResets))
                        softCount += 1
                        if softCount > softCountMax:
                            softCount = 0
                            if softResets < softResetMax:
                                softResets += 1
                                log.warning(
                                    'removing error flag to see if tegra comes back'
                                )
                                os.remove(errorFile)
                            else:
                                hardResets += 1
                                log.warning('hard reset reboot check [%d/%d]' %
                                            (hardResets, hardResetsMax))
                                if hardResets < hardResetsMax:
                                    sendReboot(options.tegraIP, sutDataPort)
                                else:
                                    events.put(('offline', ))
                    else:
                        events.put(('start', ))
            elif state == 'start':
                if tegraActive and not bbActive:
                    log.debug('starting buildslave in %s' % options.bbpath)
                    bbProc, _ = runCommand([
                        'twistd', '--no_save',
                        '--rundir=%s' % options.bbpath,
                        '--pidfile=%s' % pidFile,
                        '--python=%s' %
                        os.path.join(options.bbpath, 'buildbot.tac')
                    ],
                                           env=bbEnv)
                    log.info('buildslave start returned %s' %
                             bbProc.returncode)
                    if bbProc.returncode == 0 or bbProc.returncode == 1:
                        # pause to give twistd a chance to generate the pidfile
                        # before the code that follows goes off killing it because
                        # it thinks that it didn't start properly
                        # OMGRACECONDITIONWTF
                        nTries = 0
                        while nTries < 20:
                            nTries += 1
                            if os.path.isfile(pidFile):
                                log.debug(
                                    'pidfile found, setting bbActive to True')
                                bbActive = True
                                break
                            else:
                                time.sleep(5)
            elif state == 'dialback':
                softCount = 0
                softResets = 0
                hardResets = 0
            elif state == 'terminate':
                break

        if hbFails > maxFails:
            hbFails = 0
            sleepFails += 5
            if sleepFails > 300:
                sleepFails = 300
            if os.path.isfile(flagFile):
                log.debug('install flag found, resetting error count')
            else:
                events.put(('offline', ))
            if connected:
                hbSocket.close()
                connected = False

        log.debug('bbActive %s tegraActive %s' % (bbActive, tegraActive))

        if os.path.isfile(errorFile):
            if bbActive:
                log.error('errorFile detected - sending stop request')
                events.put(('stop', ))

        if bbActive:
            if os.path.isfile(pidFile):
                n = time.time()
                if not checkSlaveAlive(options.bbpath):
                    log.warning(
                        'buildslave should be active but pid is not alive')
                    if int(n - lastHangCheck) > 300:
                        lastHangCheck = n
                        logTD = checkSlaveActive(options.bbpath)
                        if logTD.days > 0 or (logTD.days == 0 and logTD.seconds
                                              > options.hangtime):
                            log.error(
                                'last activity was %d days %d seconds ago - marking as hung slave'
                                % (logTD.days, logTD.seconds))
                            events.put(('offline', ))
            else:
                log.warning(
                    'buildslave should be active but pidfile not found, marking as offline'
                )
                events.put(('offline', ))
        else:
            if os.path.isfile(pidFile):
                if checkSlaveAlive(options.bbpath):
                    log.error(
                        'buildslave should NOT be active but pidfile found, killing buildbot'
                    )
                    events.put(('stop', ))
                else:
                    log.warning(
                        'buildslave not active but pidfile found, removing pidfile'
                    )
                    os.remove(pidFile)

    if bbActive:
        stopSlave(pidFile)

    log.info('monitor stopped')
コード例 #11
0
def checkTegra(master, tegra):
    tegraIP = getIPAddress(tegra)
    tegraPath = os.path.join(options.bbpath, tegra)
    exportFile = os.path.join(tegraPath, '%s_status.log' % tegra)
    errorFile = os.path.join(tegraPath, 'error.flg')
    errorFlag = os.path.isfile(errorFile)
    sTegra = 'OFFLINE'
    sutFound = False
    logTD = None

    status = {
        'tegra': tegra,
        'active': False,
        'cp': 'OFFLINE',
        'bs': 'OFFLINE',
        'msg': '',
    }

    log.debug('%s: %s' % (tegra, tegraIP))

    if master is None:
        status['environment'] = 's'
        status['master'] = 'localhost'
    else:
        status['environment'] = master['environment'][0]
        status['master'] = 'http://%s:%s' % (master['hostname'],
                                             master['http_port'])

    fPing, lPing = pingDevice(tegra)
    if fPing:
        try:
            hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            hbSocket.settimeout(float(120))
            hbSocket.connect((tegraIP, 20700))

            sutFound = True

            time.sleep(2)

            hbSocket.send('info\n')

            d = hbSocket.recv(4096)

            log.debug('socket data length %d' % len(d))
            log.debug(d)

            status['active'] = True

            hbSocket.close()
        except:
            status['active'] = False
            dumpException('socket')

        if status['active']:
            sTegra = 'online'
        else:
            sTegra = 'INACTIVE'

        if not sutFound:
            status['msg'] += 'SUTAgent not present;'
    else:
        status['msg'] += '%s %s;' % (lPing[0], lPing[1])

    # Cheat until we have a better check solution for new watch_devices.sh
    status['cp'] = 'active'  # pretend all is well

    if checkSlaveAlive(tegraPath):
        logTD = checkSlaveActive(tegraPath)
        if logTD is not None:
            if (logTD.days > 0) or (logTD.days == 0 and logTD.seconds > 3600):
                status['bs'] = 'INACTIVE'
                status['msg'] += 'BS %dd %ds;' % (logTD.days, logTD.seconds)
            else:
                status['bs'] = 'active'
        else:
            status['bs'] = 'INACTIVE'
    else:
        # scan thru tegra-### dir and see if any buildbot.tac.bug#### files
        # exist but ignore buildbot.tac file itself (except to note that it is
        # missing)
        files = os.listdir(tegraPath)
        found = False
        for f in files:
            if f.startswith('buildbot.tac'):
                found = True
                if len(f) > 12:
                    status['msg'] += '%s;' % f
        if not found:
            status['msg'] += 'buildbot.tac NOT found;'

    if errorFlag:
        status['msg'] += 'error.flg [%s] ' % getLastLine(errorFile)

    s = '%s %s %9s %8s %8s :: %s' % (status['tegra'], status['environment'],
                                     sTegra, status['cp'], status['bs'],
                                     status['msg'])
    ts = time.strftime('%Y-%m-%d %H:%M:%S')
    log.info(s)
    open(exportFile, 'a+').write('%s %s\n' % (ts, s))
    summary(status['tegra'], status['environment'], sTegra, status['cp'],
            status['bs'], status['msg'], ts, status['master'])

    if errorFlag and options.reset:
        stopProcess(os.path.join(tegraPath, 'twistd.pid'), 'buildslave')

        if not options.reboot:
            try:
                hbSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                hbSocket.settimeout(float(120))
                hbSocket.connect((tegraIP, 20700))
                hbSocket.send('rebt\n')
                hbSocket.close()
                log.info('rebooting tegra')
            except:
                dumpException('socket')

        if errorFlag:
            log.info('clearing error.flg')
            os.remove(errorFile)

    # Here we try to catch the state where sutagent and cp are inactive that
    # is determined by:
    #     sTegra == 'INACTIVE' and
    #     status['cp'] == 'INACTIVE'
    # status['cp'] will be set to INACTIVE only if logTD.seconds (last time
    # clientproxy updated it's log file) is > 3600

    if options.reboot:
        if not sutFound and status['bs'] != 'active':
            log.info('power cycling tegra')
            reboot_device(tegra)
        else:
            if sTegra == 'OFFLINE' and status['bs'] != 'active':
                log.info('power cycling tegra')
                reboot_device(tegra)

    if options.reset and sTegra == 'INACTIVE' and status['cp'] == 'INACTIVE':
        log.info('stopping hung clientproxy')
        stopDevice(tegra)
        time.sleep(5)
        log.info('starting clientproxy for %s' % tegra)
        os.chdir(tegraPath)
        runCommand(['python', 'clientproxy.py', '-b', '--device=%s' % tegra])