Example #1
0
def checkProcesses( liveInstances ):
    '''check that the expected process is running on each instance'''
    logger.info( 'checking %d instance(s)', len(liveInstances) )

    #maybe should weed out some instances
    goodInstances = liveInstances

    cmd = "ps -ef | grep -v grep | grep 'agent.jar' > /dev/null"
    #cmd = "free --mega 1>&2 ; ps -ef | grep -v grep | grep 'LoadGeneratorAgent start' > /dev/null"
    # check for a running agent process on each instance
    stepStatuses = tellInstances.tellInstances( goodInstances, cmd,
        timeLimit=30*60,
        knownHostsOnly=True
        )
    #logger.info( 'cmd statuses: %s', stepStatuses )
    errorsByIid = {}
    for statusRec in stepStatuses:
        status = statusRec['status']
        if status:
            logger.info( 'statusRec: %s', statusRec )
            # only certain non-null outcomes are true errors
            if isinstance( status, Exception ):
                errorsByIid[ statusRec['instanceId'] ] = statusRec
            elif status not in [0, 1]:
                errorsByIid[ statusRec['instanceId'] ] = statusRec
    logger.info( 'errorsByIid: %s', errorsByIid )
    return errorsByIid
Example #2
0
def retrieveLogs( liveInstances, neoloadVersion ):
    '''
    instancesFilePath = os.path.join( dataDirPath, 'startedAgents.json' )
    startedInstances = []
    # get details of launched instances from the json file
    #TODO should get list of instances with good install, rather than all started instances
    with open( instancesFilePath, 'r') as jsonInFile:
        try:
            startedInstances = json.load(jsonInFile)  # an array
        except Exception as exc:
            logger.warning( 'could not load json (%s) %s', type(exc), exc )
    '''

    #maybe should weed out some instances
    goodInstances = liveInstances
    logger.info( 'retrieving logs for %d instance(s)', len(goodInstances) )

    truncVersion = truncateVersion( neoloadVersion )
    agentLogFilePath = '/root/.neotys/neoload/v%s/logs/*.log' % truncVersion

    # download the agent.log file from each instance
    stepStatuses = tellInstances.tellInstances( goodInstances,
        download=agentLogFilePath, downloadDestDir=dataDirPath +'/agentLogs',
        timeLimit=30*60,
        knownHostsOnly=True
        )
Example #3
0
def retrieveLogs( goodInstances ):
    '''download proxy log files from each of the given instances '''
    logger.info( 'retrieving logs for %d instance(s)', len(goodInstances) )

    proxyLogFilePath = '/var/log/squid/*.log'

    stepStatuses = tellInstances.tellInstances( goodInstances,
        download=proxyLogFilePath, downloadDestDir=dataDirPath +'/proxyLogs',
        timeLimit=30*60,
        knownHostsOnly=True
        )
Example #4
0
def retrieveLogs( goodInstances ):
    logger.info( 'retrieving logs for %d instance(s)', len(goodInstances) )

    #agentLogFilePath = 'lzAgent/Logs/*'
    agentLogFilePath = 'lzAgent/agent.*s'

    # download the agent.log file from each instance
    stepStatuses = tellInstances.tellInstances( goodInstances,
        download=agentLogFilePath, downloadDestDir=dataDirPath +'/agentLogs',
        timeLimit=30*60,
        knownHostsOnly=True
        )
Example #5
0
def retrieveLogs( liveInstances, farmDirPath ):
    '''retrieve logs from nodes, storing them in dataDirPath/nodeLogs subdirectories'''
    goodInstances = liveInstances
    logger.info( 'retrieving logs for %d instance(s)', len(goodInstances) )

    nodeLogFilePath = farmDirPath + '/geth.log'

    # download the log file from each instance
    stepStatuses = tellInstances.tellInstances( goodInstances,
        download=nodeLogFilePath, downloadDestDir=dataDirPath +'/nodeLogs',
        timeLimit=15*60,
        knownHostsOnly=True
        )
    #logger.info( 'download statuses: %s', stepStatuses )
    errorsByIid = {status['instanceId']: status['status'] for status in stepStatuses if status['status'] }
    return errorsByIid
Example #6
0
def checkProcesses( liveInstances ):
    logger.info( 'checking %d instance(s)', len(liveInstances) )

    #maybe should weed out some instances
    goodInstances = liveInstances

    cmd = "ps -ef | grep -v grep | grep 'geth' > /dev/null"
    # check for a running geth process on each instance
    stepStatuses = tellInstances.tellInstances( goodInstances, cmd,
        timeLimit=15*60,
        resultsLogFilePath = dataDirPath + '/checkProcesses.jlog',
        knownHostsOnly=True
        )
    #logger.info( 'proc statuses: %s', stepStatuses )
    errorsByIid = {status['instanceId']: status['status'] for status in stepStatuses if status['status'] }
    #logger.info( 'errorsByIid: %s', errorsByIid )
    return errorsByIid
Example #7
0
def checkProcesses( goodInstances ):
    '''check that the expected process is running on each instance'''
    logger.info( 'checking %d instance(s)', len(goodInstances) )
    cmd = "ps -ef | grep -v grep | grep 'squid' > /dev/null"
    stepStatuses = tellInstances.tellInstances( goodInstances, cmd,
        timeLimit=30*60,
        knownHostsOnly=True
        )
    #logger.info( 'cmd statuses: %s', stepStatuses )
    errorsByIid = {}
    for status in stepStatuses:
        if status['status']:
            # might like to be more selective here
            #logger.info( 'status: %s', status )
            errorsByIid[ status['instanceId'] ] = status
    logger.info( 'errorsByIid: %s', errorsByIid )
    return errorsByIid
Example #8
0
def checkInstanceClocks( liveInstances, dataDirPath ):
    jlogFilePath = dataDirPath + '/checkInstanceClocks.jlog'
    allIids = [inst['instanceId'] for inst in liveInstances ]
    unfoundIids = set( allIids )
    cmd = "date --iso-8601=seconds"
    # check for a running geth process on each instance
    stepStatuses = tellInstances.tellInstances( liveInstances, cmd,
        timeLimit=2*60,
        resultsLogFilePath = jlogFilePath,
        knownHostsOnly=True, sshAgent=not True
        )
    #logger.info( 'proc statuses: %s', stepStatuses )
    errorsByIid = {status['instanceId']: status['status'] for status in stepStatuses if status['status'] }
    logger.info( 'preliminary errorsByIid: %s', errorsByIid )
    for iid, status in errorsByIid.items():
        logger.warning( 'instance %s gave error "%s"', iid, status )

    with open( jlogFilePath, 'rb' ) as inFile:
        for line in inFile:
            decoded = json.loads( line )
            iid = decoded['instanceId']
            if decoded.get( 'stdout' ):
                #logger.info( decoded )
                masterDateTime = dateutil.parser.parse( decoded['dateTime'] )
                try:
                    nodeDateTime = dateutil.parser.parse( decoded['stdout'] )
                except Exception as exc:
                    logger.warning( 'exception parsing %s', decoded['stdout'] )
                    errorsByIid[ iid ] = {'exception': exc }
                else:
                    unfoundIids.discard( iid )
                    delta = masterDateTime - nodeDateTime
                    discrep =delta.total_seconds()
                    logger.info( 'discrep: %.1f seconds on inst %s',
                        discrep, iid )
                    if discrep > 4 or discrep < -1:
                        logger.warning( 'bad time discrep: %.1f', discrep )
                        errorsByIid[ iid ] = {'discrep': discrep }
    if unfoundIids:
        logger.warning( 'unfoundIids: %s', unfoundIids )
        for iid in list( unfoundIids ):
            if iid not in errorsByIid:
                errorsByIid[ iid ] = {'found': False }
    logger.info( '%d errorsByIid: %s', len(errorsByIid), errorsByIid )
    return errorsByIid
Example #9
0
def checkProcesses( liveInstances ):
    '''check that the expected process is running on each instance'''
    logger.info( 'checking %d instance(s)', len(liveInstances) )

    #maybe should weed out some instances
    goodInstances = liveInstances

    cmd = "ps -ef | grep -v grep | grep 'LoadGeneratorAgent start' > /dev/null"
    #cmd = "free --mega 1>&2 ; ps -ef | grep -v grep | grep 'LoadGeneratorAgent start' > /dev/null"
    # check for a running agent process on each instance
    stepStatuses = tellInstances.tellInstances( goodInstances, cmd,
        timeLimit=30*60,
        knownHostsOnly=True
        )
    #logger.info( 'cmd statuses: %s', stepStatuses )
    errorsByIid = {}
    for status in stepStatuses:
        if status['status']:
            # might like to be more selective here
            #logger.info( 'status: %s', status )
            errorsByIid[ status['instanceId'] ] = status
    logger.debug( 'errorsByIid: %s', errorsByIid )
    return errorsByIid
Example #10
0
        with open(launchedJsonFilePath, 'r') as jsonInFile:
            try:
                launchedInstances = json.load(jsonInFile)  # an array
            except Exception as exc:
                logger.warning('could not load json (%s) %s', type(exc), exc)
        startedInstances = [
            inst for inst in launchedInstances if inst['state'] == 'started'
        ]
        logger.info('%d instances were launched', len(startedInstances))

        starterCmd = 'geth --config netconfig/%s.config.toml --password pw.txt --unlock $(cat accountAddr.txt) >> ether/%s/stdout.txt 2>>ether/%s/geth.log </dev/null &' % \
            (configName, configName, configName)
        # start the client on each instance
        stepStatuses = tellInstances.tellInstances(
            startedInstances,
            command=starterCmd,
            resultsLogFilePath=outDataDir + '/startClients.jlog',
            timeLimit=30 * 60,
            knownHostsOnly=True)
        logger.debug('starter statuses: %s', stepStatuses)
        # make a list of instances where the client was started
        goodIids = []
        for status in stepStatuses:
            if isinstance(status['status'], int) and status['status'] == 0:
                goodIids.append(status['instanceId'])
            else:
                logger.warning('could not start agent on %s',
                               status['instanceId'][0:8])
        if launchedInstances:
            print(
                'when you want to terminate these instances, use %s terminateGethNodes.py "%s"'
                % (sys.executable, outDataDir))
Example #11
0
        startedInstances = [inst for inst in launchedInstances if inst['state'] == 'started' ]
        logger.info( '%d instances were launched', len(startedInstances) )

        installedInstances = []
        recruiterLog = readJLog( outDataDir +'/recruitInstances.jlog' )
        for logEntry in recruiterLog:
            if 'returncode' in logEntry and logEntry['returncode'] == 0:
                installedInstances.append( instancesByIid[ logEntry['instanceId'] ] )
        logger.info( '%d instances were installed', len(installedInstances) )
        installedIids = [inst['instanceId'] for inst in installedInstances ]

        # get the ip addr of each instance
        cmd = 'curl -s -S https://api.ipify.org > ipAddr.txt'
        stepStatuses = tellInstances.tellInstances( installedInstances, command=cmd,
            resultsLogFilePath=outDataDir +'/getIpAddr.jlog',
            download='ipAddr.txt', downloadDestDir=outDataDir +'/agentLogs',
            timeLimit=6*60,
            knownHostsOnly=True
            )
        logger.debug( 'download statuses: %s', stepStatuses )

        # merge the retrieved ip addrs into instances records in a new array
        goodInstances = []
        for iid in installedIids:
            ipFilePath = os.path.join( outDataDir, 'agentLogs', iid, 'ipAddr.txt' )
            if os.path.isfile( ipFilePath ):
                with open( ipFilePath, 'r' ) as ipFile:
                    ipAddr = ipFile.read().strip()
                    if ipAddr:
                        inst = instancesByIid.get( iid )
                        if inst:
                            inst['ipAddr'] = ipAddr
Example #12
0
                for index, inst in enumerate( startedInstances ):
                    iid = inst['instanceId']
                    portMap[iid] = index + portRangeStart
                logger.info( 'configuring agents')
                returnCodes = configureAgents( startedInstances, ports, timeLimit=600 )
                for index, code in enumerate( returnCodes ):
                    if code==0:
                        configuredInstances.append( startedInstances[index] )
                    else:
                        iid = startedInstances[index].get('instanceId')
                        logger.info( 'inst %s was not configured properly', iid[0:8] )

            # start the agent on each instance 
            stepStatuses = tellInstances.tellInstances( configuredInstances, command=starterCmd,
                resultsLogFilePath=outDataDir +'/startAgents.jlog',
                timeLimit=30*60,
                knownHostsOnly=True
                )
            logger.debug( 'starter statuses: %s', stepStatuses )
            # make a list of instances where the agent was started
            goodIids = []
            for status in stepStatuses:
                if isinstance( status['status'], int) and status['status'] == 0:
                    goodIids.append( status['instanceId'])
                else:
                    logger.warning( 'could not start agent on %s', status['instanceId'][0:8] )
            #COULD check bound ports again here
            #COULD download logs from all installed instances rather than just good-started instances
            goodInstances = [inst for inst in startedInstances if inst['instanceId'] in goodIids ]
            if goodInstances:
                time.sleep( 60 )