예제 #1
0
class TestStatus(object):
    """Supervisor and Phantom status information container.

    Attributes:
        state: str, human readable status.
        started_at: datetime obj, time when work was started.
        duration: int, time delta in seconds from start.
        uid: str, process owner.
        owner: str, responsible person
        total_dur: int, theoretical test duration in seconds
        wd: str, working directory if process.
        pid: int, supervisor PID.
        phantom_pid: int, Phantom process PID.
        answ_mtime: str, time when answ.txt was last time writen by Phantom.
        answ_mago: int, number of seconds since answ_mtime moment.
        retcode: int, Phantom process exit code.
        stdout: str, Phantom process STDOUT.
        stderr: str, Phantom process STDERR.
        eggs: An integer count of the eggs we have laid.
    """
    def __init__(self, test_cfg, main_cfg=None, state='Starting'):
        self.test_cfg = test_cfg
        self.logger = logging.getLogger('supervisor')
        self.pid = os.getpid()
        self.state = state
        self.started_at = datetime.datetime.utcnow()
        self.duration = 0
        self.uid = getpass.getuser()
        self.wd = os.getcwd()

        if main_cfg:
            self.main_cfg = main_cfg

        self.fires = {} 
        for idx, fire in enumerate(self.test_cfg['fire']):
            fire_up = {
                'state': 'Created',
                'end_status': None,
                'phantom_pid': None,
                'answ_mtime': None,
                'answ_mago': None,
                'retcode': None,
                'stdout': None,
                'stderr': None,
            }
            self.test_cfg['fire'][idx].update(fire_up)

    def jobs_completed(self):
        '''Check is there currentlly running phantom jobs or not.'''
        for fire in self.test_cfg['fire']:
            if fire['end_status'] == None:
                return False
        self.state = 'phantoms terminated'
        self.test_cfg['ended_at'] = datetime.datetime.utcnow()
        return True

    def run_jobs(self, sleep_time=1):
        '''Fork phantom jobs according to test_cfg
           and monitor them till the termination.
        '''
        # fork phantom for each fire in test.
        for idx, fire in enumerate(self.test_cfg['fire']):
            new_job = Popen(PHANTOM_CMD, stdout=PIPE, stderr=PIPE, cwd=fire['wd'])

            self.test_cfg['fire'][idx]['job'] = new_job 
            self.test_cfg['fire'][idx]['state'] = 'phantom launched'
            self.logger.info('%s > %s' % (fire['name'], 'Phantom launched'))
            self.test_cfg['fire'][idx]['phantom_pid'] = new_job.pid
            self.logger.info('%s > %s %s' % (fire['name'], 'Phantom PID:',
                                             new_job.pid))
            self.state = 'Phantoms forked'


        # sleep and pool each child process.
        end_status = 0  # by default All is ok.

        if self.main_cfg:
            self.oc = FirebatOverlordClient(self.main_cfg['FIERBAT_API_URL'],
                                            self.main_cfg['FIERBAT_API_TO'])

        if self.oc: # push updates to overlord if needed.
            retcode, resp = self.oc.push_test_updates(self.test_cfg,
                                                      status='running')
            self.logger.info('Push test status update to Overlord side.')
 
        while not self.jobs_completed():
            self.duration_update()
            self.log_mtime_update()

            for idx, fire in enumerate(self.test_cfg['fire']):
                if fire['end_status'] != None:  # fire allready terminated
                    continue
                job = self.test_cfg['fire'][idx]['job']
                retcode = job.poll()
                if retcode is not None:  # Process finished.
                    self.test_cfg['fire'][idx]['retcode'] = retcode
                    self.test_cfg['fire'][idx]['stdout'] = job.stdout.read()
                    self.test_cfg['fire'][idx]['stderr'] = job.stderr.read()
                    self.test_cfg['fire'][idx]['ended_at'] = datetime.datetime.utcnow()
                    self.test_cfg['fire'][idx]['state'] = 'phantom terminated'
                    self.logger.info(
                        '%s > phantom terminated with retcode: %s' %\
                        (fire['name'], retcode))
                    self.test_cfg['fire'][idx]['end_status'] = end_status
                    if self.oc: # push updates to overlord if needed.
                        f = self.test_cfg['fire'][idx]
                        #self.logger.info('Try to patch fire on Overlord side.') 
                        self.logger.info('%s > %s' % (f['name'],
                            'Try to patch fire on Overlord side.')) 
                        # Push fire termination details to remote side.
                        self.oc.push_fire_updates(f)

                        # Calculate metrics from phout and push them after.
                        result, msg = proc_whole_phout(f, oc=self.oc)
                        if result:
                            self.logger.info('%s > %s' % (f['name'],
                                'Fire updated on remote side successfully.')) 
                        else:
                            self.logger.info('%s > %s' % (f['name'],
                                'Remote call failed: %s' % msg)) 


                else:  # No process is done, wait a bit and check again.
                    self.state = 'Phantoms waiting'
                    time.sleep(sleep_time)
                    self.msg_update()
                    try:
                        astop_mtime =\
                            self.test_cfg['fire'][idx]['autostop']['astop_mtime']
                    except KeyError:
                        astop_mtime = None
                    if astop_mtime and \
                        self.test_cfg['fire'][idx]['answ_mago'] > astop_mtime:
                        self.logger.error('phantom answ file hasn\'t been' +
                                          ' updated to long')
                        self.logger.error('Try to stop the fire')
                        job.kill()
                        end_status = -1

        if self.oc: # push updates to overlord if needed.
            #self.test_cfg['ended_at'] = datetime.datetime.utcnow()
            retcode, resp = self.oc.push_test_updates(self.test_cfg,
                                                      status='finished')
            self.logger.info('Push test status update to Overlord side.')


    def msg_update(self):
        '''Update JSON obj translated to other process'''
        global msg
        msg = json.dumps(self.repr_as_dict())

    def log_result(self):
        '''Log info about how phantom was terminated.'''
        for idx, fire in enumerate(self.test_cfg['fire']):
            if fire['end_status'] == None:
                continue
            result = 'undefined'
            if fire['end_status'] == 0:
                result = 'phantom terminated normally'
                if fire['retcode'] == -9:
                    result = 'phantom killed by user with SIGNUM 9'
            elif fire['end_status'] == -1:
                result = 'phantom terminated by supervisor, reason: answ mtime'

            self.logger.info('%s > %s' % (fire['name'], result))


    def duration_update(self):
        delta = datetime.datetime.utcnow() - self.started_at
        self.duration = delta.seconds

    def log_mtime_update(self, log_name='answ.txt'):
        '''Check when Phantom write one of his log files last time,
        helps to check, is Phantom alive or not.'''
        for idx, fire in enumerate(self.test_cfg['fire']):
            log_path = '%s/%s' % (fire['wd'], log_name)
            try:
                mtime = os.path.getmtime(log_path)
                self.test_cfg['fire'][idx]['answ_mtime'] = time.ctime(mtime)
                delta = datetime.datetime.now() -\
                    datetime.datetime.fromtimestamp(int(mtime))
                self.test_cfg['fire'][idx]['answ_mago'] = delta.seconds
            except OSError:
                self.logger.warning('Can\'t read mtime of file: %s' % log_path)

    def repr_as_dict(self):
        '''Represent current test status as a dict.'''
        self.duration_update()
        self.log_mtime_update()
        result = {
            'test_name': self.test_cfg['title']['test_name'],
            'pid': self.pid,
            'id': self.test_cfg.get('id', None),
            'task': self.test_cfg['title']['task'],
            'duration': self.duration,
            'started_at': self.started_at.isoformat(),
            'state': self.state,
            'owner': self.test_cfg.get('owner', 'uid'),
            'uid': self.uid,
            'fires': [],
        }
        fire_allowed = [
            'answ_mago',
            'name',
            'phantom_pid',
            'total_dur',
        ]
        for idx, fire in enumerate(self.test_cfg['fire']):
            f = dict((key,value) for key, value in fire.iteritems() if key in\
                    fire_allowed)
            result['fires'].append(f)

        return result
예제 #2
0
    def run_jobs(self, sleep_time=1):
        '''Fork phantom jobs according to test_cfg
           and monitor them till the termination.
        '''
        # fork phantom for each fire in test.
        for idx, fire in enumerate(self.test_cfg['fire']):
            new_job = Popen(PHANTOM_CMD, stdout=PIPE, stderr=PIPE, cwd=fire['wd'])

            self.test_cfg['fire'][idx]['job'] = new_job 
            self.test_cfg['fire'][idx]['state'] = 'phantom launched'
            self.logger.info('%s > %s' % (fire['name'], 'Phantom launched'))
            self.test_cfg['fire'][idx]['phantom_pid'] = new_job.pid
            self.logger.info('%s > %s %s' % (fire['name'], 'Phantom PID:',
                                             new_job.pid))
            self.state = 'Phantoms forked'


        # sleep and pool each child process.
        end_status = 0  # by default All is ok.

        if self.main_cfg:
            self.oc = FirebatOverlordClient(self.main_cfg['FIERBAT_API_URL'],
                                            self.main_cfg['FIERBAT_API_TO'])

        if self.oc: # push updates to overlord if needed.
            retcode, resp = self.oc.push_test_updates(self.test_cfg,
                                                      status='running')
            self.logger.info('Push test status update to Overlord side.')
 
        while not self.jobs_completed():
            self.duration_update()
            self.log_mtime_update()

            for idx, fire in enumerate(self.test_cfg['fire']):
                if fire['end_status'] != None:  # fire allready terminated
                    continue
                job = self.test_cfg['fire'][idx]['job']
                retcode = job.poll()
                if retcode is not None:  # Process finished.
                    self.test_cfg['fire'][idx]['retcode'] = retcode
                    self.test_cfg['fire'][idx]['stdout'] = job.stdout.read()
                    self.test_cfg['fire'][idx]['stderr'] = job.stderr.read()
                    self.test_cfg['fire'][idx]['ended_at'] = datetime.datetime.utcnow()
                    self.test_cfg['fire'][idx]['state'] = 'phantom terminated'
                    self.logger.info(
                        '%s > phantom terminated with retcode: %s' %\
                        (fire['name'], retcode))
                    self.test_cfg['fire'][idx]['end_status'] = end_status
                    if self.oc: # push updates to overlord if needed.
                        f = self.test_cfg['fire'][idx]
                        #self.logger.info('Try to patch fire on Overlord side.') 
                        self.logger.info('%s > %s' % (f['name'],
                            'Try to patch fire on Overlord side.')) 
                        # Push fire termination details to remote side.
                        self.oc.push_fire_updates(f)

                        # Calculate metrics from phout and push them after.
                        result, msg = proc_whole_phout(f, oc=self.oc)
                        if result:
                            self.logger.info('%s > %s' % (f['name'],
                                'Fire updated on remote side successfully.')) 
                        else:
                            self.logger.info('%s > %s' % (f['name'],
                                'Remote call failed: %s' % msg)) 


                else:  # No process is done, wait a bit and check again.
                    self.state = 'Phantoms waiting'
                    time.sleep(sleep_time)
                    self.msg_update()
                    try:
                        astop_mtime =\
                            self.test_cfg['fire'][idx]['autostop']['astop_mtime']
                    except KeyError:
                        astop_mtime = None
                    if astop_mtime and \
                        self.test_cfg['fire'][idx]['answ_mago'] > astop_mtime:
                        self.logger.error('phantom answ file hasn\'t been' +
                                          ' updated to long')
                        self.logger.error('Try to stop the fire')
                        job.kill()
                        end_status = -1

        if self.oc: # push updates to overlord if needed.
            #self.test_cfg['ended_at'] = datetime.datetime.utcnow()
            retcode, resp = self.oc.push_test_updates(self.test_cfg,
                                                      status='finished')
            self.logger.info('Push test status update to Overlord side.')