Beispiel #1
0
    def run(self, state):
        for index, stage in enumerate(self.stages):
            state.log.info('running stage "{}"'.format(stage))
            try:
                stage.run(state)
                state.log.info('finished "{}"'.format(stage))
                state.log.info('active hosts after this stage: {}'.format(
                    hosts.format_hosts(state.active_hosts)))
            except (KeyboardInterrupt, Exception) as e:
                state.log.exception(e)
                state.log.error('stage "{}" failed completely'.format(stage))
                for host in set(state.active_hosts):
                    host.fail(stage, 'stage completely failed')

            if len(state.failed_hosts) > 0:
                state.log.warning('failed hosts after "{}": {}'.format(
                    stage, hosts.format_hosts(state.failed_hosts)))
                state.log.warning('doing rollback for those')
                for stage in reversed(self.stages[:index + 1]):
                    try:
                        stage.rollback(state)
                    except Exception as e:
                        state.log.exception(e)
                        state.log.error('rollback of "{}" failed')
                state.all_failed_hosts.update(state.failed_hosts)
                state.failed_hosts.clear()

            if len(state.active_hosts) == 0:
                state.log.error('all the hosts failed, stopping now')
                return False

        if len(state.all_failed_hosts) > 0:
            state.log.warning('finished. Failed hosts are: ')
            for host in sorted(state.all_failed_hosts,
                               key=lambda host: host.name):
                stage, reason = host.failure
                state.log.warning('{}, stage: {}, reason: {}'.format(
                    host.name, stage, reason))
        else:
            state.log.info('finished.')
        return True
Beispiel #2
0
    def run(self, state):
        for index, stage in enumerate(self.stages):
            state.log.info('running stage "{}"'.format(stage))
            try:
                stage.run(state)
                state.log.info('finished "{}"'.format(stage))
                state.log.info('active hosts after this stage: {}'.format(
                    hosts.format_hosts(state.active_hosts)))
            except (KeyboardInterrupt, Exception) as e:
                state.log.exception(e)
                state.log.error('stage "{}" failed completely'.format(stage))
                for host in set(state.active_hosts):
                    host.fail(stage, 'stage completely failed')

            if len(state.failed_hosts) > 0:
                state.log.warning('failed hosts after "{}": {}'.format(
                    stage, hosts.format_hosts(state.failed_hosts)))
                state.log.warning('doing rollback for those')
                for stage in reversed(self.stages[:index+1]):
                    try:
                        stage.rollback(state)
                    except Exception as e:
                        state.log.exception(e)
                        state.log.error('rollback of "{}" failed')
                state.all_failed_hosts.update(state.failed_hosts)
                state.failed_hosts.clear()

            if len(state.active_hosts) == 0:
                state.log.error('all the hosts failed, stopping now')
                return False

        if len(state.all_failed_hosts) > 0:
            state.log.warning('finished. Failed hosts are: ')
            for host in sorted(state.all_failed_hosts,
                               key=lambda host: host.name):
                stage, reason = host.failure
                state.log.warning('{}, stage: {}, reason: {}'.format(
                                  host.name, stage, reason))
        else:
            state.log.info('finished.')
        return True
Beispiel #3
0
 def run(self, state):
     for i in range(self.tries):
         if i != 0:
             time.sleep(self.pause)
         rv, failed = self.step(state)
         if not rv:
             state.log.warning('call to sinfo failed')
             failed = state.active_hosts
             break
         if len(failed) == 0:
             return
         state.log.warning(
             'still waiting for SLURM to come up on ' +
             'the following hosts: {}'.format(hosts.format_hosts(failed)))
     for host in failed:
         host.fail(self, 'timed out while waiting for SLURM to come up')
Beispiel #4
0
 def run(self, state):
     for i in range(self.tries):
         if i != 0:
             time.sleep(self.pause)
         rv, failed = self.step(state)
         if not rv:
             state.log.warning('call to sinfo failed')
             failed = state.active_hosts
             break
         if len(failed) == 0:
             return
         state.log.warning('still waiting for SLURM to come up on ' +
                           'the following hosts: {}'.format(
                               hosts.format_hosts(failed)))
     for host in failed:
         host.fail(self, 'timed out while waiting for SLURM to come up')