def revive_osd(self, osd, timeout=75): """ Revive osds by either power cycling (if indicated by the config) or by restarting. """ if self.config.get('powercycle'): (remote, ) = self.ctx.cluster.only( 'osd.{o}'.format(o=osd)).remotes.iterkeys() self.log('kill_osd on osd.{o} doing powercycle of {s}'.format( o=osd, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() if not remote.console.check_status(300): raise Exception( 'Failed to revive osd.{o} via ipmi'.format(o=osd)) teuthology.reconnect(self.ctx, 60, [remote]) ceph_task.mount_osd_data(self.ctx, remote, str(osd)) ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon('osd', osd).reset() self.ctx.daemons.get_daemon('osd', osd).restart() # wait for dump_ops_in_flight; this command doesn't appear # until after the signal handler is installed and it is safe # to stop the osd again without making valgrind leak checks # unhappy. see #5924. self.wait_run_admin_socket(osd, args=['dump_ops_in_flight'], timeout=timeout)
def revive_mon(self, mon): if self.config.get('powercycle'): (remote,) = self.ctx.cluster.only('mon.{m}'.format(m=mon)).remotes.iterkeys() self.log('revive_mon on mon.{m} doing powercycle of {s}'.format(m=mon, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon('mon', mon).restart()
def revive_mon(self, mon): if self.config.get('powercycle'): (remote, ) = self.ctx.cluster.only( 'mon.{m}'.format(m=mon)).remotes.iterkeys() self.log('revive_mon on mon.{m} doing powercycle of {s}'.format( m=mon, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon('mon', mon).restart()
def revive_mds(self, mds, standby_for_rank=None): if self.config.get('powercycle'): (remote,) = self.ctx.cluster.only('mds.{m}'.format(m=mds)).remotes.iterkeys() self.log('revive_mds on mds.{m} doing powercycle of {s}'.format(m=mds, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) args = [] if standby_for_rank: args.extend(['--hot-standby', standby_for_rank]) self.ctx.daemons.get_daemon('mds', mds).restart(*args)
def revive_mds(self, mds, standby_for_rank=None): if self.config.get('powercycle'): (remote, ) = self.ctx.cluster.only( 'mds.{m}'.format(m=mds)).remotes.iterkeys() self.log('revive_mds on mds.{m} doing powercycle of {s}'.format( m=mds, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) args = [] if standby_for_rank: args.extend(['--hot-standby', standby_for_rank]) self.ctx.daemons.get_daemon('mds', mds).restart(*args)
def revive_osd(self, osd, timeout=75): if self.config.get('powercycle'): (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() if not remote.console.check_status(300): raise Exception('Failed to revive osd.{o} via ipmi'.format(o=osd)) teuthology.reconnect(self.ctx, 60, [remote]) ceph_task.mount_osd_data(self.ctx, remote, str(osd)) ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon('osd', osd).reset() self.ctx.daemons.get_daemon('osd', osd).restart() self.wait_run_admin_socket(osd, timeout=timeout)
def revive_mon(self, mon): """ Restart by either power cycling (if the config says so), or by doing a normal restart. """ if self.config.get("powercycle"): (remote,) = self.ctx.cluster.only("mon.{m}".format(m=mon)).remotes.iterkeys() self.log("revive_mon on mon.{m} doing powercycle of {s}".format(m=mon, s=remote.name)) assert ( remote.console is not None ), "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon("mon", mon).restart()
def revive_mds(self, mds, standby_for_rank=None): """ Revive mds -- do an ipmpi powercycle (if indicated by the config) and then restart (using --hot-standby if specified. """ if self.config.get("powercycle"): (remote,) = self.ctx.cluster.only("mds.{m}".format(m=mds)).remotes.iterkeys() self.log("revive_mds on mds.{m} doing powercycle of {s}".format(m=mds, s=remote.name)) assert ( remote.console is not None ), "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() ceph_task.make_admin_daemon_dir(self.ctx, remote) args = [] if standby_for_rank: args.extend(["--hot-standby", standby_for_rank]) self.ctx.daemons.get_daemon("mds", mds).restart(*args)
def revive_osd(self, osd, timeout=75): if self.config.get('powercycle'): (remote,) = self.ctx.cluster.only('osd.{o}'.format(o=osd)).remotes.iterkeys() self.log('kill_osd on osd.{o} doing powercycle of {s}'.format(o=osd, s=remote.name)) assert remote.console is not None, "powercycling requested but RemoteConsole is not initialized. Check ipmi config." remote.console.power_on() if not remote.console.check_status(300): raise Exception('Failed to revive osd.{o} via ipmi'.format(o=osd)) teuthology.reconnect(self.ctx, 60, [remote]) ceph_task.mount_osd_data(self.ctx, remote, str(osd)) ceph_task.make_admin_daemon_dir(self.ctx, remote) self.ctx.daemons.get_daemon('osd', osd).reset() self.ctx.daemons.get_daemon('osd', osd).restart() # wait for dump_ops_in_flight; this command doesn't appear # until after the signal handler is installed and it is safe # to stop the osd again without making valgrind leak checks # unhappy. see #5924. self.wait_run_admin_socket(osd, args=['dump_ops_in_flight'], timeout=timeout)