def pcocc_ckpt(jobid, jobname, force, ckpt_dir): """Checkpoint the current state of a cluster Both the disk image and memory of all VMs of the cluster are saved and the cluster is terminated. It is then possible to restart from this state using the --restart-ckpt option of the alloc and batch commands. CKPT_DIR should not already exist unless -F is specified. In that case, make sure you're not overwriting the checkpoint from which the cluster was restarted. \b Example usage: pcocc ckpt /path/to/checkpoints/mycheckpoint """ try: load_config(jobid, jobname, default_batchname='pcocc') cluster = load_batch_cluster() dest_dir = validate_save_dir(ckpt_dir, force) cluster.checkpoint(dest_dir) click.secho('Cluster state succesfully checkpointed ' 'to %s' % (dest_dir), fg='green') except PcoccError as err: handle_error(err)
def pcocc_dump(jobid, jobname, vm, dumpfile): """Dump VM memory to a file The file is saved as ELF and includes the guest's memory mapping. It can be processed with crash or gdb. \b Example usage: pcocc dump vm1 output.bin """ try: load_config(jobid, jobname, default_batchname='pcocc') cluster = load_batch_cluster() index = vm_name_to_index(vm) vm = cluster.vms[index] dumpfile = os.path.abspath(dumpfile) click.secho('Dumping vm memory...') vm.dump(dumpfile) click.secho('vm%d has been dumped to %s' % (index, dumpfile), fg='green') except PcoccError as err: handle_error(err)
def handle_error(err): """ Print exception with stack trace if in debug mode """ click.secho(str(err), fg='red', err=True) if Config().debug: raise err sys.exit(-1)
def pcocc_tpl_show(template): try: config = load_config() try: tpl = config.tpls[template] except KeyError as err: click.secho('Template not found: ' + template, fg='red', err=True) sys.exit(-1) tpl.display() except PcoccError as err: handle_error(err)
def pcocc_reset(jobid, jobname, vm): """Reset a VM The effect is similar to the reset button on a physical machine. \b Example usage: pcocc reset vm1 """ try: load_config(jobid, jobname, default_batchname='pcocc') cluster = load_batch_cluster() index = vm_name_to_index(vm) vm = cluster.vms[index] vm.reset() click.secho('vm%d has been reset' % (index), fg='green') except PcoccError as err: handle_error(err)
def pcocc_save(jobid, jobname, dest, vm, safe): """Save the disk of a VM to a new disk image By default the output file only contains the differences between the current state of the disk and the template from which the VM was instantiated. Therefore, all incremental saves leading to an image have to be preserved. To save the disk to a new independant image file specify a new path with --dest. \b Example usage: pcocc save vm1 """ try: load_config(jobid, jobname, default_batchname='pcocc') cluster = load_batch_cluster() index = vm_name_to_index(vm) vm = cluster.vms[index] if vm.image_dir is None: click.secho('Template is not based on a CoW image', fg='red', err=True) sys.exit(-1) if dest: dest_dir = validate_save_dir(dest, False) click.secho('Saving image...') if dest: save_path = os.path.join(dest_dir, 'image') full = True else: save_path = os.path.join(vm.image_dir, 'image-rev%d' % (vm.revision + 1)) full = False if safe: freeze_opt = Hypervisor.VM_FREEZE_OPT.YES else: freeze_opt = Hypervisor.VM_FREEZE_OPT.TRY vm.save(save_path, full, freeze_opt) click.secho('vm%d disk ' 'succesfully saved to %s' % (index, save_path), fg='green') except PcoccError as err: handle_error(err)
def pcocc_console(jobid, jobname, log, vm): """Connect to a VM console Hit Ctrl-C 3 times to exit. \b Example usage: pcocc console vm1 """ try: signal.signal(signal.SIGINT, clean_exit) signal.signal(signal.SIGTERM, clean_exit) config = load_config(jobid, jobname, default_batchname='pcocc') cluster = load_batch_cluster() index = vm_name_to_index(vm) vm = cluster.vms[index] vm.wait_start() remote_host = vm.get_host() if log: try: # FIXME: reading the whole log at once will not # work for large logs log = subprocess_check_output( shlex.split('ssh {0} cat {1}'.format( remote_host, config.batch.get_vm_state_path(vm.rank, 'qemu_console_log')))) click.echo_via_pager(log) except Exception: click.secho("Unable to read console log", fg='red', err=True) sys.exit(0) socket_path = config.batch.get_vm_state_path(vm.rank, 'pcocc_console_socket') self_stdin = sys.stdin.fileno() # Raw terminal old = termios.tcgetattr(self_stdin) new = list(old) new[3] = new[3] & ~termios.ECHO & ~termios.ISIG & ~termios.ICANON termios.tcsetattr(self_stdin, termios.TCSANOW, new) s_ctl = subprocess.Popen(shlex.split('ssh %s nc -U %s ' % (remote_host, socket_path)), stdin=subprocess.PIPE) # Restore terminal and cleanup children at exit atexit.register(cleanup, s_ctl, old) last_int = datetime.datetime.now() int_count = 0 while 1: rdy = select.select([sys.stdin, s_ctl.stdin], [], [s_ctl.stdin]) if s_ctl.stdin in rdy[2] or s_ctl.stdin in rdy[0]: sys.stderr.write('Connection closed\n') break # Exit if Ctrl-C is pressed repeatedly if sys.stdin in rdy[0]: buf = os.read(self_stdin, 1024) if struct.unpack('b', buf[0:1])[0] == 3: if (datetime.datetime.now() - last_int).total_seconds() > 2: last_int = datetime.datetime.now() int_count = 1 else: int_count += 1 if int_count == 3: print '\nDetaching ...' break s_ctl.stdin.write(buf) # Restore terminal now to let user interrupt the wait if needed termios.tcsetattr(sys.stdin.fileno(), termios.TCSANOW, old) s_ctl.terminate() s_ctl.wait() except PcoccError as err: handle_error(err)