Esempio n. 1
0
File: cmd.py Progetto: ansiz/pcocc
def pcocc_setup(action, jobid, nolock, force):
    # Dont load user config, we run as a privileged user
    config = Config()

    if not nolock:
        config.lock_node()

    # Always raise verbosity for setup processes
    config.verbose = max(config.verbose, 1)

    if (action != 'delete' and (jobid or force)):
        raise click.UsageError('this option can only be used with delete')

    if action == 'init':
        config.load(process_type=ProcessType.OTHER)
        config.batch.init_node()
        config.config_node()
    elif action == 'cleanup':
        config.load(process_type=ProcessType.OTHER)
        config.cleanup_node()
    elif action == 'create':
        config.load(process_type=ProcessType.SETUP)
        config.tracker.reclaim(config.batch.list_all_jobs())
        config.batch.create_resources()
        cluster = Cluster(config.batch.cluster_definition, resource_only=True)
        cluster.alloc_node_resources()
    elif action == 'delete':
        config.load(jobid=jobid, process_type=ProcessType.SETUP)
        config.tracker.cleanup_ref(config.batch.batchid)
        config.batch.delete_resources(force)
        cluster = Cluster(config.batch.cluster_definition, resource_only=True)
        cluster.free_node_resources()

    if not nolock:
        config.release_node()
Esempio n. 2
0
File: cmd.py Progetto: ansiz/pcocc
def pcocc_alloc(restart_ckpt, alloc_script, batch_options, cluster_definition):
    try:
        config = load_config(process_type=ProcessType.OTHER)

        cluster_definition = ascii(cluster_definition)
        cluster = Cluster(cluster_definition)
        batch_options = list(batch_options)
        ckpt_opt = gen_ckpt_opt(restart_ckpt)
        alloc_opt = gen_alloc_script_opt(alloc_script)

        ret = config.batch.alloc(
            cluster, batch_options + get_license_opts(cluster) +
            ['-n', '%d' % (len(cluster.vms))],
            ['pcocc'] + build_verbose_opt() +
            ['internal', 'launcher', cluster_definition] + alloc_opt +
            ckpt_opt)

        sys.exit(ret)

    except PcoccError as err:
        handle_error(err)
Esempio n. 3
0
File: cmd.py Progetto: ansiz/pcocc
def pcocc_launcher(restart_ckpt, wait, script, alloc_script,
                   cluster_definition):
    config = load_config(process_type=ProcessType.LAUNCHER)
    batch = config.batch

    cluster_definition = ascii(cluster_definition)
    cluster = Cluster(cluster_definition)

    batch.populate_env()

    if restart_ckpt:
        ckpt_opt = ['-r', restart_ckpt]
    else:
        ckpt_opt = []

    # TODO: provide a way for the user to plugin his own pre-run scripts here
    os.mkdir(os.path.join(batch.cluster_state_dir, 'slurm'))
    for path in os.listdir(helperdir):
        path = os.path.abspath(os.path.join(helperdir, path))
        if os.path.isfile(path) and os.access(path, os.X_OK):
            subprocess.call(path, cwd=batch.cluster_state_dir)

    # TODO: This cmdline should be tunable
    s_pjob = batch.run(cluster, ['-Q', '-X', '--resv-port'], ['pcocc'] +
                       build_verbose_opt() + ['internal', 'run'] + ckpt_opt)
    try:
        cluster.wait_host_config()
    except PcoccError as err:
        s_pjob.kill()
        handle_error(err)
    except KeyboardInterrupt:
        s_pjob.kill()
        handle_error(PcoccError('Cluster launch was interrupted'))

    batch.write_key("cluster/user", "definition", cluster_definition)

    term_sigfd = fake_signalfd([signal.SIGTERM, signal.SIGINT])

    monitor_list = [s_pjob.pid]

    if script:
        if restart_ckpt:
            s_exec = subprocess.Popen(["pcocc", "exec"])
        else:
            s_exec = subprocess.Popen(["pcocc", "exec", "-s", script])
        if alloc_script:
            s_exec2 = subprocess.Popen(shlex.split(alloc_script))
            monitor_list.append(s_exec2.pid)
    elif alloc_script:
        s_exec = subprocess.Popen(shlex.split(alloc_script))
    else:
        shell_env = os.environ
        shell_env['PROMPT_COMMAND'] = 'echo -n "(pcocc/%d) "' % (batch.batchid)
        shell = os.getenv('SHELL', default='bash')
        s_exec = subprocess.Popen(shell, env=shell_env)

    monitor_list.append(s_exec.pid)

    while True:
        status, pid, _ = wait_or_term_child(monitor_list, signal.SIGTERM,
                                            term_sigfd, 40)
        if pid == s_pjob.pid:
            if status != 0:
                sys.stderr.write("The cluster terminated unexpectedly\n")
            else:
                sys.stderr.write("The cluster has shut down\n")

            # This is racy but helps
            if s_exec.poll() is None:
                s_exec.terminate()

            time.sleep(1)
            if s_exec.poll() is None:
                s_exec.kill()

            sys.exit(status >> 8)
        elif pid == s_exec.pid and not wait:
            sys.stderr.write("Terminating the cluster...\n")
            t = threading.Timer(40, wait_timeout, [s_pjob])
            t.start()
            s_pjob.send_signal(signal.SIGINT)
            s_pjob.wait()
            t.cancel()
            sys.exit(status >> 8)
Esempio n. 4
0
File: cmd.py Progetto: ansiz/pcocc
def pcocc_batch(restart_ckpt, batch_script, host_script, batch_options,
                cluster_definition):

    try:
        config = load_config(process_type=ProcessType.OTHER)

        cluster_definition = ascii(cluster_definition)
        cluster = Cluster(cluster_definition)
        batch_options = list(batch_options)
        ckpt_opt = gen_ckpt_opt(restart_ckpt)

        (wrpfile, wrpname) = tempfile.mkstemp()
        wrpfile = os.fdopen(wrpfile, 'w')

        if batch_script or host_script:
            launcher_opt = []
        else:
            launcher_opt = ['-w']

        wrpfile.write("""#!/bin/bash
#SBATCH -o pcocc_%j.out
#SBATCH -e pcocc_%j.err
""")
        if batch_script:
            launcher_opt += ['-s', '"$TEMP_BATCH_SCRIPT"']
            wrpfile.write("""
TEMP_BATCH_SCRIPT="/tmp/pcocc.batch.$$"
cat <<"PCOCC_BATCH_SCRIPT_EOF" >> "${TEMP_BATCH_SCRIPT}"
""")
            wrpfile.write(batch_script.read())
            wrpfile.write("""
PCOCC_BATCH_SCRIPT_EOF
chmod u+x "$TEMP_BATCH_SCRIPT"
""")

        if host_script:
            launcher_opt += ['-E', '"$TEMP_HOST_SCRIPT"']
            wrpfile.write("""
TEMP_HOST_SCRIPT="/tmp/pcocc.host.$$"
cat <<"PCOCC_HOST_SCRIPT_EOF" >> "${TEMP_HOST_SCRIPT}"
""")
            wrpfile.write(host_script.read())
            wrpfile.write("""
PCOCC_HOST_SCRIPT_EOF
chmod u+x "$TEMP_HOST_SCRIPT"
""")

        wrpfile.write("""
PYTHONUNBUFFERED=true pcocc %s internal launcher %s %s %s &
wait
rm "$TEMP_BATCH_SCRIPT" 2>/dev/null
rm "$TEMP_HOST_SCRIPT" 2>/dev/null
""" % (' '.join(build_verbose_opt()), ' '.join(launcher_opt),
        ' '.join(ckpt_opt), cluster_definition))

        wrpfile.close()
        ret = config.batch.batch(
            cluster, batch_options + get_license_opts(cluster) +
            ['-n', '%d' % (len(cluster.vms))], wrpname)
        sys.exit(ret)

    except PcoccError as err:
        handle_error(err)
Esempio n. 5
0
File: cmd.py Progetto: ansiz/pcocc
def load_batch_cluster():
    definition = Config().batch.read_key('cluster/user',
                                         'definition',
                                         blocking=True)
    return Cluster(definition)