コード例 #1
0
def check_job_name_queued_or_running(job_name):
    with TemporaryDirectory() as temp_folder:
        user = getuser()

        # Launch job
        command = submit(job_command="sleep 600",
                         job_name=job_name,
                         time="700",
                         memory=500,
                         log_directory=temp_folder)
        job_id = _check_job_id(command)

        # Assert that the job has been launched
        try:
            running_jobs = queued_or_running_jobs(user=user)
            assert_in(job_name, running_jobs)
        finally:
            # Make sure to clean up even if there is a failure
            if _get_backend() == "slurm":
                subprocess.call(["scancel", job_id])

            elif _get_backend() == "sge":
                subprocess.call(["qdel", job_id])
            else:
                raise NotImplementedError("backend not implemented")
コード例 #2
0
ファイル: SamtoolsSort.py プロジェクト: cacampbell/pythonmisc
    def make_scripts(self, cmds, job_names=None):
        sys.stdout.write("Writing Scripts...\n")
        scheduled_jobs = set(queued_or_running_jobs(user=self.user))
        scripts = []

        if job_names is None:
            job_names = []

            for cmd in cmds:
                job_names.append(None)

        for index, cmd in enumerate(cmds):
            job_name = job_names[index] or self.__get_job_name(cmd)
            if job_name not in scheduled_jobs:
                script = submit(cmd,
                                memory="%s" % self.memory,
                                job_name=job_name,
                                log_directory=self.slurm_logs,
                                backend='slurm',
                                time=0,
                                shell_script="#!/usr/bin/env bash")
                script = script + " --partition=%s" % self.cluster_partition
                script = script + " --cpus-per-task=%s" % self.threads
                scripts.append(script)

                if self.verbose:
                    sys.stdout.write(script + "\n")

        return scripts
コード例 #3
0
ファイル: Psme_map.py プロジェクト: cacampbell/pythonmisc
def dispatch_to_slurm(commands):
    scripts = {}

    for job_name, command in commands.iteritems():
        script = submit(command, job_name=job_name, time="0",
                        memory="{}G".format(maxmem), backend="slurm",
                        shell_script="#!/usr/bin/env bash")
        script += " --partition={}".format(partition)
        script += " --ntasks=1"
        script += " --cpus-per-task={}".format(maxcpu)
        script += " --mail-type=END,FAIL"
        script += " --mail-user={}".format(email)
        scripts[job_name] = script

    scheduled_jobs = set(queued_or_running_jobs())

    for job_name, script in scripts.iteritems():
        if job_name not in scheduled_jobs:
            if verbose:
                print("{}".format(script), file=sys.stdout)

            if not dry_run:
                subprocess.call(script, shell=True)
        else:
            print("{} already running, skipping".format(job_name),
                  file=sys.stderr)
コード例 #4
0
ファイル: fastqc_all.py プロジェクト: cacampbell/pythonmisc
def dispatch(scripts):
    running = set(queued_or_running_jobs())

    for job_name, script in scripts.iteritems():
        if job_name not in running and not output_exists(script):
            subprocess.call(script, shell=True)
        else:
            print("Job already underway or completed: {}".format(job_name),
                  file=sys.stderr)
コード例 #5
0
def test_queued_or_running_jobs_nobackend():
    """Test queued or running whenever no backend is available."""
    # Note that we can't use _get_backend since the user might
    # have set the CLUSTERLIB_BACKEND environment variable.
    if _which('qmod') is None and _which('scontrol') is None:
        # No backend available, thus no running job
        assert_equal(queued_or_running_jobs(), [])
    else:
        raise SkipTest("A backend is installed")
コード例 #6
0
def test_queued_or_running_jobs_nobackend():
    """Test queued or running whenever no backend is available."""
    # Note that we can't use _get_backend since the user might
    # have set the CLUSTERLIB_BACKEND environment variable.
    if _which('qmod') is None and _which('scontrol') is None:
        # No backend available, thus no running job
        assert_equal(queued_or_running_jobs(), [])
    else:
        raise SkipTest("A backend is installed")
コード例 #7
0
def main():
    num_args = len(sys.argv) - 1
    if num_args < 1:
        config_path = '../config.ini'
    elif num_args > 1:
        raise Exception('too many arguments: %d. %d expected' % (num_args, 1))
    else:
        config_path = sys.argv[1]
    config_file = io.abspath2(config_path)

    np.random.seed(3463)

    config = io.load_config(config_file)

    model_list = io.get_model_list(config['input_path'], config['pkl_ext'])
    np.random.shuffle(
        model_list)  # In case we don't finish at least random subset
    # model_list = model_list[:5]  # TODO remove, test only
    assert (all(io.is_safe_name(ss) for ss in model_list))
    print 'using models:'
    print model_list

    # Sort for reprodicibility
    sampler_list = sorted(BUILD_STEP_PM.keys() + BUILD_STEP_MC.keys())
    print 'using samplers:'
    print sampler_list

    # Run n_chains in the outer loop since if process get killed we have less
    # chains but with even distribution over models and samplers.
    scheduled_jobs = set(queued_or_running_jobs())
    for model_name in model_list:
        # Get the exact samples
        run_experiment(config, model_name, config['exact_name'])

        # Get the sampler samples
        for i in xrange(config['n_chains']):
            # TODO could put ADVI init here to keep it fixed across samplers
            for sampler in sampler_list:
                t = time()
                job_name = "slurm-%s-%s-%d" % (model_name, sampler, i)
                cmd_line_args = (config_file, model_name, sampler)
                if job_name in scheduled_jobs:
                    print '%s already in scheduled jobs, but running anyway' % job_name
                options = "-c 1 --job-name=%s -t 45:00 --mem=32gb --output %s.out" % (
                    job_name, job_name)
                end = "slurm_job_main.sh %s %s %s" % cmd_line_args
                command = "sbatch %s %s" % (options, end)
                print 'Executing:', command
                os.system(command)
                print 'wall time %fs' % (time() - t)
    print 'done'
コード例 #8
0
def dispatch(commands):
    scripts = {}

    for job_name, command in commands.iteritems():
        script = submit(command, job_name=job_name,
                        time="0", memory=max_memory + "G", backend="slurm",
                        shell_script="#!/usr/bin/env bash")
        script = script + " --partition=bigmemm"
        scripts[job_name] = script

    scheduled_jobs = set(queued_or_running_jobs())

    for job_name, script in scripts.iteritems():
        if job_name not in scheduled_jobs:
            sys.stdout.write("\n{}\n".format(script))
            subprocess.call(script, shell=True)
        else:
            sys.stderr.write("{} running, skipping.\n".format(job_name))
コード例 #9
0
ファイル: sort.py プロジェクト: cacampbell/pythonmisc
def dispatch(filelist, commands):
    scripts = []

    for i, command in enumerate(commands):
        script = submit(command, job_name="samtools_sort_%s" % filelist[i],
                        time="0", memory=240000, backend="slurm",
                        shell_script="#!/usr/bin/env bash")
        script = script + " --cpus-per-task=12"
        script = script + " --ntasks=1"
        script = script + " --partition=bigmemm"
        scripts.append(script)

    scheduled_jobs = set(queued_or_running_jobs())

    for i, script in enumerate(scripts):
        if "samtools_sort_%s" % filelist[i] not in scheduled_jobs:
            sys.stdout.write("\n%s\n" % script)
            subprocess.call(script, shell=True)
        else:
            sys.stderr.write("Job name 'samtools_sort_%s' found in queued or \
                             running jobs list" % filelist[i])
コード例 #10
0
def check_job_name_queued_or_running(job_name):
    with TemporaryDirectory() as temp_folder:
        user = getuser()

        # Launch job
        command = submit(job_command="sleep 600", job_name=job_name,
                         time="700", memory=500, log_directory=temp_folder)
        job_id = _check_job_id(command)

        # Assert that the job has been launched
        try:
            running_jobs = queued_or_running_jobs(user=user)
            assert_in(job_name, running_jobs)
        finally:
            # Make sure to clean up even if there is a failure
            if _get_backend() == "slurm":
                subprocess.call(["scancel", job_id])

            elif _get_backend() == "sge":
                subprocess.call(["qdel", job_id])
            else:
                raise NotImplementedError("backend not implemented")
コード例 #11
0
def test_log_output(n_trials=30):
    """Test that log output is uniform accross scheduler."""

    with TemporaryDirectory() as temp_folder:
        user = getuser()
        job_completed = False
        # Launch a sleepy SGE job
        job_name = 'ok_job'
        command = submit(job_command="echo ok",
                         job_name=job_name,
                         time="700",
                         memory=500,
                         log_directory=temp_folder)
        job_id = _check_job_id(command)

        try:
            for _ in range(n_trials):
                if job_name not in queued_or_running_jobs(user=user):
                    # job has completed, let's check the output
                    job_completed = True
                    filename = "%s.%s.txt" % (job_name, job_id)
                    assert_equal(os.listdir(temp_folder), [filename])
                    with open(op.join(temp_folder, filename)) as fhandle:
                        assert_equal(fhandle.read().strip(), "ok")
                    break
                else:
                    # Let's wait a bit before retrying
                    sleep(5)

        finally:
            # Make sure to clean up even if there is a failure
            if not job_completed:
                if _get_backend('auto') == 'slurm':
                    subprocess.call(["scancel", job_id])
                else:
                    subprocess.call(["qdel", job_id])
                raise AssertionError(
                    "job %s (%s) has not completed after 5min." %
                    (job_id, job_name))
コード例 #12
0
def test_log_output(n_trials=30):
    """Test that log output is uniform accross scheduler."""

    with TemporaryDirectory() as temp_folder:
        user = getuser()
        job_completed = False
        # Launch a sleepy SGE job
        job_name = 'ok_job'
        command = submit(job_command="echo ok", job_name=job_name,
                         time="700", memory=500,
                         log_directory=temp_folder)
        job_id = _check_job_id(command)

        try:
            for _ in range(n_trials):
                if job_name not in queued_or_running_jobs(user=user):
                    # job has completed, let's check the output
                    job_completed = True
                    filename = "%s.%s.txt" % (job_name, job_id)
                    assert_equal(os.listdir(temp_folder), [filename])
                    with open(op.join(temp_folder, filename)) as fhandle:
                        assert_equal(fhandle.read().strip(), "ok")
                    break
                else:
                    # Let's wait a bit before retrying
                    sleep(5)

        finally:
            # Make sure to clean up even if there is a failure
            if not job_completed:
                if _get_backend('auto') == 'slurm':
                    subprocess.call(["scancel", job_id])
                else:
                    subprocess.call(["qdel", job_id])
                raise AssertionError(
                    "job %s (%s) has not completed after 5min."
                    % (job_id, job_name))
コード例 #13
0
ファイル: notification.py プロジェクト: jm-begon/clustertools
    def refresh(self):
        job_dict = load_notifications(self.exp_name)

        # Updating the false running jobs
        queued = frozenset(queued_or_running_jobs(self.user))
        r_jobs = _filter(job_dict, __RUNNING__)
        p_jobs = _filter(job_dict, __PENDING__)
        i_jobs = _filter(job_dict, __PARTIAL__)
        launchables = {k for k in r_jobs.keys() if k not in queued}
        launchables.update({k for k in p_jobs.keys() if k not in queued})
        launchable_jobs_update(self.exp_name, launchables)
        incompletes = {k for k in i_jobs.keys() if k not in queued}
        incomplete_jobs_update(self.exp_name, incompletes)
        # +--- Applying local change
        for comp_name in launchables:
            info = job_dict[comp_name]
            info[__STATE__] = __LAUNCHABLE__
        for comp_name in incompletes:
            info = job_dict[comp_name]
            info[__STATE__] = __INCOMPLETE__

        # Setting the refreshment
        self.job_dict = job_dict
        self.state_dict = _sort_by_state(self.job_dict)
コード例 #14
0
# clusterlib_launcher.py

import sys
from clusterlib.scheduler import queued_or_running_jobs
from clusterlib.scheduler import submit
from clusterlib.storage import sqlite3_loads
from clusterlib_main import NOSQL_PATH

if __name__ == "__main__":
    scheduled_jobs = set(queued_or_running_jobs())
    done_jobs = sqlite3_loads(NOSQL_PATH)

    for param in range(100):
        job_name = "job-param=%s" % param
        job_command = "%s clusterlib_main.py --param %s" % (sys.executable,
                                                            param)

        if job_name not in scheduled_jobs and job_command not in done_jobs:
            script = submit(job_command, job_name=job_name)
            print(script)

            # Uncomment those lines to launch the jobs
            # import os
            # os.system(script)
コード例 #15
0
ファイル: launcher.py プロジェクト: arjoly/paper-connectomics
    # Argument parser
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--debug', default=False, action="store_true")
    parser.add_argument('-v', '--verbose', default=False, action="store_true")
    parser.add_argument('-s', '--scores', default=False, action="store_true",
                        help="compute scores")

    args = vars(parser.parse_args())


    # Create log direcotyr if needed
    if not os.path.exists(LOG_DIRECTORY):
        os.makedirs(LOG_DIRECTORY)

    # Get running jobs
    all_jobs_running = set(queued_or_running_jobs())
    all_jobs_done = sqlite3_loads(get_sqlite3_path())

    # Intialize some counter for reporting
    n_jobs_running = 0
    n_jobs_done = 0
    n_jobs_launched = 0

    results = []

    # Launch if necessary experiments
    for parameters in PARAMETER_GRID:
        job_hash = make_hash(parameters)

        if job_hash in all_jobs_running:
            n_jobs_running +=1
コード例 #16
0
# clusterlib_launcher.py

import sys
from clusterlib.scheduler import queued_or_running_jobs
from clusterlib.scheduler import submit
from clusterlib.storage import sqlite3_loads
from clusterlib_main import NOSQL_PATH

if __name__ == "__main__":
    scheduled_jobs = set(queued_or_running_jobs())
    done_jobs = sqlite3_loads(NOSQL_PATH)

    for param in range(100):
        job_name = "job-param=%s" % param
        job_command = "%s clusterlib.py --param %s" % (sys.executable,
                                                       param)

        if job_name not in scheduled_jobs and job_command not in done_jobs:
            script = submit(job_command, job_name=job_name)
            print(script)

            # Uncomment those lines to launch the jobs
            # import os
            # os.system(script)
コード例 #17
0
    parser.add_argument('-d', '--debug', default=False, action="store_true")
    parser.add_argument('-v', '--verbose', default=False, action="store_true")
    parser.add_argument('-s',
                        '--scores',
                        default=False,
                        action="store_true",
                        help="compute scores")

    args = vars(parser.parse_args())

    # Create log direcotyr if needed
    if not os.path.exists(LOG_DIRECTORY):
        os.makedirs(LOG_DIRECTORY)

    # Get running jobs
    all_jobs_running = set(queued_or_running_jobs())
    all_jobs_done = sqlite3_loads(get_sqlite3_path())

    # Intialize some counter for reporting
    n_jobs_running = 0
    n_jobs_done = 0
    n_jobs_launched = 0

    results = []

    # Launch if necessary experiments
    for parameters in PARAMETER_GRID:
        job_hash = make_hash(parameters)

        if job_hash in all_jobs_running:
            n_jobs_running += 1