예제 #1
0
 def test_05_add_arguments_single(self):
     slurm = Slurm()
     slurm.add_arguments(
         array='3-11',
         cpus_per_task='15',
         job_name='name',
         dependency='after:65541,afterok:34987',
         output=r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #2
0
 def test_10_parse_dict(self):
     slurm = Slurm(
         array=range(3, 12),
         cpus_per_task=15,
         job_name='name',
         dependency=dict(after=65541, afterok=34987),
         output=r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #3
0
 def test_11_filename_patterns(self):
     slurm = Slurm(
         array=range(3, 12),
         cpus_per_task=15,
         job_name='name',
         dependency=dict(after=65541, afterok=34987),
         output=f'{Slurm.JOB_ARRAY_MASTER_ID}_{Slurm.JOB_ARRAY_ID}.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #4
0
 def test_04_kwargs(self):
     slurm = Slurm(
         array='3-11',
         cpus_per_task='15',
         job_name='name',
         dependency='after:65541,afterok:34987',
         output=r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #5
0
 def test_09_parse_int(self):
     slurm = Slurm(
         array=range(3, 12),
         cpus_per_task=15,
         job_name='name',
         dependency='after:65541,afterok:34987',
         output=r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
def deploy_get_behavior_summary_for_all_sessions():

    current_location = pathlib.Path(__file__).parent.resolve()
    python_script_to_run = os.path.join(
        current_location, 'cache_behavior_performance_for_one_session.py')

    python_executable = "{}/anaconda2/envs/{}/bin/python".format(
        os.path.expanduser('~'), 'visual_behavior_sdk')

    # define the job record output folder
    stdout_location = r"/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/behavior_metrics"

    slurm = Slurm(
        job_name='cache_performance',
        partition='braintv',
        cpus_per_task=1,
        mem='40g',
        time='01:00:00',
        output=
        f'{stdout_location}/{Slurm.JOB_ARRAY_MASTER_ID}_{Slurm.JOB_ARRAY_ID}.out',
    )

    # import visual_behavior.data_access.utilities as utilities
    # behavior_session_ids = utilities.get_behavior_session_ids_to_analyze()

    import pandas as pd
    # import visual_behavior.data_access.loading as loading
    # df = pd.read_csv(os.path.join(loading.get_platform_analysis_cache_dir(), 'behavior_only_sessions_without_nwbs.csv'))
    filepath = r"//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/platform_paper_cache/behavior_only_sessions_without_nwbs.csv"
    df = pd.read_csv(filepath)
    behavior_session_ids = df.behavior_session_id.values

    # methods = ['stimulus_based', 'trial_based', 'sdk']
    method = 'sdk'
    # for method in methods:
    # for behavior_session_id in behavior_session_ids:
    behavior_session_id = behavior_session_ids[0]
    print('deploying job for bsid {}'.format(behavior_session_id))
    args_to_pass = '******'.format(
        behavior_session_id, method)
    job_title = 'behavior_session_id_{}'.format(behavior_session_id)

    slurm.sbatch(python_executable + ' ' + python_script_to_run + ' ' +
                 args_to_pass)
예제 #7
0
    def test_15_sbatch_execution(self):
        with io.StringIO() as buffer:
            with contextlib.redirect_stdout(buffer):
                slurm = Slurm()
                if shutil.which('sbatch') is not None:
                    job_id = slurm.sbatch('echo Hello!')
                else:
                    with patch('subprocess.run', subprocess_sbatch):
                        job_id = slurm.sbatch('echo Hello!')
                stdout = buffer.getvalue()

        out_file = f'slurm-{job_id}.out'
        while True:  # wait for job to finalize
            if os.path.isfile(out_file):
                break
        with open(out_file, 'r') as fid:
            contents = fid.read()
        os.remove(out_file)

        self.assertIsInstance(job_id, int)
        self.assertIn('Hello!', contents)
        self.assertIn(f'Submitted batch job {job_id}', stdout)
예제 #8
0
 def test_03_args_simple(self):
     slurm = Slurm(
         'array',
         '3-11',
         'cpus_per_task',
         '15',
         'job_name',
         'name',
         'dependency',
         'after:65541,afterok:34987',
         'output',
         r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #9
0
 def test_02_args_long(self):
     slurm = Slurm(
         '--array',
         '3-11',
         '--cpus_per_task',
         '15',
         '--job_name',
         'name',
         '--dependency',
         'after:65541,afterok:34987',
         '--output',
         r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #10
0
 def test_01_args_short(self):
     slurm = Slurm(
         '-a',
         '3-11',
         '-c',
         '15',
         '-J',
         'name',
         '-d',
         'after:65541,afterok:34987',
         '-o',
         r'%A_%a.out',
     )
     self.assertEqual(self.script, str(slurm))
예제 #11
0
def main(args):

    with open(args.pipeline_config) as f:
        pipeline_config = yaml.load(f, Loader=yaml.FullLoader)

    libraries = pipeline_config["libraries"]

    with open(args.batch_config) as f:
        batch_config = yaml.load(f, Loader=yaml.FullLoader)

    # Make models available to the pipeline
    sys.path.append(
        libraries["model_library"])  #  !!  TEST WITHOUT THIS LINE IN MAIN()

    for stage in pipeline_config["stage_list"]:

        # Set resume_id if it is given, else it is None and new model is built
        resume_id = get_resume_id(stage)

        # Get config file, from given location OR from ckpnt
        model_config = load_config(stage, resume_id, libraries)
        logging.info("Single config: {}".format(model_config))
        model_config_combos = combo_config(
            model_config) if resume_id is None else [model_config]
        logging.info("Combo configs: {}".format(model_config_combos))
        for config in model_config_combos:
            if args.batch:
                command_line_args = dict_to_args(config)
                slurm = Slurm(**batch_config)
                slurm_command = """bash
                             conda activate exatrkx-test
                             python run_pipeline.py --run-stage """ + command_line_args
                logging.info(slurm_command)
                slurm.sbatch(slurm_command)
            else:
                run_stage(**config)
예제 #12
0
def map_slurm_jobs(cmds, slurm_config, odir, max_jobs_array, tmp_dir, threads):
    rfile = os.path.join(odir, "fastANI_out_jobs.txt")
    with open(rfile, "w") as outfile:
        for cmd in cmds:
            outfile.write("%s\n" % cmd)

    slurm = Slurm(
        **yaml.load(open(slurm_config), Loader=yaml.FullLoader),
        output=(
            # "{}/gderep-{}_{}.out".format(
            #     tmp_dir, Slurm.JOB_ARRAY_MASTER_ID, Slurm.JOB_ARRAY_ID
            "/dev/null"
            # )
        ))

    if len(cmds) > max_jobs_array:
        log.debug(
            "The number of jobs ({}) is larger than the allowed array size ({}). Splitting jobs.."
            .format(len(cmds), max_jobs_array))

    job_ranges = split_fixed_size(range(1, len(cmds) + 1), max_jobs_array)
    job_ids = []
    log.debug("Mapping {} jobs to SLURM in {} batch(es)".format(
        len(cmds), len(job_ranges)))

    df = pd.read_csv(rfile, header=None)
    pw = []
    for r in job_ranges:
        rfile_tmp = os.path.join(odir, "fastANI_out_jobs-tmp.txt")
        skrows = min(r) - 1
        nrows = max(r)
        df1 = df.iloc[(min(r) - 1):(max(r))]
        df1.columns = ["cmd"]

        ofiles = df1["cmd"].str.rsplit(" ", 1).str[-1].values

        df1.to_csv(rfile_tmp, index=False, sep="\t", header=False)
        new_r = range(1, df1.shape[0] + 1)

        slurm.set_array(new_r)
        slurm.add_arguments(wait="")
        bjob = "$(awk -vJ=" + "$SLURM_ARRAY_TASK_ID " + "'NR==J' " + rfile_tmp + " )"
        text_trap = io.StringIO()
        sys.stdout = text_trap
        job_id = slurm.sbatch(bjob)
        sys.stdout = sys.__stdout__
        job_ids.append(job_id)
        pairwise_distances = reduce_slurm_jobs(ofiles, threads)
        pw.append(pairwise_distances)
    return pw
예제 #13
0
 def test_07_setter_methods(self):
     slurm = Slurm()
     slurm.set_array('3-11')
     slurm.set_cpus_per_task('15')
     slurm.set_job_name('name')
     slurm.set_dependency('after:65541,afterok:34987')
     slurm.set_output(r'%A_%a.out')
     self.assertEqual(self.script, str(slurm))
예제 #14
0
 def test_06_add_arguments_multiple(self):
     slurm = Slurm()
     slurm.add_arguments(array='3-11')
     slurm.add_arguments(cpus_per_task='15')
     slurm.add_arguments(job_name='name')
     slurm.add_arguments(dependency='after:65541,afterok:34987')
     slurm.add_arguments(output=r'%A_%a.out')
     self.assertEqual(self.script, str(slurm))
예제 #15
0
 def test_12_output_env_vars_object(self):
     slurm = Slurm()
     self.assertEqual(slurm.SLURM_ARRAY_TASK_ID, r'$SLURM_ARRAY_TASK_ID')
예제 #16
0
conda_environment = 'vba'

# define the job record output folder
job_dir = r"//allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/clustering"
# make the job record location if it doesn't already exist
os.mkdir(job_dir) if not os.path.exists(job_dir) else None

# env path
python_path = os.path.join(os.path.expanduser("~"), 'anaconda3', 'envs',
                           conda_environment, 'bin', 'python')

# create slurm instance
slurm = Slurm(
    cpus_per_task=8,
    job_name='glm_clustering',
    mem='5g',
    time='1:30:00',
    partition='braintv',
    output=f'{job_dir}/{Slurm.JOB_ARRAY_MASTER_ID}_{Slurm.JOB_ARRAY_ID}.out',
)

methods = ['kmeans', 'discretize']
metrics = [
    'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation',
    'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
    'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao',
    'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'
]
n_clusters = int(35)
n_boots = int(40)

for method in methods:
예제 #17
0
 def test_14_srun_returncode(self):
     slurm = Slurm()
     code = slurm.srun('echo Hello!')
     self.assertEqual(code, 0)
ophys_experiment_ids = experiments_table.index.values

if __name__ == "__main__":
    args = parser.parse_args()
    python_executable = "{}/anaconda2/envs/{}/bin/python".format(
        os.path.expanduser('~'), args.env)
    python_file = os.path.join(os.getcwd(), args.scriptname)

    # define the job record output folder
    stdout_location = r"/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/cell_metrics"

    # instantiate a Slurm object
    slurm = Slurm(
        mem='60g',  # '24g'
        cpus_per_task=1,
        time='60:00:00',
        partition='braintv',
        job_name='metrics_table',
        output=
        f'{stdout_location}/{Slurm.JOB_ARRAY_MASTER_ID}_{Slurm.JOB_ARRAY_ID}.out',
    )

    for ii, ophys_experiment_id in enumerate(ophys_experiment_ids):
        args_to_pass = '******'.format(ophys_experiment_id)
        print('experiment ID = {}, number {} of {}'.format(
            ophys_experiment_id, ii + 1, len(ophys_experiment_ids)))
        job_title = 'experiment_{}'.format(ophys_experiment_id)

        slurm.sbatch(python_executable + ' ' + python_file + ' ' +
                     args_to_pass)
# define the job record output folder
stdout_location = r"/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/cluster_jobs/multi_session_dfs"

cache_dir = loading.get_platform_analysis_cache_dir()
cache = bpc.from_s3_cache(cache_dir=cache_dir)
print(cache_dir)

experiments_table = cache.get_ophys_experiment_table()

# call the `sbatch` command to run the jobs.
for project_code in experiments_table.project_code.unique():
    print(project_code)
    for session_number in experiments_table.session_number.unique():

        # instantiate a Slurm object
        slurm = Slurm(
            mem='120g',  # '24g'
            cpus_per_task=1,
            time='60:00:00',
            partition='braintv',
            job_name='multi_session_df_' + project_code + '_' +
            str(session_number),
            output=
            f'{stdout_location}/{Slurm.JOB_ARRAY_MASTER_ID}_{Slurm.JOB_ARRAY_ID}.out',
        )

        slurm.sbatch(python_path + ' ' + python_file + ' --project_code ' +
                     str(project_code) + ' --session_number' + ' ' +
                     str(session_number))