def test_depends(): """Run some jobs with dependencies.""" job = fyrd.Job('sleep 3', profile='default', clean_files=True, clean_outputs=True) job.submit() job.submit() # Test submission abort with pytest.raises(fyrd.ClusterError): job2 = fyrd.Job('echo eggs', profile='default', clean_files=True, clean_outputs=True, depends='job').submit() job2 = fyrd.Job('echo eggs', profile='default', clean_files=True, clean_outputs=True, depends=job).submit() out = job2.get() assert out == 'eggs\n' assert job2.stdout == 'eggs\n' assert job2.stderr == '' job3 = fyrd.Job('echo cheese', profile='default', clean_files=True, clean_outputs=True, depends=job2.id).submit() out = job3.get() assert out == 'cheese\n' assert job3.stdout == 'cheese\n' assert job3.stderr == ''
def test_function_submission(): """Submit a function.""" failed = False fyrd.queue.MODE = 'local' job = fyrd.Job(write_to_file, ('42', 'bobfile')) job.submit() job.wait() job.fetch_outputs() out = job.get(delete_outfiles=False) job.function.clean(delete_output=True) job.clean() assert job.exitcode == 0 assert out == 0 assert job.out == 0 assert job.stdout == '\n' if job.stderr != '': sys.stderr.write('STDERR should be empty, but contains:\n') sys.stderr.write(job.stderr) failed = True with open('bobfile') as fin: assert fin.read().rstrip() == '42' os.remove('bobfile') job.clean(delete_outputs=True) if failed: return 1 return 0
def test_concat(delete=True): """Test concatenating two dataframes.""" df1 = make_df() df2 = pd.DataFrame([[1, 2, 3, 4, 'hi', 'there']], columns=df1.columns) job = fyrd.Job(merge_two, (df1, df2)).submit() df = job.get(cleanup=delete, delete_outfiles=delete) assert len(df) == 101
def test_mean(delete=True): """Test getting the mean of the dataframe.""" df = make_df() cmean = df.mean() job = fyrd.Job(get_mean, (df, )).submit() mean = job.get(cleanup=delete, delete_outfiles=delete) assert mean == cmean
def test_job_execution_paths(): """Run a job and autoclean with defined paths.""" os.makedirs('out') job = fyrd.Job('echo hi', profile='default', clean_files=True, clean_outputs=True, scriptpath='..', outpath='.').submit() job.wait() print(repr(job)) print(str(job)) print(repr(job.submission)) print(str(job.submission)) print(job.outfile) assert os.path.isfile(job.outfile) assert os.path.isfile(job.errfile) assert os.path.isfile(job.submission.file_name) out = job.get() assert not os.path.isfile(job.outfile) assert not os.path.isfile(job.errfile) assert not os.path.isfile(job.submission.file_name) sys.stdout.write('{};\nSTDOUT: {}\nSTDERR: {}\n'.format( job.exitcode, job.stdout, job.stderr)) assert job.exitcode == 0 assert out == 'hi\n' assert job.stdout == 'hi\n' assert job.stderr == '' assert isinstance(job.start, dt) assert isinstance(job.end, dt) assert isinstance(job.runtime, td) os.system('rm -rf {}'.format('out'))
def test_job_creation(): """Make a job and print it.""" fyrd.queue.MODE = 'local' job = fyrd.Job('echo hi', cores=2, time='00:02:00', mem='2000', threads=4, clean_files=False, clean_outputs=False) assert job.qtype == 'local' return 0
def test_job_execution(): """Run a job and autoclean.""" job = fyrd.Job('echo hi', profile='default', clean_files=True, clean_outputs=True).submit() job.wait() print(repr(job)) print(str(job)) print(repr(job.submission)) print(str(job.submission)) print(job.outfile) assert os.path.isfile(job.outfile) assert os.path.isfile(job.errfile) assert os.path.isfile(job.submission.file_name) out = job.get() assert not os.path.isfile(job.outfile) assert not os.path.isfile(job.errfile) assert not os.path.isfile(job.submission.file_name) sys.stdout.write('{};\nSTDOUT: {}\nSTDERR: {}\n'.format( job.exitcode, job.stdout, job.stderr)) assert job.exitcode == 0 assert out == 'hi\n' assert job.stdout == 'hi\n' assert job.stderr == '' assert isinstance(job.start, dt) assert isinstance(job.end, dt) assert isinstance(job.runtime, td)
def test_function_submission(): """Submit a function.""" job = fyrd.Job(write_to_file, ('42', 'bobfile'), clean_files=False) job.submit() job.wait() job.fetch_outputs() out = job.get(delete_outfiles=False) job.function.clean(delete_output=True) job.clean() sys.stdout.write('{};\nOut: {}\nSTDOUT: {}\nSTDERR: {}\n'.format( job.exitcode, out, job.stdout, job.stderr)) print(repr(job)) print(str(job)) print(repr(job.submission)) print(str(job.submission)) print(repr(job.function)) print(str(job.function)) assert job.exitcode == 0 assert out == 0 assert job.out == 0 assert job.stdout == '\n' assert job.stderr == '' print(job.runpath) assert os.path.isfile('bobfile') with open('bobfile') as fin: assert fin.read().rstrip() == '42' os.remove('bobfile') job.clean(delete_outputs=True)
def test_job_creation(): """Make a job and print it.""" env = 'local' fyrd.queue.MODE = 'local' job = fyrd.Job('echo hi', cores=2, time='00:02:00', mem='2000', threads=4, qtype='local') assert job.qtype == 'local' env = fyrd.get_cluster_environment() fyrd.queue.MODE = env
def test_outfiles(): """Run a job with outfile and errfile overriden parameters set.""" job = fyrd.Job('echo ho', profile='default', clean_files=True, clean_outputs=True, outfile='joe', errfile='john') job.submit() out = job.get() assert out == 'ho\n' assert job.stdout == 'ho\n' assert job.stderr == ''
def test_job_params(): """Run a job with some explicit parameters set.""" job = fyrd.Job('echo ho', profile='default', clean_files=True, clean_outputs=True, cores=2, mem=2000, time='00:02:00') job.submit() out = job.get() assert out == 'ho\n' assert job.stdout == 'ho\n' assert job.stderr == ''
def test_job_cleaning(): """Delete intermediate files without autoclean.""" job = fyrd.Job('echo hi', profile='default', clean_files=False, clean_outputs=False).submit() job.wait() assert os.path.isfile(job.outfile) assert os.path.isfile(job.errfile) assert os.path.isfile(job.submission.file_name) job.clean(delete_outputs=True) assert not os.path.isfile(job.outfile) assert not os.path.isfile(job.errfile) assert not os.path.isfile(job.submission.file_name)
def test_resubmit(): """Alter a job and resubmit.""" job = fyrd.Job('echo ho', profile='default', clean_files=True, clean_outputs=True, cores=2, mem=2000, time='00:02:00') job.submit() out = job.get() assert out == 'ho\n' assert job.stdout == 'ho\n' assert job.stderr == '' # job.command = 'echo hi' job.resubmit() out = job.get() assert out == 'ho\n' assert job.stdout == 'ho\n' assert job.err == ''
def test_job_execution(): """Run a job and autoclean.""" fyrd.queue.MODE = 'local' job = fyrd.Job('echo hi', profile='default', clean_files=True, clean_outputs=True).submit() job.wait() assert os.path.isfile(job.outfile) assert os.path.isfile(job.errfile) assert os.path.isfile(job.submission.file_name) out = job.get() assert not os.path.isfile(job.outfile) assert not os.path.isfile(job.errfile) assert not os.path.isfile(job.submission.file_name) assert job.exitcode == 0 assert out == 'hi\n' assert job.stdout == 'hi\n' assert job.stderr == '' assert isinstance(job.start, dt) assert isinstance(job.end, dt) assert isinstance(job.runtime, td) return 0
def test_submission(delete=True): """Test getting a dataframe and getting the mean.""" job = fyrd.Job(make_df).submit() df = job.get(cleanup=delete, delete_outfiles=delete) assert isinstance(df, pd.DataFrame)
def test_function_keywords(): """Submit a simple function with keyword arguments.""" job = fyrd.Job(raise_me, (10,), kwargs={'power': 10}).submit() assert job.get() == 10**10 job.clean(delete_outputs=True) return 0
def test_method_submission(): """Submit a method.""" t = TestMe() job = fyrd.Job(t.do_math, (2,)) t2 = job.get() assert t2.get_out() == t.me*2
def analyze_depict(sample_1, sample_2, prefix, cores=None, perms=100, run_path=None, depict_path=DEPICT, **fyrd_args): """Run depict, run permutations, compare results. This function uses fyrd to submit cluster jobs, jobs will request 2*cores to run, and 12G of memory. For 100 permutations, this function takes about 3.5 hours to run. Args: sample_1 (str): File name or path to file with rsids for sample 1 sample_2 (str): File name or path to file with rsids for sample 2 prefix (str): Name for the output directory, input file names will be used to set output files in this directory. cores (int): Number of cores to use *PER PROCESS* for DEPICT, defaults to 1/2 of available cores on the machine, meaning all cores will be used for run (1/2 each). perms (int): Number of permutations. run_path (str): Root directory to run in, defaults to current dir depict_path (str): Path to the DEPICT package, default set in file. fyrd_args (dict): Fyrd keyword arguments, not required. Outputs: <prefix>/<sample_name>.geneprioritization.txt <prefix>/<sample_name>.loci.txt <prefix>/<sample_name>.tissueenrichment.txt <prefix>/<sample_name>.genesetenrichment.txt <prefix>/<sample_name>.log Returns: DataFrame, DataFrame: All gene/tissue permutation outputs in two data frames with a permutation column added to distinguish data. """ print('Submitting main DEPICT job to cluster') if not cores: cores = PARAM_NCORES run_path = _os.path.abspath(run_path) if not _os.path.isdir(run_path): _os.mkdir(run_path) startdir = _os.path.abspath(_os.path.curdir) try: job = _fyrd.Job( run_parse_depict, (sample_1, sample_2, prefix, cores, run_path, depict_path), name='main_DEPICT', cores=cores * 2, mem='12GB', scriptpath=run_path, outpath=run_path, runpath=run_path, **fyrd_args) job.submit() print('Job submitted.') print('Run permutations') pgenes, ptissues = permute_depict(sample_1, sample_2, prefix, cores, perms, run_path, depict_path, **fyrd_args) pgenes.to_pickle('pgenes.bak') ptissues.to_pickle('ptissues.bak') print('Permutations complete, getting main output') genes, tissues = job.get() genes.to_pickle('genes.bak') tissues.to_pickle('tissues.bak') print('Main job completed successfully, DataFrames saved.') data = { 'genes': genes, 'tissues': tissues, 'pgenes': pgenes, 'ptissues': ptissues } with open(prefix + '_completed_dfs.pickle', 'wb') as fout: _pickle.dump(data, fout) data = examine_data(**data) finally: _os.chdir(startdir) return data