Esempio n. 1
0
def test_depends():
    """Run some jobs with dependencies."""
    job = fyrd.Job('sleep 3',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True)
    job.submit()
    job.submit()  # Test submission abort
    with pytest.raises(fyrd.ClusterError):
        job2 = fyrd.Job('echo eggs',
                        profile='default',
                        clean_files=True,
                        clean_outputs=True,
                        depends='job').submit()
    job2 = fyrd.Job('echo eggs',
                    profile='default',
                    clean_files=True,
                    clean_outputs=True,
                    depends=job).submit()
    out = job2.get()
    assert out == 'eggs\n'
    assert job2.stdout == 'eggs\n'
    assert job2.stderr == ''
    job3 = fyrd.Job('echo cheese',
                    profile='default',
                    clean_files=True,
                    clean_outputs=True,
                    depends=job2.id).submit()
    out = job3.get()
    assert out == 'cheese\n'
    assert job3.stdout == 'cheese\n'
    assert job3.stderr == ''
Esempio n. 2
0
def test_function_submission():
    """Submit a function."""
    failed = False
    fyrd.queue.MODE = 'local'
    job = fyrd.Job(write_to_file, ('42', 'bobfile'))
    job.submit()
    job.wait()
    job.fetch_outputs()
    out = job.get(delete_outfiles=False)
    job.function.clean(delete_output=True)
    job.clean()
    assert job.exitcode == 0
    assert out == 0
    assert job.out == 0
    assert job.stdout == '\n'
    if job.stderr != '':
        sys.stderr.write('STDERR should be empty, but contains:\n')
        sys.stderr.write(job.stderr)
        failed = True
    with open('bobfile') as fin:
        assert fin.read().rstrip() == '42'
    os.remove('bobfile')
    job.clean(delete_outputs=True)
    if failed:
        return 1
    return 0
Esempio n. 3
0
def test_concat(delete=True):
    """Test concatenating two dataframes."""
    df1 = make_df()
    df2 = pd.DataFrame([[1, 2, 3, 4, 'hi', 'there']], columns=df1.columns)
    job = fyrd.Job(merge_two, (df1, df2)).submit()
    df = job.get(cleanup=delete, delete_outfiles=delete)
    assert len(df) == 101
Esempio n. 4
0
def test_mean(delete=True):
    """Test getting the mean of the dataframe."""
    df = make_df()
    cmean = df.mean()
    job = fyrd.Job(get_mean, (df, )).submit()
    mean = job.get(cleanup=delete, delete_outfiles=delete)
    assert mean == cmean
Esempio n. 5
0
def test_job_execution_paths():
    """Run a job and autoclean with defined paths."""
    os.makedirs('out')
    job = fyrd.Job('echo hi',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True,
                   scriptpath='..',
                   outpath='.').submit()
    job.wait()
    print(repr(job))
    print(str(job))
    print(repr(job.submission))
    print(str(job.submission))
    print(job.outfile)
    assert os.path.isfile(job.outfile)
    assert os.path.isfile(job.errfile)
    assert os.path.isfile(job.submission.file_name)
    out = job.get()
    assert not os.path.isfile(job.outfile)
    assert not os.path.isfile(job.errfile)
    assert not os.path.isfile(job.submission.file_name)
    sys.stdout.write('{};\nSTDOUT: {}\nSTDERR: {}\n'.format(
        job.exitcode, job.stdout, job.stderr))
    assert job.exitcode == 0
    assert out == 'hi\n'
    assert job.stdout == 'hi\n'
    assert job.stderr == ''
    assert isinstance(job.start, dt)
    assert isinstance(job.end, dt)
    assert isinstance(job.runtime, td)
    os.system('rm -rf {}'.format('out'))
Esempio n. 6
0
def test_job_creation():
    """Make a job and print it."""
    fyrd.queue.MODE = 'local'
    job = fyrd.Job('echo hi', cores=2, time='00:02:00', mem='2000',
                   threads=4, clean_files=False, clean_outputs=False)
    assert job.qtype == 'local'
    return 0
Esempio n. 7
0
def test_job_execution():
    """Run a job and autoclean."""
    job = fyrd.Job('echo hi',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True).submit()
    job.wait()
    print(repr(job))
    print(str(job))
    print(repr(job.submission))
    print(str(job.submission))
    print(job.outfile)
    assert os.path.isfile(job.outfile)
    assert os.path.isfile(job.errfile)
    assert os.path.isfile(job.submission.file_name)
    out = job.get()
    assert not os.path.isfile(job.outfile)
    assert not os.path.isfile(job.errfile)
    assert not os.path.isfile(job.submission.file_name)
    sys.stdout.write('{};\nSTDOUT: {}\nSTDERR: {}\n'.format(
        job.exitcode, job.stdout, job.stderr))
    assert job.exitcode == 0
    assert out == 'hi\n'
    assert job.stdout == 'hi\n'
    assert job.stderr == ''
    assert isinstance(job.start, dt)
    assert isinstance(job.end, dt)
    assert isinstance(job.runtime, td)
Esempio n. 8
0
def test_function_submission():
    """Submit a function."""
    job = fyrd.Job(write_to_file, ('42', 'bobfile'), clean_files=False)
    job.submit()
    job.wait()
    job.fetch_outputs()
    out = job.get(delete_outfiles=False)
    job.function.clean(delete_output=True)
    job.clean()
    sys.stdout.write('{};\nOut: {}\nSTDOUT: {}\nSTDERR: {}\n'.format(
        job.exitcode, out, job.stdout, job.stderr))
    print(repr(job))
    print(str(job))
    print(repr(job.submission))
    print(str(job.submission))
    print(repr(job.function))
    print(str(job.function))
    assert job.exitcode == 0
    assert out == 0
    assert job.out == 0
    assert job.stdout == '\n'
    assert job.stderr == ''
    print(job.runpath)
    assert os.path.isfile('bobfile')
    with open('bobfile') as fin:
        assert fin.read().rstrip() == '42'
    os.remove('bobfile')
    job.clean(delete_outputs=True)
Esempio n. 9
0
def test_job_creation():
    """Make a job and print it."""
    env = 'local'
    fyrd.queue.MODE = 'local'
    job = fyrd.Job('echo hi', cores=2, time='00:02:00', mem='2000',
                      threads=4, qtype='local')
    assert job.qtype == 'local'
    env = fyrd.get_cluster_environment()
    fyrd.queue.MODE = env
Esempio n. 10
0
def test_outfiles():
    """Run a job with outfile and errfile overriden parameters set."""
    job = fyrd.Job('echo ho',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True,
                   outfile='joe',
                   errfile='john')
    job.submit()
    out = job.get()
    assert out == 'ho\n'
    assert job.stdout == 'ho\n'
    assert job.stderr == ''
Esempio n. 11
0
def test_job_params():
    """Run a job with some explicit parameters set."""
    job = fyrd.Job('echo ho',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True,
                   cores=2,
                   mem=2000,
                   time='00:02:00')
    job.submit()
    out = job.get()
    assert out == 'ho\n'
    assert job.stdout == 'ho\n'
    assert job.stderr == ''
Esempio n. 12
0
def test_job_cleaning():
    """Delete intermediate files without autoclean."""
    job = fyrd.Job('echo hi',
                   profile='default',
                   clean_files=False,
                   clean_outputs=False).submit()
    job.wait()
    assert os.path.isfile(job.outfile)
    assert os.path.isfile(job.errfile)
    assert os.path.isfile(job.submission.file_name)
    job.clean(delete_outputs=True)
    assert not os.path.isfile(job.outfile)
    assert not os.path.isfile(job.errfile)
    assert not os.path.isfile(job.submission.file_name)
Esempio n. 13
0
def test_resubmit():
    """Alter a job and resubmit."""
    job = fyrd.Job('echo ho',
                   profile='default',
                   clean_files=True,
                   clean_outputs=True,
                   cores=2,
                   mem=2000,
                   time='00:02:00')
    job.submit()
    out = job.get()
    assert out == 'ho\n'
    assert job.stdout == 'ho\n'
    assert job.stderr == ''
    #  job.command = 'echo hi'
    job.resubmit()
    out = job.get()
    assert out == 'ho\n'
    assert job.stdout == 'ho\n'
    assert job.err == ''
Esempio n. 14
0
def test_job_execution():
    """Run a job and autoclean."""
    fyrd.queue.MODE = 'local'
    job = fyrd.Job('echo hi', profile='default', clean_files=True,
                   clean_outputs=True).submit()
    job.wait()
    assert os.path.isfile(job.outfile)
    assert os.path.isfile(job.errfile)
    assert os.path.isfile(job.submission.file_name)
    out = job.get()
    assert not os.path.isfile(job.outfile)
    assert not os.path.isfile(job.errfile)
    assert not os.path.isfile(job.submission.file_name)
    assert job.exitcode == 0
    assert out == 'hi\n'
    assert job.stdout == 'hi\n'
    assert job.stderr == ''
    assert isinstance(job.start, dt)
    assert isinstance(job.end, dt)
    assert isinstance(job.runtime, td)
    return 0
Esempio n. 15
0
def test_submission(delete=True):
    """Test getting a dataframe and getting the mean."""
    job = fyrd.Job(make_df).submit()
    df = job.get(cleanup=delete, delete_outfiles=delete)
    assert isinstance(df, pd.DataFrame)
Esempio n. 16
0
def test_function_keywords():
    """Submit a simple function with keyword arguments."""
    job = fyrd.Job(raise_me, (10,), kwargs={'power': 10}).submit()
    assert job.get() == 10**10
    job.clean(delete_outputs=True)
    return 0
Esempio n. 17
0
def test_method_submission():
    """Submit a method."""
    t = TestMe()
    job = fyrd.Job(t.do_math, (2,))
    t2 = job.get()
    assert t2.get_out() == t.me*2
Esempio n. 18
0
def analyze_depict(sample_1,
                   sample_2,
                   prefix,
                   cores=None,
                   perms=100,
                   run_path=None,
                   depict_path=DEPICT,
                   **fyrd_args):
    """Run depict, run permutations, compare results.

    This function uses fyrd to submit cluster jobs, jobs will request 2*cores
    to run, and 12G of memory.

    For 100 permutations, this function takes about 3.5 hours to run.

    Args:
        sample_1 (str):    File name or path to file with rsids for sample 1
        sample_2 (str):    File name or path to file with rsids for sample 2
        prefix (str):      Name for the output directory, input file names will
                           be used to set output files in this directory.
        cores (int):       Number of cores to use *PER PROCESS* for DEPICT,
                           defaults to 1/2 of available cores on the machine,
                           meaning all cores will be used for run (1/2 each).
        perms (int):       Number of permutations.
        run_path (str):    Root directory to run in, defaults to current dir
        depict_path (str): Path to the DEPICT package, default set in file.
        fyrd_args (dict):  Fyrd keyword arguments, not required.

    Outputs:
        <prefix>/<sample_name>.geneprioritization.txt
        <prefix>/<sample_name>.loci.txt
        <prefix>/<sample_name>.tissueenrichment.txt
        <prefix>/<sample_name>.genesetenrichment.txt
        <prefix>/<sample_name>.log

    Returns:
        DataFrame, DataFrame: All gene/tissue permutation outputs in two data
                              frames with a permutation column added to
                              distinguish data.
    """
    print('Submitting main DEPICT job to cluster')
    if not cores:
        cores = PARAM_NCORES
    run_path = _os.path.abspath(run_path)
    if not _os.path.isdir(run_path):
        _os.mkdir(run_path)

    startdir = _os.path.abspath(_os.path.curdir)

    try:
        job = _fyrd.Job(
            run_parse_depict,
            (sample_1, sample_2, prefix, cores, run_path, depict_path),
            name='main_DEPICT',
            cores=cores * 2,
            mem='12GB',
            scriptpath=run_path,
            outpath=run_path,
            runpath=run_path,
            **fyrd_args)
        job.submit()
        print('Job submitted.')

        print('Run permutations')
        pgenes, ptissues = permute_depict(sample_1, sample_2, prefix, cores,
                                          perms, run_path, depict_path,
                                          **fyrd_args)
        pgenes.to_pickle('pgenes.bak')
        ptissues.to_pickle('ptissues.bak')

        print('Permutations complete, getting main output')

        genes, tissues = job.get()

        genes.to_pickle('genes.bak')
        tissues.to_pickle('tissues.bak')

        print('Main job completed successfully, DataFrames saved.')

        data = {
            'genes': genes,
            'tissues': tissues,
            'pgenes': pgenes,
            'ptissues': ptissues
        }

        with open(prefix + '_completed_dfs.pickle', 'wb') as fout:
            _pickle.dump(data, fout)

        data = examine_data(**data)
    finally:
        _os.chdir(startdir)

    return data