Beispiel #1
0
    def test_newstyle_task(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.transform(task_func=task3,
                                input=task1,
                                filter=regex(r"(.+)"),
                                replace_inputs=ruffus.inputs(
                                    ((r"\1"), task2,
                                     "test_transform_inputs.*y")),
                                output=r"\1.output")
        test_pipeline.merge(task4, (task3), tempdir + "final.output")

        test_pipeline.run([task4], multiprocess=10, verbose=0)

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
            tempdir=tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
 def test_newstyle_no_re_match (self):
     try:
         test_pipeline = Pipeline("test")
         test_pipeline.transform(task_func = task_2,
                                 input = None,
                                 filter = regex(tempdir + "b"),
                                 replace_inputs = inputs(tempdir + "a", tempdir + "b"),
                                 output = "task_1.output")
         test_pipeline.run(multiprocess = 10, verbose = 0)
     except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
         print("\tExpected exception thrown 1")
         return
     except ruffus.ruffus_exceptions.error_inputs_multiple_args:
         print("\tExpected exception thrown 2")
         return
     raise Exception("Inputs(...) with multiple arguments should have thrown an exception")
 def test_newstyle_no_re_match(self):
     try:
         test_pipeline = Pipeline("test")
         test_pipeline.transform(task_func=task_2,
                                 input=None,
                                 filter=regex(tempdir + "b"),
                                 replace_inputs=inputs(
                                     tempdir + "a", tempdir + "b"),
                                 output="task_1.output")
         test_pipeline.run(multiprocess=10, verbose=0)
     except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
         print("\tExpected exception thrown 1")
         return
     except ruffus.ruffus_exceptions.error_inputs_multiple_args:
         print("\tExpected exception thrown 2")
         return
     raise Exception(
         "Inputs(...) with multiple arguments should have thrown an exception"
     )
Beispiel #4
0
    def test_newstyle_task(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(task1, [[None, tempdir + "a.1"], [None, tempdir + "b.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.files(task2, [[None, tempdir + "c.1"], [None, tempdir + "d.1"]])\
            .follows(mkdir(tempdir))

        test_pipeline.transform(task_func=task3,
                                input=task1,
                                filter=regex(r"(.+)"),
                                replace_inputs=ruffus.inputs(
                                    ((r"\1"), task2, "test_transform_inputs.*y")),
                                output=r"\1.output")
        test_pipeline.merge(task4, (task3), tempdir + "final.output")

        test_pipeline.run([task4], multiprocess=10, verbose=0)

        correct_output = "{tempdir}a.1.output:test_transform_inputs.py,{tempdir}a.1,{tempdir}c.1,{tempdir}d.1;{tempdir}b.1.output:test_transform_inputs.py,{tempdir}b.1,{tempdir}c.1,{tempdir}d.1;".format(
            tempdir=tempdir)
        with open(tempdir + "final.output") as ff:
            real_output = ff.read()
        self.assertEqual(correct_output, real_output)
@transform(task1, suffix(".1"), ".2")
def task2(infiles, outfiles, *extra_params):
    """
    Second task
    """
    with open(tempdir + "jobs.start", "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))
    test_job_io(infiles, outfiles, extra_params)
    with open(tempdir + "jobs.finish", "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))


#
#    task3
#
@transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3')
@posttask(lambda: do_write(test_file, "Task 3 Done\n"))
def task3(infiles, outfiles, *extra_params):
    """
    Third task
    """
    with open(tempdir + "jobs.start", "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))
    test_job_io(infiles, outfiles, extra_params)
    with open(tempdir + "jobs.finish", "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))


#
#    task4
#
    cmd_dict = CMD_DICT.copy()
    cmd_dict['infile'] = input_file
    cmd_dict['outfile'] = output_file
    pmsg('Interval Creation', input_file, output_file)
    gatk_cmd = '%(gatk)s --analysis_type RealignerTargetCreator ' + \
            '--reference_sequence %(reference)s ' + \
            '--DBSNP %(dbsnp)s ' + \
            '--input_file %(infile)s ' + \
            '--out %(outfile)s'
    call(gatk_cmd, cmd_dict)

# Realign around possible indels
@follows(mkdir('realigned'))
@transform(create_intervals,
           regex(r'^intervals/(.+)\.intervals$'),
           inputs([r'deduped/\1.deduped.bam', r'intervals/\1.intervals']),
           r'realigned/\1.realigned.bam')
def local_realignment(input_files, output_file):
    '''Realign reads around candidate indels'''
    cmd_dict = CMD_DICT.copy()
    cmd_dict['bam_file'] = input_files[0]
    cmd_dict['indel_intervals'] = input_files[1]
    cmd_dict['outfile'] = output_file
    pmsg('Local Realignment', ', '.join(input_files), output_file)
    gatk_cmd = '%(gatk)s --analysis_type IndelRealigner ' + \
            '--reference_sequence %(reference)s ' + \
            '--DBSNP %(dbsnp)s ' + \
            '--input_file %(bam_file)s ' + \
            '--targetIntervals %(indel_intervals)s ' + \
            '--out %(outfile)s'
    call(gatk_cmd, cmd_dict)
Beispiel #7
0
# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
@follows(mkdir(tempdir))
@ruffus.files([[None, tempdir + "a.1"], [None, tempdir + "b.1"]])
def task1(i, o):
    touch(o)


@follows(mkdir(tempdir))
@ruffus.files([[None, tempdir + "c.1"], [None, tempdir + "d.1"]])
def task2(i, o):
    touch(o)


@transform(task1, regex(r"(.+)"),
           ruffus.inputs(((r"\1"), task2, "test_transform_inputs.*y")),
           r"\1.output")
def task3(i, o):
    names = ",".join(sorted(i))
    for f in o:
        with open(o, "w") as ff:
            ff.write(names)


@merge((task3), tempdir + "final.output")
def task4(i, o):
    with open(o, "w") as o_file:
        for f in sorted(i):
            with open(f) as ff:
                o_file.write(f + ":" + ff.read() + ";")
@follows(mkdir('sai'), mkdir('logs'))
@transform(copy_sequence, regex(r'^fastq/(.+)_sequence\.fastq\.gz$'), r'sai/\1.sai')
def fastq_to_sai(input_file, output_file):
    '''Convert FASTQ files to SAI files.'''
    cmd_dict = CMD_DICT.copy()
    cmd_dict['infile'] = input_file
    cmd_dict['outfile'] = output_file
    pmsg('Aligning sequences', cmd_dict['infile'], cmd_dict['outfile'])
    bwacmd = '%(bwa)s aln -t %(threads)s -f %(outfile)s %(reference)s %(infile)s'
    call(bwacmd, cmd_dict)

# Merge paired ends to SAM
@follows(mkdir('sam'))
@transform(fastq_to_sai, regex(r'^sai/(\w+)_s_(\d)(_1)?\.sai$'),
           inputs([r'sai/\1_s_\2*.sai', r'fastq/\1_s_\2*.fastq.gz']),
           r'sam/\1_s_\2.sam')
def make_sam(input_files, output_file):
    '''Convert SAI files and FASTQ files to SAM files.'''

    def saicmp(x, y):
        '''Compare function for moving sai files to front of list'''
        if x.endswith('sai') and not y.endswith('sai'):
            return - 1
        elif y.endswith('sai') and not x.endswith('sai'):
            return 1
        else:
            return cmp(x, y)

    cmd_dict = CMD_DICT.copy()
    assert type(input_files) is type([])
Beispiel #9
0

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
@follows(mkdir(tempdir))
@ruffus.files([[None, tempdir + "a.1"], [None, tempdir + "b.1"]])
def task1(i, o):
    touch(o)


@follows(mkdir(tempdir))
@ruffus.files([[None, tempdir + "c.1"], [None, tempdir + "d.1"]])
def task2(i, o):
    touch(o)


@transform(task1, regex(r"(.+)"), ruffus.inputs(((r"\1"), task2, "test_transform_inputs.*y")), r"\1.output")
def task3(i, o):
    names = ",".join(sorted(i))
    for f in o:
        with open(o,  "w") as ff:
            ff.write(names)


@merge((task3), tempdir + "final.output")
def task4(i, o):
    with open(o, "w") as o_file:
        for f in sorted(i):
            with open(f) as ff:
                o_file.write(f + ":" + ff.read() + ";")

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   imports

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   Tasks

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
try:

    @transform(None, regex(tempdir + "b"),
               inputs(tempdir + "a", tempdir + "b"), "task_1.output")
    def task_1(i, o):
        for f in o:
            open(f, 'w')
except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
    print("\tExpected exception thrown 1")
except ruffus.ruffus_exceptions.error_inputs_multiple_args:
    print("\tExpected exception thrown 2")


def task_2(i, o):
    for f in o:
        open(f, 'w')


class Test_task_mkdir(unittest.TestCase):
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

import unittest

import json


#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   Tasks


#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
try:
    @transform(None, regex(tempdir + "b"), inputs(tempdir + "a", tempdir + "b"), "task_1.output")
    def task_1 (i, o):
        for f in o:
            open(f, 'w')
except ruffus.ruffus_exceptions.error_task_transform_inputs_multiple_args:
    print("\tExpected exception thrown 1")
except ruffus.ruffus_exceptions.error_inputs_multiple_args:
    print("\tExpected exception thrown 2")

def task_2 (i, o):
    for f in o:
        open(f, 'w')


class Test_task_mkdir(unittest.TestCase):
Beispiel #12
0
def main():

    #########
    # SETUP #
    #########

    # catch jgi logon and password from cli
    parser = ruffus.cmdline.get_argparse(
        description='5 accessions variant calling pipeline.')
    parser.add_argument('--email', '-e',
                        help='Logon email address for JGI',
                        type=str,
                        dest='jgi_logon')
    parser.add_argument('--password', '-p',
                        help='JGI password',
                        type=str,
                        dest='jgi_password')
    options = parser.parse_args()
    jgi_logon = options.jgi_logon
    jgi_password = options.jgi_password

    ##################
    # PIPELINE STEPS #
    ##################

    # initialise pipeline
    main_pipeline = ruffus.Pipeline.pipelines["main"]

    # test originate job
    test_originate_files = ['ruffus/foo.txt', 'ruffus/bar.txt']
    test_originate = main_pipeline.originate(
        name='test_originate',
        task_func=functions.generate_job_function(
            job_script='src/test_originate',
            job_name='test_originate',
            job_type='originate'),
        output=test_originate_files)

    # test download job
    if not (jgi_logon and jgi_password):
        raise ValueError('Supply jgi_logon and jgi_password')
    test_download = main_pipeline.originate(
        name='test_download',
        task_func=functions.generate_job_function(
            job_script='src/test_download',
            job_name='test_download',
            job_type='download'),
        output='ruffus/download.txt',
        extras=[jgi_logon, jgi_password])

    # test transform with multiple outputs (e.g. bamfile, FASTA etc)
    test_transform = main_pipeline.transform(
        name="test_transform",
        task_func=functions.generate_job_function(
            job_script='src/test_transform',
            job_name='test_transform',
            job_type='transform'),
        input=test_originate,
        filter=ruffus.suffix(".txt"),
        output=["_transformed.txt", "_transformed.bam"])

    # Transform ONLY the bam files produced by test_transform

    # The filtering here is a bit crazy. `input` has to be an object, not
    # ruffus.output_from(). `replace_inputs` should use `ruffus.inputs()` to
    # match the files, but `filter` has to match the first file produced by
    # the previous step, NOT necessarily the file that will be transformed!
    test_selective_transform = main_pipeline.transform(
        name="test_selective_transform",
        task_func=functions.generate_job_function(
            job_script='src/test_selective_transform',
            job_name='test_selective_transform',
            job_type='transform'),
        input=test_transform,
        replace_inputs=ruffus.inputs(r"\1.bam"),
        filter=ruffus.suffix(".txt"),
        output=".bof")

    test_merge = main_pipeline.merge(
        name='test_merge',
        task_func=functions.generate_job_function(
            job_script='src/test_merge',
            job_name='test_merge',
            job_type='merge'),
        input=test_transform,
        output='ruffus/foobar_merge.txt'
        )

    ###################
    # RUFFUS COMMANDS #
    ###################

    # print the flowchart
    ruffus.pipeline_printout_graph(
        "ruffus/flowchart.pdf", "pdf",
        pipeline_name="Ruffus proforma pipeline")

    # run the pipeline
    ruffus.cmdline.run(options, multithread=8)
Beispiel #13
0
@transform(copy_sequence, regex(r'^fastq/(.+)_sequence\.fastq\.gz$'),
           r'sai/\1.sai')
def fastq_to_sai(input_file, output_file):
    '''Convert FASTQ files to SAI files.'''
    cmd_dict = CMD_DICT.copy()
    cmd_dict['infile'] = input_file
    cmd_dict['outfile'] = output_file
    pmsg('Aligning sequences', cmd_dict['infile'], cmd_dict['outfile'])
    bwacmd = '%(bwa)s aln -t %(threads)s -f %(outfile)s %(reference)s %(infile)s'
    call(bwacmd, cmd_dict)


# Merge paired ends to SAM
@follows(mkdir('sam'))
@transform(fastq_to_sai, regex(r'^sai/(\w+)_s_(\d)(_1)?\.sai$'),
           inputs([r'sai/\1_s_\2*.sai', r'fastq/\1_s_\2*.fastq.gz']),
           r'sam/\1_s_\2.sam')
def make_sam(input_files, output_file):
    '''Convert SAI files and FASTQ files to SAM files.'''
    def saicmp(x, y):
        '''Compare function for moving sai files to front of list'''
        if x.endswith('sai') and not y.endswith('sai'):
            return -1
        elif y.endswith('sai') and not x.endswith('sai'):
            return 1
        else:
            return cmp(x, y)

    cmd_dict = CMD_DICT.copy()
    assert type(input_files) is type([])
    pmsg('Generating SAM file', ', '.join(input_files), output_file)
def task2(infiles, outfiles, *extra_params):
    """
    Second task
    """
    with open(tempdir + "jobs.start",  "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))
    test_job_io(infiles, outfiles, extra_params)
    with open(tempdir + "jobs.finish",  "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))



#
#    task3
#
@transform(task2, regex('(.*).2'), inputs([r"\1.2", tempdir + "a.1"]), r'\1.3')
@posttask(lambda: do_write(test_file, "Task 3 Done\n"))
def task3(infiles, outfiles, *extra_params):
    """
    Third task
    """
    with open(tempdir + "jobs.start",  "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))
    test_job_io(infiles, outfiles, extra_params)
    with open(tempdir + "jobs.finish",  "a") as oo:
        oo.write('job = %s\n' % json.dumps([infiles, outfiles]))



#
#    task4
    cmd_dict = CMD_DICT.copy()
    cmd_dict['infile'] = input_file
    cmd_dict['outfile'] = output_file
    pmsg('Interval Creation', input_file, output_file)
    gatk_cmd = '%(gatk)s --analysis_type RealignerTargetCreator ' + \
            '--reference_sequence %(reference)s ' + \
            '--DBSNP %(dbsnp)s ' + \
            '--input_file %(infile)s ' + \
            '--out %(outfile)s'
    call(gatk_cmd, cmd_dict)


# Realign around possible indels
@follows(mkdir('realigned'))
@transform(create_intervals, regex(r'^intervals/(.+)\.intervals$'),
           inputs([r'deduped/\1.deduped.bam', r'intervals/\1.intervals']),
           r'realigned/\1.realigned.bam')
def local_realignment(input_files, output_file):
    '''Realign reads around candidate indels'''
    cmd_dict = CMD_DICT.copy()
    cmd_dict['bam_file'] = input_files[0]
    cmd_dict['indel_intervals'] = input_files[1]
    cmd_dict['outfile'] = output_file
    pmsg('Local Realignment', ', '.join(input_files), output_file)
    gatk_cmd = '%(gatk)s --analysis_type IndelRealigner ' + \
            '--reference_sequence %(reference)s ' + \
            '--DBSNP %(dbsnp)s ' + \
            '--input_file %(bam_file)s ' + \
            '--targetIntervals %(indel_intervals)s ' + \
            '--out %(outfile)s'
    call(gatk_cmd, cmd_dict)