Esempio n. 1
0
def make_bigwig_files(bam, bigwig):
    def bam_to_bigwig(bam, scale, strand, bw):
        bg, bg_sort = bw.replace('.bw', '.bg'), bw.replace('.bw', '.sort.bg')
        cmd = f'genomeCoverageBed -ibam {bam} -bg -scale {scale} -strand {strand} -du -split > {bg}'
        cmding(cmd)
        cmd = f'bedSort {bg} {bg_sort}'
        cmding(cmd)
        cmd = f'bedGraphToBigWig {bg_sort} {options.genome}/chrNameLength.txt {bw}'
        cmding(cmd)
        cmding(f'rm {bg}')
    
    message, start_time = f'Make BigWig files for {bam} ...', time.perf_counter()
    logger.info(message)
    pos_bw, neg_bw = bigwig, bigwig.replace('.plus.bw', '.minus.bw')
    with pysam.AlignmentFile(bam, 'rb') as sam:
        total_reads = sam.mapped
    r2 = bam.replace('.bam', '.r2.fastq.gz')
    total_reads = total_reads / 2 if os.path.exists(r2) else total_reads
    try:
        scale = 1000000.0 / total_reads
    except ZeroDivisionError:
        logger.error(f'No reads was found in BAM {bam}')
        ruffus.touch_file(bigwig)
        return
    if options.strand_direction in ('f', 'forward'):
        bam_to_bigwig(bam, scale, '+', pos_bw)
        bam_to_bigwig(bam, -1 * scale, '-', neg_bw)
    else:
        bam_to_bigwig(bam, -1 * scale, '-', pos_bw)
        bam_to_bigwig(bam, scale, '+', neg_bw)
    run_time = int(time.perf_counter() - start_time)
    message = message.replace(' ...', f' completed in [{str(datetime.timedelta(seconds=run_time))}].')
    logger.info(message)
Esempio n. 2
0
    def test_newstyle_ruffus (self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
            .follows(mkdir(tempdir))


        test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
                       input = tempdir + "random_numbers.list",
                       output = tempdir + "*.chunks")\
            .follows(create_random_numbers)

        test_pipeline.transform(task_func = step_5_calculate_sum_of_squares,
                           input = step_4_split_numbers_into_chunks,
                           filter = suffix(".chunks"),
                           output = ".sums")

        test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
            .posttask(lambda: sys.stdout.write("     hooray\n"))\
            .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))

        test_pipeline.run(multiprocess = 50, verbose = 0)
        output_file = os.path.join(tempdir, "variance.result")
        if not os.path.exists (output_file):
            raise Exception("Missing %s" % output_file)
Esempio n. 3
0
    def test_newstyle_ruffus(self):
        test_pipeline = Pipeline("test")

        test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\
            .follows(mkdir(tempdir))


        test_pipeline.split(task_func = step_4_split_numbers_into_chunks,
                       input = tempdir + "random_numbers.list",
                       output = tempdir + "*.chunks")\
            .follows(create_random_numbers)

        test_pipeline.transform(task_func=step_5_calculate_sum_of_squares,
                                input=step_4_split_numbers_into_chunks,
                                filter=suffix(".chunks"),
                                output=".sums")

        test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\
            .posttask(lambda: sys.stdout.write("     hooray\n"))\
            .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))

        test_pipeline.run(multiprocess=50, verbose=0)
        output_file = os.path.join(tempdir, "variance.result")
        if not os.path.exists(output_file):
            raise Exception("Missing %s" % output_file)
Esempio n. 4
0
def make_pipeline1(
        pipeline_name,  # Pipelines need to have a unique name
        starting_file_names):
    test_pipeline = Pipeline(pipeline_name)

    #   We can change the starting files later using
    #          set_input() for transform etc.
    #       or set_output() for originate
    #   But it can be more convenient to just pass this to the function making the pipeline
    #
    test_pipeline.originate(task_originate, starting_file_names)\
        .follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\
        .posttask(touch_file(tempdir + "/testdir/whatever.txt"))
    test_pipeline.transform(
        task_func=task_m_to_1,
        name="add_input",
        # Lookup Task from function name task_originate()
        #   So long as this is unique in the pipeline
        input=task_originate,
        # requires an anchor from 3.7 onwards, see
        # https://bugs.python.org/issue34982
        filter=regex(r"^(.*)"),
        add_inputs=add_inputs(tempdir + "/testdir/whatever.txt"),
        output=r"\1.22")
    test_pipeline.transform(
        task_func=task_1_to_1,
        name="22_to_33",
        # Lookup Task from Task name
        #   Function name is not unique in the pipeline
        input=output_from("add_input"),
        filter=suffix(".22"),
        output=".33")
    tail_task = test_pipeline.transform(
        task_func=task_1_to_1,
        name="33_to_44",
        # Ask Pipeline to lookup Task from Task name
        input=test_pipeline["22_to_33"],
        filter=suffix(".33"),
        output=".44")

    #   Set the tail task so that users of my sub pipeline can use it as a dependency
    #       without knowing the details of task names
    #
    #   Use Task() object directly without having to lookup
    test_pipeline.set_tail_tasks([tail_task])

    #   If we try to connect a Pipeline without tail tasks defined, we have to
    #       specify the exact task within the Pipeline.
    #   Otherwise Ruffus will not know which task we mean and throw an exception
    if DEBUG_do_not_define_tail_task:
        test_pipeline.set_tail_tasks([])

    # Set the head task so that users of my sub pipeline send input into it
    #   without knowing the details of task names
    test_pipeline.set_head_tasks([test_pipeline[task_originate]])

    return test_pipeline
Esempio n. 5
0
def make_pipeline1(pipeline_name,   # Pipelines need to have a unique name
                   starting_file_names):
    test_pipeline = Pipeline(pipeline_name)

    #   We can change the starting files later using
    #          set_input() for transform etc.
    #       or set_output() for originate
    #   But it can be more convenient to just pass this to the function making the pipeline
    #
    test_pipeline.originate(task_originate, starting_file_names)\
        .follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\
        .posttask(touch_file(tempdir + "/testdir/whatever.txt"))
    test_pipeline.transform(task_func=task_m_to_1,
                            name="add_input",
                            # Lookup Task from function name task_originate()
                            #   So long as this is unique in the pipeline
                            input=task_originate,
                            # requires an anchor from 3.7 onwards, see
                            # https://bugs.python.org/issue34982
                            filter=regex(r"^(.*)"),
                            add_inputs=add_inputs(
                                tempdir + "/testdir/whatever.txt"),
                            output=r"\1.22")
    test_pipeline.transform(task_func=task_1_to_1,
                            name="22_to_33",
                            # Lookup Task from Task name
                            #   Function name is not unique in the pipeline
                            input=output_from("add_input"),
                            filter=suffix(".22"),
                            output=".33")
    tail_task = test_pipeline.transform(task_func=task_1_to_1,
                                        name="33_to_44",
                                        # Ask Pipeline to lookup Task from Task name
                                        input=test_pipeline["22_to_33"],
                                        filter=suffix(".33"),
                                        output=".44")

    #   Set the tail task so that users of my sub pipeline can use it as a dependency
    #       without knowing the details of task names
    #
    #   Use Task() object directly without having to lookup
    test_pipeline.set_tail_tasks([tail_task])

    #   If we try to connect a Pipeline without tail tasks defined, we have to
    #       specify the exact task within the Pipeline.
    #   Otherwise Ruffus will not know which task we mean and throw an exception
    if DEBUG_do_not_define_tail_task:
        test_pipeline.set_tail_tasks([])

    # Set the head task so that users of my sub pipeline send input into it
    #   without knowing the details of task names
    test_pipeline.set_head_tasks([test_pipeline[task_originate]])

    return test_pipeline
    def test_newstyle_mkdir (self):
        test_pipeline = Pipeline("test")

        test_pipeline.follows(task_which_makes_directories,
                         mkdir(directories),
                         mkdir(unicode(tempdir + "c")),
                         mkdir(unicode(tempdir + "d"),
                               unicode(tempdir + "e")),
                         mkdir(unicode(tempdir + "e")))\
            .posttask(touch_file(unicode(tempdir + "f")))

        test_pipeline.originate(task_which_makes_files, [tempdir + "g", tempdir + "h"])
        test_pipeline.run(multiprocess = 10, verbose = 0)

        for d in 'abcdefgh':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
Esempio n. 7
0
    def test_newstyle_mkdir(self):
        test_pipeline = Pipeline("test")

        test_pipeline.follows(task_which_makes_directories,
                              mkdir(directories),
                              mkdir(unicode(tempdir + "c")),
                              mkdir(unicode(tempdir + "d"),
                                    unicode(tempdir + "e")),
                              mkdir(unicode(tempdir + "e")))\
            .posttask(touch_file(unicode(tempdir + "f")))

        test_pipeline.originate(task_which_makes_files,
                                [tempdir + "g", tempdir + "h"])
        test_pipeline.run(multiprocess=10, verbose=0)

        for d in 'abcdefgh':
            fullpath = os.path.join(os.path.dirname(__file__), tempdir, d)
            self.assertTrue(os.path.exists(fullpath))
Esempio n. 8
0
import unittest
import shutil
try:
    from StringIO import StringIO
except:
    from io import StringIO



def sentinel_file_exists(output_file):
    if not os.path.exists(output_file):
        return True, "Missing file %s" % output_file
    else:
        return False, "File %s exists" % output_file
        
@posttask(touch_file(os.path.join(tempdir, "task1_completed.flag")))
@parallel([[os.path.join(tempdir, "task1_completed.flag")]])
@check_if_uptodate(sentinel_file_exists)
def task1(x):
    pass

@follows(task1)    
@posttask(touch_file(os.path.join(tempdir, "task2_completed.flag")))
@parallel([[os.path.join(tempdir, "task2_completed.flag")]])
@check_if_uptodate(sentinel_file_exists)
def task2(x):
    pass
    


Esempio n. 9
0
        oo.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values))


def print_hooray_again():
    print("     hooray again")

def print_whoppee_again():
    print("     whoppee again")


#---------------------------------------------------------------
#
#   Calculate sum and sum of squares for each chunk
#
@posttask(lambda: sys.stdout.write("     hooray\n"))
@posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done")))
@merge(step_5_calculate_sum_of_squares, os.path.join(tempdir, "variance.result"))
def step_6_calculate_variance (input_file_names, output_file_name):
    """
    Calculate variance naively
    """
    output = open(output_file_name,  "w")
    #
    #   initialise variables
    #
    all_sum_squared = 0.0
    all_sum         = 0.0
    all_cnt_values  = 0.0
    #
    # added up all the sum_squared, and sum and cnt_values from all the chunks
    #
Esempio n. 10
0
def print_hooray_again():
    print("     hooray again")


def print_whoppee_again():
    print("     whoppee again")


#---------------------------------------------------------------
#
#   Calculate sum and sum of squares for each chunk
#
@posttask(lambda: sys.stdout.write("     hooray\n"))
@posttask(print_hooray_again, print_whoppee_again,
          touch_file(os.path.join(tempdir, "done")))
@merge(step_5_calculate_sum_of_squares, os.path.join(tempdir,
                                                     "variance.result"))
def step_6_calculate_variance(input_file_names, output_file_name):
    """
    Calculate variance naively
    """
    output = open(output_file_name, "w")
    #
    #   initialise variables
    #
    all_sum_squared = 0.0
    all_sum = 0.0
    all_cnt_values = 0.0
    #
    # added up all the sum_squared, and sum and cnt_values from all the chunks
Esempio n. 11
0
if sys.hexversion >= 0x03000000:
    unicode = str

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   Tasks

# 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
directories = [os.path.abspath(unicode(tempdir + "a")), unicode(tempdir + "b")]


@follows(mkdir(directories), mkdir(unicode(tempdir + "c")),
         mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")),
         mkdir(unicode(tempdir + "e")))
@posttask(touch_file(unicode(tempdir + "f")))
def task_which_makes_directories():
    pass


@originate([tempdir + "g", tempdir + "h"])
def task_which_makes_files(o):
    touch(o)


class Test_task_mkdir(unittest.TestCase):
    def setUp(self):
        """
        """
        os.makedirs(tempdir)
        pass
Esempio n. 12
0
'''


@follows('generate_frames_and_map')
@files('./params.ini', './params.h5')
def convert_params_to_h5(input_file, output_file):
    converter = '%s/mapping/pipeline/params_to_h5.py' % SAIL_CAR_LOG_PATH
    cmd = 'python %s' % converter
    check_call(cmd, shell=True)


# TODO Also have to run the new bag file extractor for mark2

@follows('convert_params_to_h5')
@files(None, '%s/sentinel' % LDR_DIR)
@posttask(touch_file('%s/sentinel' % LDR_DIR))
def align_ldr(dummy, sentinel):
    cmd = 'python %s/process/LidarAlign.py %s %s' % (SAIL_CAR_LOG_PATH, DSET_DIR, '%s%d.avi' % (DSET, CAMERA))
    print cmd
    check_call(cmd, shell=True)


@follows('align_ldr')
#@files('params.ini', '%s/sentinel' % POINTS_H5_DIR)
@transform('%s/*.ldr' % LDR_DIR,
           regex('%s/(.*?).ldr' % LDR_DIR),
           r'%s/\1.h5' % POINTS_H5_DIR)
def convert_ldr_to_h5(ldr_file, h5_file):
    exporter = '%s/mapping/pipeline/ldr_to_h5.py' % SAIL_CAR_LOG_PATH
    cmd = 'python {exporter} {fgps} {ldr_file} {h5_file}'.format(exporter=exporter, fgps=GPS_FILE, ldr_file=ldr_file, h5_file=h5_file)
    if NO_TRANSFORM:
Esempio n. 13
0
def touch (filename):
    with open(filename, "w"):
        pass

if sys.hexversion >= 0x03000000:
    unicode = str

#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   Tasks


#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
directories = [os.path.abspath(unicode(tempdir + "a")), unicode(tempdir + "b")]
@follows(mkdir(directories), mkdir(unicode(tempdir + "c")), mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")), mkdir(unicode(tempdir + "e")))
@posttask(touch_file(unicode(tempdir + "f")))
def task_which_makes_directories ():
    pass

@originate([tempdir + "g", tempdir + "h"])
def task_which_makes_files (o):
        touch(o)

import unittest

class Test_task_mkdir(unittest.TestCase):

    def setUp (self):
        """
        """
        os.makedirs(tempdir)
Esempio n. 14
0

@follows('generate_frames_and_map')
@files('./params.ini', './params.h5')
def convert_params_to_h5(input_file, output_file):
    converter = '%s/mapping/pipeline/params_to_h5.py' % SAIL_CAR_LOG_PATH
    cmd = 'python %s' % converter
    check_call(cmd, shell=True)


# TODO Also have to run the new bag file extractor for mark2


@follows('convert_params_to_h5')
@files(None, '%s/sentinel' % LDR_DIR)
@posttask(touch_file('%s/sentinel' % LDR_DIR))
def align_ldr(dummy, sentinel):
    cmd = 'python %s/process/LidarAlign.py %s %s' % (SAIL_CAR_LOG_PATH,
                                                     DSET_DIR, '%s%d.avi' %
                                                     (DSET, CAMERA))
    print cmd
    check_call(cmd, shell=True)


@follows('align_ldr')
#@files('params.ini', '%s/sentinel' % POINTS_H5_DIR)
@transform('%s/*.ldr' % LDR_DIR, regex('%s/(.*?).ldr' % LDR_DIR),
           r'%s/\1.h5' % POINTS_H5_DIR)
def convert_ldr_to_h5(ldr_file, h5_file):
    exporter = '%s/mapping/pipeline/ldr_to_h5.py' % SAIL_CAR_LOG_PATH
    cmd = 'python {exporter} {fgps} {ldr_file} {h5_file}'.format(
Esempio n. 15
0
import unittest
import shutil
try:
    from StringIO import StringIO
except:
    from io import StringIO


def sentinel_file_exists(output_file):
    if not os.path.exists(output_file):
        return True, "Missing file %s" % output_file
    else:
        return False, "File %s exists" % output_file


@posttask(touch_file(os.path.join(tempdir, "task1_completed.flag")))
@parallel([[os.path.join(tempdir, "task1_completed.flag")]])
@check_if_uptodate(sentinel_file_exists)
def task1(x):
    pass


@follows(task1)
@posttask(touch_file(os.path.join(tempdir, "task2_completed.flag")))
@parallel([[os.path.join(tempdir, "task2_completed.flag")]])
@check_if_uptodate(sentinel_file_exists)
def task2(x):
    pass


class Test_ruffus(unittest.TestCase):