def make_bigwig_files(bam, bigwig): def bam_to_bigwig(bam, scale, strand, bw): bg, bg_sort = bw.replace('.bw', '.bg'), bw.replace('.bw', '.sort.bg') cmd = f'genomeCoverageBed -ibam {bam} -bg -scale {scale} -strand {strand} -du -split > {bg}' cmding(cmd) cmd = f'bedSort {bg} {bg_sort}' cmding(cmd) cmd = f'bedGraphToBigWig {bg_sort} {options.genome}/chrNameLength.txt {bw}' cmding(cmd) cmding(f'rm {bg}') message, start_time = f'Make BigWig files for {bam} ...', time.perf_counter() logger.info(message) pos_bw, neg_bw = bigwig, bigwig.replace('.plus.bw', '.minus.bw') with pysam.AlignmentFile(bam, 'rb') as sam: total_reads = sam.mapped r2 = bam.replace('.bam', '.r2.fastq.gz') total_reads = total_reads / 2 if os.path.exists(r2) else total_reads try: scale = 1000000.0 / total_reads except ZeroDivisionError: logger.error(f'No reads was found in BAM {bam}') ruffus.touch_file(bigwig) return if options.strand_direction in ('f', 'forward'): bam_to_bigwig(bam, scale, '+', pos_bw) bam_to_bigwig(bam, -1 * scale, '-', neg_bw) else: bam_to_bigwig(bam, -1 * scale, '-', pos_bw) bam_to_bigwig(bam, scale, '+', neg_bw) run_time = int(time.perf_counter() - start_time) message = message.replace(' ...', f' completed in [{str(datetime.timedelta(seconds=run_time))}].') logger.info(message)
def test_newstyle_ruffus (self): test_pipeline = Pipeline("test") test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\ .follows(mkdir(tempdir)) test_pipeline.split(task_func = step_4_split_numbers_into_chunks, input = tempdir + "random_numbers.list", output = tempdir + "*.chunks")\ .follows(create_random_numbers) test_pipeline.transform(task_func = step_5_calculate_sum_of_squares, input = step_4_split_numbers_into_chunks, filter = suffix(".chunks"), output = ".sums") test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\ .posttask(lambda: sys.stdout.write(" hooray\n"))\ .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) test_pipeline.run(multiprocess = 50, verbose = 0) output_file = os.path.join(tempdir, "variance.result") if not os.path.exists (output_file): raise Exception("Missing %s" % output_file)
def test_newstyle_ruffus(self): test_pipeline = Pipeline("test") test_pipeline.files(create_random_numbers, None, tempdir + "random_numbers.list")\ .follows(mkdir(tempdir)) test_pipeline.split(task_func = step_4_split_numbers_into_chunks, input = tempdir + "random_numbers.list", output = tempdir + "*.chunks")\ .follows(create_random_numbers) test_pipeline.transform(task_func=step_5_calculate_sum_of_squares, input=step_4_split_numbers_into_chunks, filter=suffix(".chunks"), output=".sums") test_pipeline.merge(task_func = step_6_calculate_variance, input = step_5_calculate_sum_of_squares, output = os.path.join(tempdir, "variance.result"))\ .posttask(lambda: sys.stdout.write(" hooray\n"))\ .posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) test_pipeline.run(multiprocess=50, verbose=0) output_file = os.path.join(tempdir, "variance.result") if not os.path.exists(output_file): raise Exception("Missing %s" % output_file)
def make_pipeline1( pipeline_name, # Pipelines need to have a unique name starting_file_names): test_pipeline = Pipeline(pipeline_name) # We can change the starting files later using # set_input() for transform etc. # or set_output() for originate # But it can be more convenient to just pass this to the function making the pipeline # test_pipeline.originate(task_originate, starting_file_names)\ .follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\ .posttask(touch_file(tempdir + "/testdir/whatever.txt")) test_pipeline.transform( task_func=task_m_to_1, name="add_input", # Lookup Task from function name task_originate() # So long as this is unique in the pipeline input=task_originate, # requires an anchor from 3.7 onwards, see # https://bugs.python.org/issue34982 filter=regex(r"^(.*)"), add_inputs=add_inputs(tempdir + "/testdir/whatever.txt"), output=r"\1.22") test_pipeline.transform( task_func=task_1_to_1, name="22_to_33", # Lookup Task from Task name # Function name is not unique in the pipeline input=output_from("add_input"), filter=suffix(".22"), output=".33") tail_task = test_pipeline.transform( task_func=task_1_to_1, name="33_to_44", # Ask Pipeline to lookup Task from Task name input=test_pipeline["22_to_33"], filter=suffix(".33"), output=".44") # Set the tail task so that users of my sub pipeline can use it as a dependency # without knowing the details of task names # # Use Task() object directly without having to lookup test_pipeline.set_tail_tasks([tail_task]) # If we try to connect a Pipeline without tail tasks defined, we have to # specify the exact task within the Pipeline. # Otherwise Ruffus will not know which task we mean and throw an exception if DEBUG_do_not_define_tail_task: test_pipeline.set_tail_tasks([]) # Set the head task so that users of my sub pipeline send input into it # without knowing the details of task names test_pipeline.set_head_tasks([test_pipeline[task_originate]]) return test_pipeline
def make_pipeline1(pipeline_name, # Pipelines need to have a unique name starting_file_names): test_pipeline = Pipeline(pipeline_name) # We can change the starting files later using # set_input() for transform etc. # or set_output() for originate # But it can be more convenient to just pass this to the function making the pipeline # test_pipeline.originate(task_originate, starting_file_names)\ .follows(mkdir(tempdir), mkdir(tempdir + "/testdir", tempdir + "/testdir2"))\ .posttask(touch_file(tempdir + "/testdir/whatever.txt")) test_pipeline.transform(task_func=task_m_to_1, name="add_input", # Lookup Task from function name task_originate() # So long as this is unique in the pipeline input=task_originate, # requires an anchor from 3.7 onwards, see # https://bugs.python.org/issue34982 filter=regex(r"^(.*)"), add_inputs=add_inputs( tempdir + "/testdir/whatever.txt"), output=r"\1.22") test_pipeline.transform(task_func=task_1_to_1, name="22_to_33", # Lookup Task from Task name # Function name is not unique in the pipeline input=output_from("add_input"), filter=suffix(".22"), output=".33") tail_task = test_pipeline.transform(task_func=task_1_to_1, name="33_to_44", # Ask Pipeline to lookup Task from Task name input=test_pipeline["22_to_33"], filter=suffix(".33"), output=".44") # Set the tail task so that users of my sub pipeline can use it as a dependency # without knowing the details of task names # # Use Task() object directly without having to lookup test_pipeline.set_tail_tasks([tail_task]) # If we try to connect a Pipeline without tail tasks defined, we have to # specify the exact task within the Pipeline. # Otherwise Ruffus will not know which task we mean and throw an exception if DEBUG_do_not_define_tail_task: test_pipeline.set_tail_tasks([]) # Set the head task so that users of my sub pipeline send input into it # without knowing the details of task names test_pipeline.set_head_tasks([test_pipeline[task_originate]]) return test_pipeline
def test_newstyle_mkdir (self): test_pipeline = Pipeline("test") test_pipeline.follows(task_which_makes_directories, mkdir(directories), mkdir(unicode(tempdir + "c")), mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")), mkdir(unicode(tempdir + "e")))\ .posttask(touch_file(unicode(tempdir + "f"))) test_pipeline.originate(task_which_makes_files, [tempdir + "g", tempdir + "h"]) test_pipeline.run(multiprocess = 10, verbose = 0) for d in 'abcdefgh': fullpath = os.path.join(os.path.dirname(__file__), tempdir, d) self.assertTrue(os.path.exists(fullpath))
def test_newstyle_mkdir(self): test_pipeline = Pipeline("test") test_pipeline.follows(task_which_makes_directories, mkdir(directories), mkdir(unicode(tempdir + "c")), mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")), mkdir(unicode(tempdir + "e")))\ .posttask(touch_file(unicode(tempdir + "f"))) test_pipeline.originate(task_which_makes_files, [tempdir + "g", tempdir + "h"]) test_pipeline.run(multiprocess=10, verbose=0) for d in 'abcdefgh': fullpath = os.path.join(os.path.dirname(__file__), tempdir, d) self.assertTrue(os.path.exists(fullpath))
import unittest import shutil try: from StringIO import StringIO except: from io import StringIO def sentinel_file_exists(output_file): if not os.path.exists(output_file): return True, "Missing file %s" % output_file else: return False, "File %s exists" % output_file @posttask(touch_file(os.path.join(tempdir, "task1_completed.flag"))) @parallel([[os.path.join(tempdir, "task1_completed.flag")]]) @check_if_uptodate(sentinel_file_exists) def task1(x): pass @follows(task1) @posttask(touch_file(os.path.join(tempdir, "task2_completed.flag"))) @parallel([[os.path.join(tempdir, "task2_completed.flag")]]) @check_if_uptodate(sentinel_file_exists) def task2(x): pass
oo.write("%s\n%s\n%d\n" % (repr(sum_squared), repr(sum), cnt_values)) def print_hooray_again(): print(" hooray again") def print_whoppee_again(): print(" whoppee again") #--------------------------------------------------------------- # # Calculate sum and sum of squares for each chunk # @posttask(lambda: sys.stdout.write(" hooray\n")) @posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) @merge(step_5_calculate_sum_of_squares, os.path.join(tempdir, "variance.result")) def step_6_calculate_variance (input_file_names, output_file_name): """ Calculate variance naively """ output = open(output_file_name, "w") # # initialise variables # all_sum_squared = 0.0 all_sum = 0.0 all_cnt_values = 0.0 # # added up all the sum_squared, and sum and cnt_values from all the chunks #
def print_hooray_again(): print(" hooray again") def print_whoppee_again(): print(" whoppee again") #--------------------------------------------------------------- # # Calculate sum and sum of squares for each chunk # @posttask(lambda: sys.stdout.write(" hooray\n")) @posttask(print_hooray_again, print_whoppee_again, touch_file(os.path.join(tempdir, "done"))) @merge(step_5_calculate_sum_of_squares, os.path.join(tempdir, "variance.result")) def step_6_calculate_variance(input_file_names, output_file_name): """ Calculate variance naively """ output = open(output_file_name, "w") # # initialise variables # all_sum_squared = 0.0 all_sum = 0.0 all_cnt_values = 0.0 # # added up all the sum_squared, and sum and cnt_values from all the chunks
if sys.hexversion >= 0x03000000: unicode = str # 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks # 88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 directories = [os.path.abspath(unicode(tempdir + "a")), unicode(tempdir + "b")] @follows(mkdir(directories), mkdir(unicode(tempdir + "c")), mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")), mkdir(unicode(tempdir + "e"))) @posttask(touch_file(unicode(tempdir + "f"))) def task_which_makes_directories(): pass @originate([tempdir + "g", tempdir + "h"]) def task_which_makes_files(o): touch(o) class Test_task_mkdir(unittest.TestCase): def setUp(self): """ """ os.makedirs(tempdir) pass
''' @follows('generate_frames_and_map') @files('./params.ini', './params.h5') def convert_params_to_h5(input_file, output_file): converter = '%s/mapping/pipeline/params_to_h5.py' % SAIL_CAR_LOG_PATH cmd = 'python %s' % converter check_call(cmd, shell=True) # TODO Also have to run the new bag file extractor for mark2 @follows('convert_params_to_h5') @files(None, '%s/sentinel' % LDR_DIR) @posttask(touch_file('%s/sentinel' % LDR_DIR)) def align_ldr(dummy, sentinel): cmd = 'python %s/process/LidarAlign.py %s %s' % (SAIL_CAR_LOG_PATH, DSET_DIR, '%s%d.avi' % (DSET, CAMERA)) print cmd check_call(cmd, shell=True) @follows('align_ldr') #@files('params.ini', '%s/sentinel' % POINTS_H5_DIR) @transform('%s/*.ldr' % LDR_DIR, regex('%s/(.*?).ldr' % LDR_DIR), r'%s/\1.h5' % POINTS_H5_DIR) def convert_ldr_to_h5(ldr_file, h5_file): exporter = '%s/mapping/pipeline/ldr_to_h5.py' % SAIL_CAR_LOG_PATH cmd = 'python {exporter} {fgps} {ldr_file} {h5_file}'.format(exporter=exporter, fgps=GPS_FILE, ldr_file=ldr_file, h5_file=h5_file) if NO_TRANSFORM:
def touch (filename): with open(filename, "w"): pass if sys.hexversion >= 0x03000000: unicode = str #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 directories = [os.path.abspath(unicode(tempdir + "a")), unicode(tempdir + "b")] @follows(mkdir(directories), mkdir(unicode(tempdir + "c")), mkdir(unicode(tempdir + "d"), unicode(tempdir + "e")), mkdir(unicode(tempdir + "e"))) @posttask(touch_file(unicode(tempdir + "f"))) def task_which_makes_directories (): pass @originate([tempdir + "g", tempdir + "h"]) def task_which_makes_files (o): touch(o) import unittest class Test_task_mkdir(unittest.TestCase): def setUp (self): """ """ os.makedirs(tempdir)
@follows('generate_frames_and_map') @files('./params.ini', './params.h5') def convert_params_to_h5(input_file, output_file): converter = '%s/mapping/pipeline/params_to_h5.py' % SAIL_CAR_LOG_PATH cmd = 'python %s' % converter check_call(cmd, shell=True) # TODO Also have to run the new bag file extractor for mark2 @follows('convert_params_to_h5') @files(None, '%s/sentinel' % LDR_DIR) @posttask(touch_file('%s/sentinel' % LDR_DIR)) def align_ldr(dummy, sentinel): cmd = 'python %s/process/LidarAlign.py %s %s' % (SAIL_CAR_LOG_PATH, DSET_DIR, '%s%d.avi' % (DSET, CAMERA)) print cmd check_call(cmd, shell=True) @follows('align_ldr') #@files('params.ini', '%s/sentinel' % POINTS_H5_DIR) @transform('%s/*.ldr' % LDR_DIR, regex('%s/(.*?).ldr' % LDR_DIR), r'%s/\1.h5' % POINTS_H5_DIR) def convert_ldr_to_h5(ldr_file, h5_file): exporter = '%s/mapping/pipeline/ldr_to_h5.py' % SAIL_CAR_LOG_PATH cmd = 'python {exporter} {fgps} {ldr_file} {h5_file}'.format(
import unittest import shutil try: from StringIO import StringIO except: from io import StringIO def sentinel_file_exists(output_file): if not os.path.exists(output_file): return True, "Missing file %s" % output_file else: return False, "File %s exists" % output_file @posttask(touch_file(os.path.join(tempdir, "task1_completed.flag"))) @parallel([[os.path.join(tempdir, "task1_completed.flag")]]) @check_if_uptodate(sentinel_file_exists) def task1(x): pass @follows(task1) @posttask(touch_file(os.path.join(tempdir, "task2_completed.flag"))) @parallel([[os.path.join(tempdir, "task2_completed.flag")]]) @check_if_uptodate(sentinel_file_exists) def task2(x): pass class Test_ruffus(unittest.TestCase):