Authors: Bernie Pope, Gayle Philip, Clare Sloggett Description: Simple pipeline to demonstrate how to use the base tools. Counts the number of lines in a set of files and then sums them up. ''' import sys from ruffus import * from utils import (runStageCheck, getOptions, initLog) from cmdline_args import get_cmdline_args args = get_cmdline_args() options = getOptions(args) logDir = options.pipeline['logDir'] logger = initLog(options) # the input files data_files = ['test_data/data1.txt', 'test_data/data2.txt'] # count the number of lines in a file @transform(data_files, suffix('.txt'), ['.count', '.count.Success']) def countLines(file, outputs): output,flagFile = outputs runStageCheck('countLines', flagFile, logger, options, file, output) # sum the counts from the previous stage @merge(countLines, ['test_data/total.txt', 'test_data/total.Success'])
Authors: Bernie Pope, Gayle Philip, Clare Sloggett Description: Simple pipeline to demonstrate how to use the base tools. Counts the number of lines in a set of files and then sums them up. ''' import sys from ruffus import * from utils import (runStageCheck, getOptions, initLog) from cmdline_args import get_cmdline_args args = get_cmdline_args() options = getOptions(args) logDir = options.pipeline['logDir'] logger = initLog(options) # the input files data_files = ['test_data/data1.txt', 'test_data/data2.txt'] # count the number of lines in a file @transform(data_files, suffix('.txt'), ['.count', '.count.Success']) def countLines(file, outputs): output, flagFile = outputs runStageCheck('countLines', flagFile, logger, options, file, output)
def main(): args = get_cmdline_args() # We want to look for modules in the directory local to the pipeline, # just as if the pipeline script had been called directly. # This includes the script itself and the config files imported by getOptions sys.path.insert(0, os.path.dirname(args.pipeline)) # options must be set before pipeline is imported options = getOptions(args) setOptions(options) # import the pipeline so its stages are defined # the name of the pipeline is given on the command line __import__(drop_py_suffix(args.pipeline)) logDir = options.pipeline['logDir'] startLogger() pipelineOptions = options.pipeline endTasks = pipelineOptions['end'] forcedTasks = pipelineOptions['force'] style = pipelineOptions['style'] if pipelineOptions['rebuild'] == 'fromstart': rebuildMode = True elif pipelineOptions['rebuild'] == 'fromend': rebuildMode = False else: rebuildMode = True if style in ['run', 'touchfiles']: touchfiles_flag = (style == 'touchfiles') # Perform the pipeline steps (run the pipeline). pipeline_run( # End points of the pipeline. endTasks, # How many ruffus tasks to run. multiprocess=pipelineOptions['procs'], logger=black_hole_logger, # Force the pipeline to start from here, regardless of whether the # stage is up-to-date or not. forcedtorun_tasks=forcedTasks, # If the style was touchfiles, we will set a flag to bring # files up to date without running anything touch_files_only=touchfiles_flag, # Choose the mode in which ruffus decides how much work needs to be # done. gnu_make_maximal_rebuild_mode=rebuildMode) elif style == 'flowchart': # Draw the pipeline as a diagram. pipeline_printout_graph('flowchart.svg', 'svg', endTasks, no_key_legend=False) elif style == 'print': # Print a textual description of what the piplines would do, #but don't actuall run it. pipeline_printout(sys.stdout, endTasks, verbose=5, wrap_width=100000, forcedtorun_tasks=forcedTasks, gnu_make_maximal_rebuild_mode=rebuildMode)
def main(): args = get_cmdline_args() # We want to look for modules in the directory local to the pipeline, # just as if the pipeline script had been called directly. # This includes the script itself and the config files imported by getOptions sys.path.insert(0, os.path.dirname(args.pipeline)) # options must be set before pipeline is imported options = getOptions(args) setOptions(options) # import the pipeline so its stages are defined # the name of the pipeline is given on the command line __import__(drop_py_suffix(args.pipeline)) logDir = options.pipeline['logDir'] startLogger() pipelineOptions = options.pipeline endTasks = pipelineOptions['end'] forcedTasks = pipelineOptions['force'] style = pipelineOptions['style'] if pipelineOptions['rebuild'] == 'fromstart': rebuildMode = True elif pipelineOptions['rebuild'] == 'fromend': rebuildMode = False else: rebuildMode = True if style in ['run', 'touchfiles']: touchfiles_flag = (style=='touchfiles') # Perform the pipeline steps (run the pipeline). pipeline_run( # End points of the pipeline. endTasks, # How many ruffus tasks to run. multiprocess=pipelineOptions['procs'], logger=black_hole_logger, # Force the pipeline to start from here, regardless of whether the # stage is up-to-date or not. forcedtorun_tasks=forcedTasks, # If the style was touchfiles, we will set a flag to bring # files up to date without running anything touch_files_only=touchfiles_flag, # Choose the mode in which ruffus decides how much work needs to be # done. gnu_make_maximal_rebuild_mode=rebuildMode) elif style == 'flowchart': # Draw the pipeline as a diagram. pipeline_printout_graph( 'flowchart.svg', 'svg', endTasks, no_key_legend=False) elif style == 'print': # Print a textual description of what the piplines would do, #but don't actuall run it. pipeline_printout( sys.stdout, endTasks, verbose=5, wrap_width=100000, forcedtorun_tasks=forcedTasks, gnu_make_maximal_rebuild_mode=rebuildMode)