workflow.add_task(
    "src/analysis.R -o [targets[0]] -d " + args.metadata,  #Command 
    depends=[TrackedExecutable("src/analysis.R")
             ],  #Tracking executable dependencies
    targets=args.output,  #Output target directory
    args=[args.metadata])  #Additional arguments

# Task3 add_task_group  - AnADAMA2 example to execute a task on multiple input files/dependencies
multiple_input_files = glob(os.path.join(
    args.output, '*.txt'))  #Initializing multiple input files
output_files = [
    os.path.join(args.output, 'data', os.path.basename(files + "_backup"))
    for files in multiple_input_files
]
workflow.add_task_group(
    "cp [depends[0]] [targets[0]]",  #Command 
    depends=[multiple_input_files],  #Tracking executable dependencies
    targets=output_files)  #Output target directory


# private python function definition
def remove_end_tabs_function(task):
    with open(task.targets[0].name, 'w') as file_handle_out:
        for line in open(task.depends[0].name):
            file_handle_out.write(line.rstrip() + "\n")


# Task4 add_task  - AnADAMA2 example to usage of python task function
workflow.add_task(
    remove_end_tabs_function,  #Calling the python function  
    depends=args.input,  #Tracking executable dependencies
    targets=args.output + "/data/data.tsv.notabs",  #Target output
# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1", description="A workflow to run KneadData")

# add the custom arguments to the workflow
workflow.add_argument("kneaddata-db", desc="the kneaddata database", default="/work/code/kneaddata/db/")
workflow.add_argument("input-extension", desc="the input file extension", default="fastq")
workflow.add_argument("threads", desc="number of threads for knead_data to use", default=1)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
in_files = workflow.get_input_files(extension=args.input_extension)

# get a list of output files, one for each input file, with the kneaddata tag
out_files = workflow.name_output_files(name=in_files, tag="kneaddata")

# create a task for each set of input and output files to run kneaddata
workflow.add_task_group(
    "kneaddata --input [depends[0]] --output [output_folder] --reference-db [kneaddata_db] --threads [threads]",
    depends=in_files,
    targets=out_files,
    output_folder=args.output,
    kneaddata_db=args.kneaddata_db,
    threads=args.threads)

workflow.go()
Beispiel #3
0
                      required=True)
args = workflow.parse_args()

# get all of the input files
input_files = utilities.find_files(args.input,
                                   extension=args.input_extension,
                                   exit_if_not_found=True)
sample_names = utilities.sample_names(input_files, args.input_extension)

# for each raw input file, generate an md5sum file
md5sum_outputs = [
    os.path.join(args.output, output_file_name) + ".md5sum"
    for output_file_name in sample_names
]
workflow.add_task_group("md5sum [depends[0]] > [targets[0]]",
                        depends=input_files,
                        targets=md5sum_outputs)

# for each file, verify the checksum
md5sum_checks = [
    os.path.join(args.output, check_file_name) + ".check"
    for check_file_name in sample_names
]
for in_file, sum_file, check_file in zip(input_files, md5sum_outputs,
                                         md5sum_checks):
    workflow.add_task(verify_checksum,
                      depends=[in_file, sum_file, args.input_metadata],
                      targets=[check_file])

workflow.go()