Exemple #1
0
workflow = Workflow(version="0.1", description="A workflow to run strainphlan")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=["fastq.gz","fastq","fq.gz","fq","fasta","fasta.gz"])
workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1)
workflow.add_argument("bypass-taxonomic-profiling", desc="do not run the taxonomic profiling tasks (a tsv profile for each sequence file must be included in the input folder using the same sample name)", action="store_true")
workflow.add_argument("strain-profiling-options", desc="additional options when running the strain profiling step", default="")
workflow.add_argument("max-strains", desc="the max number of strains to profile", default=20, type=int)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
# return an error if no files are found
input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True)

### STEP #1: Run taxonomic profiling on all of the filtered files ###
if not args.bypass_taxonomic_profiling:
    merged_taxonomic_profile, taxonomy_tsv_files, taxonomy_sam_files = shotgun.taxonomic_profile(workflow,
        input_files,args.output,args.threads,args.input_extension)
elif:
    sample_names = utilities.sample_names(input_files,args.input_extension)
    tsv_profiles = utilities.name_files(sample_names, demultiplex_output_folder, tag="taxonomic_profile", extension="tsv")
    # check all of the expected profiles are found
    if len(tsv_profiles) != len(list(filter(os.path.isfile,tsv_profiles))):
        sys.exit("ERROR: Bypassing taxonomic profiling but all of the tsv taxonomy profile files are not found in the input folder. Expecting the following input files:\n"+"\n".join(tsv_profiles))
    # run taxonomic profile steps bypassing metaphlan2
    merged_taxonomic_profile, taxonomy_tsv_files, taxonomy_sam_files = shotgun.taxonomic_profile(workflow,
        tsv_profiles,args.output,args.threads,"tsv",already_profiled=True)
    # look for the sam profiles
Exemple #2
0
                      default=20,
                      type=int)
workflow.add_argument("strain-list",
                      desc="input file with list of strains to profile",
                      default="")
workflow.add_argument("assembly-options",
                      desc="additional options when running the assembly step",
                      default="")

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
# return an error if no files are found
input_files = utilities.find_files(args.input,
                                   extension=args.input_extension,
                                   exit_if_not_found=True)

# check for index files, do not error if they are not found
index_files = utilities.find_files(args.input,
                                   extension=args.index_identifier + "." +
                                   args.input_extension)

# remove the index files, if found, from the set of input files
input_files = list(filter(lambda file: not file in index_files, input_files))

# if a dual index file is provided, then demultiplex dual indexing
if args.dual_barcode_file:
    if ".bz2" in args.input_extension:
        sys.exit(
            "ERROR: Bz2 formatted files are not supported with demultiplexing")