def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: AnaDAMA2.Workflow: The workflow object for this pipeline. AnaDAMA2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to handle HMP2 ' 'Proteomics data.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('checksums-file', desc='MD5 checksums for files ' 'found in the supplied input directory.') workflow.add_argument( 'data_specific_metadata', desc='A collection of ' 'dataset specific metadata that should be integrated ' 'with any analysis output (creating a PCL file).') return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to handle ' 'uploading metadata and data files to the Data ' 'Coordination Center (DCC)', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) workflow.add_argument('baseline-metadata-file', desc='Metadata file ' 'containing baseline visit metadata per subject.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: AnaDAMA2.Workflow: The workflow object for this pipeline. AnaDAMA2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to handle HMP2 ' 'host exome data.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('threads', desc='Number of threads to use in ' 'workflow processing', default=1) return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='1.0', description='A workflow to handle visualization ' 'of HMP2 16S data.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) workflow.add_argument('source', desc='The source of the output files generated. ' '[biobakery, CMMR]', default='biobakery', choices=['biobakery', 'CMMR']) return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to assemble ' 'metagenomic data and run a gene caller on the ' 'resulting contigs.') workflow.add_argument('contaminant-db', desc='KneadData DNA contaminants database.') workflow.add_argument('file-extension', desc='Extension of input files to ' 'assemble and gene call on.', default='.fastq.gz') workflow.add_argument('threads', desc='number of threads/cores for each ' 'task to use', default=1) workflow.add_argument('memory', desc='The amount of memory to use for each ' 'assembly job. Provided in GB', default='10240') return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: AnaDAMA2.Workflow: The workflow object for this pipeline. AnaDAMA2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to handle HMP2 ' 'Metabolomic data.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) workflow.add_argument('aux_metadata', desc='Any additional metadata ' 'files that can supply metadata for our ouptut ' 'PCL files.') return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='0.1', description='A workflow to handle ' 'analysis of 16S amplicon data.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) workflow.add_argument('threads', desc='number of threads/cores for each ' 'task to use', default=1) return workflow
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='1.0', description='A workflow to handle visualization ' 'of HMP2 Metaviromics data.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) return workflow
THE SOFTWARE. """ # import the workflow class from anadama2 from anadama2 import Workflow # import the document templates from biobakery_workflows from biobakery_workflows import document_templates, utilities # import the files for descriptions and paths from biobakery_workflows import files import os # create a workflow instance, providing the version number and description # remove the input folder option as it will be replaced with multiple input files workflow = Workflow(version="0.1", remove_options=["input"], description="A workflow for 16S visualization") # add the custom arguments to the workflow # create a custom description for the input argument listing all expected input files input_desc="A folder containing the final products from the 16s data workflow.\n\nThe input folder must include the following:\n\n" workflow.add_argument("input",desc=input_desc,required=True) # add the custom arguments to the workflow workflow.add_argument("project-name",desc="the name of the project",required=True) workflow.add_argument("input-metadata",desc="the metadata file (samples as columns or rows)") workflow.add_argument("input-picard",desc="the folder of picard quality score files") workflow.add_argument("input-picard-extension",desc="the extensions for the picard quality score files", default="quality_by_cycle_metrics") workflow.add_argument("metadata-categorical",desc="the categorical features", action="append", default=[]) workflow.add_argument("metadata-continuous",desc="the continuous features", action="append", default=[]) workflow.add_argument("metadata-exclude",desc="the features to exclude", action="append", default=[])
import os # import the workflow class from anadama2 from anadama2 import Workflow # import the document templates from biobakery_workflows from biobakery_workflows import utilities # import the files for descriptions and paths from biobakery_workflows import files # create a workflow instance, providing the version number and description # remove the input folder option as it will be replaced with multiple input files workflow = Workflow( version="0.1", remove_options=["input"], description= "A workflow for whole metagenome and metatranscriptome shotgun sequence visualization" ) # list the required and optional files for the workflow # these are expected to be included in the input folder wmgx_input_files = { "required": ["kneaddata_read_counts", "taxonomic_profile", "pathabundance_relab"], "optional": ["humann2_read_counts", "feature_counts"] } wmtx_input_files = { "required": ["kneaddata_read_counts"], "optional": ["humann2_read_counts", "feature_counts"] } norm_input_files = {
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ from anadama2 import Workflow import os, sys, fnmatch from biobakery_workflows.tasks import sixteen_s, dadatwo, general from biobakery_workflows import utilities, config, files # create a workflow instance, providing the version number and description workflow = Workflow(version="0.1", description="A workflow for 16S sequencing data") # add the custom arguments to the workflow workflow_config = config.SixteenS() workflow.add_argument("method", desc="method to process 16s workflow", default="vsearch", choices=["usearch", "dada2", "vsearch", "its"]) workflow.add_argument("dada-db", desc="reference database for dada2 workflow", default="silva", choices=["gg", "rdp", "silva", "unite"]) workflow.add_argument( "usearch-db", desc= "full paths for the reference databases (fna and taxonomy, comma delimited) for the usearch workflow",
# -*- coding: utf-8 -*- from anadama2 import Workflow workflow = Workflow(remove_options=["input", "output"]) workflow.do("ls /usr/bin/ | sort > [t:global_exe.txt]") workflow.do("ls $HOME/.local/bin/ | sort > [t:local_exe.txt]") workflow.do("join [d:global_exe.txt] [d:local_exe.txt] > [t:match_exe.txt]") workflow.go()
#!/usr/bin/env python from anadama2 import Workflow import os workflow = Workflow(version="0.0.2", description="A workflow to run PanPhlAn") workflow.add_argument("threads", default=1, desc="number of threads for panphlan to use") workflow.add_argument("dbfolder", default=None, desc="folder containing database") workflow.add_argument("filesfile", default=None, desc="file with filepaths to run on (relative to input)") workflow.add_argument("ref", default=None, desc="name of reference db") workflow.add_argument( "refs", default=None, desc="file with list of references (relative to dbfolder)") args = workflow.parse_args() in_files = workflow.get_input_files(".fastq.gz") out_files = workflow.name_output_files(name=in_files, tag="panphlan_map", extension="csv.bz2") if args.filesfile: with open(args.filesfile) as f:
# $ python pull_out_reads_by_species_metaphlan2_results.py --input input_sam --output output_fastq # This will look for the metaphlan2 sam output files named *_bowtie2.sam in the input folder and write # files *_metaphlan2_marker_aligned_subset.fasta to the output folder (one for each input sam file). # The fasta reads will be any of the sample reads that map to a marker associated with one of the # species in the "--species-list" file. This file should have one species per line and be formatted # with the metaphlan2 species naming convention. More specifically, the species file should list # one per line with metaphlan2 format (ie "s__Gemella_sanguinis") and for unknown species # include the genus in this file (ie "s__Gemella_unclassified" should be included in the file as "g__Gemella"). # The metaphlan2 pkl database is also required for this script to run and can be provided # with the option "--pkl-database". SAM_READ_NAME_INDEX = 0 SAM_REFERENCE_NAME_INDEX = 2 SAM_SEQ_INDEX = 9 workflow = Workflow() # input folder should have sam alignment files from metaphlan2 run workflow.add_argument("pkl-database", desc="MetaPhlAn2 pkl database", default="metaphlan2_db/mpa_v20_m200.pkl") workflow.add_argument("species-list", desc="the list of species to pull reads for", default="species_list.txt") workflow.add_argument("input-tag-extension", desc="the file name tag and extension", default="_bowtie2.sam") args = workflow.parse_args() def find_reads(task): # read in the species with open(args.species_list) as file_handle: species_list = [taxon.rstrip() for taxon in file_handle.readlines()] db = pickle.load(bz2.BZ2File(args.pkl_database, 'r')) marker_to_species={}
import os from anadama2 import Workflow # to run provide the new workflow run input and output folders # $ python anadama2_add_files_to_database.py --input $NEW_INPUT_FOLDER --output $NEW_OUTPUT_FOLDER workflow = Workflow() # add the list of possible file extensions workflow.add_argument( "input-extensions", desc="the comma-delimited list of extensions of the input files", default="txt,tsv,fastq,fastq.gz,log,sam") args = workflow.parse_args() # get all of the files in the input folder with the extensions provided def get_files_to_add(input_folder): posible_extensions = set(args.input_extensions.split(",")) input_files = [] for folder, directories, files in os.walk(input_folder): if not ".anadama" in folder: for filename in files: if any( map(lambda ext: filename.endswith(ext), posible_extensions)): input_files.append(os.path.join(folder, filename)) return input_files
with open(task.depends[1].name) as file_handle: new_sum = file_handle.readline().strip().split(" ")[0] if new_sum.lower() == md5sum.lower(): file_handle = open(task.targets[0].name, "w") file_handle.write("Match") file_handle.close() else: error_msg = "ERROR: Sums do not match for file {0}\nComputed Sum: {1}\nExpected Sum: {2}".format( task.depends[1].name, new_sum, md5sum) sys.stderr.write(error_msg) raise Exception(error_msg) # create a workflow and get the arguments workflow = Workflow() workflow.add_argument("input-metadata", desc="the metadata file", required=True) workflow.add_argument("input-extension", desc="the input file extension", required=True) args = workflow.parse_args() # get all of the input files input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True) sample_names = utilities.sample_names(input_files, args.input_extension) # for each raw input file, generate an md5sum file
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: AnaDAMA2.Workflow: The workflow object for this pipeline. """ workflow = Workflow(version='0.1', description='A workflow to handle ' 'refreshing and disseminating HMP2 metadata.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='If an existing metadata ' 'file exists it can be supplied here. This metadata ' 'file will be appended to instead of a whole new ' 'metadata file being generated.') workflow.add_argument('studytrax-metadata-file', desc='Accompanying ' 'StudyTrax data all corresponding samples in the ' 'HMP2 project.') workflow.add_argument('broad-sample-tracking-file', desc='Broad Institute ' 'sample tracking spreadsheet containing status of ' 'sequence products generated.') workflow.add_argument('proteomics-metadata', desc='PNNL-supplied metadata ' 'spreadsheet.') workflow.add_argument('auxillary-metadata', action='append', default=[], desc='Any auxillary metadata to be appeneded ' 'to the final metadata table.') return workflow
# -*- coding: utf-8 -*- from anadama2 import Workflow ctx = Workflow(remove_options=["input", "output"]) ctx.do("wget -qO- checkip.dyndns.com > [t:my_ip.txt]") ctx.do(r"sed 's|.*Address: \(.*[0-9]\)<.*|\1|' [d:my_ip.txt] > [t:ip.txt]") ctx.do("whois $(cat [d:ip.txt]) > [t:whois.txt]") ctx.go()
import os, fnmatch # import the workflow class from anadama2 from anadama2 import Workflow # import the library of biobakery_workflow tasks for shotgun sequences from biobakery_workflows.tasks import shotgun, general # import the utilities functions and config settings from biobakery_workflows from biobakery_workflows import utilities, config # create a workflow instance, providing the version number and description # the version number will appear when running this script with the "--version" option # the description will appear when running this script with the "--help" option workflow = Workflow( version="0.1", description="A workflow for whole metagenome shotgun sequences") # add the custom arguments to the workflow workflow_config = config.ShotGun() workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=[ "fastq.gz", "fastq", "fq.gz", "fq", "fasta", "fasta.gz", "fastq.bz2", "fq.bz2" ]) workflow.add_argument("barcode-file", desc="the barcode file", default="") workflow.add_argument("dual-barcode-file", desc="the string to identify the dual barcode file", default="")
import anadama2.tracked from anadama2 import Workflow workflow = Workflow(remove_options=["input","output"]) # create a container class to track container = anadama2.tracked.Container(a = 20) # add a task that depends on the "a" variable in the container task1=workflow.add_task( "echo [depends[0]] > [targets[0]]", depends=container.a, targets="echo.txt", name="task1") # add a task that depends on the targets of task1 task2=workflow.add_task( "p=$(cat [depends[0]]); echo $p > [targets[0]]", depends=task1.targets[0], targets="echo2.txt", name="task2") workflow.go()
import os from glob import glob from anadama2 import Workflow from anadama2.tracked import TrackedExecutable # Setting the version of the workflow and short description workflow = Workflow( version="0.0.1", #Update the version as needed description="Analysis Template" #Update the description as needed ) # Setting additional custom arguments for workflow - run.py workflow.add_argument(name="lines", desc="Number of lines to trim [default: 10]", default="10") workflow.add_argument( name="metadata", desc="Metadata for performing analysis [default: input/metadata.tsv]", default="input/metadata.tsv") # Parsing the workflow arguments args = workflow.parse_args() #Loading the config setting args.config = 'etc/config.ini' # AnADAMA2 example workflow.do workflow.do("ls /usr/bin/ | sort > [t:output/global_exe.txt]") #Command workflow.do("ls $HOME/.local/bin/ | sort > [t:output/local_exe.txt]") #Command
def parse_cli_arguments(): """Parses any command-line arguments passed into the workflow. Args: None Requires: None Returns: anadama2.Workflow: The workflow object for this pipeline anadama2.cli.Configuration: Arguments passed into this workflow. """ workflow = Workflow(version='1.0', description='A workflow to handle HMP2 ' 'WGS data.', remove_options=['input', 'output']) workflow.add_argument('manifest-file', desc='Manifest file containing ' 'files to process in this workflow run.') workflow.add_argument('config-file', desc='Configuration file ' 'containing parameters required by the workflow.') workflow.add_argument('metadata-file', desc='Accompanying metadata file ' 'for the provided data files.', default=None) workflow.add_argument('threads', desc='number of threads/cores for each ' 'task to use', default=1) workflow.add_argument('threads-kneaddata', desc='OPTIONAL. A specific ' 'number of threads/cores to use just for the ' 'kneaddata task.', default=None) workflow.add_argument('threads-metaphlan', desc='OPTIONAL. A specific ' 'number of threads/cores to use just for the ' 'metaphlan2 task.', default=None) workflow.add_argument( 'threads-humann', desc='OPTIONAL. A specific ' 'number of threads/cores to use just for the humann2 ' 'task.', default=None) return workflow
import sys import os, fnmatch # import the workflow class from anadama2 from anadama2 import Workflow # import the library of biobakery_workflow tasks for shotgun sequences from biobakery_workflows.tasks import shotgun, general # import the utilities functions and config settings from biobakery_workflows from biobakery_workflows import utilities, config # create a workflow instance, providing the version number and description # the version number will appear when running this script with the "--version" option # the description will appear when running this script with the "--help" option workflow = Workflow(version="0.1", description="A workflow to run strainphlan") # add the custom arguments to the workflow workflow_config = config.ShotGun() workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=["fastq.gz","fastq","fq.gz","fq","fasta","fasta.gz"]) workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1) workflow.add_argument("bypass-taxonomic-profiling", desc="do not run the taxonomic profiling tasks (a tsv profile for each sequence file must be included in the input folder using the same sample name)", action="store_true") workflow.add_argument("strain-profiling-options", desc="additional options when running the strain profiling step", default="") workflow.add_argument("max-strains", desc="the max number of strains to profile", default=20, type=int) # get the arguments from the command line args = workflow.parse_args() # get all input files with the input extension provided on the command line # return an error if no files are found input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True)
# import the workflow class from anadama2 from anadama2 import Workflow # import the document templates and utilities from biobakery_workflows from biobakery_workflows import utilities # import the task to convert from biom to tsv from biobakery_workflows.tasks.sixteen_s import convert_from_biom_to_tsv_list # import the files for descriptions and paths from biobakery_workflows import files # create a workflow instance, providing the version number and description workflow = Workflow( version="0.1", remove_options=["input"], description="A workflow for stats on wmgx and 16s data sets") # add the custom arguments to the workflow workflow.add_argument( "input", desc="the folder containing taxonomy and functional data files", required=True) # add the custom arguments to the workflow workflow.add_argument("project-name", desc="the name of the project", required=True) workflow.add_argument("input-metadata", desc="the metadata file (samples as columns or rows)", required=True)
from anadama2 import Workflow workflow = Workflow(remove_options=["input", "output"]) downloads = [ "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011275.fsa.gz", "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011273.fsa.gz", "ftp://public-ftp.hmpdacc.org/HM16STR/by_sample/SRS011180.fsa.gz" ] for link in downloads: workflow.add_task("wget -O [targets[0]] [args[0]]", targets=link.split("/")[-1], args=link) workflow.go()
import os, fnmatch # import the workflow class from anadama2 from anadama2 import Workflow from anadama2.tracked import TrackedExecutable # import the library of biobakery_workflow tasks for shotgun sequences from biobakery_workflows.tasks import shotgun, general # import the utilities functions and config settings from biobakery_workflows from biobakery_workflows import utilities, config # create a workflow instance, providing the version number and description # the version number will appear when running this script with the "--version" option # the description will appear when running this script with the "--help" option workflow = Workflow(version="0.1", description="A workflow for isolate assembly") # add the custom arguments to the workflow workflow_config = config.ShotGun() workflow.add_argument("species-name", desc="the species name", required=True) workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=["fastq.gz", "fastq", "fq.gz", "fq"]) workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1) workflow.add_argument("pair-identifier", desc="the string to identify the first file in a pair", default="_R1_001") workflow.add_argument(
# -*- coding: utf-8 -*- from anadama2 import Workflow # create a workflow instance, providing the version number and description # the version number will appear when running this script with the "--version" option # the description will appear when running this script with the "--help" option workflow = Workflow(version="0.1", description="A workflow to run KneadData") # add the custom arguments to the workflow workflow.add_argument("kneaddata-db", desc="the kneaddata database", default="/work/code/kneaddata/db/") workflow.add_argument("input-extension", desc="the input file extension", default="fastq") workflow.add_argument("threads", desc="number of threads for knead_data to use", default=1) # get the arguments from the command line args = workflow.parse_args() # get all input files with the input extension provided on the command line in_files = workflow.get_input_files(extension=args.input_extension) # get a list of output files, one for each input file, with the kneaddata tag out_files = workflow.name_output_files(name=in_files, tag="kneaddata") # create a task for each set of input and output files to run kneaddata workflow.add_task_group( "kneaddata --input [depends[0]] --output [output_folder] --reference-db [kneaddata_db] --threads [threads]", depends=in_files, targets=out_files, output_folder=args.output, kneaddata_db=args.kneaddata_db, threads=args.threads)
from anadama2 import Workflow from biobakery_workflows.tasks import dadatwo workflow = Workflow() workflow.add_argument("fwd-primer", desc="forward primer, required for its workflow",required=True) workflow.add_argument("rev-primer", desc="reverse primer, required for its workflow",required=True) workflow.add_argument("pair-identifier", desc="the string to identify the first file in a pair", default="_R1_001") workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1) args = workflow.parse_args() dadatwo.remove_primers(workflow,args.fwd_primer,args.rev_primer,args.input,args.output,args.pair_identifier,args.threads) workflow.go()
from anadama2 import Workflow workflow = Workflow(remove_options=["input", "output"]) # add a task to download the file workflow.add_task( "wget ftp://public-ftp.hmpdacc.org/HMMCP/finalData/hmp1.v35.hq.otu.counts.bz2 -O [targets[0]]", targets="hmp1.v35.hq.otu.counts.bz2") # add a task to decompress the file workflow.add_task("bzip2 -d < [depends[0]] > [targets[0]]", depends="hmp1.v35.hq.otu.counts.bz2", targets="hmp1.v35.hq.otu.counts") def remove_end_tabs_function(task): with open(task.targets[0].name, 'w') as file_handle_out: for line in open(task.depends[0].name): file_handle_out.write(line.rstrip() + "\n") # add a task with a function to remove the end tabs from the file workflow.add_task(remove_end_tabs_function, depends="hmp1.v35.hq.otu.counts", targets="hmp1.v35.hq.otu.counts.notabs", name="remove_end_tabs") workflow.go()
import os import sys import datetime # constants ARCHIVE_FOLDER = "/opt/archive_folder/" COUNT_FILE = os.path.join(ARCHIVE_FOLDER, "data_deposition_counts.csv") PUBLIC_COUNT_FILE = os.path.join(ARCHIVE_FOLDER, "data_deposition_counts_public.csv") # create a workflow to check the md5sums for each file from anadama2 import Workflow from biobakery_workflows import utilities # create a workflow and get the arguments workflow = Workflow(remove_options=["input"]) workflow.add_argument("input-upload", desc="the folder of raw uploaded data", required=True) workflow.add_argument("input-processed", desc="the folder of processed data", required=True) workflow.add_argument("key", desc="the key file to use for the transfer", required=True) workflow.add_argument("user", desc="the user id for the transfer", required=True) workflow.add_argument("remote", desc="the remote host name for the transfer", required=True)