Python Workflow.Workflowの例、anadama2.Workflow.Workflow Pythonの例

コード例 #1

0

ファイルを表示

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle HMP2 '
                        'host exome data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('threads',
                          desc='Number of threads to use in '
                          'workflow processing',
                          default=1)

    return workflow

コード例 #2

0

ファイルを表示

ファイル: 16s_vis.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='1.0',
                        description='A workflow to handle visualization '
                        'of HMP2 16S data.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='Accompanying metadata file '
                          'for the provided data files.',
                          default=None)
    workflow.add_argument('source',
                          desc='The source of the output files generated. '
                          '[biobakery, CMMR]',
                          default='biobakery',
                          choices=['biobakery', 'CMMR'])

    return workflow

コード例 #3

0

ファイルを表示

ファイル: prot.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle HMP2 '
                        'Proteomics data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('checksums-file',
                          desc='MD5 checksums for files '
                          'found in the supplied input directory.')
    workflow.add_argument(
        'data_specific_metadata',
        desc='A collection of '
        'dataset specific metadata that should be integrated '
        'with any analysis output (creating a PCL file).')

    return workflow

コード例 #4

0

ファイルを表示

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle '
                        'uploading metadata and data files to the Data '
                        'Coordination Center (DCC)', 
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                           'for the provided data files.', default=None)
    workflow.add_argument('baseline-metadata-file', desc='Metadata file '
                          'containing baseline visit metadata per subject.')                           
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')

    return workflow

コード例 #5

0

ファイルを表示

ファイル: wmgx_wmtx.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle '
                        'analysis of metatranscriptomic data.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                          'for the provided data files.', default=None)
    workflow.add_argument('threads', desc='number of threads/cores for each '
                          'task to use', default=1)
    workflow.add_argument('threads-kneaddata', desc='OPTIONAL. A specific '
                          'number of threads/cores to use just for the '
                          'kneaddata task.', default=None)
    workflow.add_argument('threads-metaphlan', desc='OPTIONAL. A specific '
                          'number of threads/cores to use just for the '
                          'metaphlan2 task.', default=None)
    workflow.add_argument('threads-humann', desc='OPTIONAL. A specific '
                          'number of threads/cores to use just for the humann2 '
                          'task.', default=None)

    return workflow

コード例 #6

0

ファイルを表示

def parse_cli_arguments ():
	'''
	 Parses any command-line arguments passed into the workflow.
	the version number will appear when running this script with the "--version" option
	the description will appear when running this script with the "--help" option
	create a workflow instance, providing the version number and description
	'''

	tmp_output = os.path.abspath(config.working_dir)

	workflow = Workflow(version = VERSION, description = "A workflow for MetaWIBELE prioritization", remove_options=["output"])

	# add the custom arguments to the workflow
	workflow.add_argument("threads",
						desc = "number of threads/cores for each task to use",
						default = None)
	workflow.add_argument("prioritization-config",
	                    desc = "the configuration file for prioritization",
	                    default = None)
	workflow.add_argument("vignette-config",
	                    desc = "the file with specific functions of interest used as binary filtering for prioritization",
	                    default = "none")
	workflow.add_argument("bypass-mandatory",
	                     desc = "do not prioritize protein families based on quantitative criteria (mandatory prioritization)",
						 action = "store_true")
	workflow.add_argument("bypass-optional",
	                     desc = "do not prioritize protein families based on selecting our for interested annotations (optional prioritization)",
	                     action = "store_true")
	workflow.add_argument("bypass-finalized",
	                     desc = "do not finalize prioritized protein families",
						 action = "store_true")
	workflow.add_argument("selected-output",
	                    desc = "the output file name for the prioritized protein families by binary filtering",
	                    default = None)
	workflow.add_argument("basename",
						desc="specify the basename for output files",
						default = None)
	workflow.add_argument("input-annotation",
	                    desc = "provide the annotation file for protein families",
	                    required = True)
	workflow.add_argument("input-attribute",
	                    desc = "provide the annotation attribute file for protein families",
						required = True)
	workflow.add_argument("output",
	                    desc = "provide an output folder which the workflow database and log is written. By default, thet be written to the anadama2 folder of users' workding directory",
	                    default = tmp_output)

	return workflow

コード例 #7

0

ファイルを表示

def parse_cli_arguments():
    '''
	 Parses any command-line arguments passed into the workflow.
	the version number will appear when running this script with the "--version" option
	the description will appear when running this script with the "--help" option
	create a workflow instance, providing the version number and description
	'''

    workflow = Workflow(
        version=VERSION,
        description=
        "A workflow to preprocess shotgun sequencing reads of metagenomes "
        "with tasks of metagenomic assembly, gene calling, "
        "building gene catalogs and generating gene abundance for each sample."
    )

    # add the custom arguments to the workflow
    workflow.add_argument("threads",
                          desc="number of threads/cores for each task to use",
                          default=None)
    workflow.add_argument(
        "extension-paired",
        desc=
        "provide the extension for paired fastq files using comma to separate, e.g. .R1.fastq.gz,.R2.fastq.gz | .R1.fastq,.R2.fastq",
        default=None)
    workflow.add_argument("extension",
                          desc="provide the extension for all fastq files",
                          choices=[".fastq.gz", ".fastq"],
                          default=".fastq.gz")
    workflow.add_argument("gene-call-type",
                          desc="specify which type of gene calls will be used",
                          choices=['prokka', 'prodigal', 'both'],
                          default='prodigal')
    workflow.add_argument("bypass-assembly",
                          desc="do not run assembly",
                          action="store_true")
    workflow.add_argument("bypass-gene-calling",
                          desc="do not call ORFs",
                          action="store_true")
    workflow.add_argument("bypass-gene-catalog",
                          desc="do not build gene catalogs",
                          action="store_true")
    workflow.add_argument("output-basename",
                          desc="provide the basename for output files",
                          default=None)

    return workflow

コード例 #8

0

ファイルを表示

ファイル: metadata.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
    """
    workflow = Workflow(version='0.1',
                        description='A workflow to handle '
                        'refreshing and disseminating HMP2 metadata.',
                        remove_options=['input', 'output'])
    workflow.add_argument('manifest-file',
                          desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='If an existing metadata '
                          'file exists it can be supplied here. This metadata '
                          'file will be appended to instead of a whole new '
                          'metadata file being generated.')
    workflow.add_argument('studytrax-metadata-file',
                          desc='Accompanying '
                          'StudyTrax data all corresponding samples in the '
                          'HMP2 project.')
    workflow.add_argument('broad-sample-tracking-file',
                          desc='Broad Institute '
                          'sample tracking spreadsheet containing status of '
                          'sequence products generated.')
    workflow.add_argument('proteomics-metadata',
                          desc='PNNL-supplied metadata '
                          'spreadsheet.')
    workflow.add_argument('auxillary-metadata',
                          action='append',
                          default=[],
                          desc='Any auxillary metadata to be appeneded '
                          'to the final metadata table.')

    return workflow

コード例 #9

0

ファイルを表示

ファイル: mvx_vis.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='1.0',
                        description='A workflow to handle visualization '
                        'of HMP2 Metaviromics data.')
    workflow.add_argument('config-file',
                          desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file',
                          desc='Accompanying metadata file '
                          'for the provided data files.',
                          default=None)

    return workflow

コード例 #10

0

ファイルを表示

ファイル: wmgx_assembly.py プロジェクト: biobakery/hmp2_workflows

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.

    Args:
        None
    Requires:
        None
    Returns:
        anadama2.Workflow: The workflow object for this pipeline
        anadama2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to assemble '
                        'metagenomic data and run a gene caller on the '
                        'resulting contigs.')
    workflow.add_argument('contaminant-db', desc='KneadData DNA contaminants database.')
    workflow.add_argument('file-extension', desc='Extension of input files to '
                          'assemble and gene call on.', default='.fastq.gz')
    workflow.add_argument('threads', desc='number of threads/cores for each '
                          'task to use', default=1)
    workflow.add_argument('memory', desc='The amount of memory to use for each '
                          'assembly job. Provided in GB', default='10240')


    return workflow

コード例 #11

0

ファイルを表示

def parse_cli_arguments():
    """Parses any command-line arguments passed into the workflow.
    
    Args: 
        None
    Requires:
        None
    Returns:
        AnaDAMA2.Workflow: The workflow object for this pipeline.
        AnaDAMA2.cli.Configuration: Arguments passed into this workflow.
    """
    workflow = Workflow(version='0.1', description='A workflow to handle HMP2 '
                        'Metabolomic data.', remove_options=['input', 'output'])
    workflow.add_argument('manifest-file', desc='Manifest file containing '
                          'files to process in this workflow run.')
    workflow.add_argument('config-file', desc='Configuration file '
                          'containing parameters required by the workflow.')
    workflow.add_argument('metadata-file', desc='Accompanying metadata file '
                           'for the provided data files.', default=None)
    workflow.add_argument('aux_metadata', desc='Any additional metadata '
                          'files that can supply metadata for our ouptut '
                          'PCL files.')                           

    return workflow

コード例 #12

0

ファイルを表示

import sys
import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1", description="A workflow to run strainphlan")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("input-extension", desc="the input file extension", default="fastq.gz", choices=["fastq.gz","fastq","fq.gz","fq","fasta","fasta.gz"])
workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1)
workflow.add_argument("bypass-taxonomic-profiling", desc="do not run the taxonomic profiling tasks (a tsv profile for each sequence file must be included in the input folder using the same sample name)", action="store_true")
workflow.add_argument("strain-profiling-options", desc="additional options when running the strain profiling step", default="")
workflow.add_argument("max-strains", desc="the max number of strains to profile", default=20, type=int)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
# return an error if no files are found
input_files = utilities.find_files(args.input, extension=args.input_extension, exit_if_not_found=True)

コード例 #13

0

ファイルを表示

import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(
    version="0.1",
    description="A workflow for whole metagenome shotgun sequences")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("input-extension",
                      desc="the input file extension",
                      default="fastq.gz",
                      choices=[
                          "fastq.gz", "fastq", "fq.gz", "fq", "fasta",
                          "fasta.gz", "fastq.bz2", "fq.bz2"
                      ])
workflow.add_argument("barcode-file", desc="the barcode file", default="")
workflow.add_argument("dual-barcode-file",
                      desc="the string to identify the dual barcode file",
                      default="")

コード例 #14

0

ファイルを表示

ファイル: 16s_vis.py プロジェクト: zhaoxia413/biobakery_workflows

THE SOFTWARE.
"""

# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates from biobakery_workflows
from biobakery_workflows import document_templates, utilities

# import the files for descriptions and paths
from biobakery_workflows import files
import os

# create a workflow instance, providing the version number and description
# remove the input folder option as it will be replaced with multiple input files
workflow = Workflow(version="0.1", remove_options=["input"],
                    description="A workflow for 16S visualization")
                    
# add the custom arguments to the workflow 
# create a custom description for the input argument listing all expected input files
input_desc="A folder containing the final products from the 16s data workflow.\n\nThe input folder must include the following:\n\n"  
                      
workflow.add_argument("input",desc=input_desc,required=True)

# add the custom arguments to the workflow
workflow.add_argument("project-name",desc="the name of the project",required=True)
workflow.add_argument("input-metadata",desc="the metadata file (samples as columns or rows)")
workflow.add_argument("input-picard",desc="the folder of picard quality score files")
workflow.add_argument("input-picard-extension",desc="the extensions for the picard quality score files", default="quality_by_cycle_metrics")
workflow.add_argument("metadata-categorical",desc="the categorical features", action="append", default=[])
workflow.add_argument("metadata-continuous",desc="the continuous features", action="append", default=[])
workflow.add_argument("metadata-exclude",desc="the features to exclude", action="append", default=[])

コード例 #15

0

ファイルを表示

IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

from anadama2 import Workflow
import os, sys, fnmatch

from biobakery_workflows.tasks import sixteen_s, dadatwo, general
from biobakery_workflows import utilities, config, files

# create a workflow instance, providing the version number and description
workflow = Workflow(version="0.1",
                    description="A workflow for 16S sequencing data")

# add the custom arguments to the workflow
workflow_config = config.SixteenS()
workflow.add_argument("method",
                      desc="method to process 16s workflow",
                      default="vsearch",
                      choices=["usearch", "dada2", "vsearch", "its"])
workflow.add_argument("dada-db",
                      desc="reference database for dada2 workflow",
                      default="silva",
                      choices=["gg", "rdp", "silva", "unite"])
workflow.add_argument(
    "usearch-db",
    desc=
    "full paths for the reference databases (fna and taxonomy, comma delimited) for the usearch workflow",

コード例 #16

0

ファイルを表示

def parse_cli_arguments():
    '''
	 Parses any command-line arguments passed into the workflow.
	the version number will appear when running this script with the "--version" option
	the description will appear when running this script with the "--help" option
	create a workflow instance, providing the version number and description
	'''

    tmp_output = os.path.abspath(config.working_dir)

    workflow = Workflow(
        version=VERSION,
        description="A workflow for MetaWIBELE characterization",
        remove_options=["output"])

    # add the custom arguments to the workflow
    workflow.add_argument("threads",
                          desc="number of threads/cores for each task to use",
                          default=None)
    workflow.add_argument(
        "characterization-config",
        desc="the configuration file of characterization analysis",
        default=None)
    workflow.add_argument(
        "mspminer-config",
        desc=
        "the configuration file used by mspminer; [no] skip MSPminer-based taxonomic assignment",
        default=None)
    workflow.add_argument("bypass-clustering",
                          desc="do not cluster proteins into protein families",
                          action="store_true")
    workflow.add_argument(
        "bypass-global-homology",
        desc=
        "do not annotate protein families based on global homology information",
        action="store_true")
    workflow.add_argument(
        "bypass-domain-motif",
        desc=
        "do not annotate protein families based on domain/motif information",
        action="store_true")
    workflow.add_argument(
        "bypass-interproscan",
        desc="do not annotate protein families based on interproscan",
        action="store_true")
    workflow.add_argument(
        "bypass-pfam_to_go",
        desc="do not annotate protein families based on pfam2go",
        action="store_true")
    workflow.add_argument(
        "bypass-domine",
        desc="do not annotate protein families based on DOMINE database",
        action="store_true")
    workflow.add_argument(
        "bypass-sifts",
        desc="do not annotate protein families based on SIFTS database",
        action="store_true")
    workflow.add_argument(
        "bypass-expatlas",
        desc=
        "do not annotate protein families based on Expression Atlas database",
        action="store_true")
    workflow.add_argument(
        "bypass-psortb",
        desc="do not annotate protein families based on psortb",
        action="store_true")
    workflow.add_argument(
        "bypass-abundance",
        desc="do not annotate protein families based on abundance information",
        action="store_true")
    workflow.add_argument(
        "bypass-mspminer",
        desc="do not annotate protein families based on MSPminer",
        action="store_true")
    workflow.add_argument(
        "bypass-maaslin",
        desc="do not annotate protein families based on MaAsLin2",
        action="store_true")
    workflow.add_argument(
        "split-number",
        desc=
        "indicates number of spliting files for annotation based on sequence information",
        default=None)
    workflow.add_argument(
        "bypass-integration",
        desc="do not integrate annotations for protein families",
        action="store_true")
    workflow.add_argument("study", desc="specify the study name", default=None)
    workflow.add_argument("basename",
                          desc="specify the basename for output files",
                          default=None)
    workflow.add_argument(
        "input-sequence",
        desc=
        "input the sequence file for gene families (non-redundant catalogs)",
        required=True)
    workflow.add_argument(
        "input-count",
        desc="input the count file for gene families (non-redundant catalogs)",
        required=True)
    workflow.add_argument("input-metadata",
                          desc="input the metadata file",
                          required=True)
    workflow.add_argument(
        "output",
        desc=
        "provide an output folder which the workflow database and log is written. By default, thet be written to the anadama2 folder of users' working directory",
        default=tmp_output)

    return workflow

コード例 #17

0

ファイルを表示

ファイル: wmgx_wmtx_vis.py プロジェクト: zhaoxia413/biobakery_workflows

import os

# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates from biobakery_workflows
from biobakery_workflows import utilities

# import the files for descriptions and paths
from biobakery_workflows import files

# create a workflow instance, providing the version number and description
# remove the input folder option as it will be replaced with multiple input files
workflow = Workflow(
    version="0.1",
    remove_options=["input"],
    description=
    "A workflow for whole metagenome and metatranscriptome shotgun sequence visualization"
)

# list the required and optional files for the workflow
# these are expected to be included in the input folder
wmgx_input_files = {
    "required":
    ["kneaddata_read_counts", "taxonomic_profile", "pathabundance_relab"],
    "optional": ["humann2_read_counts", "feature_counts"]
}
wmtx_input_files = {
    "required": ["kneaddata_read_counts"],
    "optional": ["humann2_read_counts", "feature_counts"]
}
norm_input_files = {

コード例 #18

0

ファイルを表示

#!/usr/bin/env python

from anadama2 import Workflow
import os

workflow = Workflow(version="0.0.2", description="A workflow to run PanPhlAn")

workflow.add_argument("threads",
                      default=1,
                      desc="number of threads for panphlan to use")
workflow.add_argument("dbfolder",
                      default=None,
                      desc="folder containing database")
workflow.add_argument("filesfile",
                      default=None,
                      desc="file with filepaths to run on (relative to input)")
workflow.add_argument("ref", default=None, desc="name of reference db")
workflow.add_argument(
    "refs",
    default=None,
    desc="file with list of references (relative to dbfolder)")

args = workflow.parse_args()

in_files = workflow.get_input_files(".fastq.gz")
out_files = workflow.name_output_files(name=in_files,
                                       tag="panphlan_map",
                                       extension="csv.bz2")

if args.filesfile:
    with open(args.filesfile) as f:

コード例 #19

0

ファイルを表示

ファイル: stats.py プロジェクト: wanliu2019/biobakery_workflows

# import the workflow class from anadama2
from anadama2 import Workflow

# import the document templates and utilities from biobakery_workflows
from biobakery_workflows import utilities

# import the task to convert from biom to tsv
from biobakery_workflows.tasks.sixteen_s import convert_from_biom_to_tsv_list

# import the files for descriptions and paths
from biobakery_workflows import files

# create a workflow instance, providing the version number and description
workflow = Workflow(
    version="0.1",
    remove_options=["input"],
    description="A workflow for stats on wmgx and 16s data sets")

# add the custom arguments to the workflow
workflow.add_argument(
    "input",
    desc="the folder containing taxonomy and functional data files",
    required=True)

# add the custom arguments to the workflow
workflow.add_argument("project-name",
                      desc="the name of the project",
                      required=True)
workflow.add_argument("input-metadata",
                      desc="the metadata file (samples as columns or rows)",
                      required=True)

コード例 #20

0

ファイルを表示

ファイル: run.py プロジェクト: biobakery/Analysis-workflows-template

import os
from glob import glob
from anadama2 import Workflow
from anadama2.tracked import TrackedExecutable

# Setting the version of the workflow and short description
workflow = Workflow(
    version="0.0.1",  #Update the version as needed
    description="Analysis Template"  #Update the description as needed
)

# Setting additional custom arguments for workflow - run.py
workflow.add_argument(name="lines",
                      desc="Number of lines to trim [default: 10]",
                      default="10")

workflow.add_argument(
    name="metadata",
    desc="Metadata for performing analysis [default: input/metadata.tsv]",
    default="input/metadata.tsv")

# Parsing the workflow arguments
args = workflow.parse_args()

#Loading the config setting
args.config = 'etc/config.ini'

# AnADAMA2 example workflow.do
workflow.do("ls /usr/bin/ | sort > [t:output/global_exe.txt]")  #Command
workflow.do("ls $HOME/.local/bin/ | sort > [t:output/local_exe.txt]")  #Command

コード例 #21

0

ファイルを表示

ファイル: kneaddata_workflow.py プロジェクト: biobakery/anadama2

# -*- coding: utf-8 -*-
from anadama2 import Workflow

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1", description="A workflow to run KneadData")

# add the custom arguments to the workflow
workflow.add_argument("kneaddata-db", desc="the kneaddata database", default="/work/code/kneaddata/db/")
workflow.add_argument("input-extension", desc="the input file extension", default="fastq")
workflow.add_argument("threads", desc="number of threads for knead_data to use", default=1)

# get the arguments from the command line
args = workflow.parse_args()

# get all input files with the input extension provided on the command line
in_files = workflow.get_input_files(extension=args.input_extension)

# get a list of output files, one for each input file, with the kneaddata tag
out_files = workflow.name_output_files(name=in_files, tag="kneaddata")

# create a task for each set of input and output files to run kneaddata
workflow.add_task_group(
    "kneaddata --input [depends[0]] --output [output_folder] --reference-db [kneaddata_db] --threads [threads]",
    depends=in_files,
    targets=out_files,
    output_folder=args.output,
    kneaddata_db=args.kneaddata_db,
    threads=args.threads)

コード例 #22

0

ファイルを表示

ファイル: isolate_assembly.py プロジェクト: zhaoxia413/biobakery_workflows

import os, fnmatch

# import the workflow class from anadama2
from anadama2 import Workflow
from anadama2.tracked import TrackedExecutable

# import the library of biobakery_workflow tasks for shotgun sequences
from biobakery_workflows.tasks import shotgun, general

# import the utilities functions and config settings from biobakery_workflows
from biobakery_workflows import utilities, config

# create a workflow instance, providing the version number and description
# the version number will appear when running this script with the "--version" option
# the description will appear when running this script with the "--help" option
workflow = Workflow(version="0.1",
                    description="A workflow for isolate assembly")

# add the custom arguments to the workflow
workflow_config = config.ShotGun()
workflow.add_argument("species-name", desc="the species name", required=True)
workflow.add_argument("input-extension",
                      desc="the input file extension",
                      default="fastq.gz",
                      choices=["fastq.gz", "fastq", "fq.gz", "fq"])
workflow.add_argument("threads",
                      desc="number of threads/cores for each task to use",
                      default=1)
workflow.add_argument("pair-identifier",
                      desc="the string to identify the first file in a pair",
                      default="_R1_001")
workflow.add_argument(

コード例 #23

0

ファイルを表示

ファイル: has_a_function.py プロジェクト: biobakery/anadama2

from anadama2 import Workflow

workflow = Workflow(remove_options=["input", "output"])

# add a task to download the file
workflow.add_task(
    "wget ftp://public-ftp.hmpdacc.org/HMMCP/finalData/hmp1.v35.hq.otu.counts.bz2 -O [targets[0]]",
    targets="hmp1.v35.hq.otu.counts.bz2")

# add a task to decompress the file
workflow.add_task("bzip2 -d < [depends[0]] > [targets[0]]",
                  depends="hmp1.v35.hq.otu.counts.bz2",
                  targets="hmp1.v35.hq.otu.counts")


def remove_end_tabs_function(task):
    with open(task.targets[0].name, 'w') as file_handle_out:
        for line in open(task.depends[0].name):
            file_handle_out.write(line.rstrip() + "\n")


# add a task with a function to remove the end tabs from the file
workflow.add_task(remove_end_tabs_function,
                  depends="hmp1.v35.hq.otu.counts",
                  targets="hmp1.v35.hq.otu.counts.notabs",
                  name="remove_end_tabs")

workflow.go()

コード例 #24

0

ファイルを表示

ファイル: remove_primers.py プロジェクト: wanliu2019/biobakery_workflows

from anadama2 import Workflow

from biobakery_workflows.tasks import dadatwo

workflow = Workflow()
workflow.add_argument("fwd-primer", desc="forward primer, required for its workflow",required=True)
workflow.add_argument("rev-primer", desc="reverse primer, required for its workflow",required=True)
workflow.add_argument("pair-identifier", desc="the string to identify the first file in a pair", default="_R1_001")
workflow.add_argument("threads", desc="number of threads/cores for each task to use", default=1)
args = workflow.parse_args()

dadatwo.remove_primers(workflow,args.fwd_primer,args.rev_primer,args.input,args.output,args.pair_identifier,args.threads)

workflow.go()