Example #1
0
def main():
    parser = cmdline.get_argparse(description="Trench Run pipeline")

    args = parser.parse_args()

    if args.target_tasks:
        cmdline.run(args)

    else:
        pipeline_run(publish_data)
Example #2
0
def configure(config, args):
    """
    Setup runtime from config module/dict and command line args

    Parameters
    ----------
    config: dict or Namespace
        Hold configurations used to initialize ApusConfig object
    args: list
        list of arguments to be passed to Ruffus.cmdline module

    Returns
    -------
    apusconf: ApusConfig
        Hold configurations of the Apus
    option: Namespace
        Hold parsed command line arguments
    """
    if isinstance(config, dict):
        apusconf = ApusConfig(**config)
    else:
        apusconf = ApusConfig(config=config)

    parser = cmdline.get_argparse(description="""
+- Astronomy Pipeline Using ruffuS, specifically tweaked for PostCalib -+
""",
                                  version=ruffus.__version__,
                                  prog='postcalib run ... -a ')
    parser.add_argument('-r',
                        '--redo-all',
                        action='store_true',
                        help='force redo all tasks')
    parser.add_argument('-l',
                        '--list-tasks',
                        action='store_true',
                        help='list the task names and exit')

    parser.set_defaults(verbose=[
        '0',
    ],
                        log_file=os.path.join(apusconf.logdir,
                                              apusconf.log_file),
                        history_file=os.path.join(apusconf.logdir,
                                                  apusconf.history_file))
    option = parser.parse_args(args)
    # handle logger
    logger, logger_mutex = make_shared_logger_and_proxy(
        logger_factory, apusconf.jobkey, [option.log_file, option.verbose])
    apusconf.logger = logger
    apusconf.logger_mutex = logger_mutex
    return apusconf, option
Example #3
0
def parse_command_line():
    '''Parse the command line arguments of the pipeline'''
    parser = cmdline.get_argparse(description='Variant calling pipeline',
        ignored_args = ["version"] )
    parser.add_argument('--config', type=str, default=DEFAULT_CONFIG_FILE,
        help='Pipeline configuration file in YAML format, defaults to {}' \
            .format(DEFAULT_CONFIG_FILE))
    parser.add_argument('--jobscripts', type=str,
        default=DEFAULT_JOBSCRIPT_DIR,
        help='Directory to store cluster job scripts created by the ' \
             'pipeline, defaults to {}'.format(DEFAULT_JOBSCRIPT_DIR))
    parser.add_argument('--version', action='version',
        version='%(prog)s ' + version)
    return parser.parse_args()
Example #4
0
def parse_command_line(version):
    '''Parse the command line arguments of the pipeline'''
    parser = cmdline.get_argparse(description='RAD-Seq pipeline',
        ignored_args = ["version"] )
    parser.add_argument('--config', type=str, default=DEFAULT_CONFIG_FILE,
        help='Pipeline configuration file in YAML format, defaults to {}' \
            .format(DEFAULT_CONFIG_FILE))
    parser.add_argument('--jobscripts', type=str,
        default=DEFAULT_JOBSCRIPT_DIR,
        help='Directory to store cluster job scripts created by the ' \
             'pipeline, defaults to {}'.format(DEFAULT_JOBSCRIPT_DIR))
    parser.add_argument('--version', action='version',
        version='%(prog)s ' + version)
    return parser.parse_args()
Example #5
0
def parse_command_line():
    '''Parse the command line arguments of the pipeline'''
    parser = cmdline.get_argparse(description='A variant discovery pipeline',
                                  ignored_args=["version"])
    parser.add_argument('--config', type=str, default=DEFAULT_CONFIG_FILE,
        help='Pipeline configuration file in YAML format, defaults to {}' \
            .format(DEFAULT_CONFIG_FILE))
    parser.add_argument('--jobscripts', type=str,
        default=DEFAULT_JOBSCRIPT_DIR,
        help='Directory to store cluster job scripts created by the ' \
             'pipeline, defaults to {}'.format(DEFAULT_JOBSCRIPT_DIR))
    parser.add_argument('--mode', type=str, default='map', choices=['map', 'process'],
                        help='set to "map" to process fastq files up to haplotype caller' \
                        ', or "process" to process multiple map directories.  Default is map')
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s ' + version)
    return parser.parse_args()
Example #6
0
    def test_argparse(self):
        """
        Same as above but setting up options using ruffus.cmdline.get_argparse
            --verbose on its own increases the verbosity by one
            --verbose NNN (re)sets the verbosity to NNN whatever the previous state
            --verbose NNN:MMM sets the verbose_abbreviated_path to MMM
        """

        parser = cmdline.get_argparse(description='WHAT DOES THIS PIPELINE DO?')

        import sys

        sys.argv = ["test", "--verbose", "--verbose=2"]
        options = parser.parse_args()
        handle_verbose(options)
        self.assertTrue(options.verbose==2)
        self.assertTrue(options.verbose_abbreviated_path==None)

        sys.argv = ["test", "--verbose", "--verbose=3", "--verbose"]
        options = parser.parse_args()
        handle_verbose(options)
        self.assertTrue(options.verbose==4)
        self.assertTrue(options.verbose_abbreviated_path==None)

        sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose"]
        options = parser.parse_args()
        handle_verbose(options)
        self.assertTrue(options.verbose==6)
        self.assertTrue(options.verbose_abbreviated_path==3)

        sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7", "--verbose"]
        options = parser.parse_args()
        handle_verbose(options)
        self.assertTrue(options.verbose==8)
        self.assertTrue(options.verbose_abbreviated_path==3)

        sys.argv = ["test", "--verbose", "--verbose=5:3", "--verbose", "--verbose=7:5", "--verbose"]
        options = parser.parse_args()
        handle_verbose(options)
        self.assertTrue(options.verbose==8)
        self.assertTrue(options.verbose_abbreviated_path==5)
Example #7
0
    )
    sys.exit(ExitCode.missing_dependency)


check_pil_encoder('jpg', 'JPEG')
check_pil_encoder('zlib', 'PNG')


# -------------
# Parser

parser = cmdline.get_argparse(
    prog="ocrmypdf",
    description="Generate searchable PDF file from an image-only PDF file.",
    version=VERSION,
    fromfile_prefix_chars='@',
    ignored_args=[
        'touch_files_only', 'recreate_database', 'checksum_file_name',
        'key_legend_in_graph', 'draw_graph_horizontally', 'flowchart_format',
        'forced_tasks', 'target_tasks', 'use_threads', 'jobs', 'log_file'])

parser.add_argument(
    'input_file',
    help="PDF file containing the images to be OCRed")
parser.add_argument(
    'output_file',
    help="output searchable PDF file")
parser.add_argument(
    '-l', '--language', action='append',
    help="languages of the file to be OCRed")
parser.add_argument(
Example #8
0
        "See installation instructions for your platform here:\n"
        "    https://pillow.readthedocs.org/installation.html")
    sys.exit(ExitCode.missing_dependency)


check_pil_encoder('jpg', 'JPEG')
check_pil_encoder('zlib', 'PNG')

# -------------
# Parser

parser = cmdline.get_argparse(
    prog="ocrmypdf",
    description="Generate searchable PDF file from an image-only PDF file.",
    version=VERSION,
    fromfile_prefix_chars='@',
    ignored_args=[
        'touch_files_only', 'recreate_database', 'checksum_file_name',
        'key_legend_in_graph', 'draw_graph_horizontally', 'flowchart_format',
        'forced_tasks', 'target_tasks', 'use_threads', 'jobs', 'log_file'
    ])

parser.add_argument('input_file',
                    help="PDF file containing the images to be OCRed")
parser.add_argument('output_file', help="output searchable PDF file")
parser.add_argument('-l',
                    '--language',
                    action='append',
                    help="languages of the file to be OCRed")
parser.add_argument(
    '-j',
    '--jobs',
Example #9
0
        elif not params['alignments'].has_key('genome_index') or params['alignments']['genome_index'] is None or\
             type(params['alignments']['genome_index']) is not list or len(params['alignments']['genome_index']) != 2:
            error = 'no proper GMAP index provided'
        elif not params['annotations'].has_key('genome_fasta') or params['annotations']['genome_fasta'] is None:
            error = 'no genome fasta provided'
        elif not params['annotations'].has_key('gtf') or params['annotations']['gtf'] is None:
            error = 'no gtf provided'

        if not args.only_splicing:
            if not params['alignments'].has_key('transcripts_fasta') or params['alignments']['transcripts_fasta'] is None:
                error = 'no transcripts fasta provided'

    if error is not None:
        sys.exit('TAP ABORTED: %s' % error.upper())

parser = cmdline.get_argparse(description='TAP pipeline')
parser.add_argument('sample', type=str, help='sample name')
parser.add_argument('outdir', type=str, help='output directory')
parser.add_argument('--bf', type=str, help='path to bloomfilter')
parser.add_argument('--fq', type=str, nargs='+', help='input gzipped fastqs')
parser.add_argument('--fq_list', type=str, help='text file of input fastq paths')
parser.add_argument('--nprocs', type=int, default=32, help='number of threads/processes. Default=32')
parser.add_argument('--remove_fq', action='store_true', help='remove intermediate fastqs')
parser.add_argument('--only_assembly', action='store_true')
parser.add_argument('--only_sv', action='store_true')
parser.add_argument('--only_splicing', action='store_true')
parser.add_argument('--genome_bam', type=str, help='genome bam(for detecting splice-site variants)')
parser.add_argument('--params', type=str, help='parameters file')
assembly = parser.add_argument_group('assembly')
assembly.add_argument('--k', type=int, nargs='+', help='k sizes for assembly')
assembly.add_argument('--readlen', type=int, help='read length')
Example #10
0
#!/usr/bin/env python
from glob import glob
import glob
import sys, os, fnmatch
import re
from ruffus import *
import ruffus.cmdline as cmdline
from subprocess import check_call
from ruffus.drmaa_wrapper import run_job, error_drmaa_job
import drmaa

### parse command line for arguments

parser = cmdline.get_argparse(description="Chela's Pipeline")
parser.add_argument('-i', '--input', metavar="FILE", help = "Fastq files")
parser.add_argument('--cuffdiff_file', metavar="FILE", help = "cuffdiff comparison instructions")
parser.add_argument('--basedir', metavar="DIR", help = "base directory")
parser.add_argument('--aligner', metavar="choice", help = "choice of aligner; enter hisat or star")
parser.add_argument('--gtf', metavar="choice", help = "choice of gtf; enter all_transcripts, all_coding or ercc")
#parser.add_argument('--flat_gff', metavar="choice", help = "flat gff for DEXseq")
parser.add_argument('--kallisto', metavar="choice", help = "use kallisto?")
parser.add_argument('--species', metavar="choice", help = "species" )
parser.add_argument('--stringtie', metavar="choice", help = "use stringtie?")

options = parser.parse_args()
cuffdiff_file = options.cuffdiff_file
basedir=options.basedir
aligner=options.aligner
kallisto=options.kallisto
stringtie=options.stringtie
hisat_check=aligner=="hisat"
Example #11
0
import sys
import os.path
import logging
import os
from Bio import SeqIO
import re
import argparse

MODULES_KEY = 'MODULESHOME'
if MODULES_KEY in os.environ:
    modules_init = os.path.join(os.environ[MODULES_KEY], 'init/python.py')
    execfile(modules_init)
    # need this for faToTwoBit
    module('load', 'blat/default')

parser = cmdline.get_argparse(
    description='Use Ruffus to process .out files from genblastA')
parser.add_argument('--working_directory', '-W', default='.')
parser.add_argument('genome_filename',
                    help='FASTA format genome file, must end in .fa or .fasta')
parser.add_argument('hints_filename',
                    help='Augustus hints filename, generate from exonerate')
args = parser.parse_args()

logger, logger_mutex = cmdline.setup_logging(__name__, args.log_file,
                                             args.verbose)

FASTA_RE = r'\.(fa|fasta)$'
os.chdir(args.working_directory)


def safe_open(filename, mode='r'):
Example #12
0
Ruffus pipeline for simple bowtie alignment

"""
from ruffus import *
from big_wig_extras import BigWigExtras
import ruffus.cmdline as cmdline
import subprocess
import logging
import os
import pprint
import re
import time

parser = cmdline.get_argparse(
    description=
    'Given a directory of sorted bam files, convert them to adjusted bigWigs')

# Program arguments  -- Most go straight to bowtie
parser.add_argument(
    "--dir",
    help="Fullpath to the directory where the BAMS are located",
    required=True)
parser.add_argument("--size", help="Fullpath to size file")
#parser.add_argument("--reads", help="Fullpath to read stats file", required=True)
parser.add_argument("--output", help="Fullpath to output dir", default="./")

# parse the args
options = parser.parse_args()

# Kenny loggins
Example #13
0
    pipe.transform(
        name="convert_csv_files_to_tsv",
        task_func=csv_to_tsv,
        input=output_from("create_three_new_files"),
        filter=suffix(".csv"),
        output=".tsv",
    )

    pipe.transform(
        name="calculate_md5",
        task_func=md5,
        input=output_from("convert_csv_files_to_tsv"),
        filter=suffix(".tsv"),
        output=".md5sum",
    )

    return pipe


if __name__ == "__main__":
    parser = cmdline.get_argparse(description="CNV Calling",
                                  ignored_args=["jobs"])

    options = parser.parse_args()
    options.history_file = os.path.join(WORK_DIR, ".ruffus_history.sqlite")

    pipeline = build_pipeline()

    cmdline.run(options, multithead=3)
import sys
import os.path
import logging
import os
from Bio import SeqIO
import re
import argparse

MODULES_KEY = 'MODULESHOME'
if MODULES_KEY in os.environ:
    modules_init = os.path.join(os.environ[MODULES_KEY], 'init/python.py')
    execfile(modules_init)
    # need this for faToTwoBit
    module('load', 'blat/default')

parser = cmdline.get_argparse(description='Use Ruffus to process .out files from genblastA')
parser.add_argument('--working_directory', '-W', default='.')
parser.add_argument('genome_filename', help='FASTA format genome file, must end in .fa or .fasta')
parser.add_argument('hints_filename', help='Augustus hints filename, generate from exonerate')
args = parser.parse_args()

logger, logger_mutex = cmdline.setup_logging(__name__, args.log_file, args.verbose)


FASTA_RE=r'\.(fa|fasta)$'
os.chdir(args.working_directory)

def safe_open(filename, mode='r'):
    try:
        file_obj = open(filename, mode)
    except IOError as e:
Example #15
0
# ruffus imports
from ruffus import *
import ruffus.cmdline as cmdline
import logging, time, os, subprocess
from ebseq_extras import EbseqExtras

# EMAIL
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(
    description=
    "This is a pipeline for RSEM alignment and subsequent DE gene analysis with EBseq"
)

# Program arguments
parser.add_argument(
    "--dir",
    help="Fullpath to the directory where the FASTQ reads are located",
    required=True)
parser.add_argument("--cores",
                    help="Number of cores to run RSEM on",
                    default='10')
parser.add_argument(
    "--index",
    help="Fullpath to the RSEM index in: /full/file/path/basename form",
    default="/data/refs/hg19/hg19-RSEM",
    required=True)
    #
    #   symbolic link relative to original directory so that the entire path
    #       can be moved around with breaking everything
    #
    os.symlink(
        os.path.relpath(os.path.abspath(input_file),
                        os.path.abspath(os.path.dirname(soft_link_name))),
        soft_link_name)


hostname = socket.gethostname()
sys_cfg = ConfigParser()
sys_cfg.read('{}.sys.cfg'.format(hostname))

parser = cmdline.get_argparse(
    description=
    'Perform exome analysis on alignment files in bam format using GATK.')
parser.add_argument('--gatk',
                    default=sys_cfg.get('program', 'gatk'),
                    help='path to GATK jar file')
parser.add_argument("input_bams", nargs='*')
parser.add_argument(
    '--ref',
    required=True,
    help=
    'specify which reference to use. It should be consistent with the reference used in alignment.'
)
parser.add_argument(
    '--intervals',
    help=
    "One or more genomic intervals over which to operate. GATK engine parameter."
Example #17
0
from ruffus import *
from ruffus.cmdline import MESSAGE
from collections import defaultdict
import ruffus.cmdline as cmdline
import subprocess
import glob
import re
import os
import argparse
import sys
import time

DEFAULTADAPT = "AGATCGGAAGAGCACACG"
parser = cmdline.get_argparse(
    description="ChemModSeqPipeline, a pipeline for processing ChemModSeq data"
)
parser.add_argument("-f",
                    dest="forwardreads",
                    help="the path to your fastq files with forward reads.",
                    metavar="data_1.fastq",
                    nargs="*",
                    default=None)
parser.add_argument("-r",
                    dest="reversereads",
                    help="the path to your fastq files with reverse reads.",
                    metavar="data_2.fastq",
                    nargs="*",
                    default=None)
parser.add_argument("-g",
                    "--gtf",
Example #18
0
    return sip_pipe


# test_org_list = ["/current_projects/genomic_purity/test_files/Acaryochloris_marina_MBIC11017_uid12997"]
# pipeline1a = make_sipp(org_list = test_org_list)

###############################################################################
##
## Commandline iterface
##
###############################################################################

import ruffus.cmdline as cmdline

parser = cmdline.get_argparse(
    description='Pipelines for genomic contaminant study',
    version="genomic_contamination_study_pipe.py v. 0.0.0.9000")

# parser.add_argument('--pipeline', "-p",
# 					type=str,
# 					choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'],
#                     help="Defining which pipeline to run")

parser.add_argument(
    '--config_file',
    "-cf",
    type=str,
    #metavar="config_file",
    help="yaml file with pipeline parameters")

options = parser.parse_args()
Example #19
0
##########################################################
# Input: output of runDifferentialExpression function (.tsv files)
# Output: a venn diagram showing overlap between DEGs
# Type of operation: many-to-1
# Ruffus decorator used: merge


@follows(volcanoPlot)  # requires volcanoPlot to be run before
@merge(runDifferentialExpression, 'data/differential_genes-venn_diagram.png')
def testPipelineStep(infiles, outfile):

    # Print
    print('##### Step 4. Plotting venn diagram ({outfile})...'.format(
        **locals()))

    # Run
    run_r_job('plot_venn_diagram', infiles, outfile, run_locally=True)


##################################################
##################################################
########## Run pipeline
##################################################
##################################################
# Get options specified from commandline
options = cmdline.get_argparse().parse_args()

# Run indicated steps
if __name__ == '__main__':
    cmdline.run(options)
print('Done!')
Example #20
0
# ruffus imports
from ruffus import *
import ruffus.cmdline as cmdline
import logging, time, os, subprocess
from ebseq_extras import EbseqExtras 

# EMAIL
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(description="This is a pipeline for RSEM alignment and subsequent DE gene analysis with EBseq")

# Program arguments
parser.add_argument("--dir", help="Fullpath to the directory where the FASTQ reads are located", required=True)
parser.add_argument("--cores", help="Number of cores to run RSEM on", default='10')
parser.add_argument("--index", help="Fullpath to the RSEM index in: /full/file/path/basename form", 
                    default="/data/refs/hg19/hg19-RSEM", required=True)

parser.add_argument("--output", help="Fullpath to output directory", default="./")
parser.add_argument("--conf", help="Fullpath to conf tsv file, (<sample><frag-mean><frag-sd><cond>)",
                    required=True)

parser.add_argument("--fdr", help=" false discovery rate to use for DE", default=0.05)

# reporting
parser.add_argument("--emails", help="Emails to send results too", default="*****@*****.**", nargs="+")
Example #21
0
import os
import glob

from ruffus import *
import ruffus.cmdline as cmdline

DATA_FP="/media/8TB_PLAYGROUND/home/ecl/ext/100_SCID/103_Virome/data_files/"

fwd_files = glob.glob(DATA_FP + "*R1.fastq.gz")
rev_files = glob.glob(DATA_FP + "*R2.fastq.gz")

starting_files = list(zip(fwd_files, rev_files))

parser = cmdline.get_argparse(description="Pairs reads using PEAR")
options = parser.parse_args()
    
@transform(starting_files, suffix("R1.fastq.gz"), "assembled.fastq")
def pair_reads(input_files, output_file):
    os.system("pear -j 4 -f {} -r {} -o {}".format(input_files[0], input_files[1], output_file))

cmdline.run(options)
    
Example #22
0
#   The Third pipeline is a clone of pipeline1b
pipeline1c = pipeline1b.clone(new_name="pipeline1c")

#   Set the "originate" files for pipeline1c to ("e.1" and "f.1")
#       Otherwise they would use the original ("c.1", "d.1")
pipeline1c.set_output(output=[])
pipeline1c.set_output(output=[tempdir + "/" + ss for ss in ("e.1", "f.1")])

#   Join all pipeline1a-c to pipeline2
pipeline2 = make_pipeline2()
pipeline2.set_input(input=[pipeline1a, pipeline1b, pipeline1c])

import ruffus.cmdline as cmdline

parser = cmdline.get_argparse(
    description='Demonstrates the new Ruffus syntax in version 2.6')

parser.add_argument('--cleanup',
                    "-C",
                    action="store_true",
                    help="Cleanup before and after.")

options = parser.parse_args()

#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logger_mutex = cmdline.setup_logging(__file__, options.log_file,
                                             options.verbose)

logger.debug("\tRuffus Version = " + ruffus.__version__)

if options.cleanup:
                    logger.debug("Can't unlink %s" % (soft_link_name))
        with logging_mutex:
            logger.debug("os.symlink(%s, %s)" % (input_file, soft_link_name))
        #
        #   symbolic link relative to original directory so that the entire path
        #       can be moved around with breaking everything
        #
        os.symlink( os.path.relpath(os.path.abspath(input_file),
                    os.path.abspath(os.path.dirname(soft_link_name))), soft_link_name)

hostname = socket.gethostname()
sys_cfg = ConfigParser()
sys_cfg.read('{}.sys.cfg'.format(hostname))


parser = cmdline.get_argparse(description='Perform exome analysis on alignment files in bam format using GATK.')
parser.add_argument('--gatk', default=sys_cfg.get('program', 'gatk'), help='path to GATK jar file')
parser.add_argument("input_bams", nargs='*')
parser.add_argument('--ref', required=True,
                    help='specify which reference to use. It should be consistent with the reference used in alignment.')
parser.add_argument('--intervals', help="One or more genomic intervals over which to operate. GATK engine parameter.")
parser.add_argument('--output-dir', dest='working_dir', default='.', help="directory for output files.")


options = parser.parse_args("--verbose 1 --ref b37 --intervals test_data/sample_target.intervals "
                            "-T remove_realign_interval -T remove_realigned_bam -T remove_read_group_file "
                            "-T remove_intermediate_vcfs "
                            "--output-dir test_output".split())

#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logging_mutex = cmdline.setup_logging(__name__, options.log_file, options.verbose)
Example #24
0
import sys
import os, errno
import glob
import ruffus as rf
import ruffus.cmdline as cmdline
import pandas as pd
import numpy as np
import os, errno
import yaml
# import logging
from trio import triodb
# import warnings
# warnings.filterwarnings("ignore", message=".*deprecation.*")

# Configuration and command line options
parser = cmdline.get_argparse(description='Pipeline using the TPC-H example.')
parser.add_argument("--config")
options = parser.parse_args()
lg, lm = cmdline.setup_logging(__name__, options.log_file, options.verbose)
# lg.setLevel(logging.INFO)
if vars(options)['config'] == None:
    print "No config supplied."
    parser.print_help()
    sys.exit()
with open(vars(options)['config'], 'r') as f:
    cfg = yaml.load(f)
lg.info('pipeline:: ::config %s' % str(cfg))

SQL_PRINT_MAX = 1000

# =============================================================
Example #25
0
config = yaml.safe_load(open("aws-config.yml"))

#REFERENCE
reference_list = (config['reference'])
human_decoy = reference_list['human_decoy']

bed_list = (config['bed'])
fh_bed = bed_list['FH']

software_list = (config['software'])
gatk_picard = software_list['gatk_picard']
#picard_SortSam = software_list['picard_SortSam']
#mark_duplicates = software_list['mark_duplicates']
#gatk_jar = software_list['gatk']

parser = cmdline.get_argparse(description='Small pipeline for aws')

parser.add_argument("--input")
options = parser.parse_args()

run_directory = options.input

#check valid input dir provided
if os.path.isdir(run_directory):
    os.chdir(run_directory)
    log_dir = os.path.join(run_directory, "logs")
    if not os.path.exists(log_dir):
        os.mkdir(log_dir, 0o755)

    extension = ("*.fastq.gz")
    input_files = []
Example #26
0
# :) so i never have to touch excel
import pandas as pd

# for cummerbund
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr

# EMAIL
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(description='This pipeline provides a number of funtionalities for working with RNAseq data')

# Program arguments
parser.add_argument("--dir", help="Fullpath to the directory where the bams are located", required=True)
parser.add_argument("--cores", help="Number of cores to run cuffdiff on", default='10')
parser.add_argument("--output", help="Fullpath to output directory", default="./")
parser.add_argument("--size", help="Fullpath to size file")
parser.add_argument("--gtf", help="Fullpath to gtf file", required=True)
parser.add_argument("--de-conf", help="fullpath to differential expresssion configuration file", required=True)

parser.add_argument("--annotation-db", help="fullpath to the sqlite db file, <id><name><desc>")
parser.add_argument("--annotation-file", help="fullpath to a tsv file of gene annotations, will create sqlite db")

# reporting
parser.add_argument("--emails", help="Emails to send DE results too", default="*****@*****.**", nargs="+")
Example #27
0
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   options

#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

import sys, os
import os.path
# add self to search path for testing
exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
sys.path.append(os.path.abspath(os.path.join(exe_path, "..", "..")))
from ruffus import *
import ruffus.cmdline as cmdline

parser = cmdline.get_argparse(
    description='Tests legacy @files_re with combine()', version="%prog 1.0")

parser.add_argument("-D",
                    "--debug",
                    action="store_true",
                    help="Make sure output is correct and clean up.")
options = parser.parse_args()

#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logger_mutex = cmdline.setup_logging(__name__, options.log_file,
                                             options.verbose)

#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888

#   imports
Example #28
0
Kyle McChesney

Ruffus pipeline for simple bowtie alignment

"""
from ruffus import *
from big_wig_extras import BigWigExtras
import ruffus.cmdline as cmdline
import subprocess
import logging
import os
import pprint
import re
import time

parser = cmdline.get_argparse(description='Given a directory of sorted bam files, convert them to adjusted bigWigs')

# Program arguments  -- Most go straight to bowtie
parser.add_argument("--dir", help="Fullpath to the directory where the BAMS are located", required=True)
parser.add_argument("--size", help="Fullpath to size file")
#parser.add_argument("--reads", help="Fullpath to read stats file", required=True)
parser.add_argument("--output", help="Fullpath to output dir", default="./")

# parse the args
options = parser.parse_args()

# Kenny loggins
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log_formatter = logging.Formatter('%(asctime)s {%(levelname)s}: %(message)s')
Example #29
0
There is a PowerPoint presentation with details of all the ruffus decorators
and which goes through the functions in this pipeline one by one.

No input is required to run the pipeline.

'''
# ruffus imports
from ruffus import *
from ruffus.combinatorics import *
import ruffus.cmdline as cmdline

import sys
import os

# this block is essential to run ruffus from the command line
parser = cmdline.get_argparse(description='filter_transcripts')
options = parser.parse_args()


@originate(['a_originate.tsv', 'b_originate.tsv'])
def exampleOriginate(outfile):
    '''
    Example of ruffus originate decorator.

    @originate generates new files from scratch (0 to many operation)

    Here, this function generates two new files:
    a_originate.tsv
    b_originate.tsv

    '''
Example #30
0
import ruffus.cmdline as cmdline

# custom functions
#from tophat_extras import TophatExtras

# system imports
import subprocess, logging, os, re, time
import pandas as pd
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(description='seq_pipe: A pipeline for performing various NGS analysis tasks')

# this is the only 'required' argument
# will control which pipeline is run
parser.add_argument("--analysis", help=" What type of analysis to perform",
                    choices = ["bowtie","tophat","cuffdiff","DE","bigWig"],
                    required=True)

# Program arguments
parser.add_argument("--input-dir", help="Fullpath to the directory where the input files are located", required=True)
parser.add_argument("--cores", help="Number of cores to run multi-threaded programs on", default='3')
parser.add_argument("--index", help="Fullpath to the bowtie2 index in: /full/file/path/basename form", default="/data/refs/hg19/hg19")
parser.add_argument("--output", help="Fullpath to output directory", default="./")
parser.add_argument("--size", help="Fullpath to size file")
parser.add_argument("--gtf", help="Fullpath to gtf file")
parser.add_argument("--paired", help="Indicates whether the reads in --dir are paired_end. MUST FOLLOW _1 _2 convention", default=False)
Example #31
0
- DE with DESEQ
"""
import logging
import os
import subprocess
import time
from functools import partial

import pandas as pd
import ruffus.cmdline as cmdline
import sh
from ruffus import *

from utils import guess_simple_design_matrix, log_line, make_fastq_list, send_report

parser = cmdline.get_argparse(description='RSEM and deseq2 pipeline')
parser.add_argument("--dir", 
                    help="Fullpath to the directory where the FASTQ reads are located", 
                    required=True)

parser.add_argument("--index",
                    help="Fullpath to the bowtie2 index in: /full/file/path/basename form",
                    default="/data/refs/hg19/hg19-rsem")

parser.add_argument("--output",
                    help="Fullpath to output directory",
                    default="./")

parser.add_argument("--name",
                    help="Optional experiment name",
                    dest="exp_name", default="rsem-deseq")
Example #32
0
import logging
import os
import pprint
import re
import time

# EMAIL
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(
    description=
    'Given a directory of NON-paired end reads -- Align them with bowtie')

# Program arguments  -- Most go straight to bowtie
parser.add_argument(
    "--dir",
    help="Fullpath to the directory where the FASTQ reads are located",
    required=True,
    nargs='+')
parser.add_argument("--cores",
                    help="Number of cores to run bowtie on",
                    default=10)
parser.add_argument(
    "--index",
    help="Fullpath to the bowtie2 index in: /full/file/path/basename form",
    default="/data/refs/hg19/hg19")
#   The Third pipeline is a clone of pipeline1b
pipeline1c = pipeline1b.clone(new_name = "pipeline1c")

#   Set the "originate" files for pipeline1c to ("e.1" and "f.1")
#       Otherwise they would use the original ("c.1", "d.1")
pipeline1c.set_output(output = [])
pipeline1c.set_output(output = [tempdir + "/" + ss for ss in ("e.1", "f.1")])

#   Join all pipeline1a-c to pipeline2
pipeline2 = make_pipeline2()
pipeline2.set_input(input = [pipeline1a, pipeline1b, pipeline1c])


import ruffus.cmdline as cmdline
parser = cmdline.get_argparse(description='Demonstrates the new Ruffus syntax in version 2.6')

parser.add_argument('--cleanup', "-C",
                    action="store_true",
                    help="Cleanup before and after.")


options = parser.parse_args()



#  standard python logger which can be synchronised across concurrent Ruffus tasks
logger, logger_mutex = cmdline.setup_logging (__file__, options.log_file, options.verbose)

logger.debug("\tRuffus Version = " + ruffus.__version__)
import subprocess
import platform
import glob
import re
import os
import argparse
import sys
import time

GFF_FIELDS = [
    "seqname", "source", "feature", "start", "end", "score", "strand", "frame",
    "attributes"
]

parser = cmdline.get_argparse(
    description="CRAC pipeline for processing single-end multiplexed CRAC data"
)
parser.add_argument("-f",
                    dest="forwardreads",
                    help="the path to your fastq read files.",
                    metavar="data_1.fastq data_2.fastq ...",
                    nargs="*",
                    default=None)
parser.add_argument("-g",
                    "--gtf",
                    dest="gtf",
                    help="the path to your gtf annotation file",
                    metavar="rRNA.gtf",
                    default=None)
parser.add_argument("-c",
                    "--chromosome",
Example #35
0
# slurm options
#SBATCH --ntasks=1
#SBATCH --job-name="pipeline"
#SBATCH --mail-type=ALL
#SBATCH --output=ruffus/pipeline.%j.log

# imports
import os
import re
import datetime
from ruffus import *
import ruffus.cmdline as cmdline
from subprocess import Popen, PIPE

# command-line options
parser = cmdline.get_argparse(description = 'Run LMD analysis pipeline.')
parser.add_argument('--email', '-e',
                        help ='Logon email address for JGI',
                        type = str,
                        dest = 'jgiLogon')
parser.add_argument('--password', '-p',
                        help ='JGI password',
                        type = str,
                        dest = 'jgiPassword')
options = parser.parse_args()
jgiLogon = options.jgiLogon
jgiPassword = options.jgiPassword

# parse SLURM job-id
if 'SLURM_JOBID' in os.environ:
    slurm_jobid = os.environ.get('SLURM_JOBID')
Example #36
0
						suffix("-appendAlign.sam"),"-sam-report.tsv")
						
	sip_pipe.set_head_tasks([sip_pipe[task_originate]])

	return sip_pipe
# test_org_list = ["/current_projects/genomic_purity/test_files/Acaryochloris_marina_MBIC11017_uid12997"]
# pipeline1a = make_sipp(org_list = test_org_list)

###############################################################################
##
## Commandline iterface
##
###############################################################################

import ruffus.cmdline as cmdline
parser = cmdline.get_argparse(description='Pipelines for genomic contaminant study',
                              version = "genomic_contamination_study_pipe.py v. 0.0.0.9000")

# parser.add_argument('--pipeline', "-p", 
# 					type=str, 
# 					choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'],
#                     help="Defining which pipeline to run")

parser.add_argument('--config_file', "-cf", 
					type=str,
					#metavar="config_file",
					help="yaml file with pipeline parameters")

options = parser.parse_args()


Example #37
0
import os
import glob

from ruffus import *
import ruffus.cmdline as cmdline

DATA_FP = "/media/8TB_PLAYGROUND/home/ecl/ext/100_SCID/103_Virome/data_files/"

fwd_files = glob.glob(DATA_FP + "*R1.fastq.gz")
rev_files = glob.glob(DATA_FP + "*R2.fastq.gz")

starting_files = list(zip(fwd_files, rev_files))

parser = cmdline.get_argparse(description="Pairs reads using PEAR")
options = parser.parse_args()


@transform(starting_files, suffix("R1.fastq.gz"), "assembled.fastq")
def pair_reads(input_files, output_file):
    os.system("pear -j 4 -f {} -r {} -o {}".format(input_files[0],
                                                   input_files[1],
                                                   output_file))


cmdline.run(options)
Example #38
0
parser = cmdline.get_argparse(
    prog="ocrmypdf",
    version=VERSION,
    fromfile_prefix_chars='@',
    ignored_args=[
        'touch_files_only', 'recreate_database', 'checksum_file_name',
        'key_legend_in_graph', 'draw_graph_horizontally', 'flowchart_format',
        'forced_tasks', 'target_tasks', 'use_threads', 'jobs', 'log_file'
    ],
    formatter_class=argparse.RawDescriptionHelpFormatter,
    description="""\
Generates a searchable PDF or PDF/A from a regular PDF.

OCRmyPDF rasterizes each page of the input PDF, optionally corrects page
rotation and performs image processing, runs the Tesseract OCR engine on the
image, and then creates a PDF from the OCR information.
""",
    epilog="""\
OCRmyPDF attempts to keep the output file at about the same size.  If a file
contains losslessly compressed images, and output file will be losslessly
compressed as well.

PDF is a page description file that attempts to preserve a layout exactly.
A PDF can contain vector objects (such as text or lines) and raster objects
(images).  A page might have multiple images.  OCRmyPDF is prepared to deal
with the wide variety of PDFs that exist in the wild.

When a PDF page contains text, OCRmyPDF assumes that the page has already
been OCRed or is a "born digital" page that should not be OCRed.  The default
behavior is to exit in this case without producing a file.  You can use the
option --skip-text to ignore pages with text, or --force-ocr to rasterize
all objects on the page and produce an image-only PDF as output.

    ocrmypdf --skip-text file_with_some_text_pages.pdf output.pdf

    ocrmypdf --force-ocr word_document.pdf output.pdf

If you are concerned about long-term archiving of PDFs, use the default option
--output-type pdfa which converts the PDF to a standardized PDF/A-2b.  This
converts images to sRGB colorspace, removes some features from the PDF such
as Javascript or forms. If you want to minimize the number of changes made to
your PDF, use --output-type pdf.

If OCRmyPDF is given an image file as input, it will attempt to convert the
image to a PDF before processing.  For more control over the conversion of
images to PDF, use the Python package img2pdf or other image to PDF software.

For example, this command uses img2pdf to convert all .png files beginning
with the 'page' prefix to a PDF, fitting each image on A4-sized paper, and
sending the result to OCRmyPDF through a pipe.  img2pdf is a dependency of
ocrmypdf so it is already installed.

    img2pdf --pagesize A4 page*.png | ocrmypdf - myfile.pdf

""")
Example #39
0
import subprocess
import logging
import os
import pprint
import re
import time

# EMAIL
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
from email.MIMEText import MIMEText
from email.Utils import COMMASPACE, formatdate
from email import Encoders

parser = cmdline.get_argparse(description='Given a directory of NON-paired end reads -- Align them with bowtie')

# Program arguments  -- Most go straight to bowtie
parser.add_argument("--dir", help="Fullpath to the directory where the FASTQ reads are located", required=True, nargs='+')
parser.add_argument("--cores", help="Number of cores to run bowtie on", default=10)
parser.add_argument("--index", help="Fullpath to the bowtie2 index in: /full/file/path/basename form", default="/data/refs/hg19/hg19")
parser.add_argument("--output", help="Fullpath to output directory", default="./")
parser.add_argument("--size", help="Fullpath to size file")

# optional arguments to control turning on and off tasks
parser.add_argument("--wig", help="Whether or not wig files should be generated", type=bool, default=False)
parser.add_argument("--bowtie", help="Whether to use bowtie one instead of two", type=bool, default=False)

# parse the args
options = parser.parse_args()