def parse_arguments():
    """
    Creat parser instance and parse command-line arguments passed to the pipeline

    :return argparse.Namespace: parsed arguments namespace
    """
    parser = VersionInHelpParser(prog="PEPATAC_collator",
                                 description='PEPATAC collator',
                                 version=__version__)
    parser = pypiper.add_pypiper_args(parser, groups=['pypiper', 'looper'])
    parser.add_argument("-n",
                        "--name",
                        help="Name of the project to use.",
                        type=str)
    parser.add_argument("-r",
                        "--results",
                        help="Output results sub directory path.",
                        type=str)
    parser.add_argument("--skip-consensus",
                        action='store_true',
                        dest="skip_consensus",
                        default=False,
                        help="Do not calculate consensus peaks.")
    parser.add_argument("--skip-table",
                        action='store_true',
                        dest="skip_table",
                        default=False,
                        help="Do not calculate peak counts table.")
    args = parser.parse_args()
    return args
Exemple #2
0
def _parse_cmdl(cmdl):
	parser = ArgumentParser(description="Automatic GEO SRA data downloader")
	
	parser.add_argument(
			"-b", "--bamfolder", 
			default=safe_echo("SRABAM"),
			help="Optional: Specify a location to store bam files "
			"[Default: $SRABAM:" + safe_echo("SRABAM") + "]")
	
	parser.add_argument(
			"-s", "--srafolder", default=safe_echo("SRARAW"),
			help="Optional: Specify a location to store sra files "
			"[Default: $SRARAW:" + safe_echo("SRARAW") + "]")
	
	# parser.add_argument(
	# 		"--picard", dest="picard_path", default=safe_echo("PICARD"),
	# 		help="Specify a path to the picard jar, if you want to convert "
	# 		"fastq to bam [Default: $PICARD:" + safe_echo("PICARD") + "]")
	
	parser.add_argument(
			"-r", "--srr", required=True, nargs="+",
			help="SRR files")

	parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "config"])
	return parser.parse_args(cmdl)
Exemple #3
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="hic-pipeline",
        description="Hi-C pipeline."
    )
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!")

    # Create Sample object
    if series[key] != "HiChIP":
        sample = HiCSample(series)
    else:
        sample = HiChIPSample(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="hic", outfolder=sample.paths.sample_root, args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Exemple #4
0
def _parse_args(cmdl):
    from argparse import ArgumentParser

    parser = ArgumentParser(description='Pipeline')

    # First, add arguments from Pypiper, including
    # 1. pypiper options, 2. looper connections, 3. common options,
    # using the all_args=True flag (you can customize this).
    # Adds options including; for details, see:
    # http://github.com/epigen/pypiper/command_line_args.md
    parser = pypiper.add_pypiper_args(parser, all_args=True)

    # Add any pipeline-specific arguments
    parser.add_argument("-t",
                        "--trimgalore",
                        dest="trimgalore",
                        action="store_true",
                        help='Use trimgalore instead of trimmomatic?')
    parser.add_argument("-e",
                        "--epilog",
                        dest='epilog',
                        action="store_true",
                        help='Use epilog for meth calling?')
    parser.add_argument("--pdr",
                        dest="pdr",
                        action="store_true",
                        help='Calculate Proportion of Discordant Reads (PDR)?')
    parser.add_argument(
        "--rrbs-fill",
        dest="rrbs_fill",
        type=int,
        default=4,
        help=
        "Number of bases from read end to regard as unreliable and ignore due to RRBS chemistry"
    )
    parser.add_argument(
        "--dark-bases",
        type=int,
        default=None,
        help="Number of bases from to prepend to R1 from R2 for dark sequencing"
    )

    args = parser.parse_args(cmdl)

    if args.rrbs_fill < 0:
        raise ValueError("Negative RRBS fill-in value: {}".format(
            args.rrbs_fill))

    # Translate pypiper method of read type specification into flag-like option.
    if args.single_or_paired == "paired":
        args.paired_end = True
    else:
        args.paired_end = False

    # Input is required.
    if not args.input:
        parser.print_help()
        raise SystemExit

    return args
Exemple #5
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="atacseq-pipeline",
                            description="ATAC-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(
        parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()
    if args.sample_config is None or args.output_parent is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.safe_load(open(args.sample_config, "r")))
    series["sample_root"] = args.output_parent
    print(series)
    # Create Sample object
    if series["protocol"] != "DNase-seq":
        sample = ATACseqSample(series)
    else:
        sample = DNaseSample(series)

    print(sample)
    # Check if merged
    if (type(sample.data_source) == list) & (len(sample.data_source) > 1):
        sample.merged = True
    else:
        sample.merged = False
    sample.paths = AttributeDict(sample.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="atacseq",
                                           outfolder=sample.sample_root,
                                           args=args)
    pipe_manager.config.tools.scripts_dir = pjoin(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Exemple #6
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="chipseq-pipeline",
                            description="ChIP-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(
        parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.safe_load(open(args.sample_config, "r")))
    # Create Sample object
    if series["protocol"] == "ChIPmentation":
        sample = ChIPmentation(series)
    else:
        sample = ChIPseqSample(series)

    # Check if merged
    if len(sample.data_source.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths(sample.prj)

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="chipseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)

    # Start main function
    process(sample, pipe_manager, args)
Exemple #7
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="rnaKallisto",
                            description="Kallisto pipeline")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, all_args=True)
    args = parser.parse_args()

    # Read in yaml configs
    sample = AttributeDict(yaml.load(open(args.sample_config, "r")))
    path_conf_file = os.path.join(os.path.dirname(__file__), args.config_file)
    pipeline_config = AttributeDict(yaml.load(open(path_conf_file), "r"))

    # Start main function
    process(sample, pipeline_config, args)
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="starrseq-pipeline",
                            description="STARR-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(
        parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    sample = STARRSeqSample(pd.Series(yaml.load(open(args.sample_config,
                                                     "r"))))

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="starrseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Exemple #9
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="rnaseq-pipeline",
        description="RNA-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()

    # Read in yaml configs
    sample = RNASeqSample(pd.Series(yaml.load(open(args.sample_config, "r"))))

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(
        name="rnaseq", outfolder=sample.paths.sample_root, args=args)
    pipe_manager.config.tools.scripts_dir = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "tools")

    # Start main function
    process(sample, pipe_manager, args)
Exemple #10
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="dropseq-pipeline",
        description="Drop-seq pipeline."
    )
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    sample = AttributeDict(yaml.load(open(args.sample_config, "r")))
    pipeline_config = AttributeDict(yaml.load(open(os.path.join(os.path.dirname(__file__), args.config_file), "r")))

    # Start main function
    process(sample, pipeline_config, args)
Exemple #11
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="starrseq-pipeline",
                            description="STARR-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, all_args=True)
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml config and create Sample object
    sample = STARRseqSample(pd.Series(yaml.load(open(args.sample_config,
                                                     "r"))))

    # Check if merged
    if len(sample.data_source.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    sample.make_sample_dirs()

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="starrseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)

    # Start main function
    process(sample, pipe_manager, args)
Exemple #12
0
def _parse_args(cmdl):
    from argparse import ArgumentParser

    parser = ArgumentParser(description='Pipeline')

    # First, add arguments from Pypiper, including
    # 1. pypiper options, 2. looper connections, 3. common options,
    # using the all_args=True flag (you can customize this).
    # Adds options including; for details, see:
    # http://github.com/epigen/pypiper/command_line_args.md
    parser = pypiper.add_pypiper_args(parser, all_args=True)

    parser.add_argument('-e',
                        '--epilog',
                        dest='epilog',
                        action="store_true",
                        default=False,
                        help='Use epilog for meth calling?')

    parser.add_argument(
        '--single2',
        dest='single2',
        action="store_true",
        default=False,
        help=
        'Single secondary mode: any reads not mapping in paired-end mode will \
				be aligned using single-end mode, and then analyzed. Only valid for \
				paired-end mode. ')

    args = parser.parse_args(cmdl)

    if args.single_or_paired == "paired":
        args.paired_end = True
    else:
        args.paired_end = False

    if not args.input:
        parser.print_help()
        raise SystemExit

    return args
Exemple #13
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="amplicon-pipeline",
                            description="Amplicon pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser,
                                      groups=["ngs", "looper", "pypiper"])
    args = parser.parse_args()

    print(args)

    print("Processing sample {}.".format(args.sample_name))

    output_folder = os.path.abspath(args.output_parent)

    # Create output directories if not existing
    for path in [args.output_parent, output_folder]:
        if not os.path.exists(path):
            try:
                os.mkdir(path)
            except OSError("Cannot create directory '{}'".format(path)):
                raise

    # Count length of pattern matches
    sizes = count_sizes(fastq_file=args.input,
                        amplicon=args.amplicon,
                        guide_rna=args.guide_rna)

    # Calculate efficiency
    efficiency = (sizes[sizes.index != 0].sum() / float(sizes.sum())) * 100

    # Save
    with open(
            os.path.join(output_folder, args.sample_name + ".efficiency.csv"),
            "w") as handle:
        handle.write("{},{}\n".format(args.sample_name, efficiency))

    print("Sample {} has an editing efficiency of {}.".format(
        args.sample_name, efficiency))
    print("Finished processing sample {}.".format(args.sample_name))
def build_argparser():

    parser = ArgumentParser(
        description=
        "A pipeline to count the number of reads and file size. Accepts"
        " BAM, fastq, or fastq.gz files.")

    # First, add standard arguments from Pypiper.
    # groups="pypiper" will add all the arguments that pypiper uses,
    # and adding "common" adds arguments for --input and --sample--name
    # and "output_parent". You can read more about your options for standard
    # arguments in the pypiper docs (section "command-line arguments")
    parser = pypiper.add_pypiper_args(
        parser,
        groups=["pypiper", "common", "ngs", "logmuse"],
        args=["output-parent", "config"],
        required=['sample-name', 'output-parent'])

    # Add any pipeline-specific arguments if you like here.

    # args for `output_parent` and `sample_name` were added by the standard
    # `add_pypiper_args` function.

    return parser
Exemple #15
0
RNA NucSeq pipeline
"""

from argparse import ArgumentParser
import os
import sys
import yaml
import subprocess
import re
import pypiper

# Argument Parsing
# #######################################################################################
parser = ArgumentParser(description='Pypiper arguments.')

parser = pypiper.add_pypiper_args(parser, all_args=True)

# Add any pipeline-specific arguments
parser.add_argument('-e',
                    '--ercc',
                    default="ERCC92",
                    dest='ERCC_assembly',
                    type=str,
                    help='ERCC Assembly')
parser.add_argument(
    '-em',
    '--ercc-mix',
    default="False",
    dest='ERCC_mix',
    help='ERCC mix. If False no ERCC analysis will be performed.')
Exemple #16
0
def _parse_cmdl(cmdl):
    description = """ The SRA data converter is a wrapper around sra-tools that
    provides convenience functions for converting or deleting sra data in
    various formats.
    """
    parser = ArgumentParser(description=description)
    # parser = pypiper.add_pypiper_args(parser, args=["output-parent"])
    parser.add_argument(
        "-m",
        "--mode",
        default="convert",
        choices=["convert", "delete_sra", "delete_bam", "delete_fq"],
        help="What do you want to do? Default: convert",
    )

    parser.add_argument(
        "-f",
        "--format",
        default="fastq",
        choices=["fastq", "bam"],
        help="Convert to what format? Default: fastq",
    )

    parser.add_argument(
        "-b",
        "--bamfolder",
        default=safe_echo("SRABAM"),
        help="Optional: Specify a location to store bam files "
        "[Default: $SRABAM:" + safe_echo("SRABAM") + "]",
    )

    parser.add_argument(
        "-q",
        "--fqfolder",
        default=safe_echo("SRAFQ"),
        help="Optional: Specify a location to store fastq files "
        "[Default: $SRAFQ:" + safe_echo("SRAFQ") + "]",
    )

    parser.add_argument(
        "-s",
        "--srafolder",
        default=safe_echo("SRARAW"),
        help="Optional: Specify a location to store pipeline output "
        "[Default: $SRARAW:" + safe_echo("SRARAW") + "]",
    )

    parser.add_argument(
        "--keep-sra",
        action="store_true",
        default=False,
        help="On convert mode, keep original sra data?",
    )

    parser.add_argument(
        "-S",
        "--sample-name",
        required=False,
        nargs="+",
        help="Name for sample to run",
        metavar="SAMPLE_NAME",
    )

    parser.add_argument("-r",
                        "--srr",
                        required=True,
                        nargs="+",
                        help="SRR files")

    parser = pypiper.add_pypiper_args(parser,
                                      groups=["config", "logmuse"],
                                      args=["output-parent", "recover"])

    return parser.parse_args(cmdl)
Exemple #17
0
import os.path
import sys
from subprocess import call
import subprocess
import re
import pypiper
import yaml 
from datetime import datetime




# Argument Parsing
# #######################################################################################
parser = ArgumentParser(description='Pypiper arguments.')
parser = pypiper.add_pypiper_args(parser, all_args=True)

#Add any pipeline-specific arguments
#parser.add_argument('-e', '--ercc', default="ERCC92",
#parser.add_argument('-em', '--ercc-mix',
#parser.add_argument('-f', dest='filter', action='store_false', default=True)
# Core-seq as optional parameter
#parser.add_argument('-cs', '--core-seq', default=False, dest='coreseq', action='store_true', help='CORE-seq Mode')
args = parser.parse_args()

if args.single_or_paired == "paired":
	args.paired_end = True
else:
	args.paired_end = False

###
Exemple #18
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(prog="chipseq-pipeline",
                            description="ChIP-seq pipeline.")
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["all"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!"
        )

    # Create Sample object
    if series[key] != "ChIPmentation":
        sample = ChIPseqSample(series)
    else:
        sample = ChIPmentation(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="chipseq",
                                           outfolder=sample.paths.sample_root,
                                           args=args)

    # Start main function
    if not args.only_peaks:
        pipe_manager = process(sample, pipe_manager, args)
    else:
        print("Skipped processing sample '{}'.".format(sample.name))

    # If sample does not have "ctrl" attribute, finish processing it.
    if not hasattr(sample, "compare_sample"):
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return

    # The pipeline will now wait for the comparison sample file to be completed
    pipe_manager._wait_for_file(
        sample.filtered.replace(sample.name, sample.compare_sample))

    # Start peak calling function
    call_peaks(sample, pipe_manager, args)
Exemple #19
0
def main():
    # Parse command-line arguments
    parser = ArgumentParser(
        prog="chipseq-pipeline",
        description="ChIP-seq pipeline."
    )
    parser = arg_parser(parser)
    parser = pypiper.add_pypiper_args(parser, groups=["ngs", "looper", "resource", "pypiper"])
    args = parser.parse_args()
    if args.sample_config is None:
        parser.print_help()
        return 1

    # Read in yaml configs
    series = pd.Series(yaml.load(open(args.sample_config, "r")))

    # looper 0.6/0.7 compatibility:
    if "protocol" in series.index:
        key = "protocol"
    elif "library" in series.index:
        key = "library"
    else:
        raise KeyError(
            "Sample does not contain either a 'protocol' or 'library' attribute!")

    # Create Sample object
    if series[key] != "ChIPmentation":
        sample = ChIPseqSample(series)
    else:
        sample = ChIPmentation(series)

    # Check if merged
    if len(sample.data_path.split(" ")) > 1:
        sample.merged = True
    else:
        sample.merged = False
    sample.prj = AttributeDict(sample.prj)
    sample.paths = AttributeDict(sample.paths.__dict__)

    # Check read type if not provided
    if not hasattr(sample, "ngs_inputs"):
        sample.ngs_inputs = [sample.data_source]
    if not hasattr(sample, "read_type"):
        sample.set_read_type()
    else:
        if sample.read_type not in ['single', 'paired']:
            sample.set_read_type()

    # Shorthand for read_type
    if sample.read_type == "paired":
        sample.paired = True
    else:
        sample.paired = False

    # Set file paths
    sample.set_file_paths()
    # sample.make_sample_dirs()  # should be fixed to check if values of paths are strings and paths indeed

    # Start Pypiper object
    # Best practice is to name the pipeline with the name of the script;
    # or put the name in the pipeline interface.
    pipe_manager = pypiper.PipelineManager(name="chipseq", outfolder=sample.paths.sample_root, args=args)

    # Start main function
    if not args.only_peaks:
        pipe_manager = process(sample, pipe_manager, args)
    else:
        print("Skipped processing sample '{}'.".format(sample.name))

    # If sample does not have "ctrl" attribute, finish processing it.
    if not hasattr(sample, "compare_sample"):
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return
    # If compare_sample is empty string, finish processing.
    if sample.compare_sample == "":
        pipe_manager.stop_pipeline()
        print("Finished processing sample '{}'.".format(sample.name))
        return

    # The pipeline will now wait for the comparison sample file to be completed
    pipe_manager._wait_for_file(sample.filtered.replace(sample.name, sample.compare_sample))

    # Start peak calling function
    call_peaks(sample, pipe_manager, args)
import re
import pandas
import sys
import string

#' Part of the looper setup. We add two additional arguments to the parser, one is the sample id of
#' the currently processed sample and the second is the path to the bam file containing the mapped
#' reads (preferably with bsmap). These two arguments are passed through
#' config/pipeline_interface.yaml to map column names in the sample anntotation sheet to the name of
#' the argument here.

parser = argparse.ArgumentParser(description="Pipeline")
parser.add_argument("--sample_id", "-o", help="id of sample to be analyzed")
parser.add_argument("--bam_name",
                    help="path to bam file of sample to be analyzed")
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "looper"])
args = parser.parse_args()

manager = pypiper.PipelineManager(name="HETEROGENEITY",
                                  outfolder=args.output_parent,
                                  args=args)

pipe_folder = os.path.dirname(sys.argv[0]) + "/"

#####################################################################################################
#' PART I: Preprocessing
#####################################################################################################
if not os.path.exists(args.output_parent + "/" + args.sample_id):
    os.makedirs(args.output_parent + "/" + args.sample_id)

sample_folder = args.output_parent + "/" + args.sample_id + "/"
Exemple #21
0
def build_argparser():
    """
    Builds argument parser.

    :return argparse.ArgumentParser
    """

    banner = "%(prog)s - reference genome asset manager"
    additional_description = "\nhttps://refgenie.databio.org"

    parser = VersionInHelpParser(prog="refgenie",
                                 version=__version__,
                                 description=banner,
                                 epilog=additional_description)

    subparsers = parser.add_subparsers(dest="command")

    def add_subparser(cmd, description):
        return subparsers.add_parser(cmd,
                                     description=description,
                                     help=description)

    sps = {}
    for cmd, desc in SUBPARSER_MESSAGES.items():
        sps[cmd] = add_subparser(cmd, desc)
        # It's required for init
        sps[cmd].add_argument(
            '-c',
            '--genome-config',
            required=(cmd == INIT_CMD),
            dest="genome_config",
            help=
            "Path to local genome configuration file. Optional if {} environment variable is set."
            .format(", ".join(refgenconf.CFG_ENV_VARS)))

    sps[INIT_CMD].add_argument(
        '-s',
        '--genome-server',
        nargs='+',
        default=DEFAULT_SERVER,
        help="URL(s) to use for the {} attribute in config file. Default: {}.".
        format(DEFAULT_SERVER, CFG_SERVERS_KEY))
    sps[BUILD_CMD] = pypiper.add_pypiper_args(
        sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"])

    # Add any arguments specific to subcommands.

    sps[BUILD_CMD].add_argument(
        '--tag-description',
        required=False,
        default=None,
        type=str,
        help="Add tag level description (e.g. built with version 0.3.2).")

    sps[BUILD_CMD].add_argument(
        '--genome-description',
        required=False,
        default=None,
        type=str,
        help=
        "Add genome level description (e.g. The mouse mitochondrial genome, released in Dec 2013)."
    )

    sps[BUILD_CMD].add_argument(
        "-d",
        "--docker",
        action="store_true",
        help="Run all commands in the refgenie docker container.")

    sps[BUILD_CMD].add_argument(
        '--assets',
        nargs="+",
        action='append',
        required=False,
        default=None,
        help='Override the default genome, asset and tag of the parents'
        ' (e.g. fasta=hg38/fasta:default gtf=mm10/gencode_gtf:default).')

    sps[BUILD_CMD].add_argument(
        '--files',
        nargs="+",
        action='append',
        required=False,
        default=None,
        help=
        'Provide paths to the required files (e.g. fasta=/path/to/file.fa.gz).'
    )

    sps[BUILD_CMD].add_argument(
        '--params',
        nargs="+",
        action='append',
        required=False,
        default=None,
        help='Provide required parameter values (e.g. param1=value1).')

    sps[BUILD_CMD].add_argument(
        '-v',
        '--volumes',
        nargs="+",
        required=False,
        default=None,
        help='If using docker, also mount these folders as volumes.')

    sps[BUILD_CMD].add_argument(
        '-o',
        '--outfolder',
        dest='outfolder',
        required=False,
        default=None,
        help='Override the default path to genomes folder, which is the '
        'genome_folder attribute in the genome configuration file.')

    sps[BUILD_CMD].add_argument(
        "-q",
        "--requirements",
        action="store_true",
        help="Show the build requirements for the specified asset and exit.")

    sps[BUILD_CMD].add_argument("-r",
                                "--recipe",
                                required=False,
                                default=None,
                                type=str,
                                help="Provide a recipe to use.")

    # add 'genome' argument to many commands
    for cmd in [
            PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD,
            GETSEQ_CMD, TAG_CMD, ID_CMD
    ]:
        # genome is not required for listing actions
        sps[cmd].add_argument("-g",
                              "--genome",
                              required=cmd in GETSEQ_CMD,
                              help="Reference assembly ID, e.g. mm10.")

    for cmd in LIST_REMOTE_CMD, LIST_LOCAL_CMD:
        sps[cmd].add_argument("-g",
                              "--genome",
                              required=False,
                              type=str,
                              nargs="*",
                              help="Reference assembly ID, e.g. mm10.")

    for cmd in [
            PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD, REMOVE_CMD,
            TAG_CMD, ID_CMD
    ]:
        sps[cmd].add_argument(
            "asset_registry_paths",
            metavar="asset-registry-paths",
            type=str,
            nargs='+',
            help=
            "One or more registry path strings that identify assets  (e.g. hg38/fasta or hg38/fasta:tag"
            + (" or hg38/fasta.fai:tag)." if cmd == GET_ASSET_CMD else ")."))

    for cmd in [PULL_CMD, REMOVE_CMD, INSERT_CMD]:
        sps[cmd].add_argument(
            "-f",
            "--force",
            action="store_true",
            help="Do not prompt before action, approve upfront.")

    sps[PULL_CMD].add_argument("-u",
                               "--no-untar",
                               action="store_true",
                               help="Do not extract tarballs.")

    sps[INSERT_CMD].add_argument("-p",
                                 "--path",
                                 required=True,
                                 help="Relative local path to asset.")

    sps[GETSEQ_CMD].add_argument(
        "-l",
        "--locus",
        required=True,
        help="Coordinates of desired sequence; e.g. 'chr1:50000-50200'.")

    sps[GET_ASSET_CMD].add_argument(
        "-e",
        "--check-exists",
        required=False,
        action="store_true",
        help="Whether the returned asset path should be checked for existence "
        "on disk.")

    group = sps[TAG_CMD].add_mutually_exclusive_group(required=True)

    group.add_argument("-t",
                       "--tag",
                       type=str,
                       help="Tag to assign to an asset.")

    group.add_argument("-d",
                       "--default",
                       action="store_true",
                       help="Set the selected asset tag as the default one.")

    sps[SUBSCRIBE_CMD].add_argument(
        "-r",
        "--reset",
        action="store_true",
        help="Overwrite the current list of server URLs.")

    for cmd in [SUBSCRIBE_CMD, UNSUBSCRIBE_CMD]:
        sps[cmd].add_argument(
            "-s",
            "--genome-server",
            nargs='+',
            required=True,
            help=
            "One or more URLs to {action} the {key} attribute in config file.".
            format(action="add to" if cmd == SUBSCRIBE_CMD else "remove from",
                   key=CFG_SERVERS_KEY))

    return parser
Exemple #22
0
def build_argparser():
    """
    Builds argument parser.

    :return argparse.ArgumentParser
    """

    banner = "%(prog)s - builds and manages reference genome assemblies"
    additional_description = "\nhttps://refgenie.databio.org"

    parser = _VersionInHelpParser(description=banner,
                                  epilog=additional_description)

    parser.add_argument("-V",
                        "--version",
                        action="version",
                        version="%(prog)s {v}".format(v=__version__))

    subparsers = parser.add_subparsers(dest="command")

    def add_subparser(cmd, description):
        return subparsers.add_parser(cmd,
                                     description=description,
                                     help=description)

    subparser_messages = {
        INIT_CMD: "Initialize a genome configuration.",
        LIST_LOCAL_CMD: "List available local genomes.",
        LIST_REMOTE_CMD: "List available genomes and assets on server.",
        PULL_CMD: "Download assets.",
        BUILD_CMD: "Build genome assets.",
        GET_ASSET_CMD: "Get the path to a local asset.",
        INSERT_CMD: "Insert a local asset into the configuration file."
    }

    sps = {}
    for cmd, desc in subparser_messages.items():
        sps[cmd] = add_subparser(cmd, desc)
        # It's required for init
        sps[cmd].add_argument('-c',
                              '--genome-config',
                              required=(cmd == INIT_CMD),
                              dest="genome_config",
                              help="Path to local genome configuration file.")

    sps[INIT_CMD].add_argument(
        '-s',
        '--genome-server',
        default=DEFAULT_SERVER,
        help="URL to use for the genome_server attribute in config file."
        " Defaults : {}".format(DEFAULT_SERVER))
    sps[BUILD_CMD] = pypiper.add_pypiper_args(
        sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"])

    # Add any arguments specific to subcommands.

    sps[BUILD_CMD].add_argument(
        "-d",
        "--docker",
        action="store_true",
        help="Run all commands in the refgenie docker container.")

    sps[BUILD_CMD].add_argument(
        '-v',
        '--volumes',
        nargs="+",
        required=False,
        default=None,
        help='If using docker, also mount these folders as volumes')

    sps[BUILD_CMD].add_argument(
        '-o',
        '--outfolder',
        dest='outfolder',
        required=False,
        default=None,
        help='Override the default path to genomes folder, which is the '
        'genome_folder attribute in the genome configuration file.')

    for cmd in [PULL_CMD, GET_ASSET_CMD, BUILD_CMD, INSERT_CMD]:
        sps[cmd].add_argument("-g",
                              "--genome",
                              required=True,
                              help="Reference assembly ID, e.g. mm10")
        sps[cmd].add_argument(
            "-a",
            "--asset",
            required=True,
            nargs='+',
            help="Name of one or more assets (keys in genome config file)")

    sps[PULL_CMD].add_argument("-u",
                               "--no-untar",
                               action="store_true",
                               help="Do not extract tarballs.")

    sps[INSERT_CMD].add_argument("-p",
                                 "--path",
                                 required=True,
                                 help="Relative path to asset")

    # Finally, arguments to the build command to give the files needed to do
    # the building. These should eventually move to a more flexible system that
    # doesn't require them to be hard-coded here in order to be recognized

    for arg in BUILD_SPECIFIC_ARGS:
        sps[BUILD_CMD].add_argument("--{arg}".format(arg=arg),
                                    required=False,
                                    help=SUPPRESS)

    # sps[BUILD_CMD].add_argument(
    #     '--fasta', required=False, help=SUPPRESS)
    # help='Local path or URL to genome sequence file in .fa, .fa.gz, '
    #          'or .2bit format.'
    # sps[BUILD_CMD].add_argument(
    #        '--gtf', required=False, help=SUPPRESS)
    # help='Path to GTF gene annotation file.'

    return parser
Exemple #23
0
import sys
import subprocess
import yaml
import pypiper

parser = ArgumentParser(
    description="A pipeline to count the number of reads and file size. Accepts"
    " BAM, fastq, or fastq.gz files.")

# First, add standard arguments from Pypiper.
# groups="pypiper" will add all the arguments that pypiper uses,
# and adding "common" adds arguments for --input and --sample--name
# and "output_parent". You can read more about your options for standard
# arguments in the pypiper docs (section "command-line arguments")
parser = pypiper.add_pypiper_args(parser, groups=["pypiper", "common", "ngs"],
                                    args=["output-parent", "config"],
                                    required=['sample-name', 'output-parent'])

# Add any pipeline-specific arguments if you like here.

args = parser.parse_args()

if not args.input or not args.output_parent:
    parser.print_help()
    raise SystemExit

if args.single_or_paired == "paired":
    args.paired_end = True
else:
    args.paired_end = False
Exemple #24
0
def build_argparser():
    """
    Builds argument parser.

    :return argparse.ArgumentParser
    """

    banner = "%(prog)s - reference genome asset manager"
    additional_description = "\nhttps://refgenie.databio.org"

    parser = VersionInHelpParser(
        prog="refgenie",
        version=f"{__version__} | refgenconf {rgc_version}",
        description=banner,
        epilog=additional_description,
    )

    subparsers = parser.add_subparsers(dest="command")

    def add_subparser(cmd, msg, subparsers):
        return subparsers.add_parser(
            cmd,
            description=msg,
            help=msg,
            formatter_class=lambda prog: HelpFormatter(
                prog, max_help_position=40, width=90),
        )

    sps = {}
    for cmd, desc in SUBPARSER_MESSAGES.items():
        sps[cmd] = add_subparser(cmd, desc, subparsers)
        # alias is nested and alias subcommands require config path
        if cmd == ALIAS_CMD:
            continue
        # It's required for init
        sps[cmd].add_argument(
            "-c",
            "--genome-config",
            required=(cmd == INIT_CMD),
            dest="genome_config",
            metavar="C",
            help=
            "Path to local genome configuration file. Optional if {} environment variable is set."
            .format(", ".join(CFG_ENV_VARS)),
        )
        sps[cmd].add_argument(
            "--skip-read-lock",
            required=False,
            action="store_true",
            help="Whether the config file should not be locked for reading",
        )

    # upgrade: upgrade config and alter file structure to the target version
    sps[UPGRADE_CMD].add_argument(
        "-v",
        "--target-version",
        required=True,
        metavar="V",
        help="Target config version for the upgrade.",
    )
    sps[UPGRADE_CMD].add_argument(
        "-f",
        "--force",
        action="store_true",
        help="Do not prompt before action, approve upfront.",
    )

    sps[INIT_CMD].add_argument(
        "-s",
        "--genome-server",
        nargs="+",
        default=[DEFAULT_SERVER],
        help=
        f"URL(s) to use for the {CFG_SERVERS_KEY} attribute in config file. Default: {DEFAULT_SERVER}.",
    )
    sps[INIT_CMD].add_argument(
        "-f",
        "--genome-folder",
        help="Absolute path to parent folder refgenie-managed assets.",
    )
    sps[INIT_CMD].add_argument(
        "-a",
        "--genome-archive-folder",
        help=
        "Absolute path to parent archive folder refgenie-managed assets; used by refgenieserver.",
    )
    sps[INIT_CMD].add_argument(
        "-b",
        "--genome-archive-config",
        help=
        "Absolute path to desired archive config file; used by refgenieserver.",
    )
    sps[INIT_CMD].add_argument(
        "-u",
        "--remote-url-base",
        help=
        "URL to use as an alternative, remote archive location; used by refgenieserver.",
    )
    sps[INIT_CMD].add_argument(
        "-j",
        "--settings-json",
        help="Absolute path to a JSON file with the key "
        "value pairs to inialize the configuration "
        "file with. Overwritten by itemized specifications.",
    )
    sps[BUILD_CMD] = pypiper.add_pypiper_args(
        sps[BUILD_CMD], groups=None, args=["recover", "config", "new-start"])

    # Add any arguments specific to subcommands.

    sps[BUILD_CMD].add_argument(
        "--tag-description",
        required=False,
        default=None,
        type=str,
        help="Add tag level description (e.g. built with version 0.3.2).",
    )

    sps[BUILD_CMD].add_argument(
        "--genome-description",
        required=False,
        default=None,
        type=str,
        help=
        "Add genome level description (e.g. The mouse mitochondrial genome, released in Dec 2013).",
    )

    sps[BUILD_CMD].add_argument(
        "-d",
        "--docker",
        action="store_true",
        help="Run all commands in the refgenie docker container.",
    )

    sps[BUILD_CMD].add_argument(
        "--map",
        action="store_true",
        help=
        "Run the map procedure: build assets and store the metadata in separate configs.",
    )

    sps[BUILD_CMD].add_argument(
        "--pull-parents",
        action="store_true",
        help=
        "Automatically pull the default parent asset if required but not provided",
    )

    sps[BUILD_CMD].add_argument(
        "--preserve-map-configs",
        action="store_true",
        help=
        "Do not remove the genome configuration files produced in the potential map step of building",
    )

    sps[BUILD_CMD].add_argument(
        "--reduce",
        action="store_true",
        help=
        "Run the reduce procedure: gather the metadata produced with `refgenie build --map`.",
    )

    sps[BUILD_CMD].add_argument(
        "--assets",
        nargs="+",
        action="append",
        required=False,
        default=None,
        help="Override the default genome, asset and tag of the parents"
        " (e.g. fasta=hg38/fasta:default gtf=mm10/gencode_gtf:default).",
    )

    sps[BUILD_CMD].add_argument(
        "--files",
        nargs="+",
        action="append",
        required=False,
        default=None,
        help=
        "Provide paths to the required files (e.g. fasta=/path/to/file.fa.gz).",
    )

    sps[BUILD_CMD].add_argument(
        "--params",
        nargs="+",
        action="append",
        required=False,
        default=None,
        help="Provide required parameter values (e.g. param1=value1).",
    )

    sps[BUILD_CMD].add_argument(
        "-v",
        "--volumes",
        nargs="+",
        required=False,
        default=None,
        help="If using docker, also mount these folders as volumes.",
    )

    sps[BUILD_CMD].add_argument(
        "-q",
        "--requirements",
        action="store_true",
        help="Show the build requirements for the specified asset and exit.",
    )

    sps[BUILD_CMD].add_argument(
        "-r",
        "--recipe",
        required=False,
        default=None,
        type=str,
        help="Provide a recipe to use.",
    )

    alias_subparser = sps[ALIAS_CMD]
    alias_subsubparsers = alias_subparser.add_subparsers(dest="subcommand")

    alias_sps = {}
    for cmd, desc in ALIAS_SUBPARSER_MESSAGES.items():
        alias_sps[cmd] = add_subparser(cmd, desc, alias_subsubparsers)
        alias_sps[cmd].add_argument(
            "-c",
            "--genome-config",
            required=False,
            dest="genome_config",
            metavar="C",
            help=
            "Path to local genome configuration file. Optional if {} environment variable is set."
            .format(", ".join(CFG_ENV_VARS)),
        )
        alias_sps[cmd].add_argument(
            "--skip-read-lock",
            required=False,
            action="store_true",
            help="Whether the config file should not be locked for reading",
        )

    alias_sps[ALIAS_SET_CMD].add_argument(
        "-a",
        "--aliases",
        metavar="A",
        required=False,
        default=None,
        type=str,
        nargs="+",
        help=
        "Aliases to set; single if the digest is to be retrieved from the server.",
    )
    alias_sps[ALIAS_SET_CMD].add_argument(
        "-d",
        "--digest",
        metavar="D",
        required=False,
        type=str,
        help=
        "Digest to set; leave out if the digest is to be retrieved from the server.",
    )
    alias_sps[ALIAS_SET_CMD].add_argument(
        "-r",
        "--reset",
        action="store_true",
        help=
        "Whether all the aliases should be removed prior to setting new ones.",
    )
    alias_sps[ALIAS_SET_CMD].add_argument(
        "-f",
        "--force",
        action="store_true",
        help="Whether the action should be forced, if genome does not exist.",
    )

    alias_sps[ALIAS_REMOVE_CMD].add_argument(
        "-a",
        "--aliases",
        metavar="A",
        required=False,
        default=None,
        type=str,
        nargs="+",
        help="Aliases to remove.",
    )
    alias_sps[ALIAS_REMOVE_CMD].add_argument("-d",
                                             "--digest",
                                             metavar="D",
                                             required=True,
                                             type=str,
                                             help="Digest to remove.")

    alias_sps[ALIAS_GET_CMD].add_argument(
        "-a",
        "--aliases",
        metavar="A",
        required=False,
        type=str,
        nargs="+",
        help="Aliases to get the digests for.",
    )

    sps[COMPARE_CMD].add_argument(
        "genome1",
        metavar="GENOME1",
        type=str,
        nargs=1,
        help="First genome for compatibility check.",
    )
    sps[COMPARE_CMD].add_argument(
        "genome2",
        metavar="GENOME2",
        type=str,
        nargs=1,
        help="Second genome for compatibility check.",
    )
    sps[COMPARE_CMD].add_argument(
        "-e",
        "--no-explanation",
        action="store_true",
        help="Do not print compatibility code explanation.",
    )
    sps[COMPARE_CMD].add_argument(
        "-f",
        "--flag-meanings",
        action="store_true",
        help="Display compatibility flag meanings.",
    )

    # add 'genome' argument to many commands
    for cmd in [
            PULL_CMD,
            GET_ASSET_CMD,
            GET_REMOTE_ASSET_CMD,
            BUILD_CMD,
            INSERT_CMD,
            REMOVE_CMD,
            GETSEQ_CMD,
            TAG_CMD,
            ID_CMD,
    ]:
        # genome is not required for listing actions
        sps[cmd].add_argument(
            "-g",
            "--genome",
            required=cmd in GETSEQ_CMD,
            metavar="G",
            help="Reference assembly ID, e.g. mm10.",
        )

    for cmd in LIST_REMOTE_CMD, LIST_LOCAL_CMD:
        sps[cmd].add_argument(
            "-g",
            "--genome",
            required=False,
            type=str,
            metavar="G",
            nargs="*",
            help="Reference assembly ID, e.g. mm10.",
        )

    for cmd in [
            PULL_CMD,
            GET_ASSET_CMD,
            GET_REMOTE_ASSET_CMD,
            BUILD_CMD,
            INSERT_CMD,
            REMOVE_CMD,
            TAG_CMD,
            ID_CMD,
    ]:
        build_arg_kwargs = dict(
            metavar="asset-registry-paths",
            type=str,
            nargs="+",
            help=
            "One or more registry path strings that identify assets  (e.g. hg38/fasta or hg38/fasta:tag"
            + (" or hg38/fasta.fai:tag)."
               if cmd in [GET_ASSET_CMD, GET_REMOTE_ASSET_CMD] else ")."),
        )
        # make asset-registry-path argument optional for build command
        # and require it manually in CLI when running a non-reduce build
        if cmd == BUILD_CMD:
            build_arg_kwargs.update({"nargs": "*", "default": None})
        sps[cmd].add_argument("asset_registry_paths", **build_arg_kwargs)

    sps[LIST_LOCAL_CMD].add_argument("-r",
                                     "--recipes",
                                     action="store_true",
                                     help="List available recipes.")

    for cmd in [REMOVE_CMD, INSERT_CMD]:
        sps[cmd].add_argument(
            "-f",
            "--force",
            action="store_true",
            help="Do not prompt before action, approve upfront.",
        )

    sps[REMOVE_CMD].add_argument(
        "-a",
        "--aliases",
        action="store_true",
        help=
        "Remove the genome alias if last asset for that genome is removed.",
    )
    force_group = sps[PULL_CMD].add_argument_group(
        title="Prompt handling",
        description="These flags configure the pull prompt responses.",
    )

    overwrite_group = force_group.add_mutually_exclusive_group()

    overwrite_group.add_argument("--no-overwrite",
                                 action="store_true",
                                 help="Do not overwrite if asset exists.")

    overwrite_group.add_argument("--force-overwrite",
                                 action="store_true",
                                 help="Overwrite if asset exists.")

    large_group = force_group.add_mutually_exclusive_group()

    large_group.add_argument("--no-large",
                             action="store_true",
                             help="Do not pull archives over 5GB.")

    large_group.add_argument(
        "--pull-large",
        action="store_true",
        help="Pull any archive, regardless of its size.",
    )

    force_group.add_argument(
        "--size-cutoff",
        type=float,
        default=10,
        metavar="S",
        help=
        "Maximum archive file size to download with no confirmation required (in GB, default: 10)",
    )

    force_group.add_argument(
        "-b",
        "--batch",
        action="store_true",
        help="Use batch mode: pull large archives, do no overwrite",
    )

    sps[INSERT_CMD].add_argument("-p",
                                 "--path",
                                 required=True,
                                 metavar="P",
                                 help="Relative local path to asset.")

    sps[INSERT_CMD].add_argument(
        "-s",
        "--seek-keys",
        required=False,
        type=str,
        metavar="S",
        help="""
        String representation of a JSON object with seek_keys,
        e.g. '{"seek_key1": "file.txt"}'
        """,
    )

    sps[GETSEQ_CMD].add_argument(
        "-l",
        "--locus",
        required=True,
        help="Coordinates of desired sequence; e.g. 'chr1:50000-50200'.",
    )

    sps[GET_ASSET_CMD].add_argument(
        "-e",
        "--check-exists",
        required=False,
        action="store_true",
        help=
        "Whether the returned asset path should be checked for existence on disk.",
    )

    sps[TAG_CMD].add_argument(
        "-f",
        "--force",
        action="store_true",
        help="Do not prompt before action, approve upfront.",
    )

    group = sps[TAG_CMD].add_mutually_exclusive_group(required=True)

    group.add_argument("-t",
                       "--tag",
                       type=str,
                       help="Tag to assign to an asset.")

    group.add_argument(
        "-d",
        "--default",
        action="store_true",
        help="Set the selected asset tag as the default one.",
    )

    sps[SUBSCRIBE_CMD].add_argument(
        "-r",
        "--reset",
        action="store_true",
        help="Overwrite the current list of server URLs.",
    )

    for cmd in [SUBSCRIBE_CMD, UNSUBSCRIBE_CMD]:
        sps[cmd].add_argument(
            "-s",
            "--genome-server",
            nargs="+",
            required=True,
            metavar="S",
            help=
            "One or more URLs to {action} the {key} attribute in config file.".
            format(
                action="add to" if cmd == SUBSCRIBE_CMD else "remove from",
                key=CFG_SERVERS_KEY,
            ),
        )

    for cmd in [LIST_REMOTE_CMD, GET_REMOTE_ASSET_CMD, POPULATE_REMOTE_CMD]:
        sps[cmd].add_argument(
            "-s",
            "--genome-server",
            nargs="+",
            required=False,
            metavar="S",
            help="One or more URLs to use. "
            "This information will not persist in the genome config file.",
        )
        sps[cmd].add_argument(
            "-p",
            "--append-server",
            action="store_true",
            help="Whether the provided servers should be appended to the list.",
        )

    for cmd in [POPULATE_REMOTE_CMD, GET_REMOTE_ASSET_CMD]:
        sps[cmd].add_argument(
            "--remote-class",
            metavar="RC",
            type=str,
            default="http",
            help="Remote data provider class, e.g. 'http' or 's3'",
        )

    for cmd in [POPULATE_REMOTE_CMD, POPULATE_CMD]:
        sps[cmd].add_argument("-f",
                              "--file",
                              metavar="F",
                              help="File with registry paths to populate")

    return parser
Exemple #25
0
         'specificity plots')
parser.add_argument(
    "--bedbase-config", dest="bedbase_config", type=str, default=None,
    help="a path to the bedbase configuratiion file")
parser.add_argument(
    "-y", "--sample-yaml", dest="sample_yaml", type=str, required=False,
    help="a yaml config file with sample attributes to pass on more metadata "
         "into the database")
exclusive_group = parser.add_mutually_exclusive_group()
exclusive_group.add_argument(
    '--no-db-commit', action='store_true',
    help='whether the JSON commit to the database should be skipped')
exclusive_group.add_argument(
    '--just-db-commit', action='store_true',
    help='whether just to commit the JSON to the database')
parser = pypiper.add_pypiper_args(parser,
                                  groups=["pypiper", "common", "looper", "ngs"])

args = parser.parse_args()

bbc = bbconf.BedBaseConf(filepath=bbconf.get_bedbase_cfg(args.bedbase_config))

bed_digest = md5(open(args.bedfile, 'rb').read()).hexdigest()
bedfile_name = os.path.split(args.bedfile)[1]
# need to split twice since there are 2 exts
fileid = os.path.splitext(os.path.splitext(bedfile_name)[0])[0]
outfolder = os.path.abspath(os.path.join(
    bbc[CFG_PATH_KEY][CFG_BEDSTAT_OUTPUT_KEY], bed_digest))
json_file_path = os.path.abspath(os.path.join(outfolder, fileid + ".json"))

if not args.just_db_commit:
    pm = pypiper.PipelineManager(name="bedstat-pipeline", outfolder=outfolder,
Exemple #26
0
    action="store",
    type=str,
    nargs="*",
    help=
    "path to the chain file(s) ffacilitating conversion from one assembly to the other"
)
parser.add_argument(
    "-f",
    "--outfolder",
    type=str,
    required=True,
    help="path to folder where pipeline logs and lifted files will be stored")

# add pypiper args
parser = pypiper.add_pypiper_args(parser,
                                  groups=["pypiper"],
                                  required=["--bedfile", "--genome"])
args = parser.parse_args()

# Set output folder
logs_name = "bedlifter_logs"
logs_dir = os.path.join(args.outfolder, logs_name)

if not os.path.exists(logs_dir):
    print("bedlifter logs directory doesn't exist. Creating one...")
    os.makedirs(logs_dir)


def main():
    pm = pypiper.PipelineManager(name="bedlifter",
                                 outfolder=logs_dir,
__author__ = "Martin Jaeger"
__copyright__ = "Copyright 2018, Martin Jaeger"
__credits__ = []
__license__ = "GPL3"
__version__ = "0.3"
__maintainer__ = "Martin Jaeger"
__email__ = "*****@*****.**"
__status__ = "development"

########################
### Argument Parsing ###
########################
parser = ArgumentParser(description='Pypiper arguments.')

parser = pypiper.add_pypiper_args(parser,
                                  groups=["pypiper", "resource", "config"])

parser.add_argument(
    '-y',
    '--sample_yaml',
    dest='sample_config',
    help=
    'yaml config file with sample attributes; this file will be generated by looper if submitting multiple jobs in parallel',
    type=str)
parser.add_argument(
    '-i',
    '--input_file',
    dest='input_file',
    help=
    'Path to input raw read BAM file(s). Space-separated paths will be merged before processing.',
    nargs='+',
Exemple #28
0
from argparse import ArgumentParser
import os, re
import sys
import os.path
import subprocess
import pypiper
import yaml
import shutil
from datetime import datetime

# Argument Parsing from yaml file
# #######################################################################################
parser = ArgumentParser(description='Pipeline')
parser = pypiper.add_pypiper_args(
    parser,
    groups=["config"],
    args=["sample-name", "recover", "new-start", "output-parent", "genome"])

#Add any pipeline-specific arguments
parser.add_argument(
    '-I',
    '--input-dir',
    required=True,
    dest='input',
    type=str,
    help=
    "path to directory containing input bam files (and narrowpeak files if applicable) (required)"
)
parser.add_argument('-gs',
                    '--genome-size',
                    default="hs",