def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions() so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument("--TODO", dest="TODO", default=4, type=int)
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) #so = InputOptions() #so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument("--flowcell-paths", dest="flowcell_paths", required=True, nargs="+", default=[]) pipeline_group.add_argument( "--input-pattern", dest="input_pattern", default="*fastq.gz", help="""pattern for the input FastQ files (default: *fastq.gz)""")
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions(input_pattern="*.bed") so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument("-o", "--circular", action="store_true") pipeline_group.add_argument("--double-threshold", default=0.5) pipeline_group.add_argument( "--genbank", default=None, help="the genbank to annotate the events found") pipeline_group.add_argument( "--reference", default=None, help="the genome reference used to plot GC content") pipeline_group.add_argument("--high-threshold", default=4) pipeline_group.add_argument("--low-threshold", default=-4) pipeline_group.add_argument( "--mixture-models", default=2, type=int, help="""Number of models to use in the mixture model. (default 2). No need to change this value. Possibly, you may want to set to 1 or 3 in some rate occasions. """) pipeline_group.add_argument( "--window", default=20000, type=int, help="""Length of the running median window. Keep to 20000 as much as possible. This allows the detection of CNV up to 10kb. If longer event are present, increase this window size.""") pipeline_group.add_argument("--chunksize", default=5000000, type=int) pipeline_group.add_argument("--binning", default=-1, type=int) pipeline_group.add_argument("--cnv-clustering", default=-1)
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions() so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument("--mapper", default='bwa', choices=['bwa', 'minimap2', 'bowtie2'], help="Choose one of the valid mapper") pipeline_group.add_argument( "--reference-file", required=True, help="You input reference file in fasta format") pipeline_group.add_argument( "--annotation-file", help="Used by the sequana_coverage tool if provided") pipeline_group.add_argument("--do-coverage", action="store_true", help="Use sequana_coverage (prokaryotes)") pipeline_group.add_argument( "--pacbio", action="store_true", help= "If set, automatically set the input-readtag to None and set minimap2 options to -x map-pb" ) pipeline_group.add_argument( "--create-bigwig", action="store_true", help="create the bigwig files from the BAM files")
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions(add_input_readtag=False) so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("sequana_fastqc") pipeline_group.add_argument( "--method", dest="method", default="fastqc", choices=['fastqc', 'falco'], help="""Software to be used to perform QC of input data set, Standard tool is fastqc (default), but one can use falco, which is 3-4 faster and produces same plots""") pipeline_group.add_argument( "--skip-multiqc", default=False, action="store_true", help="""It may happen that multiqc requires lots of memory. For local run, you may want to swithc multiqc off with this option""") #pipeline_group.add_argument("--data-type", dest="data_type", # default="illumina", choices=['illumina', 'nanopore', 'pacbio', 'mgi', 'others'], # help="""nanopore, others and pacbio are not paired. The --input-readtag then be ignored""") self.add_argument("--run", default=False, action="store_true", help="execute the pipeline directly")
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser # demultiplex requires lots of memory sometimes hence the 64G options # so = SlurmOptions(queue="biomicspole", memory="64000", cores=16) so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory="fastq") so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument( "--threads", dest="threads", default=4, type=int, help="Number of threads to use during the demultiplexing. ") pipeline_group.add_argument("--barcode-mismatch", dest="mismatch", default=0, type=int) pipeline_group.add_argument( "--merging-strategy", required=True, dest="merging_strategy", choices=["merge", "none", "none_and_force"], help= """Merge Lanes or not. options are : merge, none, none_and_force. The 'merge' choice merges all lanes. The 'none' choice do NOT merge the lanes. For NextSeq runs, we should merge the lanes; if users demultiplex NextSeq and set this option to none, an error is raised. If you still want to skip the merging step, then set this option to 'none_and_force'""") pipeline_group.add_argument( "--bcl-directory", dest="bcl_directory", required=True, help="""Directory towards the raw BCL files. This directory should contains files such as RunParameters.xml, RunInfo.xml """) pipeline_group.add_argument("--sample-sheet", dest="samplesheet", required=True, default="SampleSheet.csv", help="Sample sheet filename to be used") pipeline_group.add_argument( "--no-ignore-missing-bcls", dest="no_ignore_missing_bcls", action="store_true", default=False, help="""In bcl2fastq, the option --ignore-missing-bcls implies that we assume 'N'/'#' for missing calls. In Sequana_demultiplex, we use that option by default. If you do not want that behviour, but the one from bcl2fastq, use this flag(--no-ignore-missing-bcls)""") pipeline_group.add_argument( "--bgzf-compression", dest="bgzf_compression", action="store_true", default=False, help="""turn on BGZF compression for FASTQ files. By default, bcl2fastq uses this option; By default we don't. Set --bgzl--compression flag to set it back""") self.add_argument( "--mars-seq", default=False, action="store_true", help= """Set options to --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15 and do not merge lanes""") self.add_argument("--run", default=False, action="store_true", help="execute the pipeline directly")
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions() so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline_general") pipeline_group.add_argument("--genome-directory", dest="genome_directory", default=".", required=True) pipeline_group.add_argument( "--aligner", dest="aligner", required=True, choices=['bowtie2', 'bowtie1', 'star', "salmon"], help="a mapper in bowtie, bowtie2, star") pipeline_group.add_argument( "--force-indexing", action="store_true", default=False, help="""If indexing files exists already, but you wish to create them again, use this option. Note that you will need permissions for that""") pipeline_group.add_argument( "--rRNA-feature", default="rRNA", help="""Feature name corresponding to the rRNA to be identified in the input GFF/GTF files""") pipeline_group.add_argument( "--contaminant-file", default=None, help="""A fasta file. If used, the rRNA-feature is not used This option is useful if you have a dedicated list of rRNA feature or a dedicated fasta file to search for contaminants""") # cutadapt related so = CutadaptOptions() so.add_options(self) pipeline_group.add_argument( "--skip-gff-check", action="store_true", default=False, help="""By default we check the coherence between the input GFF file and related options (e.g. --feature_counts_feature_type and --feature_counts_attribute options). This may take time e.g. for mouse or human. Using this option skips the sanity checks""") # feature counts related so = FeatureCountsOptions() so.add_options(self) # others self.add_argument("--run", default=False, action="store_true", help="execute the pipeline directly") pipeline_group = self.add_argument_group("pipeline_others") pipeline_group.add_argument( '--do-igvtools', action="store_true", help="""if set, this will compute TDF files that can be imported in IGV browser. TDF file allows to quickly visualise the coverage of the mapped reads.""") pipeline_group.add_argument( '--do-bam-coverage', action="store_true", help="Similar to --do-igvtools using bigwig") pipeline_group.add_argument( '--do-mark-duplicates', action="store_true", help="""Mark duplicates. To be used e.g. with QCs""") pipeline_group = self.add_argument_group("pipeline_RNAseQC") pipeline_group.add_argument('--do-rnaseqc', action="store_true", help="do RNA-seq QC using RNAseQC v2") pipeline_group.add_argument( '--rnaseqc-gtf-file', help="""The GTF file to be used for RNAseQC. Without a valid GTF, RNAseqQC will not work. Again, yu may try sequana.gff3 module to build the gtf""" ) # RNADIFF pipeline_group = self.add_argument_group("section_rnadiff") pipeline_group.add_argument( '--rnadiff-mode', type=str, required=False, choices=["one_factor", "GLM"], default="one_factor", help= """Fix the type of analyis (one_factor or GLM). By default uses one_factor""" )
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions() so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline_general") pipeline_group.add_argument("--genome-directory", dest="genome_directory", default=".", required=True) pipeline_group.add_argument( "--aligner", dest="aligner", required=True, choices=["bowtie2", "bowtie1", "star", "salmon"], help="a mapper in bowtie, bowtie2, star", ) pipeline_group.add_argument( "--rRNA-feature", default="rRNA", help="""Feature name corresponding to the rRNA to be identified in the input GFF/GTF files""", ) pipeline_group.add_argument( "--contaminant-file", default=None, help="""A fasta file. If used, the rRNA-feature is not used This option is useful if you have a dedicated list of rRNA feature or a dedicated fasta file to search for contaminants""", ) # cutadapt related so = TrimmingOptions() so.add_options(self) pipeline_group.add_argument( "--skip-gff-check", action="store_true", default=False, help="""By default we check the coherence between the input GFF file and related options (e.g. --feature_counts_feature_type and --feature_counts_attribute options). This may take time e.g. for mouse or human. Using this option skips the sanity checks""", ) # feature counts related so = FeatureCountsOptions() so.add_options(self) # others self.add_argument( "--run", default=False, action="store_true", help="execute the pipeline directly", ) pipeline_group = self.add_argument_group("pipeline_others") pipeline_group.add_argument( "--do-igvtools", action="store_true", help="""if set, this will compute TDF files that can be imported in IGV browser. TDF file allows to quickly visualise the coverage of the mapped reads.""", ) pipeline_group.add_argument( "--do-bam-coverage", action="store_true", help="Similar to --do-igvtools using bigwig", ) pipeline_group.add_argument( "--do-mark-duplicates", action="store_true", help="""Mark duplicates. To be used e.g. with QCs""", ) pipeline_group.add_argument("--do-rnaseqc", action="store_true", help="do RNA-seq QC using RNAseQC v2") pipeline_group.add_argument( "--rnaseqc-gtf-file", help="""The GTF file to be used for RNAseQC. Without a valid GTF, RNAseqQC will not work. You may try sequana.gff3 module to build the gtf from the GFF file""", ) pipeline_group.add_argument( "--do-rseqc", action="store_true", help="""do RNA-seq QC using RseQC. This will need a BED file corresponding to your GFF file. For prokaryotes, the BED file is created on the fly.""", ) pipeline_group.add_argument("--rseqc-bed-file", help="""The rseQC input bed file.""")
def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) super(Options, self).__init__( usage=usage, prog=prog, description="", epilog=epilog, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add a new group of options to the parser so = SlurmOptions() so.add_options(self) # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) so = InputOptions() so.add_options(self) so = GeneralOptions() so.add_options(self) pipeline_group = self.add_argument_group("pipeline") pipeline_group.add_argument( "--downsampling-input-format", default="fastq", type=str, choices=["fasta", "fastq", "sam"], help= "set input format (only 'fastq', 'fasta', 'sam' supported for now)" ) pipeline_group.add_argument( "--downsampling-method", default="random", type=str, choices=["random", "random_pct"], help="""set the downsampling method to be random based on read counts (random) on read percentage (random_pct))""") pipeline_group.add_argument( "--downsampling-percent", default=10, type=float, help= """Percentage of reads to select. Use with method *random_pct* only""" ) pipeline_group.add_argument( "--downsampling-max-entries", default=1000, type=int, help= """max entries (reads, alignement) to select. Use with method *random* only""" ) pipeline_group.add_argument("--downsampling-threads", default=4, type=int, help="""max threads to use with pigz""") pipeline_group.add_argument("--run", default=False, action="store_true", help="Execute the pipeline")