def get_argument_parser(): """Function to obtain the argument parser. Returns ------- A fully configured `argparse.ArgumentParser` object. Notes ----- This function is used by the `sphinx-argparse` extension for sphinx. """ file_mv = cli.file_mv desc = 'Find all runs (SRR..) associated with an SRA experiment (SRX...).' parser = cli.get_argument_parser(desc=desc) parser.add_argument( '-e', '--experiment-file', type=str, required=True, metavar=file_mv, help='File with SRA experiment IDs (starting with "SRX").' ) parser.add_argument( '-o', '--output-file', type=str, required=True, metavar=file_mv, help='The output file.' ) cli.add_reporting_args(parser) return parser
def get_gtf_argument_parser(desc, default_field_name='gene'): """Return an argument parser with basic options for reading GTF files. Parameters ---------- desc: str Description of the ArgumentParser default_field_name: str, optional Name of field in GTF file to look for. Returns ------- `argparse.ArgumentParser` object The argument parser. """ parser = cli.get_argument_parser(desc=desc) parser.add_argument( '-a', '--annotation-file', default='-', type=str, help=textwrap.dedent("""\ Path of Ensembl gene annotation file (in GTF format). The file may be gzip'ed. If set to ``-``, read from ``stdin``.""") ) parser.add_argument( '-o', '--output-file', required=True, type=str, help=textwrap.dedent("""\ Path of output file. If set to ``-``, print to ``stdout``, and redirect logging messages to ``stderr``.""") ) parser.add_argument( '-s', '--species', type=str, choices=sorted(ensembl.species_chrompat.keys()), default='human', help=textwrap.dedent("""\ Species for which to extract genes. (This parameter is ignored if ``--chromosome-pattern`` is specified.)""") ) parser.add_argument( '-c', '--chromosome-pattern', type=str, required=False, default=None, help=textwrap.dedent("""\ Regular expression that chromosome names have to match. If not specified, determine pattern based on ``--species``.""") ) parser.add_argument( '-f', '--field-name', type=str, default=default_field_name, help=textwrap.dedent("""\ Rows in the GTF file that do not contain this value in the third column are ignored.""") ) cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Function to obtain the argument parser. Returns ------- A fully configured `argparse.ArgumentParser` object. Notes ----- This function is used by the `sphinx-argparse` extension for sphinx. """ file_mv = cli.file_mv desc = 'Extracts gene-level expression data from StringTie output.' parser = cli.get_argument_parser(desc) parser.add_argument('-s', '--stringtie-file', type=str, required=True, metavar=file_mv, help="""Path of the StringTie output file .""") parser.add_argument( '-g', '--gene-file', type=str, required=True, metavar=file_mv, help="""File containing a list of protein-coding genes.""") parser.add_argument('--no-novel-transcripts', action='store_true', help="""Ignore novel transcripts.""") # parser.add_argument( # '--ambiguous-transcripts', default = 'ignore', # help='Strategy for counting expression of ambiguous novel ' # 'transcripts.' # ) # possible strategies for ambiguous transcripts: 'ignore','highest','all' parser.add_argument('-o', '--output-file', type=str, required=True, metavar=file_mv, help="""Path of output file.""") cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Function to obtain the argument parser. Returns ------- A fully configured `argparse.ArgumentParser` object. Notes ----- This function is used by the `sphinx-argparse` extension for sphinx. """ file_mv = cli.file_mv desc = "Extracts gene-level expression data from StringTie output." parser = cli.get_argument_parser(desc) parser.add_argument( "-s", "--stringtie-file", type=str, required=True, metavar=file_mv, help="""Path of the StringTie output file .""", ) parser.add_argument( "-g", "--gene-file", type=str, required=True, metavar=file_mv, help="""File containing a list of protein-coding genes.""", ) parser.add_argument("--no-novel-transcripts", action="store_true", help="""Ignore novel transcripts.""") # parser.add_argument( # '--ambiguous-transcripts', default = 'ignore', # help='Strategy for counting expression of ambiguous novel ' # 'transcripts.' # ) # possible strategies for ambiguous transcripts: 'ignore','highest','all' parser.add_argument( "-o", "--output-file", type=str, required=True, metavar=file_mv, help="""Path of output file.""" ) cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Creates the argument parser for the extract_entrez2gene.py script. Returns ------- A fully configured `argparse.ArgumentParser` object. Notes ----- This function is used by the `sphinx-argparse` extension for sphinx. """ desc = 'Generate a mapping of Entrez IDs to gene symbols.' parser = cli.get_argument_parser(desc=desc) parser.add_argument( '-f', '--gene2acc-file', type=str, required=True, help=textwrap.dedent("""\ Path of gene2accession.gz file (from ftp://ftp.ncbi.nlm.nih.gov/gene/DATA), or a filtered version thereof.""") ) parser.add_argument( '-o', '--output-file', type=str, required=True, help=textwrap.dedent("""\ Path of output file. If set to ``-``, print to ``stdout``, and redirect logging messages to ``stderr``.""") ) parser.add_argument( '-l', '--log-file', type=str, default=None, help='Path of log file. If not specified, print to stdout.' ) parser.add_argument( '-q', '--quiet', action='store_true', help='Suppress all output except warnings and errors.' ) parser.add_argument( '-v', '--verbose', action='store_true', help='Enable verbose output. Ignored if ``--quiet`` is specified.' ) return parser
def get_argument_parser(): desc = 'Trim FASTQ file (read from stdin, write to stdout).' parser = cli.get_argument_parser(desc=desc) parser.add_argument( '-l', '--left', type=int, default=0, help="""Number of base pairs to trim from the left.""" ) parser.add_argument( '-r', '--right', type=int, default=0, help="""Number of base pairs to trim from the right.""" ) return parser
def get_argument_parser(): """Returns an argument parser object for the script.""" desc = 'Filter FASTA file by chromosome names.' parser = cli.get_argument_parser(desc=desc) parser.add_argument('-f', '--fasta-file', default='-', type=str, help=textwrap.dedent("""\ Path of the FASTA file. The file may be gzip'ed. If set to ``-``, read from ``stdin``.""")) parser.add_argument('-s', '--species', type=str, choices=sorted(ensembl.SPECIES_CHROMPAT.keys()), default='human', help=textwrap.dedent("""\ Species for which to extract genes. (This parameter is ignored if ``--chromosome-pattern`` is specified.)""")) parser.add_argument('-c', '--chromosome-pattern', type=str, required=False, default=None, help=textwrap.dedent("""\ Regular expression that chromosome names have to match. If not specified, determine pattern based on the setting of ``--species``.""")) parser.add_argument('-o', '--output-file', type=str, required=True, help=textwrap.dedent("""\ Path of output file. If set to ``-``, print to ``stdout``, and redirect logging messages to ``stderr``.""")) parser = cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Returns an argument parser object for the script.""" desc = 'Filter FASTA file by chromosome names.' parser = cli.get_argument_parser(desc=desc) parser.add_argument( '-f', '--fasta-file', default='-', type=str, help=textwrap.dedent("""\ Path of the FASTA file. The file may be gzip'ed. If set to ``-``, read from ``stdin``.""")) parser.add_argument( '-s', '--species', type=str, choices=sorted(ensembl.species_chrompat.keys()), default='human', help=textwrap.dedent("""\ Species for which to extract genes. (This parameter is ignored if ``--chromosome-pattern`` is specified.)""") ) parser.add_argument( '-c', '--chromosome-pattern', type=str, required=False, default=None, help=textwrap.dedent("""\ Regular expression that chromosome names have to match. If not specified, determine pattern based on the setting of ``--species``.""") ) parser.add_argument( '-o', '--output-file', type=str, required=True, help=textwrap.dedent("""\ Path of output file. If set to ``-``, print to ``stdout``, and redirect logging messages to ``stderr``.""")) parser = cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Function to obtain the argument parser. Parameters ---------- Returns ------- `argparse.ArgumentParser` A fully configured `argparse.ArgumentParser` object. Notes ----- This function can also be used by the `sphinx-argparse` extension for sphinx to generate documentation for this script. """ desc = "Convert Entrez IDs to gene symbols." parser = cli.get_argument_parser(desc=desc) file_mv = cli.file_mv g = parser.add_argument_group("Input and output files") g.add_argument( "-e", "--expression-file", required=True, type=cli.str_type, metavar=file_mv, help="The expression file." ) g.add_argument( "-g", "--gene-file", required=True, type=cli.str_type, metavar=file_mv, help=textwrap.dedent( """\ The gene file (e.g., generated by the ensembl_extract_protein_coding_genes.py script).""" ), ) g.add_argument( "-c", "--entrez2gene-file", required=True, type=cli.str_type, metavar=file_mv, help=textwrap.dedent( """\ The entrez2gene file (.e.g., generated by the ncbi_extract_entrez2gene.py script).""" ), ) g.add_argument("-o", "--output-file", required=True, type=cli.str_type, metavar=file_mv, help="The output file.") g = parser.add_argument_group("Conversion options") g.add_argument( "-s", "--strip-affy-suffix", action="store_true", help=textwrap.dedent( """\ Strip the suffix "_at" from all Entrez IDs. (For use in affymetrix microarray pipeline.)""" ), ) cli.add_reporting_args(parser) return parser
def get_argument_parser(): """Function to obtain the argument parser. Parameters ---------- Returns ------- `argparse.ArgumentParser` A fully configured `argparse.ArgumentParser` object. Notes ----- This function can also be used by the `sphinx-argparse` extension for sphinx to generate documentation for this script. """ desc = 'Convert Entrez IDs to gene symbols.' parser = cli.get_argument_parser(desc=desc) file_mv = cli.file_mv g = parser.add_argument_group('Input and output files') g.add_argument('-e', '--expression-file', required=True, type=cli.str_type, metavar=file_mv, help='The expression file.') g.add_argument('-g', '--gene-file', required=True, type=cli.str_type, metavar=file_mv, help=textwrap.dedent('''\ The gene file (e.g., generated by the ensembl_extract_protein_coding_genes.py script).''')) g.add_argument('-c', '--entrez2gene-file', required=True, type=cli.str_type, metavar=file_mv, help=textwrap.dedent('''\ The entrez2gene file (.e.g., generated by the ncbi_extract_entrez2gene.py script).''')) g.add_argument('-o', '--output-file', required=True, type=cli.str_type, metavar=file_mv, help='The output file.') g = parser.add_argument_group('Conversion options') g.add_argument('-s', '--strip-affy-suffix', action='store_true', help=textwrap.dedent('''\ Strip the suffix "_at" from all Entrez IDs. (For use in affymetrix microarray pipeline.)''')) cli.add_reporting_args(parser) return parser