def get_argument_parser():
    """Function to obtain the argument parser.

    Returns
    -------
    A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function is used by the `sphinx-argparse` extension for sphinx.

    """
    file_mv = cli.file_mv

    desc = 'Find all runs (SRR..) associated with an SRA experiment (SRX...).'

    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument(
        '-e', '--experiment-file', type=str, required=True, metavar=file_mv,
        help='File with SRA experiment IDs (starting with "SRX").'
    )

    parser.add_argument(
        '-o', '--output-file', type=str, required=True, metavar=file_mv,
        help='The output file.'
    )

    cli.add_reporting_args(parser)

    return parser
Example #2
0
def get_gtf_argument_parser(desc, default_field_name='gene'):
    """Return an argument parser with basic options for reading GTF files.

    Parameters
    ----------
    desc: str
        Description of the ArgumentParser
    default_field_name: str, optional
        Name of field in GTF file to look for.

    Returns
    -------
    `argparse.ArgumentParser` object
        The argument parser.
    """
    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument(
        '-a', '--annotation-file', default='-', type=str,
        help=textwrap.dedent("""\
            Path of Ensembl gene annotation file (in GTF format). The file
            may be gzip'ed. If set to ``-``, read from ``stdin``.""")
    )

    parser.add_argument(
        '-o', '--output-file', required=True, type=str,
        help=textwrap.dedent("""\
            Path of output file. If set to ``-``, print to ``stdout``,
            and redirect logging messages to ``stderr``.""")
    )

    parser.add_argument(
        '-s', '--species', type=str,
        choices=sorted(ensembl.species_chrompat.keys()), default='human',
        help=textwrap.dedent("""\
            Species for which to extract genes. (This parameter is ignored
            if ``--chromosome-pattern`` is specified.)""")
    )

    parser.add_argument(
        '-c', '--chromosome-pattern', type=str, required=False,
        default=None, help=textwrap.dedent("""\
            Regular expression that chromosome names have to match.
            If not specified, determine pattern based on
            ``--species``.""")
    )

    parser.add_argument(
        '-f', '--field-name', type=str, default=default_field_name,
        help=textwrap.dedent("""\
            Rows in the GTF file that do not contain this value
            in the third column are ignored.""")
    )

    cli.add_reporting_args(parser)

    return parser
def get_argument_parser():
    """Function to obtain the argument parser.

    Returns
    -------
    A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function is used by the `sphinx-argparse` extension for sphinx.

    """
    file_mv = cli.file_mv

    desc = 'Extracts gene-level expression data from StringTie output.'
    parser = cli.get_argument_parser(desc)

    parser.add_argument('-s',
                        '--stringtie-file',
                        type=str,
                        required=True,
                        metavar=file_mv,
                        help="""Path of the StringTie output file .""")

    parser.add_argument(
        '-g',
        '--gene-file',
        type=str,
        required=True,
        metavar=file_mv,
        help="""File containing a list of protein-coding genes.""")

    parser.add_argument('--no-novel-transcripts',
                        action='store_true',
                        help="""Ignore novel transcripts.""")

    # parser.add_argument(
    #     '--ambiguous-transcripts', default = 'ignore',
    #      help='Strategy for counting expression of ambiguous novel '
    #            'transcripts.'
    # )
    # possible strategies for ambiguous transcripts: 'ignore','highest','all'

    parser.add_argument('-o',
                        '--output-file',
                        type=str,
                        required=True,
                        metavar=file_mv,
                        help="""Path of output file.""")

    cli.add_reporting_args(parser)

    return parser
def get_argument_parser():
    """Function to obtain the argument parser.

    Returns
    -------
    A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function is used by the `sphinx-argparse` extension for sphinx.

    """
    file_mv = cli.file_mv

    desc = "Extracts gene-level expression data from StringTie output."
    parser = cli.get_argument_parser(desc)

    parser.add_argument(
        "-s",
        "--stringtie-file",
        type=str,
        required=True,
        metavar=file_mv,
        help="""Path of the StringTie output file .""",
    )

    parser.add_argument(
        "-g",
        "--gene-file",
        type=str,
        required=True,
        metavar=file_mv,
        help="""File containing a list of protein-coding genes.""",
    )

    parser.add_argument("--no-novel-transcripts", action="store_true", help="""Ignore novel transcripts.""")

    # parser.add_argument(
    #     '--ambiguous-transcripts', default = 'ignore',
    #      help='Strategy for counting expression of ambiguous novel '
    #            'transcripts.'
    # )
    # possible strategies for ambiguous transcripts: 'ignore','highest','all'

    parser.add_argument(
        "-o", "--output-file", type=str, required=True, metavar=file_mv, help="""Path of output file."""
    )

    cli.add_reporting_args(parser)

    return parser
def get_argument_parser():
    """Creates the argument parser for the extract_entrez2gene.py script.

    Returns
    -------
    A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function is used by the `sphinx-argparse` extension for sphinx.

    """

    desc = 'Generate a mapping of Entrez IDs to gene symbols.'

    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument(
        '-f', '--gene2acc-file', type=str, required=True,
        help=textwrap.dedent("""\
            Path of gene2accession.gz file (from
            ftp://ftp.ncbi.nlm.nih.gov/gene/DATA), or a filtered version
            thereof.""")
    )

    parser.add_argument(
        '-o', '--output-file', type=str, required=True,
        help=textwrap.dedent("""\
            Path of output file. If set to ``-``, print to ``stdout``,
            and redirect logging messages to ``stderr``.""")
    )

    parser.add_argument(
        '-l', '--log-file', type=str, default=None,
        help='Path of log file. If not specified, print to stdout.'
    )

    parser.add_argument(
        '-q', '--quiet', action='store_true',
        help='Suppress all output except warnings and errors.'
    )

    parser.add_argument(
        '-v', '--verbose', action='store_true',
        help='Enable verbose output. Ignored if ``--quiet`` is specified.'
    )

    return parser
Example #6
0
def get_argument_parser():

    desc = 'Trim FASTQ file (read from stdin, write to stdout).'
    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument(
        '-l', '--left', type=int, default=0,
        help="""Number of base pairs to trim from the left."""
    )

    parser.add_argument(
        '-r', '--right', type=int, default=0,
        help="""Number of base pairs to trim from the right."""
    )

    return parser
Example #7
0
def get_argument_parser():
    """Returns an argument parser object for the script."""

    desc = 'Filter FASTA file by chromosome names.'
    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument('-f',
                        '--fasta-file',
                        default='-',
                        type=str,
                        help=textwrap.dedent("""\
                Path of the FASTA file. The file may be gzip'ed.
                If set to ``-``, read from ``stdin``."""))

    parser.add_argument('-s',
                        '--species',
                        type=str,
                        choices=sorted(ensembl.SPECIES_CHROMPAT.keys()),
                        default='human',
                        help=textwrap.dedent("""\
            Species for which to extract genes. (This parameter is ignored
            if ``--chromosome-pattern`` is specified.)"""))

    parser.add_argument('-c',
                        '--chromosome-pattern',
                        type=str,
                        required=False,
                        default=None,
                        help=textwrap.dedent("""\
            Regular expression that chromosome names have to match.
            If not specified, determine pattern based on the setting of
            ``--species``."""))

    parser.add_argument('-o',
                        '--output-file',
                        type=str,
                        required=True,
                        help=textwrap.dedent("""\
            Path of output file. If set to ``-``, print to ``stdout``,
            and redirect logging messages to ``stderr``."""))

    parser = cli.add_reporting_args(parser)

    return parser
Example #8
0
def get_argument_parser():
    """Returns an argument parser object for the script."""

    desc = 'Filter FASTA file by chromosome names.'
    parser = cli.get_argument_parser(desc=desc)

    parser.add_argument(
        '-f', '--fasta-file', default='-', type=str, help=textwrap.dedent("""\
                Path of the FASTA file. The file may be gzip'ed.
                If set to ``-``, read from ``stdin``."""))

    parser.add_argument(
        '-s', '--species', type=str,
        choices=sorted(ensembl.species_chrompat.keys()),
        default='human', help=textwrap.dedent("""\
            Species for which to extract genes. (This parameter is ignored
            if ``--chromosome-pattern`` is specified.)""")
    )

    parser.add_argument(
        '-c', '--chromosome-pattern', type=str, required=False,
        default=None, help=textwrap.dedent("""\
            Regular expression that chromosome names have to match.
            If not specified, determine pattern based on the setting of
            ``--species``.""")
    )

    parser.add_argument(
        '-o', '--output-file', type=str, required=True,
        help=textwrap.dedent("""\
            Path of output file. If set to ``-``, print to ``stdout``,
            and redirect logging messages to ``stderr``."""))

    parser = cli.add_reporting_args(parser)
    
    return parser
def get_argument_parser():
    """Function to obtain the argument parser.

    Parameters
    ----------

    Returns
    -------
    `argparse.ArgumentParser`
        A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function can also be used by the `sphinx-argparse` extension for
    sphinx to generate documentation for this script.
    """
    desc = "Convert Entrez IDs to gene symbols."
    parser = cli.get_argument_parser(desc=desc)

    file_mv = cli.file_mv

    g = parser.add_argument_group("Input and output files")

    g.add_argument(
        "-e", "--expression-file", required=True, type=cli.str_type, metavar=file_mv, help="The expression file."
    )

    g.add_argument(
        "-g",
        "--gene-file",
        required=True,
        type=cli.str_type,
        metavar=file_mv,
        help=textwrap.dedent(
            """\
                   The gene file (e.g., generated by the
                   ensembl_extract_protein_coding_genes.py script)."""
        ),
    )

    g.add_argument(
        "-c",
        "--entrez2gene-file",
        required=True,
        type=cli.str_type,
        metavar=file_mv,
        help=textwrap.dedent(
            """\
                   The entrez2gene file (.e.g., generated by the
                   ncbi_extract_entrez2gene.py script)."""
        ),
    )

    g.add_argument("-o", "--output-file", required=True, type=cli.str_type, metavar=file_mv, help="The output file.")

    g = parser.add_argument_group("Conversion options")

    g.add_argument(
        "-s",
        "--strip-affy-suffix",
        action="store_true",
        help=textwrap.dedent(
            """\
                   Strip the suffix "_at" from all Entrez IDs.
                   (For use in affymetrix microarray pipeline.)"""
        ),
    )

    cli.add_reporting_args(parser)

    return parser
def get_argument_parser():
    """Function to obtain the argument parser.

    Parameters
    ----------

    Returns
    -------
    `argparse.ArgumentParser`
        A fully configured `argparse.ArgumentParser` object.

    Notes
    -----
    This function can also be used by the `sphinx-argparse` extension for
    sphinx to generate documentation for this script.
    """
    desc = 'Convert Entrez IDs to gene symbols.'
    parser = cli.get_argument_parser(desc=desc)

    file_mv = cli.file_mv

    g = parser.add_argument_group('Input and output files')

    g.add_argument('-e',
                   '--expression-file',
                   required=True,
                   type=cli.str_type,
                   metavar=file_mv,
                   help='The expression file.')

    g.add_argument('-g',
                   '--gene-file',
                   required=True,
                   type=cli.str_type,
                   metavar=file_mv,
                   help=textwrap.dedent('''\
                   The gene file (e.g., generated by the
                   ensembl_extract_protein_coding_genes.py script).'''))

    g.add_argument('-c',
                   '--entrez2gene-file',
                   required=True,
                   type=cli.str_type,
                   metavar=file_mv,
                   help=textwrap.dedent('''\
                   The entrez2gene file (.e.g., generated by the
                   ncbi_extract_entrez2gene.py script).'''))

    g.add_argument('-o',
                   '--output-file',
                   required=True,
                   type=cli.str_type,
                   metavar=file_mv,
                   help='The output file.')

    g = parser.add_argument_group('Conversion options')

    g.add_argument('-s',
                   '--strip-affy-suffix',
                   action='store_true',
                   help=textwrap.dedent('''\
                   Strip the suffix "_at" from all Entrez IDs.
                   (For use in affymetrix microarray pipeline.)'''))

    cli.add_reporting_args(parser)

    return parser