def test_eq(self): a = OptionParam(flag='-i', name='input') b = OptionParam(flag='-i', name='i') self.assertEqual(a, b) a.on(2) b.on(1) self.assertNotEqual(a, b) a = OptionParam(flag='-o', name='i') b = OptionParam(flag='-i', name='i') self.assertNotEqual(a, b)
def run_cmpress(cm, force=False): '''Compress the CM database. Parameters ---------- cm : str The file path to CM database. force : boolean Whether to overwrite.''' params = [ OptionParam('-F', name='force', help='force overwrite'), ArgmntParam(name='cm', help='cm file to press')] cmpress = Dumpling('cmpress', params=Parameters(*params)) cmpress.update(cm=cm, force=force) cmpress() return cmpress
def run_hmmpress(hmm, force=False): '''Compress the HMM database. Parameters ---------- hmm : str The file path to HMM database. force : boolean Whether to overwrite.''' params = [ OptionParam('-f', name='force', help='force overwrite'), ArgmntParam(name='hmm', help='hmm file to press') ] hmmpress = Dumpling('hmmpress', params=Parameters(*params)) hmmpress.update(hmm=hmm, force=force) hmmpress() return hmmpress
from os.path import join from os import makedirs from logging import getLogger import re from skbio import read from skbio.metadata import Feature from skbio.io.format.genbank import _parse_features from dumpling import (check_choice, Dumpling, OptionParam, Parameters) from ..parsers.embl import _parse_records params = [ OptionParam('-i', 'query', help='FASTA/Genbank input file (default reads from stdin).'), OptionParam('-a', help='File to store protein translations.'), OptionParam('-d', help='File to store nuc sequence of predicted gene.'), OptionParam( '-s', help='Write all potential genes (with scores) to the selected file.'), OptionParam('-t', help=('Write a training file (if none exists); ' 'otherwise, read and use the specified training file.')), OptionParam('-o', 'output', help='output file (default writes to stdout).'), OptionParam('-f', 'fmt', action=check_choice(('gbk', 'gff', 'sco')), help='output format (gbk, gff, or sco). Default is gbk.'),
# # Distributed under the terms of the Modified BSD License. # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- from logging import getLogger from os.path import exists, join, basename, splitext from subprocess import CalledProcessError from dumpling import check_range, Dumpling, ArgmntParam, OptionParam, Parameters params = [ OptionParam('-searchWL', action=check_range(6, 9), help='Length of search window used to discover CRISPRs (range: 6-9). Default: 8'), OptionParam('-minNR', help='Minimum number of repeats a CRISPR must contain. Default: 3'), OptionParam('-minRL', help='Minimum length of the CRISPR repeats. Default: 23'), OptionParam('-maxRL', help='Maximum length of the CRISPR repeats. Default: 47'), OptionParam('-minSL', help='Minimum length of the CRISPR spacers. Default: 26'), OptionParam('-maxSL', help='Maximum length of the CRISPR spacers. Default: 50'), OptionParam('-gff', help='Output summary results in gff format containing only the positions of the CRISPR arrays. Default: false'), OptionParam('-gffFull', help='Output detailed results in gff format containing positions of CRISPR arrays and all repeat units. Default: false'), OptionParam('-spacers', help='Output a fasta formatted file containing the spacers. Default: false'), OptionParam('-h', help='Output this handy help message'), ArgmntParam(name='query', help='input file of fna sequence'),
# ---------------------------------------------------------------------------- # Copyright (c) 2015--, micronota development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- from dumpling import OptionParam, ArgmntParam _scan_params = [ OptionParam('--tblout', name='out', help='save parseable table of hits to file'), # set default to 1 instead of all available cores. OptionParam('--cpu', name='cpus', value=1, help='number of parallel CPU workers to use for multithreads'), ArgmntParam(name='db', help='HMM/CM database file'), ArgmntParam(name='query', help='input sequence to scan') ]
# # Distributed under the terms of the Modified BSD License. # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- from logging import getLogger from os.path import join, splitext, basename from dumpling import Dumpling, OptionParam, ArgmntParam, Parameters from ._base import _scan_params cmscan_params = [ OptionParam('--rfam', help='Set all filter thresholds as if the search space were more than 20 Gb.'), OptionParam('--noali', value=True, help='Omit the alignment section from the main output. This can greatly reduce the output volume.')] cmscan_params.extend(_scan_params) def run_cmpress(cm, force=False): '''Compress the CM database. Parameters ---------- cm : str The file path to CM database. force : boolean Whether to overwrite.''' params = [
# # Distributed under the terms of the Modified BSD License. # # The full license is in the file COPYING.txt, distributed with this software. # ---------------------------------------------------------------------------- from os.path import join, basename, splitext from logging import getLogger import re import pandas as pd from dumpling import (check_choice, Dumpling, OptionParam, Parameters) blast_params = [ OptionParam('--threads', 'cpus', help='number of cpu threads.'), OptionParam('--gapopen', help='Gap open penalty.'), OptionParam('--gapextend', help='Gap extension penalty.'), OptionParam('--matrix', help='Scoring matrix.'), OptionParam('--seg', help='Enable SEG masking.'), OptionParam( '--max-target-seqs', '-k', help='The maximum number of hits per query to keep alignments for.'), OptionParam( '--top', help= 'Keep alignments within the given percentage range of the top alignment' ), OptionParam('--evalue', '-e',