예제 #1
0
    def __init__(self, params, logfile):
        """
        Optional parameters:
            * parse_output (bool, True): Whether the sieve should write its own output file.

        Optional parameters passed to SGA:
            * max_edges (int, 400)
            * numcpu (int, 4)
            * min_branch_length (int, 35)
            * min_merge_overlap (int, 15)
            * min_assembly_overlap (int, 5)
            * max_gap_divergence (int, 0)
            * resolve_small (int, 500)
            * error_rate (float, 0.02)
        """

        param_names = [('parse_output', True), ('max_edges', 400),
                       ('numcpu', 4), ('min_branch_length', 35),
                       ('min_merge_overlap', 15), ('min_assembly_overlap', 5),
                       ('max_gap_divergence', 0), ('resolve_small', 500),
                       ('error_rate', 0.02)]
        Sieve.__init__(self,
                       params,
                       logfile,
                       name='SGA',
                       param_names=param_names)
예제 #2
0
 def __init__(self, params, logfile):
     """
     Optional parameters:
         * item_limit (int, default 0): Maximum number of sequences to parse. 0 means no limit.
     """
     Sieve.__init__(self,
                    params,
                    logfile,
                    name='FASTA translator',
                    param_names=[('item_limit', 0)])
예제 #3
0
    def __init__(self, params, logfile):
        """
        Mandatory parameters:
            * sieve (module): Sieve to run
            * params (dict): Parameter set listing the different values for each parameter on the following form::

                {
                    'param1': [1, 2],
                    'param2': ['A', 'B', 'C']
                }

            The supplied sieve will run once for each possible combination of parameters. The preceding example will thus run 2*3=6 times.
        """
        param_names = ['sieve', 'params']
        Sieve.__init__(self,
                       params,
                       logfile,
                       name='MultiRunner',
                       param_names=param_names)
예제 #4
0
    def __init__(self, params, logfile):
        """
        Mandatory parameters:
            * model_path (str): Path to .hmm model file to use for hmmsearch.
        Optional parameters:
            * hmmsearch_out(str, 'hmmsearch_out'): Path to temporary hmmsearch output file.
            * numcpu (int, 4): Number of threads to use for hmmsearch
            * write_only_domain (bool, True): If output FASTA file should contain entire input sequence or just matching domain.
            * use_heuristics (bool, True): Heuristics on/off;  there is little reason not to use heuristics, HMMer has a higher propensity for crashing if not used and it only increases the number of really low scoring hits anyway,
            * longseqdef (int, 151): Sequences longer than this will be considered "long" and are thus compared to `longseqcutoff` instead of `classificationfunction`.
            * longseqcutoff (float, 109.64): The classification cutoff (minimum domain score) for long sequences. That is, "long" sequences will only pass if they have a score higher than this.
            * minscore (int, 0): Minimum domain score, regardless of length, to pass a sequence.
            * min_sequence_length (int, 20): Only consider sequences longer than this.
            * max_domain_length (int, 21844): Do not include sequences where the maximum domain is longer than this.
            * max_sequence_length (int, 21844000): Do not include sequences longer than this.
            * classificationfunction (function, lambda L: self.classifyK*L + self.classifyM): Classification function. The domain score of sequences shorter than `longseqdef` are compared to the result of this function, with the sequence length fed as the parameter. Only those with a domain score higher will pass.
            * classifyK (float, 0.7778): Parameter to the default `classificationfunction`.
            * classifyM (float, -7.89): Parameter to the default `classificationfunction`.
        """

        param_names = [
            'model_path',
            ('hmmsearch_out', 'hmmsearch_out'),
            ('numcpu', 4),
            ('write_only_domain', True),
            ('use_heuristics', True),
            ('longseqdef', 151),
            ('longseqcutoff', 109.64),
            ('minscore', 0),
            ('min_sequence_length', 20), #minimum fragment length allowed.
            ('max_domain_length', 21844),
            ('max_sequence_length', 21844000),
            ('classifyK', 0.7778),
            ('classifyM', -7.89),
            ('classificationfunction', lambda L: self.classifyK*L + self.classifyM)
        ]
        Sieve.__init__(self, params, logfile, name='HMMer hmmsearch', param_names=param_names)