def __init__(self, params, logfile): """ Optional parameters: * parse_output (bool, True): Whether the sieve should write its own output file. Optional parameters passed to SGA: * max_edges (int, 400) * numcpu (int, 4) * min_branch_length (int, 35) * min_merge_overlap (int, 15) * min_assembly_overlap (int, 5) * max_gap_divergence (int, 0) * resolve_small (int, 500) * error_rate (float, 0.02) """ param_names = [('parse_output', True), ('max_edges', 400), ('numcpu', 4), ('min_branch_length', 35), ('min_merge_overlap', 15), ('min_assembly_overlap', 5), ('max_gap_divergence', 0), ('resolve_small', 500), ('error_rate', 0.02)] Sieve.__init__(self, params, logfile, name='SGA', param_names=param_names)
def __init__(self, params, logfile): """ Optional parameters: * item_limit (int, default 0): Maximum number of sequences to parse. 0 means no limit. """ Sieve.__init__(self, params, logfile, name='FASTA translator', param_names=[('item_limit', 0)])
def __init__(self, params, logfile): """ Mandatory parameters: * sieve (module): Sieve to run * params (dict): Parameter set listing the different values for each parameter on the following form:: { 'param1': [1, 2], 'param2': ['A', 'B', 'C'] } The supplied sieve will run once for each possible combination of parameters. The preceding example will thus run 2*3=6 times. """ param_names = ['sieve', 'params'] Sieve.__init__(self, params, logfile, name='MultiRunner', param_names=param_names)
def __init__(self, params, logfile): """ Mandatory parameters: * model_path (str): Path to .hmm model file to use for hmmsearch. Optional parameters: * hmmsearch_out(str, 'hmmsearch_out'): Path to temporary hmmsearch output file. * numcpu (int, 4): Number of threads to use for hmmsearch * write_only_domain (bool, True): If output FASTA file should contain entire input sequence or just matching domain. * use_heuristics (bool, True): Heuristics on/off; there is little reason not to use heuristics, HMMer has a higher propensity for crashing if not used and it only increases the number of really low scoring hits anyway, * longseqdef (int, 151): Sequences longer than this will be considered "long" and are thus compared to `longseqcutoff` instead of `classificationfunction`. * longseqcutoff (float, 109.64): The classification cutoff (minimum domain score) for long sequences. That is, "long" sequences will only pass if they have a score higher than this. * minscore (int, 0): Minimum domain score, regardless of length, to pass a sequence. * min_sequence_length (int, 20): Only consider sequences longer than this. * max_domain_length (int, 21844): Do not include sequences where the maximum domain is longer than this. * max_sequence_length (int, 21844000): Do not include sequences longer than this. * classificationfunction (function, lambda L: self.classifyK*L + self.classifyM): Classification function. The domain score of sequences shorter than `longseqdef` are compared to the result of this function, with the sequence length fed as the parameter. Only those with a domain score higher will pass. * classifyK (float, 0.7778): Parameter to the default `classificationfunction`. * classifyM (float, -7.89): Parameter to the default `classificationfunction`. """ param_names = [ 'model_path', ('hmmsearch_out', 'hmmsearch_out'), ('numcpu', 4), ('write_only_domain', True), ('use_heuristics', True), ('longseqdef', 151), ('longseqcutoff', 109.64), ('minscore', 0), ('min_sequence_length', 20), #minimum fragment length allowed. ('max_domain_length', 21844), ('max_sequence_length', 21844000), ('classifyK', 0.7778), ('classifyM', -7.89), ('classificationfunction', lambda L: self.classifyK*L + self.classifyM) ] Sieve.__init__(self, params, logfile, name='HMMer hmmsearch', param_names=param_names)