def test_get_reference_sequences(self): fp = get_reference_sequences() self.assertIn( os.path.join('gg_13_8_otus', 'rep_set', '97_otus.fasta'), fp) self.assertTrue(os.path.isfile(fp)) self.assertTrue(os.path.isabs(fp)) with open(fp, 'rb') as f: md5 = safe_md5(f).hexdigest() self.assertEqual(md5, '50b2269712b3738afb41892bed936c29')
def test_get_reference_sequences(self): fp = get_reference_sequences() self.assertIn(os.path.join('gg_13_8_otus', 'rep_set', '97_otus.fasta'), fp) self.assertTrue(os.path.isfile(fp)) self.assertTrue(os.path.isabs(fp)) with open(fp, 'rb') as f: md5 = safe_md5(f).hexdigest() self.assertEqual(md5, '50b2269712b3738afb41892bed936c29')
def get_reference_set(): """Get the reference set to use for OTU picking Returns ------- str The file path to the reference sequences. str The file path to the reference taxonomy. """ if ag.is_test_env(): repo = get_repository_dir() ref_seqs = os.path.join(repo, 'tests/data/otus.fna') ref_tax = os.path.join(repo, 'tests/data/otus.txt') return ref_seqs, ref_tax else: return qdr.get_reference_sequences(), qdr.get_reference_taxonomy()
def defineTaxSeqsFile(uploadedSeqsFile): # uploadSeqsFile(uploadedSeqsFile) return compareWithDB(uploadedSeqsFile, qdr.get_reference_sequences(), qdr.get_reference_taxonomy())
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only assign_taxonomy = opts.assign_taxonomy if opts.suppress_taxonomy_assignment: assign_taxonomy = False taxonomy_fp = None parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, assign_taxonomy=assign_taxonomy, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback) if get_reference_sequences() == reference_fp: reference_tree_fp = get_reference_tree() fn = basename(reference_tree_fp) copyfile(reference_tree_fp, join(output_dir, fn))
from os.path import basename, join from qiime_default_reference import (get_reference_sequences, get_reference_tree) from qiime.util import (load_qiime_config, parse_command_line_parameters, get_options_lookup, make_option) from qiime.parse import parse_qiime_parameters from qiime.workflow.upstream import run_pick_closed_reference_otus from qiime.workflow.util import (print_commands, call_commands_serially, print_to_stdout, no_status_updates, validate_and_set_jobs_to_start) qiime_config = load_qiime_config() options_lookup = get_options_lookup() if get_reference_sequences() == qiime_config['pick_otus_reference_seqs_fp']: reference_fp_help = ( "The reference sequences [default: %default]. " + "NOTE: If you do not pass -r to this script, you will be using " "QIIME's default reference sequences. In this case, QIIME will " "copy the corresponding reference tree to the output directory. " "This is the tree that should be used to perform phylogenetic " "diversity analyses (e.g., with core_diversity_analyses.py).") else: reference_fp_help = "The reference sequences [default: %default]." script_info = {} script_info[ 'brief_description'] = "Closed-reference OTU picking/Shotgun UniFrac workflow." script_info['script_description'] = """ This script picks OTUs using a closed reference and constructs an OTU table.
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only assign_taxonomy = opts.assign_taxonomy if opts.suppress_taxonomy_assignment: assign_taxonomy = False taxonomy_fp = None parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, assign_taxonomy=assign_taxonomy, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback) if get_reference_sequences() == reference_fp: reference_tree_fp = get_reference_tree() fn = basename(reference_tree_fp) copyfile(reference_tree_fp, join(output_dir, fn))
from qiime_default_reference import (get_reference_sequences, get_reference_tree) from qiime.util import (load_qiime_config, parse_command_line_parameters, get_options_lookup, make_option) from qiime.parse import parse_qiime_parameters from qiime.workflow.upstream import run_pick_closed_reference_otus from qiime.workflow.util import (print_commands, call_commands_serially, print_to_stdout, no_status_updates, validate_and_set_jobs_to_start) qiime_config = load_qiime_config() options_lookup = get_options_lookup() if get_reference_sequences() == qiime_config['pick_otus_reference_seqs_fp']: reference_fp_help = ( "The reference sequences [default: %default]. " + "NOTE: If you do not pass -r to this script, you will be using " "QIIME's default reference sequences. In this case, QIIME will " "copy the corresponding reference tree to the output directory. " "This is the tree that should be used to perform phylogenetic " "diversity analyses (e.g., with core_diversity_analyses.py).") else: reference_fp_help = "The reference sequences [default: %default]." script_info = {} script_info[ 'brief_description'] = "Closed-reference OTU picking/Shotgun UniFrac workflow." script_info['script_description'] = """ This script picks OTUs using a closed reference and constructs an OTU table.
from qiime_default_reference import get_template_alignment, get_reference_sequences from skbio import SequenceCollection gapped_sequences = [ (s.id, str(s)) for s in SequenceCollection.read(get_template_alignment()) ][:500] sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_reference_sequences())][:500] motif_1 = "GGTGCAAGCCGGTGGAAACA" def pairwise(l): res = [] i = iter(l) for a, b in zip(i, i): s = min(len(a), len(b)) res.append((a[:s], b[:s])) return res
from qiime_default_reference import get_template_alignment, get_reference_sequences from skbio import SequenceCollection gapped_sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_template_alignment())][:500] sequences = [(s.id, str(s)) for s in SequenceCollection.read(get_reference_sequences())][:500] motif_1 = "GGTGCAAGCCGGTGGAAACA"