def run(options): if options.no_cdhit and options.cdhit_clusters is not None: sys.exit('Cannot use both --no_cdhit and --cdhit_clusters. Neither or exactly one of those options must be used') extern_progs, version_report_lines = versions.get_all_versions(using_spades=False) if options.verbose: print(*version_report_lines, sep='\n') preparer = ref_preparer.RefPreparer( options.fasta_files, extern_progs, metadata_tsv_files=options.tsv_files, all_coding=options.all_coding, version_report_lines=version_report_lines, min_gene_length=options.min_gene_length, max_gene_length=options.max_gene_length, genetic_code=options.genetic_code, cdhit_min_id=options.cdhit_min_id, cdhit_min_length=options.cdhit_min_length, cdhit_max_memory=options.cdhit_max_memory, run_cdhit=not options.no_cdhit, clusters_file=options.cdhit_clusters, threads=options.threads, verbose=options.verbose, force=options.force, ) preparer.run(options.outdir)
def test_run_all_noncoding(self): '''test run with no metadata input, all sequences are noncoding''' fasta_in = [ os.path.join(data_dir, 'ref_preparer_test_run.in.1.fa'), os.path.join(data_dir, 'ref_preparer_test_run.in.2.fa'), os.path.join(data_dir, 'ref_preparer_test_run.in.3.fa'), ] extern_progs = external_progs.ExternalProgs() refprep = ref_preparer.RefPreparer(fasta_in, extern_progs, all_coding='no', genetic_code=1) tmp_out = 'tmp.ref_preparer_test_run' refprep.run(tmp_out) expected_outdir = os.path.join(data_dir, 'ref_preparer_test_run_all_noncoding.out') test_files = [ '00.auto_metadata.tsv', '01.filter.check_metadata.tsv', '01.filter.check_genes.log', '01.filter.check_noncoding.log', '01.filter.check_metadata.log', '02.cdhit.all.fa', '02.cdhit.clusters.tsv', '02.cdhit.gene.fa', '02.cdhit.gene.varonly.fa', '02.cdhit.noncoding.fa', '02.cdhit.noncoding.varonly.fa', ] for filename in test_files: expected = os.path.join(expected_outdir, filename) got = os.path.join(tmp_out, filename) self.assertTrue(filecmp.cmp(expected, got, shallow=False)) common.rmtree(tmp_out)
def test_run(self): '''test run''' fasta_in = [ os.path.join(data_dir, 'ref_preparer_test_run.in.1.fa'), os.path.join(data_dir, 'ref_preparer_test_run.in.2.fa'), os.path.join(data_dir, 'ref_preparer_test_run.in.3.fa'), ] tsv_in = [ os.path.join(data_dir, 'ref_preparer_test_run.in.1.tsv'), os.path.join(data_dir, 'ref_preparer_test_run.in.2.tsv'), ] extern_progs = external_progs.ExternalProgs() refprep = ref_preparer.RefPreparer(fasta_in, extern_progs, metadata_tsv_files=tsv_in, genetic_code=1) tmp_out = 'tmp.ref_preparer_test_run' refprep.run(tmp_out) expected_outdir = os.path.join(data_dir, 'ref_preparer_test_run.out') test_files = [ '01.filter.check_metadata.tsv', '01.filter.check_genes.log', '01.filter.check_metadata.log', '02.cdhit.all.fa', '02.cdhit.clusters.tsv', '02.cdhit.gene.fa', '02.cdhit.gene.varonly.fa', '02.cdhit.noncoding.fa', '02.cdhit.noncoding.varonly.fa', ] for filename in test_files: expected = os.path.join(expected_outdir, filename) got = os.path.join(tmp_out, filename) self.assertTrue(filecmp.cmp(expected, got, shallow=False)) shutil.rmtree(tmp_out)
def make_prepareref_dir(outdir): if os.path.exists(outdir): raise Exception('Output directory ' + outdir + ' already exists. Cannot continue') tmpdir = tempfile.mkdtemp(prefix=outdir + '.tmp', dir=os.getcwd()) tmp_prefix = os.path.join(tmpdir, 'out') make_prepareref_files(tmp_prefix) ref_prep = ref_preparer.RefPreparer( [tmp_prefix + '.fa'], None, metadata_tsv_files=[tmp_prefix + '.tsv'], run_cdhit=False, threads=1, ) ref_prep.run(outdir) common.rmtree(tmpdir) json_data = {'tb': True} json_file = os.path.join(outdir, '00.params.json') with open(json_file, 'w') as f: json.dump(json_data, f)
def run(self): try: os.mkdir(self.outdir) except: raise Error('Error making output directory ' + self.outdir) pubmlst = pubmlst_getter.PubmlstGetter(debug=self.debug, verbose=self.verbose) pubmlst.get_species_files(self.species, self.mlst_download_dir) if self.verbose: print('Downloaded data from pubmlst') profile_file = os.path.join(self.mlst_download_dir, 'profile.txt') self.profile = mlst_profile.MlstProfile(profile_file, duplicate_warnings=True) if self.verbose: print('Loaded mlst profile file', profile_file) self._load_fasta_files_and_write_clusters_file(self.mlst_download_dir) if self.verbose: print('Loaded fasta files and wrote clusters file') print('Putting data in ariba db directory', self.prepareref_dir) refprep = ref_preparer.RefPreparer( self.fasta_files, self.extern_progs, all_coding='no', clusters_file=self.clusters_file, verbose=self.verbose, ) refprep.run(self.prepareref_dir) shutil.copy(profile_file, os.path.join(self.prepareref_dir, 'pubmlst.profile.txt')) print('ariba db directory prepared. You can use it like this:') print('ariba run', self.prepareref_dir, 'reads_1.fq reads_2.fq output_directory')
def test_run_noncoding_checks(self): '''test run with noncoding sequences that are outside of the allowed size range''' fasta_in = [ os.path.join(data_dir, 'ref_preparer_test_run.in.4.fa') ] tsv_in = [ os.path.join(data_dir, 'ref_preparer_test_run.in.4.tsv') ] extern_progs = external_progs.ExternalProgs() refprep = ref_preparer.RefPreparer( fasta_in, extern_progs, min_noncoding_length=6, max_noncoding_length=20, metadata_tsv_files=tsv_in, genetic_code=1) tmp_out = 'tmp.ref_preparer_test_run_noncoding_checks' refprep.run(tmp_out) expected_outdir = os.path.join(data_dir, 'ref_preparer_test_run_noncoding_checks.out') test_files = [ '01.filter.check_metadata.tsv', '01.filter.check_genes.log', '01.filter.check_noncoding.log', '01.filter.check_metadata.log', '02.cdhit.all.fa', '02.cdhit.clusters.tsv', '02.cdhit.gene.fa', '02.cdhit.gene.varonly.fa', '02.cdhit.noncoding.fa', '02.cdhit.noncoding.varonly.fa', ] for filename in test_files: expected = os.path.join(expected_outdir, filename) got = os.path.join(tmp_out, filename) self.assertTrue(filecmp.cmp(expected, got, shallow=False)) common.rmtree(tmp_out)