def test_acba_simple_linear(self): replicon_filename = 'acba.007.p01.13' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} --linear {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', replicon_filename + '.fst' ) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13.linear', output_filename)) test_result_path = os.path.join(test_result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) summary_file_name = '{}.summary'.format(replicon_filename) exp_summary_path = self.find_data( os.path.join('Results_Integron_Finder_acba.007.p01.13.linear', summary_file_name)) exp_summary = pd.read_csv(exp_summary_path, sep="\t") test_summary_path = os.path.join(test_result_dir, summary_file_name) test_summary = pd.read_csv(test_summary_path, sep="\t") pdt.assert_frame_equal(exp_summary, test_summary)
def test_acba_annot(self): replicon_filename = 'acba.007.p01.13' replicon_id = 'ACBA.007.P01_13' command = "integron_finder --outdir {out_dir} --func-annot --path-func-annot {annot_bank} --promoter-attI " \ "--gbk --keep-tmp " \ "{replicon}".format(out_dir=self.out_dir, annot_bank=self.resfams_dir, replicon=self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename))) ) with self.catch_io(out=True, err=False): main(command.split()[1:], loglevel='WARNING') result_dir = os.path.join(self.out_dir, 'Results_Integron_Finder_{}'.format(replicon_filename)) gbk = '{}.gbk'.format(replicon_id) expected_gbk = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename), gbk)) gbk_test = os.path.join(result_dir, gbk) expected_gbk = SeqIO.read(expected_gbk, 'gb') gbk_test = SeqIO.read(gbk_test, 'gb') self.assertSeqRecordEqual(expected_gbk, gbk_test) output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename), output_filename)) test_result_path = os.path.join(result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) output_filename = os.path.join('tmp_{}'.format(replicon_id), replicon_id + '_Resfams_fa_table.res') expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.annot'.format(replicon_filename), output_filename)) test_result_path = os.path.join(result_dir, output_filename) self.assertHmmEqual(expected_result_path, test_result_path)
def test_acba_simple_with_gbk_without_promoter(self): replicon_filename = 'acba.007.p01.13' replicon_id = 'ACBA.007.P01_13' command = "integron_finder --outdir {out_dir} --gbk {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_dirname = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_dirname) gbk = '{}.gbk'.format(replicon_id) expected_gbk = self.find_data(os.path.join(output_dirname + ".wo_promoter", gbk)) gbk_test = os.path.join(test_result_dir, gbk) expected_gbk = SeqIO.read(expected_gbk, 'gb') gbk_test = SeqIO.read(gbk_test, 'gb') self.assertSeqRecordEqual(expected_gbk, gbk_test) output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join(output_dirname + ".wo_promoter", output_filename)) test_result_path = os.path.join(test_result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path)
def test_acba_simple_gembase(self): replicon_filename = 'ACBA.0917.00019' contig_id = 'ACBA.0917.00019.0001' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} --gembase {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Gembase', 'Replicons', replicon_filename + '.fna') ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.gembase'.format(replicon_filename), output_filename)) test_result_path = os.path.join(test_result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) summary_file_name = '{}.summary'.format(replicon_filename) exp_summary_path = self.find_data( os.path.join('Results_Integron_Finder_{}.gembase'.format(replicon_filename), summary_file_name)) exp_summary = pd.read_csv(exp_summary_path, sep="\t") test_summary_path = os.path.join(test_result_dir, summary_file_name) test_summary = pd.read_csv(test_summary_path, sep="\t") pdt.assert_frame_equal(exp_summary, test_summary)
def test_acba_simple_no_gbk_no_pdf(self): replicon_filename = 'acba.007.p01.13' replicon_id = 'ACBA.007.P01_13' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} --promoter-attI {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13', output_filename)) test_result_path = os.path.join(test_result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) summary_file_name = '{}.summary'.format(replicon_filename) exp_summary_path = self.find_data(os.path.join('Results_Integron_Finder_acba.007.p01.13', summary_file_name)) exp_summary = pd.read_csv(exp_summary_path, sep="\t") test_summary_path = os.path.join(test_result_dir, summary_file_name) test_summary = pd.read_csv(test_summary_path, sep="\t") pdt.assert_frame_equal(exp_summary, test_summary) gbk = '{}.gbk'.format(replicon_filename) gbk_test = os.path.join(test_result_dir, gbk) self.assertFalse(os.path.exists(gbk_test)) pdf = '{}_1.pdf'.format(replicon_filename) pdf_test = os.path.join(test_result_dir, pdf) self.assertFalse(os.path.exists(pdf_test))
def test_contig_with_empty_prot(self): replicon_filename = 'empty_prot_file' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', replicon_filename + '.fna' ) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = '{}.integrons'.format(replicon_filename) expected_result_dir = self.find_data(os.path.join('Results_Integron_Finder_{}'.format(replicon_filename))) expected_result_path = os.path.join(expected_result_dir, output_filename) test_result_path = os.path.join(test_result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) summary_file_name = '{}.summary'.format(replicon_filename) test_summary_path = os.path.join(test_result_dir, summary_file_name) test_summary = pd.read_csv(test_summary_path, sep='\t', comment="#") expected_summary_path = os.path.join(expected_result_dir, summary_file_name) expected_summary = pd.read_csv(expected_summary_path, sep='\t', comment="#") pdt.assert_frame_equal(test_summary, expected_summary)
def test_acba_sequential_eq_isolate(self): """ test if we find the same results if we run IF in sequential as isolated on each seq ACBA.0917.00019 contains 2 contigs 0001 and 0002. 0002 does not contains integrons .integrons file should be identical .summary file should contains the 001 contig and 0 calin, 0 complete and 0 in0 :return: """ seq_replicon_filename = 'ACBA.0917.00019' seq_output_dir = 'Results_Integron_Finder_{}'.format(seq_replicon_filename) seq_test_result_dir = os.path.join(self.out_dir, seq_output_dir) seq_cmd = "integron_finder --outdir {out_dir} " \ "--keep-tmp {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Gembase', 'Replicons', seq_replicon_filename + '.fna') ) ) with self.catch_io(out=True, err=True): main(seq_cmd.split()[1:], loglevel='WARNING') contig_name = 'ACBA.0917.00019.0001' iso_output_dir = 'Results_Integron_Finder_{}'.format(contig_name) iso_test_result_dir = os.path.join(self.out_dir, iso_output_dir) iso_cmd = "integron_finder --outdir {out_dir} " \ "--keep-tmp --lin {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', contig_name + '.fst') ) ) with self.catch_io(out=True, err=True): main(iso_cmd.split()[1:], loglevel='WARNING') seq_integron_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.integrons') iso_integron_result_path = os.path.join(iso_test_result_dir, contig_name + '.integrons') self.assertIntegronResultEqual(seq_integron_result_path, iso_integron_result_path) seq_summary_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.summary') iso_summary_result_path = os.path.join(iso_test_result_dir, contig_name + '.summary') seq_summary = pd.read_csv(seq_summary_result_path, sep="\t", comment='#') iso_summary = pd.read_csv(iso_summary_result_path, sep="\t", comment='#') summary_first_contig = seq_summary.loc[seq_summary['ID_replicon'] == 'ACBA.0917.00019.0001'] pdt.assert_frame_equal(summary_first_contig, iso_summary) summary_2nd_contig = seq_summary.loc[seq_summary['ID_replicon'] == 'ACBA.0917.00019.0002'] # the index are different 0/1 so I fix this by indexing by 'ID_replicon' summary_2nd_contig.set_index(['ID_replicon'], inplace=True) iso_summary = pd.DataFrame([['ACBA.0917.00019.0002', 0, 0, 0]], columns=['ID_replicon', 'CALIN', 'complete', 'In0']) iso_summary.set_index(['ID_replicon'], inplace=True) pdt.assert_frame_equal(summary_2nd_contig, iso_summary)
def test_get_version(self): """ test on having the version message when integron_finder is installed """ real_exit = sys.exit sys.exit = self.fake_exit from numpy import __version__ as np_vers from pandas import __version__ as pd_vers from matplotlib import __version__ as mplt_vers from Bio import __version__ as bio_vers from integron_finder import __version__ as if_vers with self.catch_io(out=True, err=False): try: finder.main(['integron_finder', '-V']) except TypeError as err: version = sys.stdout.getvalue() # program exit with returncode = 0 self.assertEqual(str(err), '0') finally: sys.exit = real_exit exp_version = """integron_finder version {i_f} Using: - Python {py} - numpy {np} - pandas {pd} - matplolib {mplt} - biopython {bio} Authors: - Jean Cury, Bertrand Neron, Eduardo Rocha, Citation: Identification and analysis of integrons and cassette arrays in bacterial genomes Jean Cury; Thomas Jove; Marie Touchon; Bertrand Neron; Eduardo PC Rocha Nucleic Acids Research 2016; doi: 10.1093/nar/gkw319 """.format(i_f=if_vers, py=sys.version.replace('\n', ' '), np=np_vers, pd=pd_vers, mplt=mplt_vers, bio=bio_vers ) self.assertEqual(exp_version.strip(), version.strip())
def test_acba_simple_with_pdf(self): replicon_filename = 'acba.007.p01.13' replicon_id = 'ACBA.007.P01_13' output_dirname = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_dirname) command = "integron_finder --outdir {out_dir} --pdf " \ "--promoter-attI {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') pdf = '{}_1.pdf'.format(replicon_id) pdf_test = os.path.join(test_result_dir, pdf) self.assertTrue(os.path.exists(pdf_test))
def test_outdir_is_file(self): replicon_filename = 'acba.007.p01.13' bad_out_dir = os.path.join(self.out_dir, 'bad_out_dir') open(bad_out_dir, 'w').close() command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=bad_out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.assertRaises(IsADirectoryError) as ctx: with self.catch_io(out=True): # in case the error is not raised # anyway do not want to mess up the test output # I cannot catch log because loggers are reinitialized in main # I need to catch stdout as log are write on main(command.split()[1:]) err_msg = "outdir '{}' already exists and is not a directory".format(bad_out_dir) self.assertEqual(err_msg, str(ctx.exception))
def test_no_integron(self): replicon_filename = 'fake_seq' replicon_id = 'fake_seq' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = 'fake_seq.integrons' expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_fake_seq', output_filename)) test_result_path = os.path.join(test_result_dir, output_filename) self.assertFileEqual(expected_result_path, test_result_path)
def test_acba_local_max(self): replicon_filename = 'acba.007.p01.13' replicon_id = 'ACBA.007.P01_13' command = "integron_finder --outdir {out_dir} --func-annot --path-func-annot {annot_bank} --local-max --gbk " \ "--keep-tmp --promoter-attI {replicon}".format( out_dir=self.out_dir, annot_bank=self.resfams_dir, replicon=self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename))) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') result_dir = os.path.join(self.out_dir, 'Results_Integron_Finder_{}'.format(replicon_filename)) gbk = '{}.gbk'.format(replicon_id) expected_gbk = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename), gbk)) gbk_test = os.path.join(result_dir, gbk) expected_gbk = SeqIO.read(expected_gbk, 'gb') gbk_test = SeqIO.read(gbk_test, 'gb') self.assertSeqRecordEqual(expected_gbk, gbk_test) output_filename = '{}.integrons'.format(replicon_filename) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename), output_filename)) test_result_path = os.path.join(result_dir, output_filename) self.assertIntegronResultEqual(expected_result_path, test_result_path) output_filename = os.path.join('tmp_{}'.format(replicon_id), '{}_Resfams_fa_table.res'.format(replicon_id)) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename), output_filename)) test_result_path = os.path.join(result_dir, output_filename) self.assertHmmEqual(expected_result_path, test_result_path) output_filename = os.path.join('tmp_{}'.format(replicon_id), '{}_13825_1014_subseq_attc_table.res'.format(replicon_id)) expected_result_path = self.find_data(os.path.join('Results_Integron_Finder_{}.local_max'.format(replicon_filename), output_filename)) test_result_path = os.path.join(result_dir, output_filename) with open(expected_result_path) as expected_result_file, open(test_result_path) as test_result_file: for expected_line, result_line in zip(expected_result_file, test_result_file): if result_line.startswith('# Program: '): break self.assertEqual(expected_line, result_line)
def test_acba_sequential_eq_isolate(self): """ test if we find the same results if we run IF in sequential as isolated on each seq ACBA.0917.00019 contains 2 contigs 0001 and 0002. 0002 does not contains integrons :return: """ seq_replicon_filename = 'ACBA.0917.00019' seq_output_dir = 'Results_Integron_Finder_{}'.format(seq_replicon_filename) seq_test_result_dir = os.path.join(self.out_dir, seq_output_dir) seq_cmd = "integron_finder --outdir {out_dir} " \ "--keep-tmp {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Gembase', 'Replicons', seq_replicon_filename + '.fna') ) ) with self.catch_io(out=True, err=True): main(seq_cmd.split()[1:], loglevel='WARNING') contig_name = 'ACBA.0917.00019.0001' iso_output_dir = 'Results_Integron_Finder_{}'.format(contig_name) iso_test_result_dir = os.path.join(self.out_dir, iso_output_dir) iso_cmd = "integron_finder --outdir {out_dir} " \ "--keep-tmp --lin {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', contig_name + '.fst') ) ) with self.catch_io(out=True, err=True): main(iso_cmd.split()[1:], loglevel='WARNING') seq_integron_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.integrons') iso_integron_result_path = os.path.join(iso_test_result_dir, contig_name + '.integrons') self.assertIntegronResultEqual(seq_integron_result_path, iso_integron_result_path) seq_summary_result_path = os.path.join(seq_test_result_dir, seq_replicon_filename + '.summary') iso_summary_result_path = os.path.join(iso_test_result_dir, contig_name + '.summary') seq_summary = pd.read_csv(seq_summary_result_path, sep="\t") iso_summary = pd.read_csv(iso_summary_result_path, sep="\t") pdt.assert_frame_equal(seq_summary, iso_summary)
def test_acba_no_cmsearch(self): replicon_filename = 'acba.007.p01.13' decorator = hide_executable('cmsearch') finder.distutils.spawn.find_executable = decorator(finder.distutils.spawn.find_executable) command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.assertRaises(RuntimeError) as ctx: with self.catch_io(out=True): # in case the error is not raised # anyway do not want to mess up the test output # I cannot catch log because loggers are reinitialized in main # I need to catch stdout as log are write on main(command.split()[1:]) err_msg = "cannot find 'cmsearch' in PATH.\n" \ "Please install infernal package or setup 'cmsearch' binary path with --cmsearch option" self.assertEqual(err_msg, str(ctx.exception))
def test_no_integron(self): replicon_filename = 'fake_seq' replicon_id = 'fake_seq' output_filename = 'Results_Integron_Finder_{}'.format(replicon_filename) test_result_dir = os.path.join(self.out_dir, output_filename) command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.catch_io(out=True, err=True): main(command.split()[1:], loglevel='WARNING') output_filename = 'fake_seq.integrons' test_result_path = os.path.join(test_result_dir, output_filename) with open(test_result_path) as tested_file: test_line = next(tested_file) self.assertTrue(test_line.startswith('# cmd: integron_finder')) test_line = next(tested_file) self.assertEqual(test_line.strip(), '# No Integron found')
def test_resultdir_not_writable(self): replicon_filename = 'acba.007.p01.13' args = argparse.Namespace() args.replicon = self.find_data(os.path.join('Replicons', '{}.fst'.format(replicon_filename))) args.outdir = self.out_dir cf = config.Config(args) os.mkdir(cf.result_dir, mode=0o500) command = "integron_finder --outdir {out_dir} {replicon}".format(out_dir=self.out_dir, replicon=self.find_data( os.path.join('Replicons', '{}.fst'.format(replicon_filename)) ) ) with self.assertRaises(PermissionError) as ctx: with self.catch_io(out=True): # in case the error is not raised # anyway do not want to mess up the test output # I cannot catch log because loggers are reinitialized in main # I need to catch stdout as log are write on main(command.split()[1:]) err_msg = "result dir '{}' already exists and is not writable".format(self.out_dir) self.assertEqual(err_msg, str(ctx.exception))