class TestScreen(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" self.output = os.path.join(self.test_folder, "output") self.tex_wig = os.path.join(self.test_folder, "tex") self.frag_wig = os.path.join(self.test_folder, "frag") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tex_wig) os.mkdir(self.frag_wig) os.mkdir(self.output) self.fasta = os.path.join(self.test_folder, "aaa.fa") gen_file(self.fasta, self.example.fasta) args = self.mock_args.mock() args.output_folder = self.output args.fasta = self.fasta self.screen = Screen(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_screenshot(self): gen_file(os.path.join(self.tex_wig, "tex_1_f.wig"), self.example.wig_f) gen_file(os.path.join(self.tex_wig, "notex_1_f.wig"), self.example.wig_f) gen_file(os.path.join(self.frag_wig, "frag_f.wig"), self.example.wig_f) gen_file(os.path.join(self.tex_wig, "tex_1_r.wig"), self.example.wig_r) gen_file(os.path.join(self.tex_wig, "notex_1_r.wig"), self.example.wig_r) gen_file(os.path.join(self.frag_wig, "frag_r.wig"), self.example.wig_r) args = self.mock_args.mock() args.fasta = self.fasta args.main_gff = os.path.join(self.test_folder, "main.gff") gen_file(args.main_gff, self.example.main_gff) side_gff = os.path.join(self.test_folder, "side.gff") args.side_gffs = [side_gff] gen_file(side_gff, self.example.side_gff) args.frag_wigs = self.frag_wig args.tex_wigs = self.tex_wig args.height = 1000 args.tlibs = ["tex_1_f.wig:tex:1:a:+", "tex_1_r.wig:tex:1:a:-", "notex_1_f.wig:notex:1:a:+", "notex_1_r.wig:notex:1:a:-"] args.flibs = ["frag_f.wig:frag:1:a:+", "frag_r.wig:frag:1:a:-"] args.present = "expand" args.output_folder = self.output self.screen.screenshot(args) self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "forward"))) self.assertTrue(os.path.exists(os.path.join(self.output, "screenshots", "aaa", "reverse"))) datas = import_data(os.path.join(self.output, "screenshots", "aaa", "forward.txt")) datas = import_data(os.path.join(self.output, "screenshots", "aaa", "reverse.txt")) self.assertEqual("\n".join(datas), self.example.out_r) def test_import_libs(self): texs = [["tex_1.wig", "tex", "1", "a", "+"], ["notex_1.wig", "notex", "1", "a", "+"]] lib_dict = {"ft": [], "fn": [], "rt": [], "rn": [], "ff": [], "rf": []} self.screen._import_libs(texs, "+", self.tex_wig, lib_dict) self.assertDictEqual(lib_dict, {'fn': ['test_folder/tex/notex_1.wig'], 'rn': [], 'rt': [], 'ft': ['test_folder/tex/tex_1.wig'], 'rf': [], 'ff': []})
class TestExtractRBS(unittest.TestCase): def setUp(self): self.example = Example() self.test_folder = "test_folder" self.mock_args = MockClass() if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_detect_site(self): inters = [{"seq": "ATGGTGACCCAGGAGGTTGATCCCAGACGTAGGACCTGTTT"}, {"seq": "TTAGGACGTACTCCTCGAATGATCAACTGATACTTA"}, {"seq": "TTTTTTTTTAAAAAAAAAATATATATTTTTTTTTTT"}] args = self.mock_args.mock() args.start_codons = ["ATG"] args.end_rbs = 14 args.start_rbs = 5 args.fuzzy_rbs = 2 ribos = er.detect_site(inters, args) self.assertListEqual(ribos, [{'seq': 'TTAGGACGTACTCCTCGAATGATCAACTGATACTTA'}]) def test_extract_seq(self): er.helper = Mock_Helper inters = er.extract_seq(self.example.gffs, self.example.seq, self.example.tsss, self.example.tas, 5, 300) self.assertDictEqual(inters[0], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 2, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'}) self.assertDictEqual(inters[1], {'protein': 'AAA_00001', 'strain': 'aaa', 'start': 1, 'seq': 'AAAATTAT', 'end': 3, 'strand': '+'})
class TestGenScreenshots(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_set_data_range(self): gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 6, "phase": ".", "strand": "+", "score": "."} attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"} gff = Create_generator(gff_dict, attributes_gff, "gff") out = StringIO() gs.set_data_range(out, gff, self.example.wigs_low, "+") self.assertEqual(out.getvalue(), "setDataRange 0,20\n") out.close() out = StringIO() gs.set_data_range(out, gff, self.example.wigs_high, "+") self.assertEqual(out.getvalue(), "setDataRange 0,510\n") def test_print_batch(self): out = StringIO() lib_t = "wig1 wig2" lib_n = "wig3 wig4" lib_f = "wig5" args = self.mock_args.mock() args.fasta = "fasta" args.main_gff = "main_gff" args.present = "expend" args.height = 1000 args.side_gffs = ["test_folder/side1", "test_folder/side2"] gen_file("test_folder/side1", "test") gen_file("test_folder/side2", "test") args.output_folder = self.test_folder gs.print_batch(args, out, "+", lib_t, lib_n, lib_f, "test") self.assertEqual(out.getvalue(), self.example.out) def test_gen_batch(self): gs.import_wig = Mock_func().mock_import_wig out = StringIO() lib_t = "wig1 wig2" lib_n = "wig3 wig4" lib_f = "wig5" gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 6, "phase": ".", "strand": "+", "score": "."} attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"} gff = Create_generator(gff_dict, attributes_gff, "gff") seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"} gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq) self.assertEqual(out.getvalue(), self.example.out_print_wig)
class TestOptimizeTSS(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" self.fastas = os.path.join(self.test_folder, "fasta") self.wigs = os.path.join(self.test_folder, "wigs") self.gffs = os.path.join(self.test_folder, "gffs") self.manuals = os.path.join(self.test_folder, "manuals") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.wigs) os.mkdir(os.path.join(self.wigs, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.manuals) os.mkdir(os.path.join(self.manuals, "tmp")) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_optimize_tss(self): opt.Helper = Mock_helper opt.Multiparser = Mock_multiparser opt.optimization = Mock_func().mock_optimization gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test") gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test") args = self.mock_args.mock() args.fastas = self.fastas args.gffs = self.gffs args.wigs = self.wigs args.tsspredator_path = "test" args.manuals = self.manuals gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test") args.output_folder = self.test_folder args.project_strain = "test" args.height = 9 args.height_reduction = 9 args.factor = 9 args.factor_reduction = 9 args.base_height = 9 args.enrichment = 9 args.processing = 9 args.utr = 200 args.libs = "test" args.replicate_name = "test" args.cluster = 2 args.strain_lengths = {"test": 100} args.cores = 4 args.program = "TSS" args.replicate = 2 args.steps = 2000 log = open(os.path.join(self.test_folder, "test.log"), "w") opt.optimize_tss(args, log) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "test.csv"))) log.close()
class TestOptimizeTSS(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" self.fastas = os.path.join(self.test_folder, "fasta") self.wigs = os.path.join(self.test_folder, "wigs") self.gffs = os.path.join(self.test_folder, "gffs") self.manuals = os.path.join(self.test_folder, "manuals") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.wigs) os.mkdir(os.path.join(self.wigs, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.manuals) os.mkdir(os.path.join(self.manuals, "tmp")) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_optimize_tss(self): opt.Helper = Mock_helper opt.Multiparser = Mock_multiparser opt.optimization = Mock_func().mock_optimization gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test") gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test") args = self.mock_args.mock() args.fastas = self.fastas args.gffs = self.gffs args.wigs = self.wigs args.tsspredator_path = "test" args.manuals = self.manuals gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test") args.output_folder = self.test_folder args.project_strain = "test" args.height = 9 args.height_reduction = 9 args.factor = 9 args.factor_reduction = 9 args.base_height = 9 args.enrichment = 9 args.processing = 9 args.utr = 200 args.libs = "test" args.replicate_name = "test" args.cluster = 2 args.strain_lengths = {"test": 100} args.cores = 4 args.program = "TSS" args.replicate = 2 args.steps = 2000 log = open(os.path.join(self.test_folder, "test.log"), "w") opt.optimize_tss(args, log) self.assertTrue( os.path.exists(os.path.join(self.test_folder, "test.csv"))) log.close()
class TestMEME(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" self.out_folder = "test_folder/output" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.out_folder) os.mkdir(os.path.join(self.out_folder, "fasta_output")) self.tss_folder = os.path.join(self.test_folder, "tss_folder") if (not os.path.exists(self.tss_folder)): os.mkdir(self.tss_folder) self.gff_folder = os.path.join(self.test_folder, "gff_folder") if (not os.path.exists(self.gff_folder)): os.mkdir(self.gff_folder) self.fa_folder = os.path.join(self.test_folder, "fa_folder") if (not os.path.exists(self.fa_folder)): os.mkdir(self.fa_folder) args = self.mock_args.mock() args.tsss = self.tss_folder args.fastas = self.fa_folder args.gffs = self.gff_folder args.output_folder = self.out_folder self.meme = MEME(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_move_and_merge_fasta(self): me.del_repeat_fasta = Mock_func().mock_del_repeat_fasta if (not os.path.exists("tmp")): os.mkdir("tmp") gen_file("tmp/primary.fa", "primary") gen_file("tmp/secondary.fa", "secondary") gen_file("tmp/internal.fa", "internal") gen_file("tmp/antisense.fa", "antisense") gen_file("tmp/orphan.fa", "orphan") self.meme._move_and_merge_fasta(self.test_folder, "test") self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_all_types.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_primary.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_secondary.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_internal.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_antisense.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_orphan.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_allstrain_without_orphan.fa"))) def test_split_fasta_by_strain(self): with open(os.path.join(self.fa_folder, "allstrain.fa"), "w") as fh: fh.write(""">aaa_aaa_aaa ATTATATATA >bbb_bbb_bbb AATTAATTAA""") self.meme._split_fasta_by_strain(self.fa_folder) self.assertTrue(os.path.join(self.fa_folder, "aaa.fa")) self.assertTrue(os.path.join(self.fa_folder, "bbb.fa"))
class TestGetPolyT(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) self.gffs = os.path.join(self.test_folder, "gff_folder") if (not os.path.exists(self.gffs)): os.mkdir(self.gffs) self.go_folder = os.path.join(self.test_folder, "go_folder") if (not os.path.exists(self.go_folder)): os.mkdir(self.go_folder) self.all_strain = "all_genomes_uniprot.csv" self.trans = os.path.join(self.test_folder, "tran_folder") if (not os.path.exists(self.trans)): os.mkdir(self.trans) args = self.mock_args.mock() args.out_folder = self.test_folder args.gffs = self.gffs args.trans = self.trans self.go = GoTermFinding(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_merge_files(self): gff_folder = os.path.join(self.gffs, "test.gff_folder") if (not os.path.exists(gff_folder)): os.mkdir(gff_folder) test1_folder = os.path.join(self.go_folder, "test1") if (not os.path.exists(test1_folder)): os.mkdir(test1_folder) test2_folder = os.path.join(self.go_folder, "test2") if (not os.path.exists(test2_folder)): os.mkdir(test2_folder) with open(os.path.join(gff_folder, "test1.gff"), "w") as fh: fh.write("test1") with open(os.path.join(gff_folder, "test2.gff"), "w") as fh: fh.write("test2") with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh: fh.write("test1") with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh: fh.write("test2") log = open(os.path.join(self.test_folder, "test.log"), "w") self.go._merge_files(self.gffs, self.go_folder, self.test_folder, log) out_file = os.path.join(self.go_folder, "test", self.all_strain) self.assertTrue(os.path.exists(out_file)) data = [] with open(out_file) as fh: for line in fh: data.append(line) self.assertEqual( "".join(data), "Genome Strand Start End Protein_id Go_term\ntest1\ntest2\n") log.close()
class TestGetPolyT(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) self.gffs = os.path.join(self.test_folder, "gff_folder") if (not os.path.exists(self.gffs)): os.mkdir(self.gffs) self.go_folder = os.path.join(self.test_folder, "go_folder") if (not os.path.exists(self.go_folder)): os.mkdir(self.go_folder) self.all_strain = "all_genomes_uniprot.csv" self.trans = os.path.join(self.test_folder, "tran_folder") if (not os.path.exists(self.trans)): os.mkdir(self.trans) args = self.mock_args.mock() args.out_folder = self.test_folder args.gffs = self.gffs args.trans = self.trans self.go = GoTermFinding(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_merge_files(self): gff_folder = os.path.join(self.gffs, "test.gff_folder") if (not os.path.exists(gff_folder)): os.mkdir(gff_folder) test1_folder = os.path.join(self.go_folder, "test1") if (not os.path.exists(test1_folder)): os.mkdir(test1_folder) test2_folder = os.path.join(self.go_folder, "test2") if (not os.path.exists(test2_folder)): os.mkdir(test2_folder) with open(os.path.join(gff_folder, "test1.gff"), "w") as fh: fh.write("test1") with open(os.path.join(gff_folder, "test2.gff"), "w") as fh: fh.write("test2") with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh: fh.write("test1") with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh: fh.write("test2") log = open(os.path.join(self.test_folder, "test.log"), "w") self.go._merge_files(self.gffs, self.go_folder, self.test_folder, log) out_file = os.path.join(self.go_folder, "test", self.all_strain) self.assertTrue(os.path.exists(out_file)) data = [] with open(out_file) as fh: for line in fh: data.append(line) self.assertEqual("".join(data), "Genome Strand Start End Protein_id Go_term\ntest1\ntest2\n") log.close()
class TestPotentialTarget(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_file(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) fasta, cdss_f, cdss_r, genes = pt.read_file( seq_file, gff_file, "test", ["CDS"]) self.assertEqual( fasta, "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC") self.assertEqual(cdss_f[0].start, 1) self.assertEqual(cdss_f[0].feature, "CDS") self.assertEqual(cdss_r[0].start, 14) self.assertEqual(cdss_r[0].feature, "CDS") self.assertEqual(len(genes), 2) self.assertEqual(genes[0].start, 1) self.assertEqual(genes[1].start, 14) def test_deal_cds_forward(self): pt.deal_cds_forward(self.example.cdss_f, self.test_folder, self.example.fasta, self.example.genes, 2, 10) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.cdsf_result) def test_deal_cds_reverse(self): pt.deal_cds_reverse(self.example.cdss_r, self.test_folder, self.example.fasta, self.example.genes, 2, 10) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.cdsf_result) def test_potential_target(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) args = self.mock_args.mock() args.tar_start = 2 args.tar_end = 10 args.features = ["CDS"] pt.potential_target(gff_file, seq_file, self.test_folder, args) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.all_result)
class TestGenScreenshots(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_set_data_range(self): gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 6, "phase": ".", "strand": "+", "score": "."} attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"} gff = Create_generator(gff_dict, attributes_gff, "gff") out = StringIO() gs.set_data_range(out, gff, self.example.wigs_low, "+") self.assertEqual(out.getvalue(), "setDataRange 0,20\n") out.close() out = StringIO() gs.set_data_range(out, gff, self.example.wigs_high, "+") self.assertEqual(out.getvalue(), "setDataRange 0,510\n") def test_print_batch(self): out = StringIO() lib_t = "wig1 wig2" lib_n = "wig3 wig4" lib_f = "wig5" args = self.mock_args.mock() args.fasta = "fasta" args.main_gff = "main_gff" args.present = "expend" args.height = 1000 args.side_gffs = "side1 side2" args.output_folder = self.test_folder gs.print_batch(args, out, "+", lib_t, lib_n, lib_f, "test") self.assertEqual(out.getvalue(), self.example.out) def test_gen_batch(self): gs.import_wig = Mock_func().mock_import_wig out = StringIO() lib_t = "wig1 wig2" lib_n = "wig3 wig4" lib_f = "wig5" gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 6, "phase": ".", "strand": "+", "score": "."} attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001"} gff = Create_generator(gff_dict, attributes_gff, "gff") seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"} gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq) self.assertEqual(out.getvalue(), self.example.out_print_wig)
class TestPotentialTarget(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_file(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) fasta, cdss_f, cdss_r, genes = pt.read_file(seq_file, gff_file, "test", ["CDS"]) self.assertEqual( fasta, "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC") self.assertEqual(cdss_f[0].start, 1) self.assertEqual(cdss_f[0].feature, "CDS") self.assertEqual(cdss_r[0].start, 14) self.assertEqual(cdss_r[0].feature, "CDS") self.assertEqual(len(genes), 2) self.assertEqual(genes[0].start, 1) self.assertEqual(genes[1].start, 14) def test_deal_cds_forward(self): pt.deal_cds_forward(self.example.cdss_f, self.test_folder, self.example.fasta, self.example.genes, 2, 10) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.cdsf_result) def test_deal_cds_reverse(self): pt.deal_cds_reverse(self.example.cdss_r, self.test_folder, self.example.fasta, self.example.genes, 2, 10) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.cdsf_result) def test_potential_target(self): seq_file = os.path.join(self.test_folder, "seq") gff_file = os.path.join(self.test_folder, "gff") gen_file(seq_file, self.example.seq_file) gen_file(gff_file, self.example.gff_file) args = self.mock_args.mock() args.tar_start = 2 args.tar_end = 10 args.features = ["CDS"] pt.potential_target(gff_file, seq_file, self.test_folder, args) data = import_data(os.path.join(self.test_folder, "aaa_target.fa")) self.assertTrue("\n".join(data), self.example.all_result)
class TestGetPolyT(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) self.gffs = os.path.join(self.test_folder, "gff_folder") if (not os.path.exists(self.gffs)): os.mkdir(self.gffs) self.go_folder = os.path.join(self.test_folder, "go_folder") if (not os.path.exists(self.go_folder)): os.mkdir(self.go_folder) self.all_strain = "all_strains_uniprot.csv" self.trans = os.path.join(self.test_folder, "tran_folder") if (not os.path.exists(self.trans)): os.mkdir(self.trans) args = self.mock_args.mock() args.out_folder = self.test_folder args.gffs = self.gffs args.trans = self.trans self.go = GoTermFinding(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_merge_files(self): gff_folder = os.path.join(self.gffs, "test.gff_folder") if (not os.path.exists(gff_folder)): os.mkdir(gff_folder) test1_folder = os.path.join(self.go_folder, "test1") if (not os.path.exists(test1_folder)): os.mkdir(test1_folder) test2_folder = os.path.join(self.go_folder, "test2") if (not os.path.exists(test2_folder)): os.mkdir(test2_folder) with open(os.path.join(gff_folder, "test1.gff"), "w") as fh: fh.write("test1") with open(os.path.join(gff_folder, "test2.gff"), "w") as fh: fh.write("test2") with open(os.path.join(test1_folder, "test1_uniprot.csv"), "w") as fh: fh.write("test1") with open(os.path.join(test2_folder, "test2_uniprot.csv"), "w") as fh: fh.write("test2") self.go._merge_files(self.gffs, self.go_folder, self.test_folder) out_file = os.path.join(self.go_folder, "test", self.all_strain) self.assertTrue(os.path.exists(out_file)) with open(out_file) as fh: for line in fh: self.assertEqual(line, "test1test2")
class TestCoverageTerminator(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_compare_ta(self): trans = read_dict(3, self.example.tran_dict, self.example.attributes_tran) dct.compare_ta(self.example.term_dict, trans, 5) express = [] for term in self.example.term_dict: express.append(term["express"]) self.assertListEqual(express, ["True", "True", "False"]) def test_compare_transtermhp(self): hps = read_dict(3, self.example.hp_dict, self.example.attributes_term) terms = dct.compare_transtermhp(hps, self.example.term_dict) terms = sorted(terms, key=lambda x: (x["strain"], x["start"])) poss = [] methods = [] for term in terms: poss.append("_".join([str(term["start"]), str(term["end"])])) methods.append(term["method"]) self.assertListEqual(poss, ['30_40', '350_367', '420_432', '1420_2429']) self.assertListEqual(methods, ['TransTermHP', 'forward_reverse&TransTermHP', 'forward_reverse&TransTermHP', 'forward_reverse']) def test_compare_replicates(self): texs = {"track_tex_track_notex": 0} args = self.mock_args.mock() args.replicates = {"tex": 1, "frag": 1} args.tex_notex = 2 cond = "texnotex" term_covers = [{"track": "track_tex", "high": 300, "low": 50, "detect": "True", "diff": 250, "type": "tex"}, {"track": "track_notex", "high": 200, "low": 50, "detect": "True", "diff": 150, "type": "notex"}] diff_cover, diff, term_datas, detect_num = \ dct.compare_replicates(term_covers, texs, cond, args) self.assertEqual(diff_cover, 250) self.assertDictEqual(diff, {'track': 'track_tex', 'detect': 'True', 'high': 300, 'low': 50, 'type': 'tex', 'diff': 250}) ref_datas = [{'track': 'track_notex', 'detect': 'True', 'high': 200, 'low': 50, 'type': 'notex', 'diff': 150}, {'track': 'track_tex', 'detect': 'True', 'high': 300, 'low': 50, 'type': 'tex', 'diff': 250}] for index in range(0, 2): self.assertDictEqual(ref_datas[index], term_datas[index]) self.assertEqual(detect_num, 1) args.replicates = {"tex": 1, "frag": 1} cond = "frag" term_covers = [{"track": "frag", "high": 10, "low": 0, "detect": "False", "diff": 10, "type": "frag"}] diff_cover, diff, term_datas, detect_num = \ dct.compare_replicates(term_covers, texs, cond, args) self.assertEqual(diff_cover, 10) self.assertDictEqual(diff, {'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10}) self.assertDictEqual(term_datas[0], {'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10}) self.assertEqual(detect_num, 1) def test_coverage2term(self): dct.coverage_comparison = Mock_coverage().coverage_comparison hl_covers = {"low": 20, "high": 30} hl_poss = {"low": 1, "high": 2} term = {"start": 2, "end": 4} covers = [{"coverage": 100, "pos": 1, "type": "frag"}, {"coverage": 30, "pos": 2, "type": "frag"}, {"coverage": 23, "pos": 3, "type": "frag"}, {"coverage": 21, "pos": 4, "type": "frag"}, {"coverage": 21, "pos": 5, "type": "frag"},] term_covers = [] args = self.mock_args.mock() args.fuzzy = 1 args.decrease = 0.5 dct.coverage2term(covers, term, hl_covers, hl_poss, "+", term_covers, "track_1", args) self.assertDictEqual(term_covers[0], {'diff': 70, 'track': 'track_1', 'type': 'frag', 'high': 100, 'low': 30, 'detect': 'True'}) def test_get_coverage(self): term = {"start": 2, "end": 4, "strain": "aaa", "strand": "+"} texs = {"track_tex_track_notex": 0} wigs = {"aaa": {"frag_1": {"track_1": [{"pos": 1, "coverage": 100, "type": "frag"}, {"pos": 2, "coverage": 30, "type": "frag"}, {"pos": 3, "coverage": 23, "type": "frag"}, {"pos": 4, "coverage": 21, "type": "frag"}, {"pos": 5, "coverage": 21, "type": "frag"}]}}} args = self.mock_args.mock() args.fuzzy = 1 args.decrease = 0.5 args.replicates = {"tex": 1, "frag": 1} args.tex_notex = 2 diff_cover, diff, term_datas, detect_nums = dct.get_coverage( term, wigs, "+", texs, args) self.assertEqual(diff_cover, 70) self.assertDictEqual(diff, {'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30}) self.assertDictEqual(term_datas["frag_1"][0], {'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30}) self.assertDictEqual(detect_nums, {'frag_1': 1}) def test_compare_term(self): terms = [] term = {"miss": 5, "diff_cover": 30, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 4, "diff_cover": 30, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 6, "diff_cover": 80, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], {"miss": 4, "diff_cover": 30, "ut": 4}) term = {"miss": 4, "diff_cover": 80, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 4, "diff_cover": 80, "ut": 6} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) self.assertDictEqual(terms[1], term) def test_first_term(self): detect_terms = {"detect": [], "undetect": []} detect = False term = {"detect_p": True, "detect_m": False} detect = dct.first_term("+", term, detect_terms, detect) self.assertTrue(detect) self.assertDictEqual(detect_terms["detect"][0], term) detect = False detect = dct.first_term("-", term, detect_terms, detect) self.assertFalse(detect) self.assertDictEqual(detect_terms["undetect"][0], term) def test_print_table(self): args = self.mock_args.mock() args.cutoff_coverage = 5 args.table_best = True out_t = StringIO() term = {"express": "True", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([" True track_1(diff=70;high=100;low=30)"])) out_t.close() out_t = StringIO() args.table_best = False dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([" True track_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)"])) term = {"express": "False", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} out_t.close() out_t = StringIO() dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([" False NA"])) term = {"express": "True", "diff_cover": -1, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} out_t.close() out_t = StringIO() dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([" False No_coverage_decreasing"])) out_t.close() def test_print2file(self): out = StringIO() out_t = StringIO() term = {"strain": "aaa", "express": "True", "diff_cover": 70, "strand": "+", "start": 2, "end": 4, "method": "TransTermHP", "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [{"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} args = self.mock_args.mock() args.cutoff_coverage = 5 args.table_best = True dct.print2file(0, term, "70", "test", out, out_t, "test_method", args) # dct.print2file(0, term, "70", "test", out, out_t, # "test_method", True, 5) self.assertEqual(set(out.getvalue().split("\n")[:-1]), set([self.example.gff_file])) self.assertEqual(set(out_t.getvalue().split("\n")[:-1]), set([self.example.table])) out.close() out_t.close()
class TestMergesRNA(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.example = Example() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_modify_attributes(self): pre_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"} tar1_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_tar1 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr" } tar2_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_tar2 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr" } pre = Create_generator(pre_dict, attributes_pre, "gff") tar1 = Create_generator(tar1_dict, attributes_tar1, "gff") ms.modify_attributes(pre, tar1, "UTR", "pre") self.assertEqual(pre.attributes["sRNA_type"], "3utr,5utr") pre = Create_generator(pre_dict, attributes_pre, "gff") tar2 = Create_generator(tar2_dict, attributes_tar2, "gff") ms.modify_attributes(pre, tar2, "UTR", "pre") self.assertEqual(pre.attributes["sRNA_type"], "5utr") pre = Create_generator(pre_dict, attributes_pre, "gff") tar1 = Create_generator(tar1_dict, attributes_tar1, "gff") ms.modify_attributes(pre, tar1, "UTR", "current") self.assertEqual(pre.attributes["sRNA_type"], "5utr") self.assertEqual(tar1.attributes["sRNA_type"], "3utr,5utr") def test_detect_overlap(self): pre_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"} tar1_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_tar1 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr" } tar2_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 53, "end": 233, "phase": ".", "strand": "+", "score": "." } attributes_tar2 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr" } pre = Create_generator(pre_dict, attributes_pre, "gff") tar1 = Create_generator(tar1_dict, attributes_tar1, "gff") tar2 = Create_generator(tar2_dict, attributes_tar2, "gff") overlap = False overlap = ms.detect_overlap(tar1, pre, "UTR", overlap) self.assertTrue(overlap) overlap = False overlap = ms.detect_overlap(tar2, pre, "UTR", overlap) self.assertFalse(overlap) def test_modify_overlap(self): pre_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_pre = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr", "with_TSS": "NA", "start_cleavage": "cleavage_1,cleavage_2", "end_cleavage": "NA" } tar_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 5, "end": 30, "phase": ".", "strand": "+", "score": "." } attributes_tar = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr", "with_TSS": "TSS_1", "start_cleavage": "cleavage3", "end_cleavage": "cleavage10" } pre = Create_generator(pre_dict, attributes_pre, "gff") tar = Create_generator(tar_dict, attributes_tar, "gff") pre_srna = ms.modify_overlap(pre, tar) self.assertEqual(pre_srna.attributes["with_TSS"], "TSS_1") self.assertEqual(pre_srna.attributes["start_cleavage"], "cleavage_1,cleavage_2,cleavage3") self.assertEqual(pre_srna.attributes["end_cleavage"], "cleavage10") self.assertEqual(pre_srna.start, 3) self.assertEqual(pre_srna.end, 33) def test_merge_srna(self): srnas = ms.merge_srna(self.example.srnas_utr, "UTR") self.assertEqual(len(srnas), 2) self.assertEqual(srnas[0].start, 3) self.assertEqual(srnas[1].start, 54) self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1") self.assertEqual(srnas[1].attributes["with_TSS"], "TSS_3") self.assertEqual(srnas[0].attributes["start_cleavage"], "cleavage_1,cleavage_2,cleavage_3") self.assertEqual(srnas[1].attributes["start_cleavage"], "cleavage_4") srnas = ms.merge_srna(self.example.srnas_int, "inter") self.assertEqual(srnas[0].attributes["with_TSS"], "TSS_1") self.assertEqual(srnas[1].attributes["with_TSS"], "NA") def test_merge_srna_gff(self): out_file = os.path.join(self.test_folder, "test_out") gen_file(os.path.join(self.test_folder, "aaa.gff"), self.example.gff_file) ms.read_gff = Mock_func().mock_read_gff gffs = {"merge": out_file, "utr": "UTR", "normal": "inter"} ms.merge_srna_gff(gffs, False, 0.5, os.path.join(self.test_folder, "aaa.gff")) datas, attributes = extract_info(out_file, "file") self.assertListEqual(datas, [ 'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.', 'aaa\tANNOgesic\tncRNA\t54\t254\t.\t+\t.' ]) self.assertEqual( set(attributes[0]), set([ 'overlap_percent=NA', 'end_cleavage=cleavage_40', 'start_cleavage=cleavage_4', 'Name=sRNA_00000', 'with_TSS=TSS_3', 'ID=srna0', 'sRNA_type=interCDS', 'overlap_cds=NA' ])) self.assertEqual( set(attributes[1]), set([ 'overlap_percent=NA', 'end_cleavage=NA', 'Name=sRNA_00001', 'with_TSS=NA', 'ID=srna1', 'sRNA_type=intergenic', 'overlap_cds=NA' ])) def test_compare_table(self): ms.replicate_comparison = Mock_func().mock_replicate_comparison wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}} tables = [{ "strain": "aaa", "name": "sRNA_1", "start": 3, "end": 4, "strand": "+", "libs": "track_1", "detect": "True", "avg": 30, "high": 100, "low": 20, "detail": "detail" }] srna_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 4, "phase": ".", "strand": "+", "score": "." } attributes_srna = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr", "with_TSS": "TSS_1", "start_cleavage": "cleavage3", "end_cleavage": "cleavage10", "overlap_cds": "CDS1", "overlap_percent": "0.01415" } srna = Create_generator(srna_dict, attributes_srna, "gff") tss_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "." } attributes_tss = {"ID": "tss0", "Name": "TSS_0", "type": "Orphan"} tss = Create_generator(tss_dict, attributes_tss, "gff") out = StringIO() cutoff_tex = [0, 0, 0, 50, 20] cutoff_notex = [0, 0, 0, 30, 10] cutoff_frag = [400, 200, 0, 50, 30] gen_file("tmp_median", "aaa\t3utr\ttrack_1\t10") args = self.mock_args.mock() args.replicates = replicates = {"tex": 1, "frag": 1} args.texs = texs = {"track_tex_track_notex": 0} args.out_folder = os.getcwd() args.table_best = True args.tex_notex = 2 ms.compare_table(srna, tables, "utr", wigs, wigs, texs, out, [tss], args) self.assertEqual( out.getvalue(), "aaa\tsrna_0\t3\t4\t+\tfrag_1\t1\tTSS_1;cleavage3\tcleavage10\t22.0\t23\t21\ttrack_1(avg=22.0;high=23;low=21)\tCDS1\t0.01415\n" ) os.remove("tmp_median") def test_get_coverage(self): wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}} srna_cover = ms.get_coverage(wigs, self.example.srnas_int[0]) self.assertEqual(srna_cover["frag_1"], [{ 'low': 21, 'track': 'track_1', 'avg': 1.3548387096774193, 'final_end': 33, 'high': 21, 'pos': 0, 'final_start': 3, 'type': 'frag' }]) def test_get_tss_pro(self): srna_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 4, "phase": ".", "strand": "+", "score": "." } attributes_srna = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr", "with_TSS": "TSS_1", "start_cleavage": "cleavage3", "end_cleavage": "cleavage10" } srna = Create_generator(srna_dict, attributes_srna, "gff") tss_pro = ms.get_tss_pro("utr", srna) self.assertEqual(tss_pro, ('TSS_1;cleavage3', 'cleavage10'))
class TestsTSSpredator(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.mock = Mock_func() self.mock_parser = Mock_Multiparser() self.example = Example() self.test_folder = "test_folder" self.trans = "test_folder/trans" self.out = "test_folder/output" self.wigs = "test_folder/wigs" self.gffs = "test_folder/gffs" self.tsss = "test_folder/tsss" self.fastas = "test_folder/fastas" self.manual = "test_folder/manuals" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.trans) os.mkdir(self.out) os.mkdir(self.wigs) os.mkdir(self.gffs) os.mkdir(self.tsss) os.mkdir(self.fastas) os.mkdir(self.manual) args = self.mock_args.mock() args.out_folder = self.out args.ta_files = self.trans args.gffs = self.gffs args.wig_folder = self.wigs args.fastas = self.fastas args.manual = self.manual self.tss = TSSpredator(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) if os.path.exists("tmp"): shutil.rmtree("tmp") def test_print_lib(self): out = StringIO() lib_list = [{"condition": 1, "replicate": "a", "wig": "test_1.wig"}, {"condition": 2, "replicate": "a", "wig": "test_2.wig"}] self.tss._print_lib(2, lib_list, out, self.wigs, "test", ["a"]) self.assertEqual(out.getvalue(), ("test_1a = test_folder/wigs/test_1.wig\n" "test_2a = test_folder/wigs/test_2.wig\n")) def test_import_lib(self): out = StringIO() libs = ["test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-", "test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-"] gen_file(os.path.join( self.wigs, "test1_forward.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "test1_reverse.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "test1_TEX_forward.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "test1_TEX_reverse.wig_STRAIN_test.wig"), "test") self.tss._import_lib( libs, self.wigs, "test", out, "test.gff", "TSS", "test.fa") self.assertListEqual( out.getvalue().split("\n"), [ "annotation_1 = test.gff", "fivePrimeMinus_1a = test_folder/wigs/test1_TEX_reverse.wig", "fivePrimePlus_1a = test_folder/wigs/test1_TEX_forward.wig", "genome_1 = test.fa", ""]) def test_gen_config(self): os.mkdir(os.path.join(self.out, "MasterTables")) os.mkdir(os.path.join(self.wigs, "tmp")) config_file = os.path.join(self.test_folder, "config") libs = ["test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-", "test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-"] args = self.mock_args.mock() args.out_folder = self.out args.program = "TSS" args.height = 0.3 args.height_reduction = 0.2 args.factor = 2.0 args.factor_reduction = 0.5 args.base_height = 0.00 args.enrichment_factor = 2.0 args.processing_factor = 1.5 args.utr_length = 300 args.cluster = 3 args.repmatch = ["all_2"] args.libs = libs args.output_prefixs = ["test1"] args.specify_strains = None log = open(os.path.join(self.test_folder, "test.log"), "w") self.tss._gen_config( "test", args, self.gffs + "/tmp/test.gff", self.wigs + "/tmp", self.fastas + "/tmp/test.fa", config_file, log) datas = import_data(config_file) self.assertEqual("\n".join(datas), self.example.config) def test_set_gen_config(self): os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(os.path.join(self.wigs, "tmp")) os.mkdir(os.path.join(self.out, "MasterTables")) gen_file(os.path.join(self.fastas, "tmp/test.fa"), "test") gen_file(os.path.join(self.gffs, "tmp/test.gff"), "test") gen_file(os.path.join( self.wigs, "tmp/test1_forward.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "tmp/test1_reverse.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "tmp/test1_TEX_forward.wig_STRAIN_test.wig"), "test") gen_file(os.path.join( self.wigs, "tmp/test1_TEX_reverse.wig_STRAIN_test.wig"), "test") libs = ["test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-", "test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-"] log = open(os.path.join(self.test_folder, "test.log"), "w") args = self.mock_args.mock() args.program = "TSS" args.height = 0.3 args.height_reduction = 0.2 args.factor = 2.0 args.factor_reduction = 0.5 args.base_height = 0.00 args.enrichment_factor = 2.0 args.processing_factor = 1.5 args.utr_length = 300 args.libs = libs args.out_folder = self.out args.cluster = 3 args.repmatch = ["all_2"] args.specify_strains = None args.output_prefixs = ["test1"] self.tss._set_gen_config(args, self.test_folder, log) datas = import_data(os.path.join(self.test_folder, "config_test.ini")) self.assertEqual("\n".join(datas), self.example.config) def test_convert_gff(self): os.mkdir(os.path.join(self.out, "gffs")) os.mkdir(os.path.join(self.out, "MasterTables")) os.mkdir(os.path.join(self.out, "MasterTables/MasterTable_test")) gen_file(os.path.join( self.out, "MasterTables/MasterTable_test/MasterTable.tsv"), self.example.master) args = self.mock_args.mock() args.out_folder = self.out args.program = "TSS" log = open(os.path.join(self.test_folder, "test.log"), "w") self.tss._convert_gff(["test"], args, log) datas = import_data(os.path.join(self.out, "gffs/test_TSS.gff")) self.assertEqual("\n".join(datas), self.example.master_gff) def test_merge_wigs(self): gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r") gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r") libs = ["test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-", "test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-"] self.tss._merge_wigs(self.wigs, "test", libs) datas = import_data(os.path.join("tmp", "merge_forward.wig")) self.assertEqual("\n".join(datas), "test_ftest_f") datas = import_data(os.path.join("tmp", "merge_reverse.wig")) self.assertEqual("\n".join(datas), "test_rtest_r") shutil.rmtree("tmp") def test_check_orphan(self): os.mkdir(os.path.join(self.out, "gffs")) gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r") gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r") ts.check_orphan = self.mock.mock_check_orphan libs = ["test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-", "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-"] args = self.mock_args.mock() args.program = "TSS" args.gffs = self.gffs args.libs = libs self.tss._check_orphan(["test"], self.wigs, args) self.assertTrue(os.path.exists( os.path.join(self.out, "gffs/test_TSS.gff"))) def test_low_expression(self): ts.filter_low_expression = self.mock.mock_filter_low_expression gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r") gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r") gen_file(os.path.join(self.gffs, "test_TSS.gff"), self.example.tss_file) os.mkdir(os.path.join(self.out, "statistics")) os.mkdir(os.path.join(self.out, "statistics/test")) libs = ["test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-", "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-"] args = self.mock_args.mock() args.manual = "manual" args.libs = libs args.wig_folder = self.wigs args.program = "TSS" args.cluster = 3 self.tss._low_expression(args, self.gffs) shutil.rmtree("tmp") datas = import_data(os.path.join( self.out, "statistics/test/stat_test_low_expression_cutoff.csv")) self.assertEqual("\n".join(datas), "Genome\tCutoff_coverage\ntest\t100") def test_merge_manual(self): gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file) os.mkdir(os.path.join(self.out, "statistics")) os.mkdir(os.path.join(self.out, "statistics/test")) os.mkdir(os.path.join(self.out, "gffs")) ts.merge_manual_predict_tss = self.mock.mock_merge_manual_predict_tss args = self.mock_args.mock() args.gffs = self.gffs args.manual = "test_folder/manuals/tmp" os.mkdir(args.manual) gen_file("test_folder/manuals/tmp/test.gff", "test") args.wig_folder = self.wigs args.out_folder = self.out args.program = "TSS" args.utr_length = 300 args.libs = "libs" args.cluster = 3 self.tss._merge_manual(["test"], args) self.assertTrue(os.path.exists(os.path.join( self.out, "statistics/test/stat_compare_TSSpredator_manual_test.csv"))) self.assertTrue(os.path.exists( os.path.join(self.out, "gffs/test_TSS.gff"))) shutil.rmtree(args.manual) def test_deal_with_overlap(self): ts.filter_tss_pro = self.mock.mock_filter_tss_pro gen_file(os.path.join(self.out, "test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.test_folder, "test_processing.gff"), self.example.tss_file) args = self.mock_args.mock() args.overlap_feature = "overlap" args.program = "TSS" args.cluster = 3 args.overlap_gffs = self.test_folder self.tss._deal_with_overlap(self.out, args) self.assertTrue(os.path.exists(os.path.join(self.out, "test_TSS.gff"))) def test_stat_tss(self): log = open(os.path.join(self.test_folder, "test.log"), "w") ts.stat_tsspredator = self.mock.mock_stat_tsspredator ts.plot_venn = self.mock.mock_plot_venn os.mkdir(os.path.join(self.out, "statistics")) os.mkdir(os.path.join(self.out, "statistics/test")) self.tss._stat_tss(["test"], "TSS", log) self.assertTrue(os.path.exists(os.path.join( self.out, "statistics/test/test_venn.png"))) self.assertTrue(os.path.exists(os.path.join( self.out, "statistics/test/test_class.png"))) def test_validate(self): log = open(os.path.join(self.test_folder, "test.log"), "w") gen_file(os.path.join(self.gffs, "test.gff"), self.example.tss_file) os.mkdir(os.path.join(self.out, "gffs")) ts.validate_gff = self.mock.mock_validate_gff args = self.mock_args.mock() args.gffs = self.gffs args.utr_length = 300 args.out_folder = self.out args.program = "tss" self.tss._validate(["test"], args, log) def test_compare_ta(self): self.tss.multiparser = self.mock_parser ts.stat_ta_tss = self.mock.mock_stat_ta_tss ta_path = os.path.join(self.trans, "tmp") os.mkdir(ta_path) os.mkdir(os.path.join(self.out, "gffs")) gen_file(os.path.join(ta_path, "test_transcript.gff"), self.example.tran_file) args = self.mock_args.mock() args.fuzzy = 3 args.trans = self.trans args.gffs = self.gffs log = open(os.path.join(self.test_folder, "test.log"), "w") self.tss._compare_ta(["test"], args ,log) self.assertTrue(os.path.exists(os.path.join( self.trans, "test_transcript.gff"))) self.assertTrue(os.path.exists(os.path.join( self.out, "gffs/test_TSS.gff")))
class TestsRNATargetPrediction(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.example = Example() self.mock = Mock_func() self.test_folder = "test_folder" self.gffs = "test_folder/gffs" self.srnas = "test_folder/srnas" self.out = "test_folder/output" self.fastas = "test_folder/fastas" self.seq = "test_folder/output/sRNA_seqs" self.rnaup = "test_folder/output/RNAup" self.rnaplex = "test_folder/output/RNAplex" self.merge = "test_folder/output/merge" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.gffs) os.mkdir(self.out) os.mkdir(self.srnas) os.mkdir(self.fastas) os.mkdir(self.rnaup) os.mkdir(self.rnaplex) os.mkdir(self.seq) os.mkdir(self.merge) os.mkdir(os.path.join(self.rnaup, "test")) args = self.mock_args.mock() args.out_folder = self.out args.srnas = self.srnas args.fastas = self.fastas args.gffs = self.gffs self.star = sRNATargetPrediction(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_sort_srna_fasta(self): fasta = os.path.join(self.fastas, "test.fa") gen_file(fasta, ">aaa\nAAAAAAAA\n>bbb\nCCCC\n>ccc\nGGGGGGGGGGGG") self.star._sort_srna_fasta(fasta, "test", self.test_folder) datas = import_data(os.path.join(self.test_folder, "tmp_test_sRNA.fa")) self.assertListEqual(datas, ['>bbb', 'CCCC', '>aaa', 'AAAAAAAA', '>ccc', 'GGGGGGGGGGGG']) def test_read_fasta(self): fasta = os.path.join(self.fastas, "test.fa") gen_file(fasta, ">aaa\nAAAAAAAA") seq = self.star._read_fasta(fasta) self.assertEqual(seq, "AAAAAAAA") def test_get_specific_seq(self): srna_file = os.path.join(self.test_folder, "aaa_sRNA.gff") seq_file = os.path.join(self.test_folder, "aaa.fa") srna_out = os.path.join(self.test_folder, "out") gen_file(srna_file, self.example.srna_file) gen_file(seq_file, self.example.seq_file) self.star._get_specific_seq(srna_file, seq_file, srna_out, ["aaa:+:5:8"]) datas = import_data(srna_out) self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT') def test_gen_seq(self): srna_seq = os.path.join(self.out, "sRNA_seqs") tar_seq = os.path.join(self.out, "target_seqs") os.mkdir(os.path.join(self.srnas, "tmp")) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(tar_seq) gen_file(os.path.join(self.srnas, "tmp", "aaa_sRNA.gff"), self.example.srna_file) gen_file(os.path.join(self.fastas, "tmp", "aaa.fa"), self.example.seq_file) gen_file(os.path.join(self.gffs, "tmp", "aaa.gff"), self.example.gff_file) args = self.mock_args.mock() args.query = ["aaa:+:5:8"] args.features = ["CDS"] args.tar_start = 3 args.tar_end = 5 self.star._gen_seq(["aaa"], args) datas = import_data(os.path.join(srna_seq, "aaa_sRNA.fa")) self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT') datas = import_data(os.path.join(tar_seq, "aaa_target_1.fa")) self.assertEqual("\n".join(datas), '>AAA_000001|CDS_00000\nTAAATTCC') def test_rna_plex(self): self.star._run_rnaplex = self.mock.mock_run_rnaplex self.star._run_rnaplfold = self.mock.mock_run_rnaplfold os.mkdir("test_folder/test") gen_file("test_folder/test/test_RNAplex.txt", "test") gen_file(os.path.join(self.test_folder, "aaa_RNAplex.txt"), self.example.rnaplex) args = self.mock_args.mock() args.vienna_path = "test" args.win_size_s = 5 args.win_size_t = 5 args.span_s = 5 args.span_t = 5 args.unstr_region_rnaplex_s = 5 args.unstr_region_rnaplex_t = 5 self.star._rna_plex(["test"], args) datas = import_data("test_folder/test/test_RNAplex.txt") self.assertEqual("\n".join(datas), "test") def test_get_continue(self): out_rnaup = os.path.join(self.test_folder, "rnaup.txt") gen_file(out_rnaup, self.example.rnaup) srnas = self.star._get_continue(out_rnaup) self.assertListEqual(srnas, ["srna437"]) def test_rnaup(self): self.star._run_rnaup = self.mock.mock_run_rnaup gen_file(os.path.join(self.out, "sRNA_seqs/tmp_test_sRNA.fa"), ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG") gen_file(os.path.join(self.gffs, "test_target.fa"), ">AAA_000001|CDS_00000\nAAATTAATTAAATTCCGGCCGGCCGG") args = self.mock_args.mock() args.srnas = self.srnas args.fastas = self.fastas args.gffs = self.gffs args.vienna_path = "test" args.out_folder = self.out args.core_up = 4 self.star._rnaup(["test"], args) datas = import_data(os.path.join(self.out, "tmp1.fa")) self.assertEqual("\n".join(datas), ">srna0|aaa|5|8|+\nAAATTAATTAAATTCCGGCCGGCCGG") def test_merge_rnaplex_rnaup(self): st.merge_srna_target = self.mock.mock_merge_srna_target args = self.mock_args.mock() args.srnas = self.srnas args.fastas = self.fastas args.gffs = self.gffs args.program = "both" args.out_folder = self.out args.top = "top" self.star._merge_rnaplex_rnaup(["test"], args) datas = import_data(os.path.join(self.test_folder, "out")) self.assertEqual("\n".join(datas), "test_folder/output/RNAplex/test/test_RNAplex.txttest_folder/output/RNAup/test/test_RNAup.txt")
class TestTerminator(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.mock_parser = Mock_Multiparser() self.test_folder = "test_folder" self.out = "test_folder/output" self.fastas = "test_folder/fastas" self.gffs = "test_folder/gffs" self.srnas = "test_folder/srnas" self.trans = "test_folder/trans" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.out) os.mkdir(self.fastas) os.mkdir(self.gffs) os.mkdir(self.srnas) os.mkdir(self.trans) os.mkdir(os.path.join(self.out, "tables")) os.mkdir(os.path.join(self.out, "gffs")) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(os.path.join(self.fastas, "tmp")) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.trans = self.trans args.out_folder = self.out args.srnas = self.srnas self.term = Terminator(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) if os.path.exists("tmp_transterm"): shutil.rmtree("tmp_transterm") if os.path.exists("tmp_term_table"): shutil.rmtree("tmp_term_table") if os.path.exists("tmp_merge_gff"): shutil.rmtree("tmp_merge_gff") def test_convert_gff2rntptt(self): os.mkdir(os.path.join(self.srnas, "tmp")) gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file) gen_file(os.path.join(self.srnas, "aaa_sRNA.gff"), self.example.srna_file) gen_file(os.path.join(self.fastas, "aaa.fa"), self.example.fasta_file) file_types, prefixs = self.term._convert_gff2rntptt( self.gffs, self.fastas, self.srnas) self.assertDictEqual(file_types, {'aaa': 'srna'}) self.assertListEqual(prefixs, ['aaa']) def test_combine_annotation(self): test1 = os.path.join(self.test_folder, "test1.ptt") test2 = os.path.join(self.test_folder, "test2.ptt") gen_file(test1, self.example.ptt) gen_file(test2, self.example.ptt) files = [test1, test2] combine_file = os.path.join(self.test_folder, "combine") self.term._combine_annotation(combine_file, files) datas = import_data(combine_file) result = self.example.ptt.split("\n")[3:] self.assertEqual("\n".join(datas), "\n".join(result + result)) def test_run_TransTermHP(self): self.term._TransTermHP = self.mock.mock_TransTermHP os.mkdir(os.path.join(self.gffs, "tmp/combine")) gen_file(os.path.join(self.gffs, "tmp/combine/aaa.ptt"), self.example.ptt) gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.hp_folder = self.out self.term._run_TransTermHP(args) self.assertTrue(os.path.exists(os.path.join(self.out, "aaa"))) def test_convert_to_gff(self): self.term.multiparser = self.mock_parser hp_folder = os.path.join(self.out, "aaa") os.mkdir(hp_folder) gen_file(os.path.join(hp_folder, "aaa_best_terminator_after_gene.bag"), self.example.bag) os.mkdir("tmp_transterm") args = self.mock_args.mock() args.hp_folder = self.out args.gffs = self.gffs self.term._convert_to_gff(["aaa"], args) datas = import_data( "/home/silas/ANNOgesic/tmp_transterm/aaa_transtermhp.gff") self.assertEqual("\n".join(datas), self.example.gff_bag) def test_merge_sRNA(self): os.mkdir(os.path.join(self.srnas, "tmp")) self.term.multiparser = self.mock_parser gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file) gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"), self.example.srna_file) merge = self.term._merge_sRNA(self.srnas, ["aaa"], self.gffs) self.assertEqual(merge.split("/")[-1], "tmp_merge_gff") shutil.rmtree("tmp_merge_gff") def test_move_file(self): term_outfolder = self.gffs csv_outfolder = self.out gen_file(os.path.join(term_outfolder, "aaa_term.gff"), self.example.term_file) if (not os.path.exists("tmp_term_table")): os.mkdir("tmp_term_table") gen_file("tmp_term_table/aaa_term_raw.csv", "test") self.term._move_file(term_outfolder, csv_outfolder) shutil.rmtree("tmp_term_table") self.assertTrue( "test_folder/output/gffs/all_candidates/aaa_term_all.gff") self.assertTrue( "test_folder/output/tables/all_candidates/aaa_term_all.csv") def test_compute_intersection_forward_reverse(self): self.term.multiparser = self.mock_parser te.intergenic_seq = self.mock.mock_intergenic_seq te.poly_t = self.mock.mock_poly_t te.detect_coverage = self.mock.mock_detect_coverage self.term._run_rnafold = self.mock.mock_run_rnafold term_outfolder = os.path.join(self.out, "gffs") csv_outfolder = os.path.join(self.out, "tables") args = self.mock_args.mock() args.trans = self.trans args.fastas = self.fastas args.tex_notex = "tex_notex" args.libs = "libs" args.replicates = "rep" args.RNAfold_path = "test" self.term._compute_intersection_forward_reverse(["aaa"], self.test_folder, "wig_path", "merge_wigs", args) self.assertTrue(os.path.join(self.out, "inter_seq_aaa")) self.assertTrue(os.path.join(self.out, "inter_sec_aaa")) def test_compute_stat(self): term_outfolder = os.path.join(self.out, "gffs") csv_outfolder = os.path.join(self.out, "tables") te.stat_term = self.mock.mock_stat_term gen_file( os.path.join(term_outfolder, "all_candidates/aaa_term_all.gff"), self.example.term_file) gen_file(os.path.join(term_outfolder, "best/aaa_term.csv"), self.example.term_file) gen_file(os.path.join(term_outfolder, "express/aaa_term.csv"), self.example.term_file) gen_file(os.path.join(term_outfolder, "non_express/aaa_term.csv"), self.example.term_file) args = self.mock_args.mock() args.stat = True args.out_folder = self.out self.term._compute_stat(args) self.assertTrue( os.path.exists(os.path.join(csv_outfolder, "express/aaa_term.csv"))) self.assertTrue( os.path.exists(os.path.join(csv_outfolder, "best/aaa_term.csv"))) self.assertTrue( os.path.exists( os.path.join(csv_outfolder, "non_express/aaa_term.csv"))) def test_run_terminator(self): te.stat_term = self.mock.mock_stat_term te.intergenic_seq = self.mock.mock_intergenic_seq te.poly_t = self.mock.mock_poly_t te.detect_coverage = self.mock.mock_detect_coverage self.term.multiparser = self.mock_parser self.term._run_rnafold = self.mock.mock_run_rnafold self.term._TransTermHP = self.mock.mock_TransTermHP self.term._compare_term_tran = self.mock.mock_compare_term_tran self.term._remove_tmp_file = self.mock.mock_remove_tmp_file os.mkdir(os.path.join(self.srnas, "tmp")) os.mkdir(os.path.join(self.trans, "tmp")) gen_file(os.path.join(self.gffs, "tmp/aaa.gff"), self.example.gff_file) gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file) gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"), self.example.srna_file) gen_file(os.path.join(self.trans, "tmp/aaa_transcript.gff"), self.example.tran_file) tex_wigs = os.path.join(self.test_folder, "tex") frag_wigs = os.path.join(self.test_folder, "frag") os.mkdir(tex_wigs) os.mkdir(frag_wigs) gen_file(os.path.join(frag_wigs, "frag.wig"), "text") gen_file(os.path.join(tex_wigs, "tex.wig"), "text") args = self.mock_args.mock() args.out_folder = self.out args.fastas = self.fastas args.gffs = self.gffs args.trans = self.trans args.srnas = self.srnas args.tex_wigs = tex_wigs args.frag_wigs = frag_wigs args.hp_folder = self.test_folder args.tex_notex = "tex_notex" args.wig_path = self.test_folder args.merge_wigs = self.test_folder args.RNAfold_path = "RNAfold_path" args.stat = True args.fuzzy_up_ta = 2 args.fuzzy_up_gene = 2 args.fuzzy_down_ta = 2 args.fuzzy_down_gene = 2 self.term.run_terminator(args) self.assertTrue( os.path.exists(os.path.join(self.out, "tables/all_candidates"))) self.assertTrue( os.path.exists(os.path.join(self.out, "tables/express"))) self.assertTrue(os.path.exists(os.path.join(self.out, "tables/best"))) self.assertTrue( os.path.exists(os.path.join(self.out, "gffs/all_candidates"))) self.assertTrue(os.path.exists(os.path.join(self.out, "gffs/express"))) self.assertTrue(os.path.exists(os.path.join(self.out, "gffs/best")))
class TestsRNAIntergenic(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.wig_folder = "test_folder/wigs" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.wig_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_data(self): gff_file = os.path.join(self.test_folder, "anno.gff") tran_file = os.path.join(self.test_folder, "tran.gff") pro_file = os.path.join(self.test_folder, "pro.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file args.ex_srna = False nums, cdss, tas, pros, genes, ncs = si.read_data(args) self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0} ) self.assertEqual(cdss[0].start, 140) self.assertEqual(tas[0].start, 140) self.assertEqual(pros[0].start, 140) def test_read_tss(self): tss_file = os.path.join(self.test_folder, "tss.gff") gen_file(tss_file, self.example.gff_file) tsss, num_tss = si.read_tss(tss_file) self.assertEqual(tsss[0].start, 140) def test_compare_ta_cds(self): detects = {"overlap": False} gffs = copy.deepcopy(self.example.gffs) tas = copy.deepcopy(self.example.tas) si.compare_ta_cds(gffs, tas[0], detects) self.assertDictEqual(detects, {'overlap': True}) def test_compare_ta_tss(self): out_table = StringIO() nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} output = StringIO() detects = {"overlap": False, "uni_with_tss": False} si.get_coverage = self.mock.mock_get_coverage args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.nums = nums args.fuzzy = 20 args.detects = detects args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.output = output args.out_table = out_table tas = copy.deepcopy(self.example.tas) tsss = copy.deepcopy(self.example.tsss) si.compare_ta_tss(10, 2, 15, tas[0], tsss[0], 50, "cutoff", 20, "", args) self.assertEqual(output.getvalue(), ("aaa\tANNOgesic\tncRNA\t10\t15\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS:170_+\n")) self.assertEqual(out_table.getvalue(), ("aaa\t00000\t10\t15\t+\tNA\tNA\t" "NA\tNA\tNA\tTSS:170_+\n")) si.get_coverage = get_coverage def test_print_file(self): string = "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t." nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() srna_datas = {"high": 20, "low": 5, "best": 13, "conds": {"cond1": "test1"}, "detail": [{"track": "test1", "high": 30, "low": 10, "avg": 15}, {"track": "test2", "high": 25, "low": 13, "avg": 20}]} args = self.mock_args.mock() args.nums = nums args.out_table = out_table args.output = output args.table_best = False si.print_file(string, "TSS_160+", srna_datas, "intergenic", args, "aaa") self.assertEqual(out_table.getvalue(), ("aaa\t00000\t10\t15\t+\tcond1\ttest1\t13\t20\t5\t" "TSS_160+\ttest1(avg=15;high=30;low=10);" "test2(avg=20;high=25;low=13)\n")) self.assertEqual(output.getvalue(), ("aaa\tintergenic\tsRNA\t10\t15\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS_160+;best_avg_coverage=13;" "best_high_coverage=20;best_low_coverage=5\n")) def test_detect_include_tss(self): si.get_coverage = self.mock.mock_get_coverage nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.nums = nums args.fuzzy = 20 args.detects = detects args.cutoff_coverage = coverage args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.notex = coverage args.file_type = "frag" args.break_tran = False args.output = output args.out_table = out_table tas = copy.deepcopy(self.example.tas) si.detect_include_tss(tas[0], args, None, args.wigs_f, args.wigs_r) si.get_coverage = get_coverage self.assertEqual(output.getvalue(), ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS:170_+\n")) self.assertEqual(out_table.getvalue(), ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA" "\tNA\tNA\tTSS:170_+\n")) def test_get_differential_cover(self): checks = {"detect_diff": True, "first": True} cover_sets = {"diff": 30, "low": 5, "high": 35} cover = 20 poss = {"stop_point": 100} args = self.mock_args.mock() args.fuzzy_inter = 10 args.decrease_inter = 200 si.get_differential_cover(0, checks, cover_sets, poss, cover, args, 80) self.assertDictEqual(cover_sets, {'diff': 20, 'low': 20, 'high': 35}) cover = 50 poss = {"stop_point": 100} num = 20 args.fuzzy_inter = 20 si.get_differential_cover(num, checks, cover_sets, poss, cover, args, 80) self.assertDictEqual(poss, {"stop_point": 80}) def test_check_coverage_pos(self): si.coverage_comparison = self.mock.mock_coverage_comparison cover_sets = {"low": 20, "high":30, "total": 90, "diff": 50} poss = {"high": 20, "low": 70, "stop_point": 70} tmps = {"total": 0, "toler": 10, "pos": 0} checks = {"detect_diff": True, "first": True} cover = {"coverage": 50, "pos": 80} detect = si.check_coverage_pos(30, 100, cover, 80, cover_sets, checks, poss, "+", 5) self.assertFalse(detect) self.assertDictEqual(poss, {'high': 20, 'stop_point': 70, 'low': 70}) def test_get_best(self): args = self.mock_args.mock() args.tolerance = 5 args.fuzzy_inter = 5 args.decrease_inter = 50 datas = si.get_best(self.example.wigs, "aaa", "+", 2, 20, "normal", args, 10) self.assertDictEqual(datas, {'frag_1': [ {'low': -1, 'high': -1, 'avg': 30.7, 'pos': 21, 'type': 'frag', 'track': 'track_1'}]}) def test_get_attribute_string(self): srna_datas = {'best': 23, 'low': 20, 'high': 35} data = si.get_attribute_string(srna_datas, "TSS_100+;Cleavage_150+", 1, "sRNA_00001", "3utr", "aaa") self.assertEqual(data, ("ID=aaa_srna1;Name=sRNA_sRNA_00001;sRNA_type=3utr;" "with_TSS=TSS_100+;end_cleavage=Cleavage_150+;" "best_avg_coverage=23;best_high_coverage=35;" "best_low_coverage=20")) def test_check_pro(self): si.replicate_comparison = self.mock.mock_replicate_comparison srna_datas = {"pos": 50} texs = {"track_1@AND@track_2"} args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tex_notex = "tex_notex" args.replicates = "rep" args.texs = texs args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) pro_pos, new_srna_datas, detect_pro = si.check_pro( tas[0], 20, 70, srna_datas, "within", 5, self.example.wigs, 20, args) self.assertEqual(pro_pos, 190) self.assertDictEqual(new_srna_datas, { 'best': 40, 'high': 50, 'low': 10, "pos": 5, "conds": {"cond1": "test1"}, "detail": None}) self.assertEqual(detect_pro, "Cleavage:190_+") def test_exchange_to_pro(self): nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() srna_datas = {"pos": 50, "best": 10, "high": 12} args = self.mock_args.mock() args.max_len = 300 args.min_len = 30 args.table_best = True args.replicates = "rep" args.tex_notex = "tex_notex" args.texs = "texs" args.decrease_inter = 50 args.fuzzy_inter = 5 args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.tolerance = 5 si.replicate_comparison = self.mock.mock_replicate_comparison detect, srna_datas, pro = si.exchange_to_pro( args, srna_datas, tas[0], 20, 70, 10, self.example.wigs, 20) self.assertTrue(detect) self.assertDictEqual(srna_datas, { 'best': 40, 'high': 50, 'low': 10, 'pos': 190, "conds": {"cond1": "test1"}, "detail": None}) self.assertEqual(pro, "Cleavage:190_+") def test_get_tss_type(self): coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} si.check_break_tran = self.mock.mock_check_break_tran cover = si.get_tss_type(self.example.tsss[0], coverage, None, None, None, False) self.assertEqual(cover, 10) def test_detect_wig_pos(self): si.replicate_comparison = self.mock.mock_replicate_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() args = self.mock_args.mock() args.texs = "texs" args.replicates = "rep" args.max_len = 300 args.min_len = 30 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tex_notex = "tex_notex" args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.table_best = True args.nums = nums args.out_table = out_table args.output = output args.tolerance = 5 si.detect_wig_pos(self.example.wigs, tas[0], 20, 70, "TSS_160+", 10, 20, args) self.assertEqual(output.getvalue(), ("aaa\tANNOgesic\tncRNA\t20\t190\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS_160+;end_cleavage=Cleavage:190_+;" "best_avg_coverage=40;best_high_coverage=50;" "best_low_coverage=10\n")) self.assertEqual(out_table.getvalue(), ("aaa\t00000\t20\t190\t+\tcond1\t" "test1\t40\t50\t10\t\n")) def test_detect_longer(self): si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.nums = nums args.fuzzy = 20 args.file_type = "frag" args.break_tran = False args.detects = detects args.cutoff_coverage = coverage args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.notex = 20 args.output = output args.out_table = out_table si.get_tss_type = self.mock.mock_get_tss_type si.detect_longer(tas[0], args, None, args.wigs_f, args.wigs_r) self.assertEqual(output.getvalue(), ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS:170_+\n")) self.assertEqual(out_table.getvalue(), ("aaa\t00000\t170\t230\t+\tNA\tNA\tNA" "\tNA\tNA\tTSS:170_+\n")) def test_get_proper_tss(self): tss_file = os.path.join(self.test_folder, "tss.gff") gen_file(tss_file, self.example.gff_file) coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} tsss, num_tss = si.get_proper_tss(tss_file, coverage) self.assertEqual(tsss[0].start, 140) def test_check_srna_condition(self): si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} notex = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 30, "orphan": 10} coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.nums = nums args.fuzzy = 20 args.detects = detects args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.file_type = "frag" args.break_tran = False args.notex = notex args.output = output args.cutoff_coverage = coverage args.out_table = out_table si.check_srna_condition(tas[0], args, None, args.wigs_f, args.wigs_r) self.assertEqual(output.getvalue(), ("aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\t" "ID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;" "with_TSS=TSS:170_+\n")) self.assertEqual(out_table.getvalue(), ("aaa\t00000\t170\t230\t+\tNA\tNA\t" "NA\tNA\tNA\tTSS:170_+\n")) def test_intergenic_srna(self): si.read_libs = self.mock.mock_read_libs si.read_wig = self.mock.mock_read_wig gff_file = os.path.join(self.test_folder, "aaa.gff") tss_file = os.path.join(self.test_folder, "aaa_TSS.gff") tran_file = os.path.join(self.test_folder, "aaa_tran.gff") pro_file = os.path.join(self.test_folder, "aaa_processing.gff") wig_f_file = os.path.join(self.wig_folder, "wig_f.wig") wig_r_file = os.path.join(self.wig_folder, "wig_r.wig") gen_file(gff_file, self.example.gff_file) gen_file(tss_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) output_file = os.path.join(self.test_folder, "output") output_table = os.path.join(self.test_folder, "table") coverage = [0, 0, 0, 50, 10] si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file args.tss_file = tss_file args.table_best = True args.cutoffs = coverage args.out_folder = self.test_folder args.file_type = "frag" args.cut_notex = coverage args.input_libs = "input_libs" args.wig_folder = self.wig_folder args.wig_f_file = wig_f_file args.wig_r_file = wig_r_file args.tss_source = True args.output_file = output_file args.output_table = output_table args.in_cds = False args.wigs_f = None args.wigs_r = None args.ex_srna = False si.intergenic_srna(args, args.input_libs, None, args.wigs_f, args.wigs_r, tss_file) self.assertTrue(os.path.exists(output_file)) self.assertTrue(os.path.exists(output_table))
class TestRibos(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.gffs = os.path.join(self.test_folder, "gffs") self.fastas = os.path.join(self.test_folder, "fastas") self.out_folder = os.path.join(self.test_folder, "output") self.database = os.path.join(self.test_folder, "database") self.seq_path = os.path.join(self.test_folder, "seqs") self.tables = os.path.join(self.out_folder, "tables") self.stat = os.path.join(self.out_folder, "statistics") self.scan = os.path.join(self.test_folder, "scan") self.tsss = os.path.join(self.test_folder, "tsss") self.trans = os.path.join(self.test_folder, "trans") self.out_gff = os.path.join(self.out_folder, "gffs") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tsss) os.mkdir(os.path.join(self.tsss, "tmp")) os.mkdir(self.trans) os.mkdir(os.path.join(self.trans, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.out_folder) os.mkdir(self.database) os.mkdir(self.seq_path) os.mkdir(os.path.join(self.out_folder, "tmp_table")) os.mkdir(os.path.join(self.out_folder, "tmp_scan")) os.mkdir(os.path.join(self.out_folder, "tmp_fasta")) os.mkdir(os.path.join(self.out_folder, "scan_Rfam")) os.mkdir(self.tables) os.mkdir(self.scan) os.mkdir(self.stat) os.mkdir(self.out_gff) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.ribos_out_folder = self.out_folder args.database = self.database args.tsss = self.tsss args.trans = self.trans args.program = 'riboswtich' self.ribo = Ribos(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_scan_extract_rfam(self): self.ribo._run_cmscan = self.mock.mock_run_cmscan rb.modify_table = self.mock.mock_modify_table rb.regenerate_seq = self.mock.mock_regenerate_seq rb.reextract_rbs = self.mock.mock_reextract_rbs prefixs = [] gen_file(os.path.join(self.gffs, "tmp/test.gff"), self.example.gff_file) gen_file(os.path.join(self.fastas, "tmp/test.fa"), self.example.fasta_file) gen_file(os.path.join(self.seq_path, "test.fa"), self.example.fasta_file) gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"), self.example.fasta_file) args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.fuzzy = 5 args.fuzzy_rbs = 2 args.utr = True args.output_all = "test" tmp_files = { "fasta": os.path.join(self.out_folder, "tmp_fasta"), "scan": "tmp_scan", "table": os.path.join(self.out_folder, "tmp_table") } rfam = "Rfam_.cm" suffixs = { "csv": "test.csv", "txt": "test_prescan.txt", "re_txt": "test_scan.txt", "re_csv": "test_scan.csv" } self.ribo._scan_extract_rfam(prefixs, args, tmp_files, suffixs, "test", rfam) self.assertListEqual(prefixs, ["test"]) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "tmp_fasta", "test_regenerate.fa"))) def test_merge_results(self): rb.stat_and_covert2gff = self.mock.mock_stat_and_covert2gff gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file( os.path.join(self.out_folder, "tmp_table/test_riboswitch.csv"), self.example.table) gen_file( os.path.join(self.out_folder, "tmp_scan/test_riboswitch_prescan.txt"), self.example.rescan_file) gen_file( os.path.join(self.out_folder, "tmp_scan/test_riboswitch_scan.txt"), self.example.rescan_file) gen_file(os.path.join(self.test_folder, "ids"), self.example.ids) gen_file(os.path.join(self.tables, "test_riboswitch.csv"), self.example.table) gen_file('test_folder/output/tmp_table/test_test_scan.csv', "test") gen_file( os.path.join("test_folder/output", "tmp_fasta", "test_regenerate.fa"), "test") gen_file('test_folder/output/tmp_scan/test_test_prescan.txt', "test") gen_file('test_folder/output/tmp_scan/test_test_scan.txt', "test") if not os.path.exists('test_folder/output/tmp_table/test_test.csv'): gen_file('test_folder/output/tmp_table/test_test.csv', "test") args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.ribos_id = os.path.join(self.test_folder, "ids") args.fuzzy = 3 suffixs = { "csv": "test.csv", "txt": "test_prescan.txt", "re_txt": "test_scan.txt", "re_csv": "test_scan.csv" } tmp_files = { "fasta": os.path.join(self.out_folder, "tmp_fasta"), "scan": os.path.join(self.out_folder, "tmp_scan"), "table": os.path.join(self.out_folder, "tmp_table") } rfam = "Rfam_.cm" self.ribo._merge_results(args, os.path.join(self.out_folder, "tmp_scan"), suffixs, tmp_files, os.path.join(self.out_folder, "tmp_scan"), os.path.join(self.out_folder, "scan_Rfam"), os.path.join(self.out_folder, "scan_Rfam"), os.path.join(self.out_folder, "gffs"), "riboswitch")
class TestCoverageTerminator(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_compare_ta(self): trans = read_dict(3, self.example.tran_dict, self.example.attributes_tran) dct.compare_ta(self.example.term_dict, trans, 5) express = [] for term in self.example.term_dict: express.append(term["express"]) self.assertListEqual(express, ["True", "True", "False"]) def test_compare_transtermhp(self): hps = read_dict(3, self.example.hp_dict, self.example.attributes_term) terms = dct.compare_transtermhp(hps, self.example.term_dict) terms = sorted(terms, key=lambda x: (x["strain"], x["start"])) poss = [] methods = [] for term in terms: poss.append("_".join([str(term["start"]), str(term["end"])])) methods.append(term["method"]) self.assertListEqual(poss, [ '30_40', '350_367', '420_432', '1420_2429']) self.assertListEqual(methods, [ 'TransTermHP', 'forward_reverse,TransTermHP', 'forward_reverse,TransTermHP', 'forward_reverse']) def test_compare_replicates(self): texs = {"track_tex_track_notex": 0} args = self.mock_args.mock() args.replicates = {"tex": ["all_1"], "frag": ["all_1"]} args.tex_notex = 2 cond = "texnotex" term_covers = [{"track": "track_tex", "high": 300, "low": 50, "detect": "True", "diff": 250, "type": "tex"}, {"track": "track_notex", "high": 200, "low": 50, "detect": "True", "diff": 150, "type": "notex"}] diff_cover, diff, term_datas, detect_num = \ dct.compare_replicates(term_covers, texs, cond, args) self.assertEqual(diff_cover, 250) self.assertDictEqual(diff, {'track': 'track_tex', 'detect': 'True', 'high': 300, 'low': 50, 'type': 'tex', 'diff': 250}) ref_datas = [{'track': 'track_notex', 'detect': 'True', 'high': 200, 'low': 50, 'type': 'notex', 'diff': 150}, {'track': 'track_tex', 'detect': 'True', 'high': 300, 'low': 50, 'type': 'tex', 'diff': 250}] for index in range(0, 2): self.assertDictEqual(ref_datas[index], term_datas[index]) self.assertEqual(detect_num, 1) args.replicates = {"tex": ["all_1"], "frag": ["all_1"]} cond = "frag" term_covers = [{"track": "frag", "high": 10, "low": 0, "detect": "False", "diff": 10, "type": "frag"}] diff_cover, diff, term_datas, detect_num = \ dct.compare_replicates(term_covers, texs, cond, args) self.assertEqual(diff_cover, 10) self.assertDictEqual(diff, {'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10}) self.assertDictEqual(term_datas[0], { 'detect': 'False', 'type': 'frag', 'low': 0, 'diff': 10, 'track': 'frag', 'high': 10}) self.assertEqual(detect_num, 1) def test_coverage2term(self): dct.coverage_comparison = Mock_coverage().coverage_comparison hl_covers = {"low": 20, "high": 30} hl_poss = {"low": 1, "high": 2} term = {"start": 2, "end": 4} covers = [100, 30, 23, 21, 21] term_covers = [] args = self.mock_args.mock() args.fuzzy = 1 args.decrease = 0.5 dct.coverage2term(covers, term, hl_covers, hl_poss, "+", term_covers, "track_1", args, 0, 4, "frag") self.assertDictEqual(term_covers[0], { 'diff': 70, 'track': 'track_1', 'type': 'frag', 'high': 100, 'low': 30, 'detect': 'True'}) def test_get_coverage(self): term = {"start": 2, "end": 4, "strain": "aaa", "strand": "+"} texs = {"track_tex_track_notex": 0} wigs = {"aaa": {"frag_1": {"track_1|+|frag": [100, 30, 23, 21, 21]}}} args = self.mock_args.mock() args.fuzzy = 1 args.decrease = 0.5 args.replicates = {"tex": ["all_1"], "frag": ["all_1"]} args.tex_notex = 2 diff_cover, diff, term_datas, detect_nums = dct.get_coverage( term, wigs, "+", texs, args) self.assertEqual(diff_cover, 70) self.assertDictEqual(diff, { 'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30}) self.assertDictEqual(term_datas["frag_1"][0], {'track': 'track_1', 'high': 100, 'type': 'frag', 'detect': 'True', 'diff': 70, 'low': 30}) self.assertDictEqual(detect_nums, {'frag_1': 1}) def test_compare_term(self): terms = [] term = {"miss": 5, "diff_cover": 30, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 4, "diff_cover": 30, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 6, "diff_cover": 80, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], {"miss": 4, "diff_cover": 30, "ut": 4}) term = {"miss": 4, "diff_cover": 80, "ut": 4} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) term = {"miss": 4, "diff_cover": 80, "ut": 6} terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) terms = dct.compare_term(term, terms) self.assertDictEqual(terms[0], term) self.assertDictEqual(terms[1], term) def test_first_term(self): detect_terms = {"detect": [], "undetect": []} detect = False term = {"detect_p": True, "detect_m": False} detect = dct.first_term("+", term, detect_terms, detect) self.assertTrue(detect) self.assertDictEqual(detect_terms["detect"][0], term) detect = False detect = dct.first_term("-", term, detect_terms, detect) self.assertFalse(detect) self.assertDictEqual(detect_terms["undetect"][0], term) def test_print_table(self): args = self.mock_args.mock() args.cutoff_coverage = 5 args.table_best = True out_t = StringIO() term = {"express": "True", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [ {"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([ "\tTrue\ttrack_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)track_1(diff=70;high=100;low=30)"])) out_t.close() out_t = StringIO() args.table_best = False dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([ "\tTrue\ttrack_1(diff=70;high=100;low=30);track_2(diff=39;high=99;low=60)track_1(diff=70;high=100;low=30)"])) term = {"express": "False", "diff_cover": 70, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [ {"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} out_t.close() out_t = StringIO() dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([ " False NA"])) term = {"express": "True", "diff_cover": -1, "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [ {"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} out_t.close() out_t = StringIO() dct.print_table(term, out_t, args) self.assertEqual(set(out_t.getvalue().split("\n")), set([ " False No_coverage_decreasing"])) out_t.close() def test_print2file(self): out = StringIO() out_t = StringIO() term = {"strain": "aaa", "express": "True", "diff_cover": 70, "strand": "+", "start": 2, "end": 4, "method": "TransTermHP", "diff": {"high": 100, "low": 30, "track": "track_1"}, "datas": {"data": [ {"track": "track_1", "diff": 70, "high": 100, "low": 30}, {"track": "track_2", "diff": 39, "high": 99, "low": 60}]}} args = self.mock_args.mock() args.cutoff_coverage = 5 args.table_best = True dct.print2file(0, term, "70", "test", out, out_t, "test_method", args) self.assertEqual(set(out.getvalue().split("\n")[:-1]), set([self.example.gff_file])) self.assertEqual(set(out_t.getvalue().split("\n")[:-1]), set([self.example.table])) out.close() out_t.close()
class TestMergeRNAplexRNAup(unittest.TestCase): def setUp(self): self.test_folder = "test_project" if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) os.mkdir(self.test_folder) self.example = Example() self.mock_args = MockClass() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_detect_energy(self): srna = {"energy": -2} mrr.detect_energy(self.example.out_rna_txt, srna) self.assertDictEqual(srna, {'energy': -5.3}) srna = {"energy": -8} mrr.detect_energy(self.example.out_rna_txt, srna) self.assertDictEqual(srna, {'energy': -8.0}) def test_print_rank_one(self): out = StringIO() args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 mrr.print_rank_one(self.example.srnas, out, "RNAplex", self.example.gffs, self.example.srna_gffs, args_tar) datas = convert_dict(out.getvalue().split("\n")) refs = convert_dict(self.example.out_print.split("\n")) self.assertDictEqual(datas, refs) def test_read_table(self): rnaplex = os.path.join(self.test_folder, "rnaplex") rnaup = os.path.join(self.test_folder, "rnaup") gen_file(rnaplex, self.example.rnaplex) gen_file(rnaup, self.example.rnaup) srnas = mrr.read_table(self.example.gffs, rnaplex, rnaup) self.assertDictEqual(srnas, {'RNAup': {'srna352': [{'target': 'srna1023', 'energy': 0}, {'tar_pos': '571,576', 'target': 'SAOUHSC_00001|dnaA', 'energy': -4.87, 'srna_pos': '20,25'}, {'tar_pos': '14,30', 'target': 'SAOUHSC_00002', 'energy': -5.91, 'srna_pos': '11,26'}]}, 'RNAplex': {'srna1023': [{'tar_pos': '571,576', 'target': 'SAOUHSC_00001|dnaA', 'energy': -5.3, 'srna_pos': '20,25'}], 'srna352': [{'tar_pos': '163,170', 'target': 'SAOUHSC_00001|dnaA', 'energy': -1.91, 'srna_pos': '24,31'}]}}) def test_get_srna_name(self): output = mrr.get_srna_name(self.example.srna_gffs, "srna0") self.assertEqual(output[0], 'sRNA_0') self.assertEqual(output[1].start, 6) def test_get_target_info(self): output = mrr.get_target_info(self.example.gffs, "AAA_00001") self.assertEqual(output.start, 100) def test_merge_base_rnaplex(self): args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 merges = [] overlap = mrr.merge_base_rnaplex(self.example.srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges) output = [['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001', '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'], ['sRNA_0', 'aaa', '6-15', '7-12', '7-15', '+', 'AAA_00002|dnaA', '2348-2934', '2330-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2'], ['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003', '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'], ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001', '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1']] count = 0 for out in output: for data in overlap: if out == data: count += 1 self.assertEqual(count, 4) count = 0 for out in output: for data in merges: if out == data: count += 1 self.assertEqual(count, 4) def test_merge_base_rnaup(self): args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 srnas = {"RNAplex": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.43, "rank": 3, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}]}, "RNAup": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}]}} merges = [] mrr.merge_base_rnaup(srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges) output = [['sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'AAA_00003', '5544-5597', '5550-5545', '5550-5545', '-', '-10.5', '1', '-10.5', '1'], ['sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'AAA_00001', '100-150', '89-94', '89-94', '+', '-23.5', '1', '-23.5', '1'], ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00001', '100-150', '89-94', '89-94', '+', '-6.5', '1', '-6.5', '1'], ['sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'AAA_00002|dnaA', '2348-2934', '2337-2342', '2337-2342', '+', '-3.5', '2', '-3.5', '2']] count = 0 for out in output: for data in merges: if out == data: count += 1 self.assertEqual(count, 4)
class TestsRNAClass(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_initiate(self): out = StringIO() key = "test" key_list = ["test"] class_name = "test_name" class_num = 0 index = {} content = "testtest" sc.initiate(key, key_list, class_name, class_num, index, out, content) self.assertEqual(out.getvalue(), "1testtest\n") def test_print_stat_title(self): out_stat = StringIO() strain = "aaa" checks = {"limit": False, "first": True, "utr": False, "inter": False} srna_datas = {"aaa": self.example.srnas, "all": self.example.srnas} args = self.mock_args.mock() args.energy = 0 args.nr_hits_num = 0 args.import_info = ["tss", "sec_str", "blast_nr", "blast_srna"] class_num, index = sc.print_stat_title(checks, out_stat, strain, srna_datas, 1, args) self.assertEqual(out_stat.getvalue(), """1 - the normalized(by length of sRNA) free energy change of secondary structure below to 0 2 - sRNA candidates start with TSS (3'UTR derived and interCDS sRNA also includes the sRNA candidates which start with processing site.) 3 - blast can not find the homology from nr database (the cutoff is 0). 4 - blast can not find the homology from sRNA database. 5 - blast can find the homology from sRNA database. All strains: """) self.assertEqual(class_num, 5) self.assertDictEqual(index, {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4, 'nr_no_hit': 3, 'with_TSS': 2}) def test_import_class(self): index = {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4, 'nr_no_hit': 3, 'with_TSS': 2} num_srna = 0 datas_srna = {} datas = {"aaa": self.example.srnas} num = sc.import_class(5, datas_srna, datas, index, num_srna, "aaa", "UTR_derived", "5utr", 0, 0) self.assertEqual(num, 1) self.assertEqual(datas_srna["class_4"][0].start, 230) def test_import_data(self): datas = {"aaa": self.example.srnas, "all": self.example.srnas} index = {'sRNA_hit': 5, '2d_energy': 1, 'sRNA_no_hit': 4, 'nr_no_hit': 3, 'with_TSS': 2} num_srna = {"total": 0, "intergenic": 0, "5'UTR_derived": 0, "3'UTR_derived": 0, "interCDS": 0, "in_CDS": 0} checks = {"limit": False, "first": True, "utr": True, "inter": True, "in_CDS": True, "antisense": False} datas_rna = sc.import_data(5, datas, index, num_srna, "aaa", checks, 0, 0) self.assertEqual(datas_rna["5'UTR_derived"]["class_4"][0].start, 230) self.assertEqual(datas_rna["interCDS"]["class_1"][0].start, 140) self.assertEqual(datas_rna["in_CDS"]["class_1"][0].start, 6166) self.assertEqual(datas_rna["intergenic"]["class_5"][0].start, 5166) def test_print_intersection(self): num_srna = {"total": 3, "intergenic": 1, "5'UTR_derived": 1, "3'UTR_derived": 0, "interCDS": 1} gff_name = os.path.join(self.test_folder, "test") out_stat = StringIO() keys = ["class_1", "class_4", "class_2", "class_3", "class_5"] datas = {"class_1": self.example.srnas, "class_2": self.example.srnas, "class_3": self.example.srnas, "class_4": self.example.srnas, "class_5": self.example.srnas} sc.print_intersection(datas, keys, 3, gff_name, "total", out_stat) self.assertEqual(out_stat.getvalue(), "\tclass_1 and class_4 and class_2 and class_3 and class_5 = 4(1.3333333333333333)\n") results, attributes = extract_info(os.path.join(self.test_folder, "test"), "file") self.assertEqual("\n".join(results), self.example.gff_info) def test_read_file(self): srna_file = os.path.join(self.test_folder, "srna.gff") gen_file(srna_file, self.example.gff_file) srna_datas, strains, checks = sc.read_file(srna_file) self.assertEqual(srna_datas["aaa"][0].start, 140) self.assertEqual(srna_datas["aaa"][1].start, 230) self.assertEqual(srna_datas["bbb"][0].start, 5166) self.assertListEqual(strains, ['all', 'aaa', 'bbb']) self.assertDictEqual(checks, {'inter': True, 'limit': False, 'utr': True, 'antisense': False, 'in_CDS': True, 'first': True}) def test_sort_keys(self): keys = ["class_3", "class_1", "class_5"] final_keys = sc.sort_keys(keys) self.assertListEqual(final_keys, ['class_1', 'class_3', 'class_5']) def test_classify_srna(self): out_stat_file = os.path.join(self.test_folder, "stat") srna_file = os.path.join(self.test_folder, "srna.gff") gen_file(srna_file, self.example.gff_file) args = self.mock_args.mock() args.energy = 0 args.nr_hits_num = 0 args.in_cds = True args.import_info = ["tss", "sec_str"] sc.classify_srna(srna_file, self.test_folder, out_stat_file, args)
class TestTranscripSNP(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.example = Example() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_import_data(self): snp_file = os.path.join(self.test_folder, "snp") gen_file(snp_file, self.example.snp_file) depth_file = os.path.join(self.test_folder, "depth") gen_file(depth_file, self.example.depth_file) args = self.mock_args.mock() args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 max_quals, snps, dess, raw_snps = ts.import_data( snp_file, args, 2, depth_file, 2) self.assertDictEqual(max_quals, { 'NC_007795.1': 98.0, 'All_genome': 98.0}) self.assertListEqual(snps, [ {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.', 'indel': -1, 'pos': 1, 'id': '.', 'all_info': ("NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;DP4=0,0," "60,9;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"), 'qual': 98.0, 'info': ['DP=89', 'DP4=0,0,60,9', 'VDB=8.46526e-15'], 'alt': 'A', 'ref': 'C', 'frac': -1, 'depth': 89, 'dp4_sum': 69}, {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.', 'indel': 22, 'pos': 6, 'id': '.', 'all_info': ("NC_007795.1\t6\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;" "IMF=0.536585;DP=41;VDB=9.36323e-14;DP4=0,0,40,0\t" "GT:PL:DP\t0/1:60,0,55:40"), 'qual': 26.9515, 'info': ['INDEL', 'IDV=22', 'IMF=0.536585', 'DP=41', 'VDB=9.36323e-14', 'DP4=0,0,40,0'], 'alt': 'AA', 'ref': 'A', 'frac': 0.536585, 'depth': 41, 'dp4_sum': 40}]) def test_check_overlap(self): snps = {"test": []} overlaps = [{"test": []}] ts.check_overlap(snps, overlaps) self.assertListEqual(overlaps, [{'test': [], 'print': True}]) self.assertDictEqual(snps, {'test': [{'test': [], 'print': True}]}) def test_overlap_position(self): qual_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\t" "DP=89;VDB=8.46526e-15;SGB=-0.693147\t" "GT:PL:DP\t1/1:125,184,0:87")}, {'filter': '.', 'pos': 22182, 'alt': 'C', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'A', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\t" "DP=89;VDB=8.46526e-15;SGB=-0.693147\t" "GT:PL:DP\t1/1:125,184,0:87")}, {'filter': '.', 'pos': 30000, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\t" "DP=89;VDB=8.46526e-15;SGB=-0.693147\t" "GT:PL:DP\t1/1:125,184,0:87")}] conflicts, nooverlap = ts.overlap_position(qual_snps) self.assertListEqual(conflicts, [[ {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C', 'depth': 89, 'print': True, 'pos': 22182, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}]]) self.assertDictEqual(nooverlap, {1: [ {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 30000, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}], 2: [ {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C', 'depth': 89, 'print': True, 'pos': 22182, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 30000, 'filter': '.', 'id': '.', 'all_info': ("NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t" "1/1:125,184,0:87")}]}) def test_stat(self): stat_file = os.path.join(self.test_folder, "stat") max_quals = {'NC_007795.1': 98.0, 'All_genome': 98.0} trans_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\t" "DP=89;VDB=8.46526e-15\tGT:PL:DP\t" "1/1:125,184,0:87")}] args = self.mock_args.mock() args.depth = 50 args.fraction = 0.3 args.quality = 20 ts.stat(max_quals, trans_snps, 2, stat_file, self.test_folder + "/test", args, "best.csv") datas = import_data(stat_file + "_best.csv") self.assertEqual("\n".join(datas), self.example.stat) def test_plot_bar(self): ts.plot_bar([3, 10, 30, 45, 50], "NC_007795.1", self.test_folder + "/test", "best.png") self.assertTrue(os.path.exists(os.path.join( self.test_folder, "test_NC_007795.1_SNP_QUAL_best.png"))) def test_read_fasta(self): fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_file) seqs = ts.read_fasta(fasta_file) self.assertListEqual(seqs, [{ 'NC_007795.1': 'AAATATATCAGCACCGTAGACGATAGAGTAGTAC'}]) def test_gen_ref(self): refs = [] snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}, {'filter': '.', 'pos': 22500, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}] refs = ts.gen_ref(snps, 1, refs, 1) self.assertListEqual(refs, ['1:A', '1:A']) snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}, {'filter': '.', 'pos': 22500, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")}] refs = ts.gen_ref(snps, 1, refs, 2) self.assertListEqual(refs, [ '1:A_1:A', '1:A_1:A', '1:A_1:A', '1:A_1:A']) def test_change(self): snp = {'filter': '.', 'pos': 1, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': ("NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87")} seq = {"num_mod": 3, "seq": "CCCCATATCAGCACCGTAGACGATAGAGTAGTAC"} ts.change(snp, seq) self.assertDictEqual(seq, { 'num_mod': 3, 'seq': 'CCCaATATCAGCACCGTAGACGATAGAGTAGTAC'}) def test_print_file(self): refs = {'NC_007795.1': ['1:A', '1:GT']} conflicts = [[{'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125," "184,0:87"), 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True}, {'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t" "INDEL;IDV=22;IMF=0.536585;DP=41;" "VDB=9.36323e-14 GT:PL:DP\t0/1:60,0," "55:40"), 'filter': '.', 'id': '.', 'frac': 0.536585, 'indel': 22, 'alt': 'AA', 'info': 'VDB=9.36323e-14 GT:PL:DP', 'qual': 26.9515, 'ref': 'A', 'strain': 'NC_007795.1', 'depth': 41, 'pos': 2, 'print': True}]] values = [{'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"), 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True}, {'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87"), 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 7, 'print': True}] mod_seq_init = {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'} mod_seqs = [{'genome': 'NC_007795.1', 'num_mod': -1, 'seq': 'aGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}, {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'gtGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}] out_ref = StringIO() out_seq = os.path.join(self.test_folder, "seq") ts.print_file(refs, out_ref, conflicts, 1, values, mod_seq_init, mod_seqs, out_seq, "NC_007795.1") self.assertEqual( out_ref.getvalue(), "1\t1\t1\tNC_007795.1\tNC_007795.1\n") self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_2.fa"))) def test_gen_new_fasta(self): out_ref = StringIO() out_seq = os.path.join(self.test_folder, "seq") nooverlap = {1: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 1, 'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\t" "DP=89;VDB=8.46526e-15\tGT:PL:DP\t" "1/1:125,184,0:87")}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 7, 'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125," "184,0:87")}], 2: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A', 'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP', 'indel': 22, 'qual': 26.9515, 'pos': 2, 'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t" "INDEL;IDV=22;IMF=0.536585;DP=41;" "VDB=9.36323e-14 GT:PL:DP\t0/1:60," "0,55:40")}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 7, 'all_info': ("NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;" "VDB=8.46526e-15\tGT:PL:DP\t1/1:125," "184,0:87")}]} seqs = [{'NC_007795.1': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}] conflicts = [[{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 1, 'all_info': ("NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\t" "DP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125," "184,0:87")}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A', 'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP', 'indel': 22, 'qual': 26.9515, 'pos': 2, 'all_info': ("NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\t" "INDEL;IDV=22;IMF=0.536585;DP=41;" "VDB=9.36323e-14 GT:PL:DP\t0/1:60,0," "55:40")}]] ts.gen_new_fasta(nooverlap, seqs, out_ref, conflicts, out_seq) self.assertEqual(out_ref.getvalue(), ("1\t1\t1\t1:A\tNC_007795.1\n" "1\t1\t2\t1:GT\tNC_007795.1\n" "2\t2\t1\tAll\tNC_007795.1\n")) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_2.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_2_1.fa"))) def test_snp_detect(self): depth_file = os.path.join(self.test_folder, "depth") gen_file(depth_file, self.example.depth_file) fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_final) snp_file = os.path.join(self.test_folder, "NC_007795.1.snp") gen_file(snp_file, self.example.snp_final) out_seq = os.path.join(self.test_folder, "seq") out_snp = os.path.join(self.test_folder, "snp") stat_file = os.path.join(self.test_folder, "stat") args = self.mock_args.mock() args.depth = 5 args.fraction = 0.3 args.quality = 5 args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 ts.snp_detect(fasta_file, snp_file, depth_file, out_snp, out_seq, 2, stat_file, args, 2) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_1_2.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "seq_NC_007795.1_2_1.fa"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "snp_seq_reference.csv"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "snp_best.vcf"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "snp_NC_007795.1_SNP_QUAL_best.png"))) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "snp_NC_007795.1_SNP_QUAL_raw.png")))
class TestsRNAIntergenic(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.wig_folder = "test_folder/wigs" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.wig_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_data(self): gff_file = os.path.join(self.test_folder, "anno.gff") tran_file = os.path.join(self.test_folder, "tran.gff") pro_file = os.path.join(self.test_folder, "pro.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file nums, cdss, tas, pros, genes, ncs = si.read_data(args) self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0}) self.assertEqual(cdss[0].start, 140) self.assertEqual(tas[0].start, 140) self.assertEqual(pros[0].start, 140) def test_read_tss(self): tss_file = os.path.join(self.test_folder, "tss.gff") gen_file(tss_file, self.example.gff_file) tsss, num_tss = si.read_tss(tss_file) self.assertEqual(tsss[0].start, 140) def test_compare_ta_cds(self): detects = {"overlap": False} gffs = copy.deepcopy(self.example.gffs) tas = copy.deepcopy(self.example.tas) si.compare_ta_cds(gffs, tas[0], detects) self.assertDictEqual(detects, {'overlap': True}) def test_compare_ta_tss(self): out_table = StringIO() nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} output = StringIO() detects = {"overlap": False, "uni_with_tss": False} si.get_coverage = self.mock.mock_get_coverage args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.nums = nums args.fuzzy = 20 args.detects = detects args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.output = output args.out_table = out_table tas = copy.deepcopy(self.example.tas) tsss = copy.deepcopy(self.example.tsss) si.compare_ta_tss(10, 2, 15, tas[0], tsss[0], 50, "cutoff", 20, "", args) self.assertEqual( output.getvalue(), "aaa\tANNOgesic\tncRNA\t10\t15\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n" ) self.assertEqual( out_table.getvalue(), "aaa\t00000\t10\t15\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n") si.get_coverage = get_coverage def test_print_file(self): string = "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t." nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() srna_datas = { "high": 20, "low": 5, "best": 13, "conds": { "cond1": "test1" }, "detail": [{ "track": "test1", "high": 30, "low": 10, "avg": 15 }, { "track": "test2", "high": 25, "low": 13, "avg": 20 }] } args = self.mock_args.mock() args.nums = nums args.out_table = out_table args.output = output args.table_best = False si.print_file(string, "TSS_160+", srna_datas, "intergenic", args, "aaa") self.assertEqual( out_table.getvalue(), "aaa\t00000\t10\t15\t+\tcond1\ttest1\t13\t20\t5\tTSS_160+\ttest1(avg=15;high=30;low=10);test2(avg=20;high=25;low=13)\n" ) self.assertEqual( output.getvalue(), "aaa\tintergenic\tsRNA\t10\t15\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS_160+;best_avg_coverage=13;best_high_coverage=20;best_low_coverage=5\n" ) def test_detect_include_tss(self): si.get_coverage = self.mock.mock_get_coverage nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} coverage = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10 } args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.nums = nums args.fuzzy = 20 args.detects = detects args.cutoff_coverage = coverage args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.notex = coverage args.file_type = "frag" args.break_tran = False args.output = output args.out_table = out_table tas = copy.deepcopy(self.example.tas) si.detect_include_tss(tas[0], args, None, args.wigs_f, args.wigs_r) si.get_coverage = get_coverage self.assertEqual( output.getvalue(), "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n" ) self.assertEqual( out_table.getvalue(), "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n") def test_get_differential_cover(self): checks = {"detect_diff": True, "first": True} cover_sets = {"diff": 30, "low": 5, "high": 35} cover = 20 poss = {"stop_point": 100} args = self.mock_args.mock() args.fuzzy_inter = 10 args.decrease_inter = 200 si.get_differential_cover(0, checks, cover_sets, poss, cover, args, 80) self.assertDictEqual(cover_sets, {'diff': 20, 'low': 20, 'high': 35}) cover = 50 poss = {"stop_point": 100} num = 20 args.fuzzy_inter = 20 si.get_differential_cover(num, checks, cover_sets, poss, cover, args, 80) self.assertDictEqual(poss, {"stop_point": 80}) def test_check_coverage_pos(self): si.coverage_comparison = self.mock.mock_coverage_comparison cover_sets = {"low": 20, "high": 30, "total": 90, "diff": 50} poss = {"high": 20, "low": 70, "stop_point": 70} tmps = {"total": 0, "toler": 10, "pos": 0} checks = {"detect_diff": True, "first": True} cover = {"coverage": 50, "pos": 80} detect = si.check_coverage_pos(30, 100, cover, 80, cover_sets, checks, poss, "+", 5) self.assertFalse(detect) self.assertDictEqual(poss, {'high': 20, 'stop_point': 70, 'low': 70}) def test_get_best(self): args = self.mock_args.mock() args.tolerance = 5 args.fuzzy_inter = 5 args.decrease_inter = 50 datas = si.get_best(self.example.wigs, "aaa", "+", 2, 20, "normal", args, 10) self.assertDictEqual( datas, { 'frag_1': [{ 'low': -1, 'high': -1, 'avg': 30.7, 'pos': 21, 'type': 'frag', 'track': 'track_1' }] }) def test_get_attribute_string(self): srna_datas = {'best': 23, 'low': 20, 'high': 35} data = si.get_attribute_string(srna_datas, "TSS_100+;Cleavage_150+", 1, "sRNA_00001", "3utr", "aaa") self.assertEqual( data, "ID=aaa_srna1;Name=sRNA_sRNA_00001;sRNA_type=3utr;with_TSS=TSS_100+;end_cleavage=Cleavage_150+;best_avg_coverage=23;best_high_coverage=35;best_low_coverage=20" ) def test_check_pro(self): si.replicate_comparison = self.mock.mock_replicate_comparison srna_datas = {"pos": 50} texs = {"track_1@AND@track_2"} args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tex_notex = "tex_notex" args.replicates = "rep" args.texs = texs args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) pro_pos, new_srna_datas, detect_pro = si.check_pro( tas[0], 20, 70, srna_datas, "within", 5, self.example.wigs, 20, args) self.assertEqual(pro_pos, 190) self.assertDictEqual( new_srna_datas, { 'best': 40, 'high': 50, 'low': 10, "pos": 5, "conds": { "cond1": "test1" }, "detail": None }) self.assertEqual(detect_pro, "Cleavage:190_+") def test_exchange_to_pro(self): nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() srna_datas = {"pos": 50, "best": 10, "high": 12} args = self.mock_args.mock() args.max_len = 300 args.min_len = 30 args.table_best = True args.replicates = "rep" args.tex_notex = "tex_notex" args.texs = "texs" args.decrease_inter = 50 args.fuzzy_inter = 5 args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.tolerance = 5 si.replicate_comparison = self.mock.mock_replicate_comparison detect, srna_datas, pro = si.exchange_to_pro(args, srna_datas, tas[0], 20, 70, 10, self.example.wigs, 20) self.assertTrue(detect) self.assertDictEqual( srna_datas, { 'best': 40, 'high': 50, 'low': 10, 'pos': 190, "conds": { "cond1": "test1" }, "detail": None }) self.assertEqual(pro, "Cleavage:190_+") def test_get_tss_type(self): coverage = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10 } si.check_break_tran = self.mock.mock_check_break_tran cover = si.get_tss_type(self.example.tsss[0], coverage, None, None, None, False) self.assertEqual(cover, 10) def test_detect_wig_pos(self): si.replicate_comparison = self.mock.mock_replicate_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() args = self.mock_args.mock() args.texs = "texs" args.replicates = "rep" args.max_len = 300 args.min_len = 30 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tex_notex = "tex_notex" args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.table_best = True args.nums = nums args.out_table = out_table args.output = output args.tolerance = 5 si.detect_wig_pos(self.example.wigs, tas[0], 20, 70, "TSS_160+", 10, 20, args) self.assertEqual( output.getvalue(), "aaa\tANNOgesic\tncRNA\t20\t190\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS_160+;end_cleavage=Cleavage:190_+;best_avg_coverage=40;best_high_coverage=50;best_low_coverage=10\n" ) self.assertEqual( out_table.getvalue(), "aaa\t00000\t20\t190\t+\tcond1\ttest1\t40\t50\t10\t\n") def test_detect_longer(self): si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} coverage = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10 } args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.nums = nums args.fuzzy = 20 args.file_type = "frag" args.break_tran = False args.detects = detects args.cutoff_coverage = coverage args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.notex = 20 args.output = output args.out_table = out_table si.get_tss_type = self.mock.mock_get_tss_type si.detect_longer(tas[0], args, None, args.wigs_f, args.wigs_r) self.assertEqual( output.getvalue(), "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n" ) self.assertEqual( out_table.getvalue(), "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n") def test_get_proper_tss(self): tss_file = os.path.join(self.test_folder, "tss.gff") gen_file(tss_file, self.example.gff_file) coverage = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10 } tsss, num_tss = si.get_proper_tss(tss_file, coverage) self.assertEqual(tsss[0].start, 140) def test_check_srna_condition(self): si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison nums = {'pro': 3, 'tss': 3, 'uni': 0, 'cds': 3, 'ta': 3} out_table = StringIO() output = StringIO() detects = {"overlap": False, "uni_with_tss": False} notex = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 30, "orphan": 10 } coverage = { "primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10 } args = self.mock_args.mock() args.tex_notex = "tex_notex" args.min_len = 30 args.max_len = 300 args.decrease_inter = 50 args.fuzzy_inter = 5 args.tolerance = 5 args.tsss = copy.deepcopy(self.example.tsss) args.pros = copy.deepcopy(self.example.pros) tas = copy.deepcopy(self.example.tas) args.nums = nums args.fuzzy = 20 args.detects = detects args.texs = "texs" args.replicates = "rep" args.table_best = True args.wigs_f = "" args.wigs_r = "" args.file_type = "frag" args.break_tran = False args.notex = notex args.output = output args.cutoff_coverage = coverage args.out_table = out_table si.check_srna_condition(tas[0], args, None, args.wigs_f, args.wigs_r) self.assertEqual( output.getvalue(), "aaa\tANNOgesic\tncRNA\t170\t230\t.\t+\t.\tID=aaa_srna0;Name=sRNA_00000;sRNA_type=intergenic;with_TSS=TSS:170_+\n" ) self.assertEqual( out_table.getvalue(), "aaa\t00000\t170\t230\t+\tNA\tNA\tNA\tNA\tNA\tTSS:170_+\n") def test_intergenic_srna(self): si.read_libs = self.mock.mock_read_libs si.read_wig = self.mock.mock_read_wig gff_file = os.path.join(self.test_folder, "aaa.gff") tss_file = os.path.join(self.test_folder, "aaa_TSS.gff") tran_file = os.path.join(self.test_folder, "aaa_tran.gff") pro_file = os.path.join(self.test_folder, "aaa_processing.gff") wig_f_file = os.path.join(self.wig_folder, "wig_f.wig") wig_r_file = os.path.join(self.wig_folder, "wig_r.wig") gen_file(gff_file, self.example.gff_file) gen_file(tss_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) output_file = os.path.join(self.test_folder, "output") output_table = os.path.join(self.test_folder, "table") coverage = [0, 0, 0, 50, 10] si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file args.tss_file = tss_file args.table_best = True args.cutoffs = coverage args.out_folder = self.test_folder args.file_type = "frag" args.cut_notex = coverage args.input_libs = "input_libs" args.wig_folder = self.wig_folder args.wig_f_file = wig_f_file args.wig_r_file = wig_r_file args.tss_source = True args.output_file = output_file args.output_table = output_table args.in_cds = False args.wigs_f = None args.wigs_r = None si.intergenic_srna(args, args.input_libs, None, args.wigs_f, args.wigs_r) self.assertTrue(os.path.exists(output_file)) self.assertTrue(os.path.exists(output_table))
class TestRibos(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.gffs = os.path.join(self.test_folder, "gffs") self.fastas = os.path.join(self.test_folder, "fastas") self.out_folder = os.path.join(self.test_folder, "output") self.database = os.path.join(self.test_folder, "database") self.seq_path = os.path.join(self.test_folder, "seqs") self.tables = os.path.join(self.out_folder, "tables") self.stat = os.path.join(self.out_folder, "statistics") self.scan = os.path.join(self.test_folder, "scan") self.tsss = os.path.join(self.test_folder, "tsss") self.trans = os.path.join(self.test_folder, "trans") self.out_gff = os.path.join(self.out_folder, "gffs") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tsss) os.mkdir(os.path.join(self.tsss, "tmp")) os.mkdir(self.trans) os.mkdir(os.path.join(self.trans, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.out_folder) os.mkdir(self.database) os.mkdir(self.seq_path) os.mkdir(os.path.join(self.out_folder, "tmp_table")) os.mkdir(os.path.join(self.out_folder, "tmp_scan")) os.mkdir(os.path.join(self.out_folder, "tmp_fasta")) os.mkdir(os.path.join(self.out_folder, "scan_Rfam")) os.mkdir(self.tables) os.mkdir(self.scan) os.mkdir(self.stat) os.mkdir(self.out_gff) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.ribos_out_folder = self.out_folder args.database = self.database args.tsss = self.tsss args.trans = self.trans args.program = 'riboswtich' self.ribo = Ribos(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_scan_extract_rfam(self): self.ribo._run_cmscan = self.mock.mock_run_cmscan rb.modify_table = self.mock.mock_modify_table rb.regenerate_seq = self.mock.mock_regenerate_seq rb.reextract_rbs = self.mock.mock_reextract_rbs prefixs = [] gen_file(os.path.join(self.gffs, "tmp/test.gff"), self.example.gff_file) gen_file(os.path.join(self.fastas, "tmp/test.fa"), self.example.fasta_file) gen_file(os.path.join(self.seq_path, "test.fa"), self.example.fasta_file) gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"), self.example.fasta_file) args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.fuzzy = 5 args.fuzzy_rbs = 2 args.utr = True args.without_rbs = False args.rbs_seq = ["AGGAGG"] args.output_all = "test" args.cutoff = "e_0.01" tmp_files = {"fasta": os.path.join(self.out_folder, "tmp_fasta"), "scan": "tmp_scan", "table": os.path.join(self.out_folder, "tmp_table")} rfam = "Rfam_.cm" suffixs = {"csv": "test.csv", "txt": "test_prescan.txt", "re_txt": "test_scan.txt", "re_csv": "test_scan.csv"} log = open(os.path.join(self.test_folder, "test.log"), "w") self.ribo._scan_extract_rfam(prefixs, args, tmp_files, suffixs, "test", rfam, log) self.assertListEqual(prefixs, ["test"]) self.assertTrue(os.path.exists(os.path.join( self.out_folder, "tmp_fasta", "test_regenerate.fa"))) def test_merge_results(self): rb.stat_and_covert2gff = self.mock.mock_stat_and_covert2gff gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join( self.out_folder, "tmp_table/test_riboswitch.csv"), self.example.table) gen_file(os.path.join( self.out_folder, "tmp_scan/test_riboswitch_prescan.txt"), self.example.rescan_file) gen_file(os.path.join( self.out_folder, "tmp_scan/test_riboswitch_scan.txt"), self.example.rescan_file) gen_file(os.path.join( self.test_folder, "ids"), self.example.ids) gen_file(os.path.join( self.tables, "test_riboswitch.csv"), self.example.table) gen_file('test_folder/output/tmp_table/test_test_scan.csv', "test") gen_file(os.path.join("test_folder/output", "tmp_fasta", "test_regenerate.fa"), "test") gen_file('test_folder/output/tmp_scan/test_test_prescan.txt', "test") gen_file('test_folder/output/tmp_scan/test_test_scan.txt', "test") if not os.path.exists('test_folder/output/tmp_table/test_test.csv'): gen_file('test_folder/output/tmp_table/test_test.csv', "test") args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.ribos_id = os.path.join(self.test_folder, "ids") args.fuzzy = 3 suffixs = {"csv": "test.csv", "txt": "test_prescan.txt", "re_txt": "test_scan.txt", "re_csv": "test_scan.csv"} tmp_files = {"fasta": os.path.join(self.out_folder, "tmp_fasta"), "scan": os.path.join(self.out_folder, "tmp_scan"), "table": os.path.join(self.out_folder, "tmp_table")} rfam = "Rfam_.cm" log = open(os.path.join(self.test_folder, "test.log"), "w") self.ribo._merge_results( args, os.path.join(self.out_folder, "tmp_scan"), suffixs, tmp_files, os.path.join(self.out_folder, "tmp_scan"), os.path.join(self.out_folder, "scan_Rfam"), os.path.join(self.out_folder, "scan_Rfam"), os.path.join(self.out_folder, "gffs"), "riboswitch", log)
class TestsORFDetection(unittest.TestCase): def setUp(self): self.mock = Mock_func() self.mock_args = MockClass() self.test_folder = "test_folder" self.tsss = "test_folder/tsss" self.srnas = "test_folder/sRNA" self.out = "test_folder/output" self.trans = "test_folder/trans" self.fastas = "test_folder/fastas" self.tex = "test_folder/tex" self.frag = "test_folder/frag" self.gffs = "test_folder/gffs" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tsss) os.mkdir(self.out) os.mkdir(self.trans) os.mkdir(self.fastas) os.mkdir(self.tex) os.mkdir(self.frag) os.mkdir(self.srnas) os.mkdir(self.gffs) args = self.mock_args.mock() args.tsss = self.tsss args.srnas = self.srnas args.out_folder = self.out args.trans = self.trans args.fastas = self.fastas self.sorf = sORFDetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_start_stop_codon(self): gff_path = os.path.join(self.out, "gffs") table_path = os.path.join(self.out, "tables") os.mkdir(gff_path) os.mkdir(table_path) os.mkdir(os.path.join(gff_path, "all_candidates")) os.mkdir(os.path.join(table_path, "all_candidates")) os.mkdir(os.path.join(gff_path, "best_candidates")) os.mkdir(os.path.join(table_path, "best_candidates")) gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.csv"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.csv"), "test") so.sorf_detection = self.mock.mock_sorf_detection args = self.mock_args.mock() args.libs = "libs" args.tex_notex = "tex_notex" args.replicates = "replicates" args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.background = "background" args.wig_path = "wig_path" args.merge_wigs = "merge_wigs" log = open(os.path.join(self.test_folder, "test.log"), "w") self.sorf._start_stop_codon(["test"], args, log) self.assertTrue(os.path.exists(os.path.join( gff_path, "best_candidates/test_sORF.gff"))) self.assertTrue(os.path.exists(os.path.join( gff_path, "all_candidates/test_sORF.gff"))) self.assertTrue(os.path.exists(os.path.join( table_path, "best_candidates/test_sORF.csv"))) self.assertTrue(os.path.exists(os.path.join( table_path, "all_candidates/test_sORF.csv"))) log.close() def test_compare_tran_cds(self): so.get_intergenic = self.mock.mock_get_intergenic gen_file(os.path.join(self.test_folder, "test.gff"), "test") args = self.mock_args.mock() args.out_folder = self.out args.gffs = self.test_folder args.hypo = False args.utr_detect = True args.extend_5 = 5 args.extend_3 = 75 log = open(os.path.join(self.test_folder, "test.log"), "w") prefixs = self.sorf._compare_tran_cds(args, log) self.assertListEqual(prefixs, ["test"]) log.close() def test_run_sorf_detection(self): gff_path = os.path.join(self.out, "gffs") table_path = os.path.join(self.out, "tables") os.mkdir(gff_path) os.mkdir(table_path) os.mkdir(os.path.join(gff_path, "all_candidates")) os.mkdir(os.path.join(table_path, "all_candidates")) os.mkdir(os.path.join(gff_path, "best")) os.mkdir(os.path.join(table_path, "best")) so.get_intergenic = self.mock.mock_get_intergenic so.sorf_detection = self.mock.mock_sorf_detection self.sorf._remove_tmp = self.mock.mock_remove_tmp self.sorf._check_gff = self.mock.mock_check_gff self.sorf._check_necessary_files = self.mock.mock_check_necessary_files self.sorf.multiparser = Mock_Multiparser() args = self.mock_args.mock() args.trans = self.trans args.gffs = self.gffs args.tsss = self.tsss args.out_folder = self.out args.libs = "libs" args.tex_notex = "tex_notex" args.replicates = "replicates" args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.background = "background" args.wig_path = "wig_path" args.merge_wigs = "merge_wigs" args.fuzzy_rbs = 2 log = open(os.path.join(self.test_folder, "test.log"), "w") self.sorf.run_sorf_detection(args, log) log.close()
class TestTranscriptAssembly(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_wig(self): libs = [{"name": "test1", "type": "frag", "cond": "1", "strand": "+", "rep": "a"}] filename = os.path.join(self.test_folder, "test_f.wig") gen_file(filename, self.example.wig_f) wigs = ta.read_wig(filename, libs, "+") self.assertDictEqual(wigs, self.example.wigs_f) def test_detect_hight_toler(self): cover = {"coverage": 100, "track": "test_1"} height = 5 tmp_covers = {"best": 10, "toler": 2} tracks = [] ta.detect_hight_toler(cover, height, tmp_covers, tracks) self.assertDictEqual(tmp_covers, {'best': 100, 'toler': 2}) def test_check_tex_conds(self): check_tex = [] tracks = ["test1", "test2"] libs = [{"name": "test1", "type": "frag", "cond": "1", "strand": "+", "rep": "a"}, {"name": "test2", "type": "tex", "cond": "2", "strand": "+", "rep": "a"}] texs = {"test1": 2, "test2": 2} conds = {} ta.check_tex_conds(tracks, libs, texs, check_tex, conds, 1) self.assertDictEqual(conds, {'1_frag': 1, '2_tex': 1}) def test_elongation(self): covers = [{"coverage": 10, "pos": 10, "track": "test1"}, {"coverage": 1, "pos": 10, "track": "test2"}, {"coverage": 100, "pos": 11, "track": "test1"}, {"coverage": 20, "pos": 11, "track": "test2"}] libs = [{"name": "test1", "type": "tex", "cond": "1", "strand": "+", "rep": "a"}, {"name": "test2", "type": "notex", "cond": "1", "strand": "+", "rep": "a"}] reps = {"tex": 1, "frag": 1} tmp_texs = {"test1_test2": 2} tolers = [] trans = {"aaa": []} args = self.mock_args.mock() args.replicates = reps args.height = 5 args.tex = 2 cover_best, conds, tracks, texs, pos = ta.elongation(covers, tmp_texs, libs, "+", trans, args, "aaa", tolers) self.assertEqual(cover_best, 100) self.assertListEqual(tracks, ['test1', 'test2']) self.assertDictEqual(texs, {'test1_test2': 2}) self.assertEqual(pos, 11) self.assertDictEqual(trans, {'aaa': [{'coverage': 10, 'cond': 1, 'strand': '+', 'pos': 10}]}) def test_transfer_to_tran(self): reps = {"tex": 1, "frag": 1} tmp_texs = {"test1": 2} libs = [{"name": "test1", "type": "frag", "cond": "1", "strand": "+", "rep": "a"}] args = self.mock_args.mock() args.height = 10 args.tex = 1 args.replicates = reps tolers, trans = ta.transfer_to_tran(self.example.wigs_f, libs, tmp_texs, "+", args) self.assertDictEqual(tolers, {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7.0]}) self.assertDictEqual(trans, {'aaa': [{'pos': 3, 'cond': 1, 'strand': '+', 'coverage': 41.0}, {'pos': 4, 'cond': 1, 'strand': '+', 'coverage': 47.0}, {'pos': 6, 'cond': 1, 'strand': '+', 'coverage': 47.0}, {'pos': 8, 'cond': 1, 'strand': '+', 'coverage': 47.0}]}) def test_fill_gap_and_print(self): trans = {'aaa': [{'pos': 3, 'cond': 1, 'strand': '+', 'coverage': 41.0}, {'pos': 4, 'cond': 1, 'strand': '+', 'coverage': 47.0}, {'pos': 6, 'cond': 1, 'strand': '+', 'coverage': 47.0}, {'pos': 8, 'cond': 1, 'strand': '+', 'coverage': 47.0}]} out = StringIO() tolers = {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 20]} args = self.mock_args.mock() args.tolerance = 3 args.low_cutoff = 5 args.width = 1 ta.fill_gap_and_print(trans, "+", out, tolers, "TEX", args) self.assertEqual(out.getvalue(), self.example.out_tran + "\n") def test_print_transctipt(self): out = StringIO() ta.print_transctipt(100, 200, 20, 1, 40, "TEX", 20, out, "aaa", "+") self.assertEqual(out.getvalue(), "aaa\tANNOgesic\tTranscript\t100\t200\t.\t+\t.\tID=tran_1;Name=Transcript_00001;high_coverage=40;low_coverage=20;detect_lib=TEX\n") def test_assembly(self): wig_f_file = os.path.join(self.test_folder, "aaa_forward.wig") wig_r_file = os.path.join(self.test_folder, "aaa_reverse.wig") wig_f2_file = os.path.join(self.test_folder, "aaa2_forward.wig") wig_r2_file = os.path.join(self.test_folder, "aaa2_reverse.wig") gen_file(wig_f_file, self.example.wig_f) gen_file(wig_r_file, self.example.wig_r) gen_file(wig_f2_file, self.example.wig_f) gen_file(wig_r2_file, self.example.wig_r) reps = {"tex": 1, "frag": 1} out_file = os.path.join(self.test_folder, "out") input_lib = ["aaa_forward.wig:frag:1:a:+", "aaa_reverse.wig:frag:1:a:-", "aaa2_forward.wig:tex:1:a:+", "aaa2_reverse.wig:tex:1:a:-"] args = self.mock_args.mock() args.replicates = reps args.height = 10 args.width = 1 args.tolerance = 3 args.tex = 2 args.low_cutoff = 5 ta.assembly(wig_f_file, wig_r_file, self.test_folder, input_lib, out_file, "TEX", args) datas = import_data(out_file) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.out_tran)
class TestMergeRNAplexRNAup(unittest.TestCase): def setUp(self): self.test_folder = "test_project" if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) os.mkdir(self.test_folder) self.example = Example() self.mock_args = MockClass() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_detect_energy(self): srna = {"energy": -2} mrr.detect_energy(self.example.out_rna_txt, srna) self.assertDictEqual(srna, {'energy': -5.3}) srna = {"energy": -8} mrr.detect_energy(self.example.out_rna_txt, srna) self.assertDictEqual(srna, {'energy': -8.0}) def test_print_rank_one(self): out = StringIO() args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 mrr.print_rank_one(self.example.srnas, out, "RNAplex", self.example.gffs, self.example.srna_gffs, args_tar, 50) datas = convert_dict(out.getvalue().split("\n")) news = {} for key, value in datas.items(): if len(key) != 0: news[key] = value refs = convert_dict(self.example.out_print.split("\n")) self.assertDictEqual(news, refs) def test_read_table(self): rnaplex = os.path.join(self.test_folder, "rnaplex") rnaup = os.path.join(self.test_folder, "rnaup") gen_file(rnaplex, self.example.rnaplex) gen_file(rnaup, self.example.rnaup) srnas = mrr.read_table(self.example.srna_gffs, rnaplex, rnaup, self.example.genes, self.example.gffs, ["CDS"]) self.assertDictEqual( srnas, { 'RNAup': { 'srna0': [{ 'srna_pos': '20,25', 'energy': -4.87, 'tar_pos': '571,576', 'gene_id': 'gene0', 'target_id': 'cds0', 'target_locus': 'AAA_00001', 'detail': '100-150_+' }, { 'srna_pos': '11,26', 'energy': -5.91, 'tar_pos': '14,30', 'gene_id': 'NA', 'target_id': 'cds1', 'target_locus': 'AAA_00003', 'detail': '2348-2934_+' }] }, 'RNAplex': { 'srna0': [{ 'srna_pos': '20,25', 'energy': -5.3, 'tar_pos': '571,576', 'gene_id': 'gene0', 'target_id': 'cds0', 'target_locus': 'AAA_00001', 'detail': '100-150_+' }], 'srna1': [{ 'srna_pos': '24,31', 'energy': -1.91, 'tar_pos': '163,170', 'gene_id': 'gene0', 'target_id': 'cds0', 'target_locus': 'AAA_00001', 'detail': '100-150_+' }] } }) def test_get_srna_name(self): output = mrr.get_srna_name(self.example.srna_gffs, "srna0") self.assertEqual(output[0], 'sRNA_0') self.assertEqual(output[1].start, 6) def test_get_target_info(self): target = { "gene_id": "gene0", "detail": "100-150_+", "target_id": "cds0", "target_locus": "AAA_00001", "energy": -6.5 } output = mrr.get_target_info(self.example.gffs, target) self.assertEqual(output.start, 100) def test_merge_base_rnaplex(self): args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 merges = [] overlap = mrr.merge_base_rnaplex(self.example.srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges, 50) output = [[ 'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+', '-6.5', '1', '-6.5', '1' ], [ 'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'gene2', 'cds2', 'AAA_00003', '2348-2934', '2337-50', '2337-50', '+', '-10.5', '1', '-10.5', '1' ], [ 'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+', '-23.5', '1', '-23.5', '1' ]] count = 0 for out in output: for data in overlap: if out == data: count += 1 self.assertEqual(count, 3) count = 0 for out in output: for data in merges: if out == data: count += 1 self.assertEqual(count, 3) def test_merge_base_rnaup(self): args_tar = self.mock_args.mock() args_tar.top = 2 args_tar.tar_start = 20 args_tar.tar_end = 15 srnas = { "RNAplex": { "srna0": [{ "gene_id": "gene0", "detail": "100-150_+", "target_id": "cds0", "target_locus": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }], "srna1": [{ "gene_id": "gene2", "detail": "2348-2934_+", "target_id": "cds2", "target_locus": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }], "srna2": [{ "gene_id": "gene0", "detail": "100-150_+", "target_id": "cds0", "target_locus": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }, { "gene_id": "gene2", "detail": "2348-2934_+", "target_id": "cds2", "target_locus": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15" }] }, "RNAup": { "srna0": [{ "gene_id": "gene0", "detail": "100-150_+", "target_id": "cds0", "target_locus": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }], "srna1": [{ "gene_id": "gene2", "detail": "2348-2934_+", "target_id": "cds2", "target_locus": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }], "srna2": [{ "gene_id": "gene0", "detail": "100-150_+", "target_id": "cds0", "target_locus": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15" }] } } merges = [] mrr.merge_base_rnaup(srnas, self.example.srna_gffs, args_tar, self.example.gffs, merges, 50) output = [[ 'sRNA_1', 'aaa', '1258-2234', '1259-1267', '1259-1267', '+', 'gene2', 'cds2', 'AAA_00003', '2348-2934', '2337-50', '2337-50', '+', '-10.5', '1', '-10.5', '1' ], [ 'sRNA_2', 'aaa', '3544-6517', '6508-6516', '6508-6516', '-', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+', '-23.5', '1', '-23.5', '1' ], [ 'sRNA_0', 'aaa', '6-15', '7-15', '7-15', '+', 'gene0', 'cds0', 'AAA_00001', '100-150', '89-50', '89-50', '+', '-6.5', '1', '-6.5', '1' ]] count = 0 for out in output: for data in merges: if out == data: count += 1 self.assertEqual(count, 3)
class TestSubLocal(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.out = "test_folder/output" self.fastas = "test_folder/fastas" self.gffs = "test_folder/gffs" self.stat = "test_folder/stat" self.trans = "test_folder/tran" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.out) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.stat) os.mkdir(self.trans) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.out_folder = self.out args.trans = self.trans self.sub = SubLocal(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_get_protein_seq(self): gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file) gff = "aaa.gff" gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file) gen_file(os.path.join(self.trans, "aaa_transcript.gff"), self.example.tran_file) prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans) self.assertEqual(prefix, "aaa") def test_run_psortb(self): self.sub._psortb = self.mock.mock_psortb tmp_result = os.path.join(self.out, "tmp_results") os.mkdir(tmp_result) args = self.mock_args.mock() args.psortb_path = "psortb_path" args.gram = "positive" self.sub._run_psortb(args, "aaa", self.out, self.test_folder, tmp_result) self.assertTrue(os.path.exists(os.path.join(self.out, "tmp_log"))) self.assertTrue(os.path.exists(os.path.join(tmp_result, "_".join(["aaa", "raw.txt"])))) def test_merge_and_stat(self): su.stat_sublocal = self.mock.mock_stat_sublocal os.mkdir(os.path.join(self.gffs, "aaa.gff_folder")) gen_file(os.path.join(self.gffs, "aaa.gff_folder/aaa.gff"), "test") os.mkdir(os.path.join(self.out, "psortb_results")) gen_file(os.path.join(self.out, "aaa_raw.txt"), "test") gen_file(os.path.join(self.out, "aaa_table.csv"), "test") self.sub._merge_and_stat(self.gffs, self.out, self.test_folder, self.stat) self.assertTrue(os.path.exists(os.path.join(self.stat, "aaa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "aaa"))) def test_compare_cds_tran(self): gff_file = os.path.join(self.test_folder, "aaa.gff") tran_file = os.path.join(self.test_folder, "aaa_transcript.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.tran_file) self.sub._compare_cds_tran(gff_file, tran_file) datas, string = extract_info("test_folder/output/all_CDS/tmp_cds.gff", "file") self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
class TestCircRNA(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_get_feature(self): attributes_cds = {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001", "protein_id": "YP_918384.3"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "AAA_00001") attributes_cds = {"ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "YP_918384.3") attributes_cds = {"ID": "cds0", "Name": "CDS_0"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "cds0:122-267_f") def test_detect_conflict(self): circ_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "circRNA", "start": 100, "end": 467, "phase": ".", "strand": "+", "score": ".", "support": 30, "start_site": 30, "end_site": 35, "situation": "P", "splice_type": "C"} attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"} circrna = Create_generator(circ_dict, attributes_circ, "circ") gffs = [Create_generator(self.example.cds_dict, self.example.attributes_cds, "gff")] args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 out = StringIO() out_best = StringIO() circ.detect_conflict(gffs, circrna, 0, out, out_best, args) self.assertEqual(out.getvalue(), "circRNA_0 aaa + 100 467 AAA_00001 30 1.0 0.8571428571428571\n") out.close() def test_get_circrna(self): circs = [] gffs = [] for index in range(0, 5): circs.append(Create_generator(self.example.circ_dict[index], self.example.attributes_circ[index], "circ")) for index in range(0, 3): gffs.append(Create_generator(self.example.gffs_dict[index], self.example.attributes_gffs[index], "gff")) out = StringIO() out_best = StringIO() args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 nums = circ.get_circrna(circs, gffs, 50, out, out_best, args) self.assertDictEqual(nums["support"], {'aaa': {0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1}, 'all': {0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1}, 'bbb': {0: 1, 5: 1}}) self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3}) self.assertDictEqual(nums["conflict"], {'bbb': {0: 1, 5: 1}, 'aaa': {}, 'all': {0: 1, 5: 1}}) def test_detect_circrna(self): out_file = os.path.join(self.test_folder, "out_all.csv") stat_file = os.path.join(self.test_folder, "stat.csv") circ.read_file = Mock_read_file().read_file args = self.mock_args.mock() args.start_ratio = 0.5 args.end_ratio = 0.5 args.support = 5 args.hypo = True circ.detect_circrna("test.circ", "test.gff", out_file, args, stat_file) circs = import_data(out_file) stats = import_data(stat_file) self.assertEqual(set(circs), set(self.example.out_file.split("\n"))) self.assertEqual(set(stats), set(self.example.stat_file.split("\n")))
class TestTranscriptAssembly(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_read_wig(self): libs = [{ "name": "test1", "type": "frag", "cond": "frag_1", "strand": "+", "rep": "a" }] filename = os.path.join(self.test_folder, "test_f.wig") gen_file(filename, self.example.wig_f) wigs = ta.read_wig(filename, "+", libs) for i in range(len(wigs["aaa"]['frag_1']["test1|+|frag"])): self.assertEqual( wigs["aaa"]['frag_1']["test1|+|frag"][i], self.example.wigs_nf["aaa"]['frag_1']["test1|+|frag"][i]) def test_detect_hight_toler(self): cover = 100 height = 5 tmp_covers = {"best": 10, "toler": 2} tracks = [] ta.detect_hight_toler(cover, height, tmp_covers, tracks, "test_1|+|frag") self.assertDictEqual(tmp_covers, {'best': 100, 'toler': 2}) def test_check_tex_conds(self): check_tex = [] tracks = ["test1", "test2"] libs = [{ "name": "test1", "type": "frag", "cond": "1", "strand": "+", "rep": "a" }, { "name": "test2", "type": "tex", "cond": "2", "strand": "+", "rep": "a" }] texs = {"test1": 2, "test2": 2} conds = {} ta.check_tex_conds(tracks, libs, texs, check_tex, conds, 1) self.assertDictEqual(conds, {'1': 1, '2': 1}) def test_elongation(self): covers = { "texnotex_1": { "test1|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 20], "test2|+|texnotex_1": [0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 100] } } libs = [{ "name": "test1", "type": "tex", "cond": "texnotex_1", "strand": "+", "rep": "a" }, { "name": "test2", "type": "notex", "cond": "texnotex_1", "strand": "+", "rep": "a" }] reps = {"tex": ["all_1"], "frag": ["all_1"]} tmp_texs = {"test1_test2": 2} tolers = [] trans = {"aaa": []} args = self.mock_args.mock() args.replicates = reps args.height = 5 args.tex = 2 ta.elongation(covers, tmp_texs, libs, "+", trans, args, "aaa", []) self.assertDictEqual( trans, {'aaa': [-1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 100]}) def test_transfer_to_tran(self): reps = {"tex": ["all_1"], "frag": ["all_1"]} tmp_texs = {"test1": 2} libs = [{ "name": "test1", "type": "frag", "cond": "frag_1", "strand": "+", "rep": "a" }] args = self.mock_args.mock() args.height = 10 args.tex = 1 args.replicates = reps tolers, trans = ta.transfer_to_tran(self.example.wigs_f, libs, tmp_texs, "+", args) self.assertDictEqual(tolers, {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7.0, 20]}) self.assertDictEqual(trans, {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]}) def test_fill_gap_and_print(self): trans = {'aaa': [-1, -1, 41.0, 47.0, -1, 47.0, -1, 47.0]} tolers = {'aaa': [0.0, 2.0, 20, 20, 4.0, 20, 7, 7, 7, 7, 7, 7, 7, 20]} args = self.mock_args.mock() args.tolerance = 3 args.low_cutoff = 5 args.width = 1 finals = {} ta.fill_gap_and_print(trans, "+", finals, tolers, "TEX", args) self.assertDictEqual( finals, { 'aaa': [{ 'strand': '+', 'high': 47.0, 'end': 4, 'wig': 'TEX', 'low': 41.0, 'start': 3 }, { 'strand': '+', 'high': 47.0, 'end': 8, 'wig': 'TEX', 'low': 47.0, 'start': 6 }] }) def test_print_transctipt(self): out = StringIO() tas = { "aaa": [{ "start": 100, "end": 200, "strand": "+", "high": 40, "low": 20, "wig": "TEX" }] } ta.print_transcript(tas, out) self.assertEqual( out.getvalue(), "aaa\tANNOgesic\ttranscript\t100\t200\t.\t+\t.\tID=aaa_transcript0;Name=transcript_00000;high_coverage=40;low_coverage=20;detect_lib=TEX\n" ) def test_assembly(self): wig_f_file = os.path.join(self.test_folder, "aaa_forward.wig") wig_r_file = os.path.join(self.test_folder, "aaa_reverse.wig") wig_f2_file = os.path.join(self.test_folder, "aaa2_forward.wig") wig_r2_file = os.path.join(self.test_folder, "aaa2_reverse.wig") gen_file(wig_f_file, self.example.wig_f) gen_file(wig_r_file, self.example.wig_r) gen_file(wig_f2_file, self.example.wig_f) gen_file(wig_r2_file, self.example.wig_r) reps = {"tex": "all_1", "frag": "all_1"} out_file = os.path.join(self.test_folder, "out") input_lib = [ "aaa_forward.wig:frag:1:a:+", "aaa_reverse.wig:frag:1:a:-", "aaa2_forward.wig:tex:1:a:+", "aaa2_reverse.wig:tex:1:a:-" ] args = self.mock_args.mock() args.replicates = reps args.height = 10 args.width = 1 args.tolerance = 3 args.tex = 2 args.low_cutoff = 5 ta.detect_transcript(wig_f_file, wig_r_file, self.test_folder, input_lib, out_file, "TEX", args) datas = import_data(out_file) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.out_tran)
class TestsRNAUTR(unittest.TestCase): def setUp(self): self.example = Example() self.mock = Mock_func() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_import_data(self): pos = {"start": 4, "end": 40, "ori_start": 2, "ori_end": 3} datas = sud.import_data("+", "aaa", pos, "3UTR", "TSS", "cds", "srna_cover", "test") self.assertDictEqual( datas, { 'start_cleavage': 'NA', 'strand': '+', 'end_cleavage': 'test', 'start_tss': 'cds', 'end': 40, 'start': 4, 'utr': '3UTR', 'strain': 'aaa', 'datas': 'srna_cover' }) def test_read_data(self): args = self.mock_args.mock() args.gff_file = os.path.join(self.test_folder, "test.gff") args.ta_file = os.path.join(self.test_folder, "test.gff") args.tss_file = os.path.join(self.test_folder, "test.gff") args.pro_file = os.path.join(self.test_folder, "test.gff") args.seq_file = os.path.join(self.test_folder, "test.fa") gen_file(args.gff_file, self.example.gff_file) gen_file(args.seq_file, self.example.seq_file) args.hypo = False cdss, tas, tsss, pros, seq = sud.read_data(args) self.assertEqual(cdss[0].start, 4) self.assertEqual(tas[0].start, 4) self.assertEqual(tsss[0].start, 4) self.assertEqual(pros[0].start, 4) self.assertDictEqual(seq, { 'aaa': 'ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT' }) def test_get_terminal(self): inters = [] seq = { "aaa": "ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT" } sud.get_terminal(self.example.gffs, inters, seq, "start") self.assertListEqual(inters, [{ 'end': 4, 'len_CDS': 0, 'strand': '+', 'strain': 'aaa', 'start': 1 }]) def test_get_inter(self): inters = [] sud.get_inter(self.example.gffs, inters) self.assertListEqual(inters, [{ 'start': 14, 'strand': '+', 'end': 20, 'strain': 'aaa', 'len_CDS': 10 }]) def test_set_cover_and_point(self): covers = [2, 3, 4, 1, 6, 2, 8, 3, 5, 6, 7, 5, 2, 1] cover_results = {"covers": None, "check_point": None} pos = {"start": 2, "end": 6, "ori_start": 2, "ori_end": 3} sud.set_cover_and_point(cover_results, self.example.inters[0], covers, pos, 5) self.assertListEqual(cover_results["covers"], [2, 3, 4, 1, 6, 2, 8, 3, 5]) self.assertDictEqual(cover_results["check_point"], { 'srna_start': 0, 'utr_start': 2, 'utr_end': 3, 'srna_end': 12 }) def test_check_import_srna_covers(self): args = self.mock_args.mock() cover = {"type": "5utr"} datas = { "num": 0, "cover_tmp": { "total": 100, "ori_total": 200 }, "checks": { "detect_decrease": True }, "final_poss": { "start": 3, "end": 23 } } cover_results = { "cover_sets": { "high": 50, "low": 10 }, "srna_covers": { "cond_1": [] }, "utr_covers": { "cond_1": [] }, "type": "5utr", "intercds": "TSS" } args.min_len = 30 args.max_len = 500 pos = {"start": 1, "end": 25, "ori_start": 1, "ori_end": 25} sud.check_import_srna_covers(datas, cover_results, self.example.inters[0], "cond_1", "track", cover, pos, args, "5utr") self.assertDictEqual(datas["final_poss"], {'end': 23, 'start': 3}) self.assertDictEqual( cover_results["srna_covers"], { 'cond_1': [{ 'final_start': 3, 'high': 50, 'ori_avg': 8.0, 'final_end': 23, 'low': 10, 'type': '5utr', 'avg': 4, 'track': 'track' }] }) self.assertDictEqual(cover_results["utr_covers"], cover_results["srna_covers"]) datas["checks"] = {"detect_decrease": False} cover_results["srna_covers"] = {"cond_1": []} cover_results["utr_covers"] = {"cond_1": []} sud.check_import_srna_covers(datas, cover_results, self.example.inters[0], "cond_1", "track", cover, pos, args, "5utr") self.assertDictEqual(cover_results["srna_covers"], {'cond_1': []}) def test_check_pos(self): cover = {"pos": 4} check_point = { "utr_start": 1, "utr_end": 29, "srna_start": 3, "srna_end": 11 } checks = {"srna": False, "utr": False} sud.check_pos(cover, check_point, checks, 4) self.assertDictEqual(checks, {'srna': True, 'utr': True}) def test_get_cover_5utr(self): args = self.mock_args.mock() datas = { "num": 0, "cover_tmp": { "5utr": 0 }, "checks": { "detect_decrease": True }, "final_poss": { "start": 1, "end": 26 } } cover = 20 cover_sets = {"high": 50, "low": 10} args.decrease_utr = 50 args.fuzzy_utr = 2 go_out = sud.get_cover_5utr(datas, cover_sets, cover, self.example.inters[0], args, 10) self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 10}) self.assertEqual(datas["num"], 0) self.assertTrue(go_out) self.assertDictEqual(datas["cover_tmp"], {'5utr': 0}) self.assertDictEqual(cover_sets, {'high': 50, 'low': 10}) cover = 20 datas = { "num": 0, "cover_tmp": { "5utr": 30 }, "checks": { "detect_decrease": True }, "final_poss": { "start": 1, "end": 26 } } cover_sets = {"low": 10, "high": 50} args.decrease_utr = 0.5 go_out = sud.get_cover_5utr(datas, cover_sets, cover, self.example.inters[0], args, 10) self.assertEqual(datas["num"], 1) self.assertFalse(go_out) self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 26}) self.assertDictEqual(datas["cover_tmp"], {'5utr': 20}) self.assertDictEqual(cover_sets, {'low': 20, 'high': 50}) def test_detect_cover_utr_srna(self): sud.coverage_comparison = self.mock.mock_coverage_comparison cover_results = { "cover_sets": { "low": 10, "high": 50 }, "pos": { "low": 10, "high": 50 }, "covers": [20], "type": "5utr", "srna_covers": { "frag_1": [] }, "utr_covers": { "frag_1": [] }, "intercds": "TSS", "check_point": { "utr_start": 1, "utr_end": 29, "srna_start": 2, "srna_end": 25 } } datas = { "num": 0, "cover_tmp": { "total": 100, "ori_total": 200 }, "checks": { "detect_decrease": True }, "final_poss": { "start": 3, "end": 23 } } pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 23} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 sud.detect_cover_utr_srna(cover_results, pos, self.example.inters[0], "frag_1", "track_1", args, "frag", 2, 20, "+") self.assertDictEqual( cover_results["srna_covers"], { 'frag_1': [{ 'low': 20, 'high': 50, 'track': 'track_1', 'final_start': 2, 'ori_avg': 0.8695652173913043, 'type': 'frag', 'final_end': 20, 'avg': 1.0526315789473684 }] }) self.assertDictEqual(cover_results["utr_covers"], cover_results["srna_covers"]) self.assertDictEqual(cover_results["cover_sets"], { 'best': 20, 'low': 20, 'high': 50 }) def test_get_coverage(self): sud.coverage_comparison = self.mock.mock_coverage_comparison sud.detect_cover_utr_srna = self.mock.mock_detect_cover_utr_srna pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 srna_covers, utr_covers = sud.get_coverage(self.example.wigs, self.example.inters[0], pos, "3utr", "TSS", args) self.assertDictEqual( srna_covers, { 'frag_1': [{ 'track': 'track_1', 'high': 50, 'final_start': 2, 'type': 'frag', 'avg': 8.052631578947368, 'low': 10, 'final_end': 3, 'ori_avg': 2.12 }] }) self.assertDictEqual(utr_covers, srna_covers) def test_get_utr_cutoff(self): mediandict = {"aaa": {"5utr": {"bbb": {}}}} avgs = [30, 60, 550, 302, 44] sud.get_utr_cutoff("p_0.5", mediandict, avgs, "aaa", "5utr", "bbb") self.assertDictEqual( mediandict, {'aaa': { '5utr': { 'bbb': { 'mean': 197.2, 'median': 60 } } }}) def test_detect_normal(self): sud.get_coverage = self.mock.mock_get_coverage diff = 50 pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.utrs = [] args.srnas = [] sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos, "3utr", self.example.tsss[0], args) self.assertListEqual(args.srnas, [{ 'end': 20, 'strand': '+', 'datas': { 'frag_1': [{ 'track': 'track_1', 'final_start': 2, 'avg': 41.36842105263158, 'high': 50, 'type': 'frag', 'final_end': 20, 'ori_avg': 27.52, 'low': 10 }] }, 'end_cleavage': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'strain': 'aaa', 'start': 2, 'start_tss': 'TSS:1_+' }]) self.assertListEqual(args.utrs, [{ 'end': 20, 'strand': '+', 'datas': { 'frag_1': [{ 'track': 'track_1', 'final_start': 2, 'avg': 41.36842105263158, 'high': 50, 'type': 'frag', 'final_end': 20, 'ori_avg': 27.52, 'low': 10 }] }, 'end_cleavage': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'strain': 'aaa', 'start': 2, 'start_tss': 'NA' }]) args.utrs = [] args.srnas = [] args.pros = self.example.pros args.min_len = 3 args.max_len = 20 pos = {"start": 2, "end": 24, "ori_start": 1, "ori_end": 25} sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos, "3utr", self.example.tsss[0], args) self.assertListEqual(args.srnas, [{ 'start': 1, 'end': 18, 'start_tss': 'TSS:1_+', 'datas': { 'frag_1': [{ 'ori_avg': 27.52, 'track': 'track_1', 'high': 50, 'low': 10, 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'final_start': 2 }] }, 'start_cleavage': 'NA', 'end_cleavage': 'Cleavage:18_+', 'utr': '3utr', 'strand': '+', 'strain': 'aaa' }]) sud.get_coverage = get_coverage def test_detect_3utr_pro(self): sud.get_coverage = self.mock.mock_get_coverage args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 1 args.fuzzy_tsss = {"3utr": 3} args.pros = self.example.pros args.utrs = [] args.srnas = [] pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} sud.detect_3utr_pro(self.example.inters[0], pos, self.example.wigs, "3utr", args) self.assertListEqual(args.srnas, [{ 'end_cleavage': 'NA', 'end': 20, 'start_cleavage': 'Cleavage:18_+', 'utr': '3utr', 'datas': { 'frag_1': [{ 'low': 10, 'final_start': 2, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'ori_avg': 27.52, 'high': 50 }] }, 'strand': '+', 'start_tss': 'NA', 'start': 18, 'strain': 'aaa' }]) self.assertListEqual(args.utrs, [{ 'end_cleavage': 'NA', 'end': 20, 'start_cleavage': 'NA', 'utr': '3utr', 'datas': { 'frag_1': [{ 'low': 10, 'final_start': 2, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'ori_avg': 27.52, 'high': 50 }] }, 'strand': '+', 'start_tss': 'NA', 'start': 18, 'strain': 'aaa' }]) sud.get_coverage = get_coverage def test_detect_twopro(self): sud.get_coverage = self.mock.mock_get_coverage pro_dict = [{ "seq_id": "aaa", "source": "tsspredator", "feature": "processing", "start": 18, "end": 18, "phase": ".", "strand": "+", "score": "." }, { "seq_id": "aaa", "source": "tsspredator", "feature": "processing", "start": 38, "end": 38, "phase": ".", "strand": "+", "score": "." }] attributes_pro = [{ "ID": "processing0", "Name": "Processing_0" }, { "ID": "processing1", "Name": "Processing_1" }] pros = [] for index in range(0, 2): pros.append( Create_generator(pro_dict[index], attributes_pro[index], "gff")) args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 3 args.fuzzy_tsss = {"3utr": 3} args.pros = pros args.utrs = [] args.srnas = [] pos = {"start": 2, "end": 50, "ori_start": 1, "ori_end": 25} sud.detect_twopro(self.example.inters[0], pos, self.example.wigs, "interCDS", "interCDS", args) self.assertListEqual(args.srnas, [{ 'start_cleavage': 'Cleavage:18_+', 'utr': 'interCDS', 'datas': { 'frag_1': [{ 'type': 'frag', 'low': 10, 'final_start': 2, 'high': 50, 'avg': 41.36842105263158, 'final_end': 20, 'track': 'track_1', 'ori_avg': 27.52 }] }, 'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+', 'strand': '+', 'end': 38, 'strain': 'aaa', 'start': 18 }]) self.assertListEqual(args.utrs, [{ 'start_cleavage': 'NA', 'utr': 'interCDS', 'datas': { 'frag_1': [{ 'type': 'frag', 'low': 10, 'final_start': 2, 'high': 50, 'avg': 41.36842105263158, 'final_end': 20, 'track': 'track_1', 'ori_avg': 27.52 }] }, 'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+', 'strand': '+', 'end': 38, 'strain': 'aaa', 'start': 18 }]) sud.get_coverage = get_coverage def test_run_utr_detection(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.fuzzy_tsss = {"5utr": "n_3"} args.utrs = [] args.srnas = [] args.tsss = self.example.tsss args.pros = self.example.pros sud.get_coverage = self.mock.mock_get_coverage sud.run_utr_detection(self.example.wigs, self.example.inters[0], 2, 50, "5utr", args) sud.get_coverage = get_coverage self.assertListEqual(args.srnas, [{ 'start': 1, 'end': 50, 'start_cleavage': 'NA', 'datas': { 'frag_1': [{ 'high': 50, 'final_end': 20, 'avg': 41.36842105263158, 'low': 10, 'ori_avg': 27.52, 'final_start': 2, 'type': 'frag', 'track': 'track_1' }] }, 'start_tss': 'TSS:1_+', 'strain': 'aaa', 'strand': '+', 'utr': '5utr', 'end_cleavage': 'NA' }]) self.assertListEqual(args.utrs, [{ 'start': 1, 'end': 50, 'start_cleavage': 'NA', 'datas': { 'frag_1': [{ 'high': 50, 'final_end': 20, 'avg': 41.36842105263158, 'low': 10, 'ori_avg': 27.52, 'final_start': 2, 'type': 'frag', 'track': 'track_1' }] }, 'start_tss': 'NA', 'strain': 'aaa', 'strand': '+', 'utr': '5utr', 'end_cleavage': 'NA' }]) def test_class_utr(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.fuzzy_tsss = {"3utr": "p_3"} args.utrs = [] args.srnas = [] args.tsss = self.example.tsss args.pros = self.example.pros args.wig_fs = self.example.wigs sud.get_coverage = self.mock.mock_get_coverage sud.class_utr(self.example.inters[0], self.example.tas[0], args, args.wig_fs, args.wig_fs) sud.get_coverage = get_coverage self.assertListEqual(args.srnas, [{ 'end_cleavage': 'NA', 'start_tss': 'TSS:1_+', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 1, 'datas': { 'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50 }] }, 'strain': 'aaa', 'strand': '+' }, { 'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'Cleavage:18_+', 'end': 20, 'start': 18, 'datas': { 'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50 }] }, 'strain': 'aaa', 'strand': '+' }]) self.assertListEqual(args.utrs, [{ 'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 1, 'datas': { 'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50 }] }, 'strain': 'aaa', 'strand': '+' }, { 'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 18, 'datas': { 'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50 }] }, 'strain': 'aaa', 'strand': '+' }]) def test_get_utr_coverage(self): utrs = [{ 'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': { 'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50 }] }, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'NA' }] covers = sud.get_utr_coverage(utrs) self.assertDictEqual(covers, { 'aaa': { 'interCDS': {}, '3utr': { 'track_1': [27.52] }, '5utr': {} } }) def test_set_cutoff(self): args = self.mock_args.mock() args.texs = {"track_4@AND@track_6": 0} covers = { 'aaa': { '5utr': { 'track_4': [52, 11, 23] }, 'inter': { 'track_3': [111] }, 'total': { 'track_1': [27.52, 111] }, '3utr': { 'track_1': [27.52, 111] }, 'interCDS': { 'track_2': [12, 0] } } } args.coverages = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"} args.cover_notex = { "5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5" } mediandict = sud.set_cutoff(covers, args) self.assertDictEqual( mediandict, { 'aaa': { '5utr': { 'track_4': { 'median': 11, 'mean': 28.666666666666668 } }, 'interCDS': { 'track_2': {} }, '3utr': { 'track_1': {} } } }) args.cover_notex = None mediandict = sud.set_cutoff(covers, args) self.assertDictEqual( mediandict, { 'aaa': { '3utr': { 'track_1': { 'mean': 69.26, 'median': 10.0 } }, '5utr': { 'track_4': { 'mean': 28.666666666666668, 'median': 11 } }, 'interCDS': { 'track_2': { 'mean': 6.0, 'median': 0 } } } }) def test_mean_score(self): lst = [1, 3, 5, 6, 7, 8] mean = sud.mean_score(lst) self.assertEqual(mean, 5.0) def test_median_score(self): lst = [1, 3, 5, 6, 7, 8] median = sud.median_score(lst, 0.5) self.assertEqual(median, 5) def test_detect_srna(self): sud.replicate_comparison = self.mock.mock_replicate_comparison args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.coverages = "cover" args.texs = "template_texs" args.tex_notex = "tex_notex" args.replicates = "rep" args.table_best = True args.out = StringIO() args.out_t = StringIO() median = {"aaa": {"3utr": 555}} args.srnas = [{ 'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': { 'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50, "conds": ["frag"] }] }, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'Cleavage:18_+' }] sud.detect_srna(median, args) self.assertEqual( args.out.getvalue(), "aaa\tANNOgesic\tncRNA\t18\t20\t.\t+\t.\tID=srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n" ) self.assertEqual( args.out_t.getvalue(), "aaa\t00000\t18\t20\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n" ) def test_print_file(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.coverages = "cover" args.texs = "template_texs" args.tex_notex = "tex_notex" args.replicates = "rep" args.table_best = True args.out = StringIO() args.out_t = StringIO() srna = { 'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': { 'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50, "conds": ["frag"] }] }, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'Cleavage:18_+' } srna_datas = { "best": 500, "track": "frag", "high": 700, "low": 400, "start": 100, "end": 202, "conds": { "frag_1": "track_1" } } sud.print_file(0, srna, 2, 50, srna_datas, args) self.assertEqual( args.out.getvalue(), "aaa\tANNOgesic\tncRNA\t2\t50\t.\t+\t.\tID=srna_utr0;Name=UTR_sRNA_00000;sRNA_type=3utr;best_avg_coverage=500;best_high_coverage=700;best_low_coverage=400;with_TSS=NA;start_cleavage=Cleavage:18_+;end_cleavage=NA\n" ) self.assertEqual( args.out_t.getvalue(), "aaa\t00000\t2\t50\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=500;high=700;low=400)\n" )
class TestCircRNA(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_get_feature(self): attributes_cds = { "ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001", "protein_id": "YP_918384.3" } attributes = circ.get_feature( Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "AAA_00001") attributes_cds = { "ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3" } attributes = circ.get_feature( Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "YP_918384.3") attributes_cds = {"ID": "cds0", "Name": "CDS_0"} attributes = circ.get_feature( Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "cds0:122-267_f") def test_detect_conflict(self): circ_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "circRNA", "start": 100, "end": 467, "phase": ".", "strand": "+", "score": ".", "support": 30, "start_site": 30, "end_site": 35, "situation": "P", "splice_type": "C" } attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"} circrna = Create_generator(circ_dict, attributes_circ, "circ") gffs = [ Create_generator(self.example.cds_dict, self.example.attributes_cds, "gff") ] args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 out = StringIO() out_best = StringIO() circ.detect_conflict(gffs, circrna, 0, out, out_best, args) self.assertEqual( out.getvalue(), "circRNA_0 aaa + 100 467 AAA_00001 30 1.0 0.8571428571428571\n") out.close() def test_get_circrna(self): circs = [] gffs = [] for index in range(0, 5): circs.append( Create_generator(self.example.circ_dict[index], self.example.attributes_circ[index], "circ")) for index in range(0, 3): gffs.append( Create_generator(self.example.gffs_dict[index], self.example.attributes_gffs[index], "gff")) out = StringIO() out_best = StringIO() args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 nums = circ.get_circrna(circs, gffs, 50, out, out_best, args) self.assertDictEqual( nums["support"], { 'aaa': { 0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1 }, 'all': { 0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1 }, 'bbb': { 0: 1, 5: 1 } }) self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3}) self.assertDictEqual(nums["conflict"], { 'bbb': { 0: 1, 5: 1 }, 'aaa': {}, 'all': { 0: 1, 5: 1 } }) def test_detect_circrna(self): out_file = os.path.join(self.test_folder, "out_all.csv") stat_file = os.path.join(self.test_folder, "stat.csv") circ.read_file = Mock_read_file().read_file args = self.mock_args.mock() args.start_ratio = 0.5 args.end_ratio = 0.5 args.support = 5 args.hypo = True circ.detect_circrna("test.circ", "test.gff", out_file, args, stat_file) circs = import_data(out_file) stats = import_data(stat_file) self.assertEqual(set(circs), set(self.example.out_file.split("\n"))) self.assertEqual(set(stats), set(self.example.stat_file.split("\n")))
class TestsTranscriptAssembly(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.mock = Mock_func() self.mock_parser = Mock_Multiparser() self.example = Example() self.test_folder = "test_folder" self.trans = "test_folder/trans" self.out = "test_folder/output" self.tex = "test_folder/tex" self.frag = "test_folder/frag" self.gffs = "test_folder/gffs" self.tsss = "test_folder/tsss" self.terms = "test_folder/terms" self.stat = "test_folder/output/statistics" self.out_gff = "test_folder/output/gffs" self.out_table = "test_folder/output/tables" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.trans) os.mkdir(self.out) os.mkdir(self.tex) os.mkdir(self.frag) os.mkdir(os.path.join(self.frag, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.tsss) os.mkdir(self.terms) os.mkdir(self.stat) os.mkdir(self.out_gff) os.mkdir(self.out_table) args = self.mock_args.mock() args.out_folder = self.out self.tran = TranscriptDetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_compute(self): pre_assembly = tr.detect_transcript tr.detect_transcript = self.mock.mock_assembly gen_file(os.path.join( self.frag, "tmp/test_forward.wig"), "test") args = self.mock_args.mock() args.replicates = "rep" args.out_foler = self.out strains = self.tran._compute("frag", self.frag, "libs", args) self.assertListEqual(strains, ['test']) tr.assembly = pre_assembly def test_for_one_wig(self): pre_assembly = tr.assembly tr.assembly = self.mock.mock_assembly self.tran.multiparser = self.mock_parser gen_file(os.path.join(self.frag, "tmp/test_forward.wig"), "test") gen_file(os.path.join(self.out, "test_frag"), self.example.tran_file) args = self.mock_args.mock() args.replicates = "rep" args.libs = "libs" args.gffs = self.gffs args.out_folder = self.out args.frag_wigs = self.frag args.flibs = "flibs" strains = self.tran._for_one_wig("frag", args) self.assertListEqual(strains, ['test']) datas = import_data(os.path.join( self.out_gff, "test_transcript_frag.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.tran_file) tr.assembly = pre_assembly def test_for_two_wigs(self): pre_combine = tr.combine tr.combine = self.mock.mock_combine gen_file(os.path.join( self.out_gff, "test_transcript_fragment.gff"), "test") gen_file(os.path.join( self.out_gff, "test_transcript_tex_notex.gff"), "test") args = self.mock_args.mock() args.frag_wigs = self.frag args.tex_wigs = self.tex args.gffs = self.gffs args.tolerance = 5 log = open(os.path.join(self.test_folder, "test.log"), "w") self.tran._for_two_wigs(["test"], args, log) self.assertTrue(os.path.exists( os.path.join(self.out_gff, "test_transcript.gff"))) tr.combine = pre_combine def test_post_modify(self): pre_longer = tr.longer_ta pre_fill = tr.fill_gap tr.longer_ta = self.mock.mock_longer_ta tr.fill_gap = self.mock.mock_fill_gap gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gff_out = os.path.join(self.out, "gffs") os.mkdir(os.path.join(self.out, "tmp_tran")) gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file) gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file) args = self.mock_args.mock() args.gffs = self.gffs args.out_folder = self.out args.length = 20 args.modify = "merge_overlap" self.tran._post_modify(["test"], args) self.assertTrue(os.path.exists(os.path.join( gff_out, "test_transcript.gff"))) tr.longer_ta = pre_longer tr.fill_gap = pre_fill def test_compare_cds(self): tr.stat_ta_gff = self.mock.mock_stat_ta_gff self.tran.multiparser = self.mock_parser gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.gffs, "tmp/test.gff"), self.example.gff_file) gen_file(os.path.join(self.out_gff, "test_transcript.gff"), self.example.tran_file) gff_out = os.path.join(self.out, "gffs") gen_file(os.path.join(gff_out, "tmp_ta_gff"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_gff_ta"), self.example.gff_file) args = self.mock_args.mock() args.out_folder = self.out args.trans = self.trans args.gffs = self.gffs log = open(os.path.join(self.test_folder, "test.log"), "w") args.c_feature = ["CDS"] self.tran._compare_cds(["test"], args, log) datas = import_data(os.path.join(self.gffs, "test.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.gff_file) datas = import_data(os.path.join(self.out_gff, "test_transcript.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.tran_file) def test_compare_tss(self): tr.stat_ta_tss = self.mock.mock_stat_ta_tss self.tran.multiparser = self.mock_parser gen_file(os.path.join(self.gffs, "test_TSS.gff"), self.example.gff_file) gen_file(os.path.join(self.gffs, "tmp/test_TSS.gff"), self.example.gff_file) gen_file(os.path.join(self.out_gff, "test_transcript.gff"), self.example.tran_file) gff_out = os.path.join(self.out, "gffs") gen_file(os.path.join(gff_out, "tmp_ta_tss"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_tss_ta"), self.example.gff_file) args = self.mock_args.mock() args.out_folder = self.out args.trans = self.trans args.compare_tss = self.gffs args.fuzzy = 2 log = open(os.path.join(self.test_folder, "test.log"), "w") self.tran._compare_tss(["test"], args, log) datas = import_data(os.path.join(self.gffs, "test_TSS.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.gff_file) datas = import_data(os.path.join(self.out_gff, "test_transcript.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.tran_file) def test_run_transcript_assembly(self): tr.stat_ta_tss = self.mock.mock_stat_ta_tss tr.stat_ta_gff = self.mock.mock_stat_ta_gff tr.longer_ta = self.mock.mock_longer_ta tr.fill_gap = self.mock.mock_fill_gap tr.combine = self.mock.mock_combine pre_assembly = tr.detect_transcript tr.assembly = self.mock.mock_assembly tr.gen_table_transcript = self.mock.mock_gen_table_tran gen_file(os.path.join(self.frag, "tmp/test1_forward.wig"), self.example.wig_f) gen_file(os.path.join(self.frag, "tmp/test1_reverse.wig"), self.example.wig_r) gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.terms, "test_term.gff"), self.example.term_file) gen_file("test_folder/output/test1_fragment", self.example.tran_file) gff_out = os.path.join(self.out, "gffs") gen_file(os.path.join( gff_out, "test_transcript_assembly_fragment.gff"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file) gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file) args = self.mock_args.mock() args.out_folder = self.out args.frag_wigs = self.frag args.tex_wigs = None args.flibs = "flibs" args.tlibs = "tlibs" args.gffs = self.gffs args.terms = None args.compare_tss = None args.c_feature = None args.fuzzy_term = 1 args.max_dist = 2000 args.modify = "merge_overlap" log = open(os.path.join(self.test_folder, "test.log"), "w") self.tran.run_transcript(args, log) tr.assembly = pre_assembly
class TestCoverageDetection(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() def test_coverage_comparison_first(self): first = True cover_sets = {"high": -1, "low": -1} poss = {"high": -1, "low": -1} cover = 100 cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+", 50) self.assertDictEqual(cover_sets, {"high": 100, "low": 100}) self.assertDictEqual(poss, {"high": 50, "low": 50}) def test_coverage_comparison_forward(self): first = False cover_sets = {"high": 50, "low": 20} poss = {"high": 10, "low": 30} cover = 100 cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+", 50) self.assertDictEqual(cover_sets, {"high": 100, "low": 100}) self.assertDictEqual(poss, {"high": 50, "low": 50}) cover = 30 cover_detect.coverage_comparison(cover, cover_sets, poss, first, "+", 51) self.assertDictEqual(cover_sets, {"high": 100, "low": 30}) self.assertDictEqual(poss, {"high": 50, "low": 51}) def test_coverage_comparison_reverse(self): first = False cover_sets = {"high": 50, "low": 20} poss = {"high": 30, "low": 10} cover = 100 cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-", 50) self.assertDictEqual(cover_sets, {"high": 100, "low": 100}) self.assertDictEqual(poss, {"high": 50, "low": 50}) cover = 30 cover_detect.coverage_comparison(cover, cover_sets, poss, first, "-", 49) self.assertDictEqual(cover_sets, {"high": 100, "low": 30}) self.assertDictEqual(poss, {"high": 50, "low": 49}) def test_define_cutoff_median(self): coverages = {"3utr": "mean", "5utr": "median"} median = { "track_a": { "median": 100, "mean": 200 }, "track_b": { "median": 30, "mean": 80 } } cutoff = cover_detect.define_cutoff(coverages, median, "5utr") self.assertDictEqual(cutoff, {'track_a': 100, 'track_b': 30}) cutoff = cover_detect.define_cutoff(coverages, median, "3utr") self.assertDictEqual(cutoff, {'track_a': 200, 'track_b': 80}) def test_check_tex(self): template_texs = self.example.texs covers = self.example.cover_datas coverages = {"3utr": 100, "5utr": 600} poss = {"high": 30, "low": 10} median = { "track1_tex": { "median": 100, "mean": 200 }, "track1_notex": { "median": 30, "mean": 80 }, "track2_tex": { "median": 150, "mean": 200 }, "track2_notex": { "median": 10, "mean": 20 }, "frag": { "median": 80, "mean": 100 } } target_datas = [] texs = { "track1_tex@AND@track1_notex": 0, "track2_tex@AND@track2_notex": 0 } detect_num_lib = cover_detect.check_tex(template_texs, covers, target_datas, 20, None, poss, median, coverages, "3utr", 200, 2) self.assertEqual(detect_num_lib, 2) num_frag = 0 num_tex = 0 for target in target_datas: if target["type"] == "frag": num_frag += 1 else: num_tex += 1 self.assertEqual(num_frag, 1) self.assertEqual(num_tex, 2) detect_num_lib = cover_detect.check_tex(template_texs, covers, target_datas, 20, "sRNA_utr_derived", poss, median, coverages, "5utr", 200, 2) self.assertEqual(detect_num_lib, 2) self.assertDictEqual(poss, { 'start': 100, 'high': 30, 'end': 202, 'low': 10 }) def test_replicate_comparison(self): cover_detect.check_tex = Mock_func().mock_check_tex template_texs = self.example.texs srna_covers = {"texnotex": self.example.cover_datas} coverages = {"3utr": 100, "5utr": 600} median = { "track1_tex": { "median": 100, "mean": 200 }, "track1_notex": { "median": 30, "mean": 80 }, "track2_tex": { "median": 150, "mean": 200 }, "track2_notex": { "median": 10, "mean": 20 }, "frag": { "median": 80, "mean": 100 } } texs = { "track1_tex@AND@track1_notex": 0, "track2_tex@AND@track2_notex": 0 } args = self.mock_args.mock() args.replicates = {"tex": ["all_2"], "frag": ["all_1"]} args.tex_notex = 2 srna_datas = cover_detect.replicate_comparison(args, srna_covers, "+", "sRNA_utr_derived", median, coverages, "3utr", 100, 200, template_texs) self.assertEqual(srna_datas["best"], 500) self.assertEqual(srna_datas["track"], "frag") self.assertEqual(srna_datas["high"], 700) self.assertEqual(srna_datas["low"], 400) self.assertEqual(srna_datas["start"], 100) self.assertEqual(srna_datas["end"], 202)
class TestGensRNAOutput(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_merge_info(self): blasts = [{"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "111"}, {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "222"}, {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "333"}, {"strain": "aaa", "strand": "+", "start": 20, "end": 70, "hits": "444"}, {"strain": "bbb", "strand": "+", "start": 20, "end": 70, "hits": "555"}] merge = gso.merge_info(blasts) self.assertDictEqual(merge[0], {'hits': '111;222;333', 'start': 20, 'strand': '+', 'strain': 'aaa', 'end': 70}) self.assertDictEqual(merge[1], {'hits': '555', 'start': 20, 'strand': '+', 'strain': 'bbb', 'end': 70}) def test_compare_srna_table(self): final = {"energy": -23, "utr": "3UTR"} srna_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 300, "end": 367, "phase": ".", "strand": "+", "score": "."} attributes_srna = {"ID": "srna0", "Name": "sRNA_0"} srna = Create_generator(srna_dict, attributes_srna, "gff") args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 new_final = gso.compare_srna_table(self.example.srna_tables, srna, final, args) self.assertDictEqual(new_final, {'end_pro': 'NA', 'strand': '+', 'strain': 'aaa', 'avg': 100, 'type': 'TEX+/-;Fragmented', 'conds': 'tex_frag', 'candidates': '300-367', 'tss_pro': 'TSS:300_+', 'start': 300, 'utr': '3UTR', 'energy': -23, 'end': 367}) def test_compare(self): args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 finals = gso.compare(self.example.srnas, self.example.srna_tables, self.example.nr_blasts, self.example.srna_blasts, args) for index in range(len(finals)): self.assertDictEqual(finals[index], self.example.finals[index]) def test_gen_best_srna(self): gso.read_gff = Mock_func().mock_read_gff args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.nr_hits_num = 0 args.energy = 0 args.import_info = ["tss", "sec_str"] args.all_hit = True args.best_sorf = True args.best_promoter = True args.best_term = True out_file = os.path.join(self.test_folder, "test.out") gso.gen_best_srna("test.srna", out_file, args) with open(out_file) as fh: for line in fh: if not (line.startswith("#")): data = "\t".join(line.split("\t")[:-1]) self.assertEqual(data, "aaa\tUTR_derived\tsRNA\t300\t367\t.\t+\t.")
class TestsORFDetection(unittest.TestCase): def setUp(self): self.mock = Mock_func() self.mock_args = MockClass() self.test_folder = "test_folder" self.tsss = "test_folder/tsss" self.srnas = "test_folder/sRNA" self.out = "test_folder/output" self.trans = "test_folder/trans" self.fastas = "test_folder/fastas" self.tex = "test_folder/tex" self.frag = "test_folder/frag" self.gffs = "test_folder/gffs" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tsss) os.mkdir(self.out) os.mkdir(self.trans) os.mkdir(self.fastas) os.mkdir(self.tex) os.mkdir(self.frag) os.mkdir(self.srnas) os.mkdir(self.gffs) args = self.mock_args.mock() args.tsss = self.tsss args.srnas = self.srnas args.out_folder = self.out args.trans = self.trans args.fastas = self.fastas self.sorf = sORFDetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_start_stop_codon(self): gff_path = os.path.join(self.out, "gffs") table_path = os.path.join(self.out, "tables") os.mkdir(gff_path) os.mkdir(table_path) os.mkdir(os.path.join(gff_path, "all_candidates")) os.mkdir(os.path.join(table_path, "all_candidates")) os.mkdir(os.path.join(gff_path, "best_candidates")) os.mkdir(os.path.join(table_path, "best_candidates")) gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.csv"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.csv"), "test") so.sorf_detection = self.mock.mock_sorf_detection args = self.mock_args.mock() args.libs = "libs" args.tex_notex = "tex_notex" args.replicates = "replicates" args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.background = "background" args.wig_path = "wig_path" args.merge_wigs = "merge_wigs" self.sorf._start_stop_codon(["test"], args) self.assertTrue( os.path.exists( os.path.join(gff_path, "best_candidates/test_sORF.gff"))) self.assertTrue( os.path.exists( os.path.join(gff_path, "all_candidates/test_sORF.gff"))) self.assertTrue( os.path.exists( os.path.join(table_path, "best_candidates/test_sORF.csv"))) self.assertTrue( os.path.exists( os.path.join(table_path, "all_candidates/test_sORF.csv"))) def test_compare_tran_cds(self): so.get_intergenic = self.mock.mock_get_intergenic gen_file(os.path.join(self.test_folder, "test.gff"), "test") args = self.mock_args.mock() args.out_folder = self.out args.gffs = self.test_folder args.hypo = False args.utr_detect = True prefixs = self.sorf._compare_tran_cds(args) self.assertListEqual(prefixs, ["test"]) def test_run_sorf_detection(self): gff_path = os.path.join(self.out, "gffs") table_path = os.path.join(self.out, "tables") os.mkdir(gff_path) os.mkdir(table_path) os.mkdir(os.path.join(gff_path, "all_candidates")) os.mkdir(os.path.join(table_path, "all_candidates")) os.mkdir(os.path.join(gff_path, "best")) os.mkdir(os.path.join(table_path, "best")) so.get_intergenic = self.mock.mock_get_intergenic so.sorf_detection = self.mock.mock_sorf_detection self.sorf._remove_tmp = self.mock.mock_remove_tmp self.sorf._check_gff = self.mock.mock_check_gff self.sorf._check_necessary_files = self.mock.mock_check_necessary_files self.sorf.multiparser = Mock_Multiparser() args = self.mock_args.mock() args.trans = self.trans args.gffs = self.gffs args.tsss = self.tsss args.out_folder = self.out args.libs = "libs" args.tex_notex = "tex_notex" args.replicates = "replicates" args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.background = "background" args.wig_path = "wig_path" args.merge_wigs = "merge_wigs" args.fuzzy_rbs = 2 self.sorf.run_sorf_detection(args)
class TestRibos(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.gffs = os.path.join(self.test_folder, "gffs") self.fastas = os.path.join(self.test_folder, "fastas") self.out_folder = os.path.join(self.test_folder, "output") self.database = os.path.join(self.test_folder, "database") self.seq_path = os.path.join(self.test_folder, "seqs") self.tables = os.path.join(self.out_folder, "tables") self.stat = os.path.join(self.out_folder, "statistics") self.scan = os.path.join(self.test_folder, "scan") self.tsss = os.path.join(self.test_folder, "tsss") self.trans = os.path.join(self.test_folder, "trans") self.out_gff = os.path.join(self.out_folder, "gffs") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.tsss) os.mkdir(os.path.join(self.tsss, "tmp")) os.mkdir(self.trans) os.mkdir(os.path.join(self.trans, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.out_folder) os.mkdir(self.database) os.mkdir(self.seq_path) os.mkdir(os.path.join(self.out_folder, "tmp_table")) os.mkdir(os.path.join(self.out_folder, "tmp_scan")) os.mkdir(os.path.join(self.out_folder, "tmp_fasta")) os.mkdir(os.path.join(self.out_folder, "scan_Rfam")) os.mkdir(self.tables) os.mkdir(self.scan) os.mkdir(self.stat) os.mkdir(self.out_gff) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.out_folder = self.out_folder args.database = self.database args.tsss = self.tsss args.trans = self.trans self.ribo = Ribos(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_scan_extract_rfam(self): self.ribo._run_infernal = self.mock.mock_run_infernal rb.modify_table = self.mock.mock_modify_table prefixs = [] gen_file(os.path.join(self.gffs, "tmp/test.gff"), self.example.gff_file) gen_file(os.path.join(self.fastas, "tmp/test.fa"), self.example.fasta_file) gen_file(os.path.join(self.seq_path, "test.fa"), self.example.fasta_file) gen_file(os.path.join(self.tsss, "tmp/test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.trans, "tmp/test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.out_folder, "tmp_fasta", "test.fa"), self.example.fasta_file) args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.fuzzy = 5 args.fuzzy_rbs = 2 args.utr = True args.output_all = "test" self.ribo._scan_extract_rfam(prefixs, args) self.assertListEqual(prefixs, ["test"]) self.assertTrue(os.path.exists(os.path.join(self.out_folder, "tmp_fasta", "test_regenerate.fa"))) def test_merge_results(self): gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.out_folder, "tmp_table/test_riboswitch.csv"), self.example.table) gen_file(os.path.join(self.out_folder, "tmp_scan/test_riboswitch_prescan.txt"), self.example.rescan_file) gen_file(os.path.join(self.out_folder, "tmp_scan/test_riboswitch_scan.txt"), self.example.rescan_file) gen_file(os.path.join(self.test_folder, "ids"), self.example.ids) gen_file(os.path.join(self.tables, "test_riboswitch.csv"), self.example.table) args = self.mock_args.mock() args.start_codons = ["ATG"] args.fastas = self.fastas args.out_folder = self.out_folder args.gffs = self.gffs args.ribos_id = os.path.join(self.test_folder, "ids") args.fuzzy = 3 self.ribo._merge_results(args)
class TestsORFDetection(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.fasta = "test_folder/fasta" self.wigs = "test_folder/wig" self.gff = "test_folder/gff" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.fasta) os.mkdir(self.wigs) os.mkdir(self.gff) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_get_coverage(self): coverages = {"3utr": "median", "5utr": "median", "inter": 5, "interCDS": "median"} medianlist = {"aaa": {"3utr": {"track_1": {"median": 3}}, "5utr": {"track_1": {"median": 6}}, "interCDS": {"track_1": {"median": 2}}, "inter": {"track_1": {"median": 5}}}} cutoffs = {"track_1": 0} sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1]} covers = sd.get_coverage(sorf, self.example.wigs, "+", coverages, medianlist, cutoffs) self.assertDictEqual(covers, {'frag_1': [ {'low': 2, 'avg': 33.4, 'high': 100, 'pos': 2, 'track': 'track_1', 'type': 'frag'}]}) def test_detect_rbs_site(self): args = self.mock_args.mock() args.max_len = 20 args.min_len = 3 args.fuzzy_rbs = 2 detect = sd.detect_rbs_site("AGGAGGCCGCTATGCCACACGT", 2, self.example.tas[0], args) self.assertListEqual(detect, [1]) def test_detect_start_stop(self): seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"} args = self.mock_args.mock() args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.max_len = 20 args.min_len = 3 args.fuzzy_rbs = 2 sorf = sd.detect_start_stop(self.example.tas, seq, args) self.assertListEqual(sorf, [ {'strand': '+', 'type': 'intergenic', 'starts': ['13'], 'print': False, 'seq': 'ATGCCATTA', 'ends': ['21'], 'end': 21, 'start': 13, 'rbs': [2], 'strain': 'aaa'}]) seq = {"aaa": "TTAAAGGCATTATCCTCCTA"} self.example.tas[0].strand = "-" sorf = sd.detect_start_stop(self.example.tas, seq, args) self.assertListEqual(sorf, [ {'end': 10, 'starts': ['2'], 'strain': 'aaa', 'ends': ['10'], 'type': 'intergenic', 'print': False, 'seq': 'TAAAGGCAT', 'rbs': [19], 'strand': '-', 'start': 2}]) self.example.tas[0].strand = "+" def test_read_data(self): inter = os.path.join(self.test_folder, "inter") fasta = os.path.join(self.test_folder, "fa") gen_file(inter, self.example.inter) gen_file(fasta, ">aaa\nATATACCGATC") inters, tsss, srnas, seq = sd.read_data(inter, None, None, fasta, True) self.assertEqual(inters[0].start, 2) self.assertDictEqual(seq, {'aaa': 'ATATACCGATC'}) def test_check_tss(self): sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "with_TSS": []} checks = {"start": False, "rbs": False, "import": False} sd.check_tss(sorf, self.example.tsss[0], 300, checks) self.assertDictEqual(checks, {'start': True, 'rbs': [1], 'import': True}) def test_compare_sorf_tss(self): sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1]}] args = self.mock_args.mock() args.utr_length = 300 args.noafter_tss = False args.no_tss = False sorfs_all, sorfs_best = sd.compare_sorf_tss( sorfs, self.example.tsss, "tss", args) self.assertListEqual(sorfs_all, [ {'print': False, 'ends': ['10'], 'strand': '+', 'end': 6, 'type': '3utr', 'starts': ['2'], 'seq': 'ATGTA', 'strain': 'aaa', 'start': 2, 'rbs': [1], 'start_TSS': '1_+', 'with_TSS': ['TSS:1_+']}]) self.assertListEqual(sorfs_best, [ {'print': False, 'ends': ['10'], 'strand': '+', 'end': 6, 'type': '3utr', 'starts': ['2'], 'seq': 'ATGTA', 'strain': 'aaa', 'start': 2, 'rbs': [1], 'with_TSS': ['TSS:1_+'], 'start_TSS': '1_+'}]) def test_compare_sorf_srna(self): sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1]}] sd.compare_sorf_srna(sorfs, self.example.srnas, "test") self.assertListEqual(sorfs, [ {'print': False, 'starts': ['2'], 'seq': 'ATGTA', 'strand': '+', 'srna': ['sRNA:5-8_+'], 'end': 6, 'rbs': [1], 'ends': ['10'], 'start': 2, 'strain': 'aaa', 'type': '3utr'}]) def test_import_overlap(self): sorf1 = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"} sorf2 = {"strain": "aaa", "strand": "+", "start": 5, "end": 15, "starts": [str(5)], "ends": [str(15)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [2], "start_TSS": "2"} final = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"} sd.import_overlap(sorf2, final, sorf1, True) self.assertDictEqual(final, { 'end': 15, 'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'], 'start': 2, 'rbs': [1, 2], 'strand': '+', 'strain': 'aaa', 'print': False, 'seq': 'ATGTA', 'ends': ['10', '15'], 'start_TSS': '1', 'type': '3utr', 'starts': ['2', '5']}) def test_merge(self): seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"} sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"}, {"strain": "aaa", "strand": "+", "start": 5, "end": 15, "starts": [str(5)], "ends": [str(15)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [2], "start_TSS": "2"}] finals = sd.merge(sorfs, seq) self.assertDictEqual(finals[0], { 'start_TSS': '1', 'rbs': [1, 2], 'strand': '+', 'strain': 'aaa', 'start': 2, 'candidate': ['2-6_TSS:1_RBS:1', '5-15_TSS:2_RBS:2'], 'ends': ['10', '6', '15'], 'starts': ['2', '5'], 'type': '3utr', 'end': 15, 'seq': 'AGGAGGCCGCTATG'}) def test_assign_utr_cutoff(self): coverages = {"3utr": "median", "5utr": 20, "interCDS": 11, "intergenic": 59} medians = {"median": 50, "mean": 20} cutoff =sd.assign_utr_cutoff(coverages, "3utr", medians) self.assertEqual(cutoff, 50) def test_get_cutoff(self): sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1"} coverages = {"3utr": "median", "5utr": 20, "interCDS": 11, "intergenic": 59} medians = {"aaa": {"3utr": {"track_1": {"median": 50, "mean": 20}}}} cutoff = sd.get_cutoff(sorf, "track_1", coverages, medians) self.assertEqual(cutoff, 50) def test_get_attribute(self): sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1", "with_TSS": "NA", "srna": "NA", "shift": 1} string = sd.get_attribute(1, "sORF_1", "4", sorf, "utr") self.assertEqual( string, "ID=aaa_sorf1;Name=sORF_sORF_1;start_TSS=4;with_TSS=N,A;sORF_type=3utr;sRNA=N,A;rbs=1;frame_shift=1") def test_print_file(self): out_g = StringIO() out_t = StringIO() sorf = {"strain": "aaa", "strand": "+", "start": 10, "end": 15, "starts": [str(10)], "ends": [str(15)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["3"], "start_TSS": "1", "with_TSS": ["NA"], "srna": ["NA"], "candidate": ["AAA"], "shift": 1} sorf_datas = {"best": 20, "high": 50, "low": 10, "start": 1, "end": 10, "track": "track_1", "detail": [], "conds": {"frag": "track_1"}} args = self.mock_args.mock() args.table_best = True args.print_all = True sd.print_file(sorf, sorf_datas, 1, out_g, out_t, "best", args) self.assertEqual( out_g.getvalue(), "aaa\tANNOgesic\tsORF\t10\t15\t.\t+\t.\tID=aaa_sorf1;Name=sORF_00001;start_TSS=1;with_TSS=NA;sORF_type=3utr;sRNA=NA;rbs=RBS_3;frame_shift=1\n") self.assertEqual( out_t.getvalue(), "aaa\tsORF_00001\t10\t15\t+\t3'UTR_derived\tNA\tRBS_3\t10\t15\tNA\t1\tFragmented\t20\t50\t10\ttrack_1(avg=20;high=50;low=10)\tATGTA\tAAA\n") def test_print_table(self): out_t = StringIO() sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1", "with_TSS": ["NA"], "srna": ["NA"], "candidate": ["AAA"], "shift": 1} sorf_datas = {"best": 20, "high": 50, "low": 10, "start": 1, "end": 10, "track": "track_1", "detail": [], "conds": {"frag": "track_1"}} args = self.mock_args.mock() args.table_best = True args.print_all = True sd.print_table(out_t, sorf, "test", "3utr", "frag", sorf_datas, args) self.assertEqual( out_t.getvalue(), "aaa\tsORF_test\t2\t6\t+\t3utr\tNA\t1\t2\t10\tNA\t1\tfrag\t20\t50\t10\ttrack_1(avg=20;high=50;low=10)\tATGTA\tAAA\n") def test_get_inter_coverage(self): inter_covers = {} inters = [{"frag": [{"track": "track_1", "avg": 22}]}] sd.get_inter_coverage(inters, inter_covers) self.assertDictEqual(inter_covers, {'track_1': [22]}) def test_detect_utr_type(self): ta_dict = [{"seq_id": "aaa", "source": "intergenic", "feature": "Transcript", "start": 1, "end": 23, "phase": ".", "strand": "+", "score": "."}] attributes_tas = [{"ID": "tran0", "Name": "Transcript_0", "UTR_type": "intergenic"}] tas = [] tas.append(Create_generator(ta_dict[0], attributes_tas[0], "gff")) sd.get_coverage = self.mock.mock_get_coverage med_inters = {"aaa": {"intergenic": []}} sd.detect_utr_type(tas[0], "intergenic", med_inters, "wigs", "+", "test") sd.get_coverage = get_coverage self.assertDictEqual(med_inters, {'aaa': {'intergenic': ["2"]}}) def test_median_score(self): num = sd.median_score([1, 3, 11, 42, 2, 32, 111], "p_0.5") self.assertEqual(num, 11) def test_mean_score(self): num = sd.mean_score([1, 3, 11, 42, 2, 32, 111]) self.assertEqual(num, 28.857142857142858) def test_validate_tss(self): sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "3", "with_TSS": ["TSS:3_+"], "srna": ["NA"], "candidate": ["AAA"]} datas = sd.validate_tss([2], [6], sorf, 300) self.assertEqual(datas, (['TSS:3_+'], 'NA')) def test_validate_srna(self): sorf = {"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1", "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"], "candidate": ["AAA"]} srnas = sd.validate_srna([2], [6], sorf) self.assertListEqual(srnas, ['sRNA:2-5_+']) def test_get_best(self): sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1", "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"], "candidate": ["2-6_TSS:3_RBS:1"]}] args = self.mock_args.mock() args.table_best = True args.no_srna = True args.utr_length = 300 data = sd.get_best(sorfs, "tss", "srna", args) self.assertListEqual(data, [ {'type': '3utr', 'strand': '+', 'print': False, 'with_TSS': ['TSS:3_+'], 'starts': ['2'], 'start': 2, 'srna': ['sRNA:2-5_+'], 'rbs': ['1'], 'end': 6, 'seq': 'ATGTA', 'start_TSS': '1', 'strain': 'aaa', 'ends': ['10'], 'candidate': ['2-6_TSS:3_RBS:1']}]) def test_coverage_and_output(self): out_t = StringIO() out_g = StringIO() sd.get_coverage = self.mock.mock_get_coverage sd.replicate_comparison = self.mock.mock_replicate_comparison sorfs = [{"strain": "aaa", "strand": "+", "start": 10, "end": 15, "starts": [str(10)], "ends": [str(15)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": [1], "start_TSS": "1", "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"], "candidate": ["2-6_TSS:3_RBS:1"]}] seq = {"aaa": "TAGGAGGCCGCTATGCCATTA"} wigs = {"forward": "wigs_f", "reverse": "wigs_r"} args = self.mock_args.mock() args.print_all = True args.min_rbs = 0 args.max_rbs = 20 args.min_len = 0 args.max_len = 300 args.table_best = True sd.coverage_and_output(sorfs, "median", wigs, out_g, out_t, "best", seq, "cover", args, "texs", "final") sd.get_coverage = copy.deepcopy(get_coverage) self.assertEqual(out_g.getvalue(), ( "##gff-version 3\naaa\tANNOgesic\tsORF\t10\t15\t." "\t+\t.\tID=aaa_sorf0;Name=sORF_00000;start_TSS=1;" "with_TSS=TSS:3_+;sORF_type=3utr;sRNA=NA;rbs=RBS_1;frame_shift=1\n")) self.assertEqual(out_t.getvalue().split("\n")[1], ("aaa\tsORF_00000\t10\t15\t+\t" "3'UTR_derived\tTSS:3_+\tRBS_1\t10\t15\tNA\t1" "\tFragmented\t20\t50\t10\ttrack_1(avg=20;" "high=50;low=10)\tGCTATG\t10-15_TSS:3_+_RBS:1")) def test_detect_inter_type(self): inter_dict = [{"seq_id": "aaa", "source": "UTR_derived", "feature": "Transcript", "start": 1, "end": 23, "phase": ".", "strand": "+", "score": "."}] attributes_inter = [{"ID": "tran0", "Name": "Transcript_0", "UTR_type": "3utr"}] inters = [] inters.append(Create_generator( inter_dict[0], attributes_inter[0], "gff")) sd.get_coverage = self.mock.mock_get_coverage wigs = {"forward": "wigs_f", "reverse": "wigs_r"} data = sd.detect_inter_type(inters, wigs, "test") self.assertDictEqual(data, {'aaa': { 'interCDS': [], '5utr': [], '3utr': ['2']}}) sd.get_coverage = copy.deepcopy(get_coverage) def test_set_median(self): mediandict = {} covers = {"aaa": {"3utr": {"track_1": [1, 3, 4, 2, 55]}}} coverages = {"3utr": "p_0.5", "5utr": "p_0.5", "interCDS": "n_100"} sd.set_median(covers, mediandict, coverages) self.assertDictEqual(mediandict, {'aaa': { '5utr': {}, 'interCDS': {}, '3utr': {'track_1': {'median': 3}}}}) def test_compute_candidate_best(self): sorfs = [{"strain": "aaa", "strand": "+", "start": 2, "end": 6, "starts": [str(2)], "ends": [str(10)], "seq": "ATGTA", "type": "3utr", "print": False, "rbs": ["1"], "start_TSS": "1", "with_TSS": ["TSS:3_+"], "srna": ["sRNA:2-5_+"]}] sd.compute_candidate_best(sorfs) self.assertListEqual(sorfs, [ {'starts': ['2'], 'seq': 'ATGTA', 'strain': 'aaa', 'ends': ['10'], 'print': False, 'rbs': ['1'], 'type': '3utr', 'end': 6, 'start': 2, 'srna': ['sRNA:2-5_+'], 'candidate': ['2-6_TSS:1_RBS:1'], 'start_TSS': '1', 'strand': '+', 'with_TSS': ['TSS:3_+']}]) def test_sorf_detection(self): fasta = os.path.join(self.fasta, "fasta") gen_file(fasta, ">aaa\nTAGGAGGCCGCTATGCCATTA") srna_gff = os.path.join(self.gff, "srna.gff") inter_gff = os.path.join(self.gff, "inter.gff") tss_file = os.path.join(self.gff, "tss.gff") sd.get_coverage = self.mock.mock_get_coverage sd.read_libs = self.mock.mock_read_libs sd.read_wig = self.mock.mock_read_wig sd.get_inter_coverage = self.mock.mock_get_inter_coverage gen_file(srna_gff, self.example.srna) gen_file(inter_gff, self.example.inter) gen_file(tss_file, self.example.tss) args = self.mock_args.mock() args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.cutoff_5utr = "p_0.5" args.cutoff_intercds = "n_20" args.cutoff_3utr = "n_11" args.cutoff_inter = 50 args.cutoff_anti = 50 args.libs = ["frag:frag:1:a:+"] args.merge_wigs = "wig_folder" args.utr_detect = True args.background = 10 args.print_all = True sd.sorf_detection(fasta, srna_gff, inter_gff, tss_file, "wig_f_file", "wig_r_file", "test_folder/test", args) sd.get_coverage = copy.deepcopy(get_coverage) sd.replicate_comparison = self.mock.mock_replicate_comparison self.assertTrue(os.path.exists("test_folder/test_all.csv")) self.assertTrue(os.path.exists("test_folder/test_all.gff")) self.assertTrue(os.path.exists("test_folder/test_best.csv")) self.assertTrue(os.path.exists("test_folder/test_best.gff"))
class TestSNPCalling(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" self.fasta = os.path.join(self.test_folder, "fasta") self.snp_folder = os.path.join(self.test_folder, "snp") self.table = os.path.join(self.test_folder, "table") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.fasta) os.mkdir(self.snp_folder) os.mkdir(self.table) os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes")) os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes/seqs")) os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes/seqs/with_BAQ")) os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes/statistics")) os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes/SNP_raw_outputs")) args = self.mock_args.mock() args.types = "related_genome" args.out_folder = self.test_folder args.fastas = self.fasta self.snp = SNPCalling(args) self.mock = Mock_func() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_transcript_snp(self): fasta = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta, self.example.fasta) snp_folder = os.path.join( self.test_folder, "compare_related_and_reference_genomes/SNP_raw_outputs/test") os.mkdir(snp_folder) snp = os.path.join(snp_folder, "test_with_BAQ_NC_007795.1.vcf") gen_file(snp, self.example.snp) args = self.mock_args.mock() args.depth = 5 args.fraction = 0.3 args.quality = 2 args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 os.mkdir(os.path.join( self.test_folder, "compare_related_and_reference_genomes/seqs/with_BAQ/test")) depth_file = os.path.join(self.test_folder, "tmp_depthNC_007795.1") gen_file(depth_file, self.example.depth_file) bam_datas = [{"sample": "NC_007795.1", "bam_number": 1, "bams": "test", "rep": 1}] self.snp._transcript_snp(fasta, "test", "with", "test", bam_datas, self.table, args,) datas = import_data(os.path.join( self.test_folder, "compare_related_and_reference_genomes/statistics/stat_test_with_BAQ_NC_007795.1_SNP_best.csv")) print("\n".join(datas)) self.assertEqual("\n".join(datas), self.example.out_stat) datas = import_data(os.path.join( self.test_folder, "compare_related_and_reference_genomes/seqs/with_BAQ/test/test_NC_007795.1_NC_007795.1_1_1.fa")) self.assertEqual("\n".join(datas), ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT") os.remove("test_NC_007795.1_seq_reference.csv") os.remove("test_NC_007795.1_best.vcf") os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_best.png") os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_raw.png") def test_run_program(self): self.snp._run_sub = self.mock.mock_run_sub args = self.mock_args.mock() bam_datas = [{"sample": "NC_007795.1", "bam_number": 1, "bams": "test", "rep": 1}] args.program = ["with_BAQ"] log = open(os.path.join(self.test_folder, "test.log"), "w") self.snp._run_program("fasta", bam_datas, args, log) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test"))) def test_merge_bams(self): args = self.mock_args.mock() args.frag_bams = os.path.join(self.test_folder, "frag_bams") args.normal_bams = os.path.join(self.test_folder, "tex_bams") os.mkdir(args.normal_bams) os.mkdir(args.frag_bams) bam_datas = [{"sample": "NC_007795.1", "bam_number": 0, "bams": "test", "rep": 1}] self.snp._run_bam = self.mock.mock_run_bam gen_file(os.path.join(args.normal_bams, "tex.bam"), "test") gen_file(os.path.join(args.normal_bams, "notex.bam"), "test") gen_file(os.path.join(args.frag_bams, "farg.bam"), "test") args.bams = [args.frag_bams, args.normal_bams] args.samtools_path = "test" log = open(os.path.join(self.test_folder, "test.log"), "w") self.snp._merge_bams(args, bam_datas, log) self.assertEqual(bam_datas[0]["bam_number"], 1) def test_modify_header(self): gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC") self.snp._modify_header(self.fasta) datas = import_data(os.path.join(self.fasta, "test.fa")) self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC") def test_get_genome_name(self): self.snp._get_header = self.mock.mock_get_header gen_file(os.path.join(self.test_folder, "header"), self.example.bam) args = self.mock_args.mock() args.samtools_path = "test" bam_datas = [{"sample": "NC_007795.1", "bam_number": 0, "bams": "test", "rep": 1}] seq_names = self.snp._get_genome_name(args, bam_datas) def test_run_snp_calling(self): self.snp._get_header = self.mock.mock_get_header self.snp._run_bam = self.mock.mock_run_bam self.snp._run_sub = self.mock.mock_run_sub self.snp._run_tools = self.mock.mock_run_tools self.snp._transcript_snp = self.mock.mock_transcript_snp gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC") gen_file(os.path.join(self.test_folder, "header"), self.example.bam) gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test") gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"), "test") gen_file(os.path.join(self.test_folder, "tmp_bcf"), "test") gen_file(os.path.join(self.fasta, "all.fa.fai"), "test") args = self.mock_args.mock() args.types = "reference" args.program = ["with_BAQ"] args.bams = ["a1:" + os.path.join(self.test_folder, "frag_bams"), "a2:" + os.path.join(self.test_folder, "tex_bams")] args.frag_bams = os.path.join(self.test_folder, "frag_bams") args.normal_bams = os.path.join(self.test_folder, "tex_bams") os.mkdir(args.normal_bams) os.mkdir(args.frag_bams) gen_file(os.path.join(args.normal_bams, "tex.bam"), "test") gen_file(os.path.join(args.normal_bams, "notex.bam"), "test") gen_file(os.path.join(args.frag_bams, "farg.bam"), "test") args.samtools_path = "test" log = open(os.path.join(self.test_folder, "test.log"), "w") self.snp.run_snp_calling(args, log)
class TestsRNADetection(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.example = Example() self.mock = Mock_func() self.test_folder = "test_folder" self.gffs = "test_folder/gffs" self.tsss = "test_folder/tsss" self.sorf = "test_folder/sORF" self.out = "test_folder/output" self.trans = "test_folder/trans" self.fastas = "test_folder/fastas" self.tex = "test_folder/tex" self.frag = "test_folder/frag" self.pros = "test_folder/pros" self.terms = "test_folder/terms" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.gffs) os.mkdir(self.tsss) os.mkdir(os.path.join(self.tsss, "tmp")) os.mkdir(self.out) os.mkdir(self.trans) os.mkdir(os.path.join(self.trans, "tmp")) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.tex) os.mkdir(self.frag) os.mkdir(self.pros) os.mkdir(os.path.join(self.pros, "tmp")) os.mkdir(self.sorf) os.mkdir(os.path.join(self.sorf, "tmp")) os.mkdir(self.terms) args = self.mock_args.mock() args.tss_folder = self.tsss args.pro_folder = self.pros args.out_folder = self.out args.sorf_file = self.sorf args.fastas = self.fastas args.trans = self.trans args.terms = self.terms self.srna = sRNADetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) os.chdir(current_path) if os.path.exists("tmp"): shutil.rmtree("tmp") if os.path.exists("tmp_srna.csv"): os.remove("tmp_srna.csv") if os.path.exists("tmp_srna.gff"): os.remove("tmp_srna.gff") if os.path.exists("tmp_blast.txt"): os.remove("tmp_blast.txt") def test_check_folder_exist(self): path_ = self.srna._check_folder_exist(self.sorf) self.assertEqual(path_, "test_folder/sORF/tmp") def test_formatdb(self): database = "test_folder/test.fa" gen_file(database, "test") log = open(os.path.join(self.test_folder, "test.log"), "w") sr.change_format = self.mock.mock_change_format self.srna._run_format = self.mock.mock_run_format self.srna._formatdb(database, "type_", self.out, "blast_path", "sRNA", log) self.assertTrue(os.path.exists(os.path.join(self.out, "log.txt"))) def test_check_necessary_file(self): self.srna.multiparser = Mock_multiparser self.srna._check_gff = self.mock.mock_check_gff self.srna._check_database = self.mock.mock_check_database args = self.mock_args.mock() args.trans = self.trans args.tsss = self.tsss args.pros = self.pros args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"] args.fastas = self.fastas args.terms = self.terms args.sorf_file = self.sorf args.gffs = self.gffs args.tex_wigs = self.tex args.frag_wigs = self.frag args.utr_srna = True args.nr_format = True args.srna_format = True args.nr_database = "test" args.srna_database = "test" log = open(os.path.join(self.test_folder, "test.log"), "w") self.srna._check_necessary_file(args, log) def test_run_program(self): self.srna.multiparser = Mock_multiparser self.srna._check_gff = self.mock.mock_check_gff self.srna._run_normal = self.mock.mock_run_normal self.srna._run_utrsrna = self.mock.mock_run_utrsrna self.srna._merge_tex_frag_datas = self.mock.mock_merge_tex_frag_datas sr.filter_frag = self.mock.mock_run_filter_frag sr.merge_srna_gff = self.mock.mock_merge_srna_gff sr.merge_srna_table = self.mock.mock_merge_srna_table gen_file(os.path.join(self.gffs, "test.gff"), self.example.sorf_file) gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.sorf_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.sorf_file) gen_file(os.path.join(self.tsss, "test_processing.gff"), self.example.sorf_file) fuzzy_tsss = {"inter": 3} args = self.mock_args.mock() args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"] args.trans = self.trans args.tsss = self.tsss args.pros = self.pros args.max_len = 300 args.min_len = 30 args.tex_notex = "tex_notex" args.fuzzy_tsss = fuzzy_tsss args.out_folder = self.out args.table_best = True args.wig_path = "wig_path" args.merge_wigs = "merge" args.libs = "libs" args.gffs = self.gffs args.in_cds = False args.utr_srna = True args.ex_srna = False args.cutoff_overlap = 0.5 args.source = True log = open(os.path.join(self.test_folder, "test.log"), "w") prefixs = self.srna._run_program(args, log) self.assertListEqual(prefixs, ['test']) def test_get_seq_sec(self): sr.extract_energy = self.mock.mock_extract_energy self.srna.helper.get_seq = self.mock.mock_get_seq self.srna._run_RNAfold = self.mock.mock_run_RNAfold os.mkdir(os.path.join(self.out, "tmp_srna")) gen_file(os.path.join(self.fastas, "test.fa"), ">test\nAAATTTGGGCCC") log = open(os.path.join(self.test_folder, "test.log"), "w") datas = self.srna._get_seq_sec( self.fastas, self.out, "test", self.test_folder, self.test_folder, "vienna_path", log) self.assertEqual(datas["sec"].split("/")[-1], "test_folder") self.assertEqual(datas["dot"].split("/")[-1], "test_folder") self.assertEqual(datas["main"].split("/")[-1], datas["tmp"].split("/")[-4]) self.assertEqual(datas["tmp"].split("/")[-1], "tmp_srna") def test_replot_sec(self): self.srna._run_replot = self.mock.mock_run_replot self.srna._convert_pdf = self.mock.mock_convert_pdf gen_file(os.path.join(self.tsss, "test.rss.ps"), "test") gen_file(os.path.join(self.tsss, "test.dp.ps"), "test") log = open(os.path.join(self.test_folder, "test.log"), "w") tmp_paths = {"dot": self.out, "sec": self.fastas, "tmp": self.tsss} self.srna._replot_sec("vienna_util", tmp_paths, "test", log) self.assertTrue(os.path.exists(os.path.join( tmp_paths["dot"], "test/test.dp.ps"))) self.assertTrue(os.path.exists(os.path.join( tmp_paths["sec"], "test/test.rss.ps"))) def test_plot_mountain(self): self.srna._run_mountain = self.mock.mock_run_mountain tmp_paths = {"main": self.test_folder, "tmp": self.tsss, "dot": self.sorf} moun_path = "fastas" log = open(os.path.join(self.test_folder, "test.log"), "w") os.mkdir(os.path.join(tmp_paths["dot"], "test")) gen_file(os.path.join(tmp_paths["dot"], "test/test.dp.ps"), "test") self.srna._plot_mountain(True, moun_path, tmp_paths, "test", "vienna_util", log) self.assertTrue("test_folder/fastas/test/test.mountain.pdf") def test_compute_2d_and_energy(self): sr.extract_energy = self.mock.mock_extract_energy sr.change_format = self.mock.mock_change_format self.srna._run_replot = self.mock.mock_run_replot self.srna._convert_pdf = self.mock.mock_convert_pdf self.srna._run_mountain = self.mock.mock_run_mountain sec_path = os.path.join(self.out, "figs") os.mkdir(sec_path) os.mkdir(os.path.join(sec_path, "sec_plots")) os.mkdir(os.path.join(sec_path, "dot_plots")) os.mkdir(os.path.join(sec_path, "mountain_plots")) tmp_paths = {"dot": self.out, "sec": self.fastas, "tmp": self.tsss, "main": self.test_folder} gen_file(os.path.join(self.fastas, "tmp/test.fa"), ">test\nAAATTTGGGCCC") gen_file(os.path.join(self.out, "tmp_basic_test"), self.example.srna_file) gen_file(os.path.join(self.out, "tmp_energy_test"), "test") log = open(os.path.join(self.test_folder, "test.log"), "w") args = self.mock_args.mock() args.out_folder = self.out args.fastas = self.fastas args.rnafold = "test" args.relplot_pl = "test" args.mountain_pl = "test" args.mountain = True args.ps2pdf14_path = "test" self.srna._compute_2d_and_energy(args, ["test"], log) datas = import_data(os.path.join(self.out, "tmp_basic_test")) self.assertEqual("\n".join(datas), "test") def test_blast(self): self.srna.helper.merge_blast_out = self.mock.mock_merge_blast_out sr.extract_blast = self.mock.mock_extract_blast self.srna._run_blast = self.mock.mock_run_blast self.srna._run_format = self.mock.mock_run_format gen_file(os.path.join(self.out, "tmp_basic_test"), self.example.srna_file) gen_file(os.path.join(self.out, "tmp_nr_test"), "test") gen_file(os.path.join(self.fastas, "tmp/test.fa"), ">test\nAAATTTGGGCCC") args = self.mock_args.mock() args.blast_path = "test" args.para_blast = 1 log = open(os.path.join(self.test_folder, "test.log"), "w") args.fastas = self.fastas args.out_folder = self.out args.blast_score_s = 0 args.blast_score_n = 0 self.srna._blast("database", False, "dna", args, ["test"], "blast_all", "nr", 0.0001, "tss", log) datas = import_data(os.path.join(self.out, "tmp_basic_test")) self.assertEqual("\n".join(datas), "test") def test_class_srna(self): sr.classify_srna = self.mock.mock_classify_srna sr.gen_srna_table = self.mock.mock_gen_srna_table gff_out = os.path.join(self.out, "gffs") table_out = os.path.join(self.out, "tables") stat_out = os.path.join(self.out, "stat") os.mkdir(gff_out) os.mkdir(table_out) os.mkdir(stat_out) log = open(os.path.join(self.test_folder, "test.log"), "w") os.mkdir(os.path.join(table_out, "for_classes")) os.mkdir(os.path.join(gff_out, "for_classes")) args = self.mock_args.mock() args.max_len = 300 args.min_len = 30 args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"] self.srna._class_srna(["test"], args, log) self.assertTrue(os.path.exists(os.path.join( gff_out, "for_classes/test"))) self.assertTrue(os.path.exists(os.path.join( table_out, "for_classes/test"))) def test_filter_srna(self): self.srna.helper.merge_blast_out = self.mock.mock_merge_blast_out sr.classify_srna = self.mock.mock_classify_srna sr.gen_srna_table = self.mock.mock_gen_srna_table sr.extract_blast = self.mock.mock_extract_blast self.srna._run_blast = self.mock.mock_run_blast self.srna._run_format = self.mock.mock_run_format sr.extract_energy = self.mock.mock_extract_energy sr.change_format = self.mock.mock_change_format self.srna._run_replot = self.mock.mock_run_replot self.srna._convert_pdf = self.mock.mock_convert_pdf self.srna._run_mountain = self.mock.mock_run_mountain self.srna.multiparser = Mock_multiparser self.srna._check_gff = self.mock.mock_check_gff self.srna._run_normal = self.mock.mock_run_normal self.srna._run_utrsrna = self.mock.mock_run_utrsrna sr.merge_srna_gff = self.mock.mock_merge_srna_gff sr.merge_srna_table = self.mock.mock_merge_srna_table sr.extract_energy = self.mock.mock_extract_energy self.srna.helper.get_seq = self.mock.mock_get_seq self.srna._run_RNAfold = self.mock.mock_run_RNAfold stat_out = os.path.join(self.out, "stat") if "mountain_plot" not in os.listdir(self.out): os.mkdir(os.path.join(self.out, "mountain_plot")) sec_path = os.path.join(self.out, "sec_structure") if "sec_structure" not in os.listdir(self.out): os.mkdir(sec_path) os.mkdir(os.path.join(sec_path, "sec_plot")) os.mkdir(os.path.join(sec_path, "dot_plot")) gen_file(os.path.join(self.fastas, "tmp/test.fa"), ">test\nAAATTTGGGCCC") gen_file(os.path.join(self.out, "sRNA_seq_test"), ">test\nAAATTTGGGCCC") gen_file(os.path.join(self.out, "sRNA_index_test"), ">test\nAAATTTGGGCCC") gen_file(os.path.join(self.out, "tmp_basic_test"), self.example.srna_file) gen_file(os.path.join(self.out, "tmp_energy_test"), "test") gen_file(os.path.join(self.out, "tmp_nr_test"), "test") gen_file(os.path.join(self.out, "tmp_sRNA_test"), "test") gen_file(os.path.join(self.out, "tmp_sRNA_test.csv"), "test") gen_file(os.path.join(self.test_folder, "srna"), "test") gen_file(os.path.join(self.test_folder, "nr"), "test") sr.blast_class = self.mock.mock_blast_class sr.srna_sorf_comparison = self.mock.mock_srna_sorf_comparison args = self.mock_args.mock() args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"] args.out_folder = self.out args.fastas = self.fastas args.rnafold = "test" args.relplot_pl = "test" args.mountain_pl = "test" args.table_best = True args.in_cds = False args.ps2pdf14_path = "test" args.sorf_file = self.sorf args.mountain = True args.nr_database = os.path.join(self.test_folder, "nr") args.srna_database = os.path.join(self.test_folder, "srna") args.blastx = "blast_path" args.blastn = "blast_path" args.nr_format = False log = open(os.path.join(self.test_folder, "test.log"), "w") args.srna_format = False args.compute_sec_str = False args.e_nr = 0 args.e_srna = 0 args.para_blast = 1 args.blast_score_s = 0 args.blast_score_n = 0 self.srna._filter_srna(args, ["test"], log) datas = import_data(os.path.join(self.out, "tmp_basic_test")) self.assertEqual("\n".join(datas), "test")
class TestCircRNADetection(unittest.TestCase): def setUp(self): self.segemehl = Mock_segemehl() self.samtools = Mock_samtools() self.mock_args = MockClass() self.example = Example() self.test_folder = "test_folder" self.fasta_folder = os.path.join(self.test_folder, "fasta") self.gff_folder = os.path.join(self.test_folder, "gff") self.out_folder = os.path.join(self.test_folder, "output") self.read_folder = os.path.join(self.test_folder, "read") self.splice_folder = os.path.join(self.test_folder, "splice") self.alignment_path = os.path.join(self.out_folder, "segemehl_align") if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) if (not os.path.exists(self.fasta_folder)): os.mkdir(self.fasta_folder) os.mkdir(os.path.join(self.fasta_folder, "tmp")) if (not os.path.exists(self.gff_folder)): os.mkdir(self.gff_folder) if (not os.path.exists(self.out_folder)): os.mkdir(self.out_folder) if (not os.path.exists(self.read_folder)): os.mkdir(self.read_folder) if (not os.path.exists(self.splice_folder)): os.mkdir(self.splice_folder) args = self.mock_args.mock() args.output_folder = self.out_folder args.gffs = self.gff_folder args.align = True args.fastas = self.fasta_folder self.circ = CircRNADetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) if os.path.exists("test1"): if os.path.isfile("test1"): os.remove("test1") if os.path.isdir("test1"): shutil.rmtree("test1") if os.path.exists("test2"): if os.path.isfile("test2"): os.remove("test2") if os.path.isdir("test2"): shutil.rmtree("test2") def test_deal_zip_file(self): out1 = os.path.join(self.test_folder, "test1.fa") out2 = os.path.join(self.test_folder, "test2") gen_file(out1, self.example.fasta_file) gen_file(out2, self.example.fasta_file) os.system("gzip " + out1) os.system("bzip2 -z " + out2) log = open(os.path.join(self.test_folder, "test.log"), "w") reads = self.circ._deal_zip_file( [{ "sample": "all", "files": [out1 + ".gz", out2 + ".bz2"] }], log) self.assertEqual(reads, [{ 'files': [ 'test_folder/test1.fa.gz', 'test_folder/test2.bz2', 'test_folder/test1.fa', 'test_folder/test2.fa' ], 'zips': ['test_folder/test1.fa', 'test_folder/test2.fa'], 'sample': 'all' }]) self.assertTrue(os.path.exists(out1)) self.assertTrue(os.path.exists(out2 + ".fa")) def test_align(self): self.circ._run_segemehl_fasta_index = self.segemehl.mock_fasta_index self.circ._run_segemehl_align = self.segemehl.mock_align self.circ._wait_process = self.segemehl.mock_wait_processes fasta1 = os.path.join(os.path.join(self.fasta_folder, "tmp/test1.fa")) fasta2 = os.path.join(os.path.join(self.fasta_folder, "tmp/test2.fa")) read1 = os.path.join(self.read_folder, "read1.fa") read2 = os.path.join(self.read_folder, "read2.fa") gen_file(fasta1, self.example.fasta_file) gen_file(fasta2, self.example.fasta_file) gen_file(read1, self.example.fasta_file) gen_file(read2, self.example.fasta_file) os.mkdir(os.path.join(self.out_folder, "segemehl_alignment_files")) args = self.mock_args.mock() args.output_folder = self.out_folder args.gffs = self.gff_folder args.align = True args.fastas = self.fasta_folder args.segemehl_path = None args.read_files = [read1, read2] args.cores = 2 log = open(os.path.join(self.test_folder, "test.log"), "w") read_datas = [{"sample": "test", "files": [read1, read2]}] align_results, prefixs = self.circ._align(args, read_datas, log) self.assertEqual( set(align_results), set(['read1_test1', 'read2_test1', 'read1_test2', 'read2_test2'])) self.assertEqual(set(prefixs), set(['test1', 'test2'])) def test_convert_sam2bam(self): self.circ._run_samtools_convert_bam = self.samtools.mock_covert_bam sam1 = os.path.join(self.test_folder, "test1.sam") sam2 = os.path.join(self.test_folder, "test2.sam") bam = os.path.join(self.test_folder, "test3.bam") gen_file(sam1, self.example.align_file) gen_file(sam2, self.example.align_file) gen_file(bam, self.example.align_file) align_files = ["test1"] log = open(os.path.join(self.test_folder, "test.log"), "w") bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam( self.test_folder, None, align_files, log) self.assertEqual( set(bam_files), set([bam, sam1.replace("sam", "bam"), sam2.replace("sam", "bam")])) self.assertEqual(set(convert_ones), set([sam2.replace("sam", "bam")])) self.assertEqual(set(remove_ones), set([sam1])) align_files = ["test3"] bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam( self.test_folder, None, align_files, log) self.assertEqual( set(convert_ones), set([sam2.replace("sam", "bam"), sam1.replace("sam", "bam")])) self.assertEqual(set(remove_ones), set([])) def test_merge_bed(self): fasta1 = os.path.join(self.fasta_folder, "test1.fa") fasta2 = os.path.join(self.fasta_folder, "test2.fa") header1 = os.path.join(self.splice_folder, "Staphylococcus_aureus_HG003") header2 = os.path.join(self.splice_folder, "aaa") header3 = os.path.join(self.splice_folder, "bbb") os.mkdir(header1) os.mkdir(header2) os.mkdir(header3) splice1 = os.path.join( header1, "Staphylococcus_aureus_HG003_a1_splicesites.bed") splice2 = os.path.join(header2, "aaa_a1_splicesites.bed") splice3 = os.path.join(header3, "bbb_a1_splicesites.bed") tran1 = os.path.join( header1, "Staphylococcus_aureus_HG003_a1_transrealigned.bed") tran2 = os.path.join(header2, "aaa_a1_transrealigned.bed") tran3 = os.path.join(header3, "bbb_a1_transrealigned.bed") gen_file(fasta1, self.example.fasta_file) gen_file(fasta2, self.example.multi_fasta_file) gen_file(splice1, self.example.splice_file) gen_file(splice2, self.example.splice_file) gen_file(splice3, self.example.splice_file) gen_file(tran1, self.example.tran_file) gen_file(tran2, self.example.tran_file) gen_file(tran3, self.example.tran_file) prefixs = self.circ._merge_bed(self.fasta_folder, self.splice_folder, self.out_folder) self.assertEqual(set(prefixs[1]), set(["test1", "test2"])) self.assertEqual(prefixs[0][0], "_a1_") self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test1", "test1_a1_splicesites.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test1", "test1_a1_transrealigned.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "test2_a1_splicesites.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "test2_a1_transrealigned.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "tmp_bbb_a1_splicesites.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "tmp_aaa_a1_splicesites.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "tmp_aaa_a1_transrealigned.bed"))) self.assertTrue( os.path.exists( os.path.join(self.out_folder, "test2", "tmp_bbb_a1_transrealigned.bed"))) def test_combine_read_bam(self): bam_datas = [{ "sample": "aaa", "files": [ os.path.join(self.out_folder, "segemehl_alignment_files", "aaa1.bam"), "aaa2.bam" ] }, { "sample": "bbb", "files": ["bbb1.bam", "bbb2.bam"] }] read_datas = [{ "sample": "aaa", "files": ["aaa1.fa", "aaa3.fa", "aaa4.fa"] }] bam_files = [ os.path.join(self.out_folder, "segemehl_alignment_files", "aaa1.bam"), os.path.join(self.out_folder, "segemehl_alignment_files", "aaa3.bam") ] self.circ._combine_read_bam(bam_files, bam_datas, read_datas) self.assertDictEqual( bam_datas[0], { 'files': [ 'test_folder/output/segemehl_alignment_files/aaa1.bam', 'aaa2.bam', 'test_folder/output/segemehl_alignment_files/aaa3.bam' ], 'sample': 'aaa' })
class TestRATT(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" self.ref_embls = "test_folder/embls" self.output_path = "test_folder/output" self.tar_fastas = "test_folder/tar_fasta" self.ref_fastas = "test_folder/ref_fasta" self.gff_outfolder = "test_folder/gffs" self.ref_gbk = "test_folder/gbk" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.ref_embls) os.mkdir(self.ref_gbk) os.mkdir(self.output_path) os.mkdir(self.tar_fastas) os.mkdir(self.ref_fastas) os.mkdir(self.gff_outfolder) args = self.mock_args.mock() args.output_path = self.output_path args.ref_embls = self.ref_embls args.ref_gbk = self.ref_gbk args.tar_fastas = self.tar_fastas args.ref_fastas = self.ref_fastas args.gff_outfolder = self.gff_outfolder self.ratt = RATT(args) self.example = Example() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_convert_to_pttrnt(self): files = ["aaa.gff"] gen_file(os.path.join(self.test_folder, "aaa.gff"), self.example.gff_file) os.mkdir(os.path.join(self.tar_fastas, "tmp")) gen_file(os.path.join(self.tar_fastas, "tmp/aaa.fa"), self.example.fasta_file) self.ratt._convert_to_pttrnt(self.test_folder, files) data = import_data(os.path.join(self.test_folder, "aaa.rnt")) self.assertEqual("\n".join(data), self.example.rnt_file) data = import_data(os.path.join(self.test_folder, "aaa.ptt")) self.assertEqual("\n".join(data), self.example.ptt_file) def test_convert_to_gff(self): files = ["aaa.gff"] ratt_result = "chromosome.aaa.final.embl" gen_file(os.path.join(self.output_path, ratt_result), self.example.embl_file) args = self.mock_args.mock() args.output_path = self.output_path args.gff_outfolder = self.gff_outfolder self.ratt._convert_to_gff(ratt_result, args, files) # self.ratt._convert_to_gff(ratt_result, self.output_path, self.gff_outfolder, files) data = import_data(os.path.join(self.output_path, "aaa.gff")) self.assertEqual("\n".join(data), self.example.embl_gff) data = import_data(os.path.join(self.gff_outfolder, "aaa.gff")) self.assertEqual("\n".join(data), self.example.embl_gff) def test_parser_embl_gbk(self): files = [os.path.join(self.test_folder, "aaa.gbk")] gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file) self.ratt._parser_embl_gbk(files) data = import_data( os.path.join(self.ref_gbk, "gbk_tmp/NC_007795.1.gbk")) self.assertEqual("\n".join(data), self.example.gbk_file.split("//")[0] + "//") data = import_data( os.path.join(self.ref_gbk, "gbk_tmp/NC_007799.1.gbk")) self.assertEqual("\n".join(data), self.example.gbk_file.split("//")[1].strip() + "\n//") def test_convert_embl(self): gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file.split("//")[0]) out = self.ratt._convert_embl(self.test_folder) self.assertEqual(out, "test_folder/gbk/gbk_tmp") self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp")) def test_format_and_run(self): self.ratt._run_ratt = Mock_func().mock_run_ratt args = self.mock_args.mock() args.output_path args.pairs = ["NC_007795.1:Staphylococcus_aureus_HG003"] args.element = "chromosome" self.ratt._format_and_run(args) def test_annotation_transfer(self): gen_file(os.path.join(self.ref_fastas, "aaa.fa"), self.example.fasta_file) gen_file(os.path.join(self.tar_fastas, "bbb.fa"), self.example.fasta_file) gen_file(os.path.join(self.ref_embls, "aaa.gbk"), self.example.gbk_file.split("//")[0]) self.ratt._run_ratt = Mock_func().mock_run_ratt args = self.mock_args.mock() args.element = "element" args.ref_embls = self.ref_embls args.tar_fastas = self.tar_fastas args.ref_fastas = self.ref_fastas args.output_path = self.output_path args.gff_outfolder = self.gff_outfolder args.pairs = ["aaa:bbb"] args.convert = True self.ratt.annotation_transfer(args) # self.ratt.annotation_transfer("test", "element", "test_type", # self.ref_embls, self.tar_fastas, # self.ref_fastas, self.output_path, # True, self.gff_outfolder, pairs) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.gff"))) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.rnt"))) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.ptt")))
class TestRATT(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.test_folder = "test_folder" self.ref_embls = "test_folder/embls" self.output_path = "test_folder/output" self.tar_fastas = "test_folder/tar_fasta" self.ref_fastas = "test_folder/ref_fasta" self.gff_outfolder = "test_folder/gffs" self.ref_gbk = "test_folder/gbk" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.ref_embls) os.mkdir(self.ref_gbk) os.mkdir(self.output_path) os.mkdir(self.tar_fastas) os.mkdir(self.ref_fastas) os.mkdir(self.gff_outfolder) args = self.mock_args.mock() args.output_path = self.output_path args.ref_embls = self.ref_embls args.ref_gbk = self.ref_gbk args.tar_fastas = self.tar_fastas args.ref_fastas = self.ref_fastas args.gff_outfolder = self.gff_outfolder self.ratt = RATT(args) self.example = Example() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_convert_to_pttrnt(self): files = ["aaa.gff"] log = open(os.path.join(self.test_folder, "test.log"), "w") gen_file(os.path.join(self.test_folder, "aaa.gff"), self.example.gff_file) os.mkdir(os.path.join(self.tar_fastas, "tmp")) gen_file(os.path.join(self.tar_fastas, "tmp/aaa.fa"), self.example.fasta_file) self.ratt._convert_to_pttrnt(self.test_folder, files, log) data = import_data(os.path.join(self.test_folder, "aaa.rnt")) self.assertEqual("\n".join(data), self.example.rnt_file) data = import_data(os.path.join(self.test_folder, "aaa.ptt")) self.assertEqual("\n".join(data), self.example.ptt_file) def test_convert_to_gff(self): files = ["aaa.gff"] log = open(os.path.join(self.test_folder, "test.log"), "w") ratt_result = "chromosome.aaa.final.embl" gen_file(os.path.join(self.output_path, ratt_result), self.example.embl_file) args = self.mock_args.mock() args.output_path = self.output_path args.gff_outfolder = self.gff_outfolder self.ratt._convert_to_gff(ratt_result, args, files, log) data = import_data(os.path.join(self.output_path, "aaa.gff")) self.assertEqual("\n".join(data), self.example.embl_gff) data = import_data(os.path.join(self.gff_outfolder, "aaa.gff")) self.assertEqual("\n".join(data), self.example.embl_gff) def test_parser_embl_gbk(self): files = [os.path.join(self.test_folder, "aaa.gbk")] gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file) self.ratt._parser_embl_gbk(files) data = import_data(os.path.join(self.ref_gbk, "gbk_tmp/NC_007795.1.gbk")) self.assertEqual( "\n".join(data), self.example.gbk_file.split("//")[0] + "//") data = import_data(os.path.join( self.ref_gbk, "gbk_tmp/NC_007799.1.gbk")) self.assertEqual( "\n".join(data), self.example.gbk_file.split("//")[1].strip() + "\n//") def test_convert_embl(self): gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file.split("//")[0]) log = open(os.path.join(self.test_folder, "test.log"), "w") out = self.ratt._convert_embl(self.test_folder, log) self.assertEqual(out, "test_folder/gbk/gbk_tmp") self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp")) def test_format_and_run(self): self.ratt._run_ratt = Mock_func().mock_run_ratt args = self.mock_args.mock() args.output_path args.pairs = ["NC_007795.1:Staphylococcus_aureus_HG003"] args.element = "chromosome" log = open(os.path.join(self.test_folder, "test.log"), "w") self.ratt._format_and_run(args, log) def test_annotation_transfer(self): gen_file(os.path.join(self.ref_fastas, "aaa.fa"), self.example.fasta_file) gen_file(os.path.join(self.tar_fastas, "bbb.fa"), self.example.fasta_file) gen_file(os.path.join(self.ref_embls, "aaa.gbk"), self.example.gbk_file.split("//")[0]) log = open(os.path.join(self.test_folder, "test.log"), "w") self.ratt._run_ratt = Mock_func().mock_run_ratt args = self.mock_args.mock() args.element = "element" args.ref_embls = self.ref_embls args.tar_fastas = self.tar_fastas args.ref_fastas = self.ref_fastas args.output_path = self.output_path args.gff_outfolder = self.gff_outfolder args.pairs = ["aaa:bbb"] args.convert = True self.ratt.annotation_transfer(args, log) self.assertTrue(os.path.exists( os.path.join(self.gff_outfolder, "bbb.gff"))) self.assertTrue(os.path.exists( os.path.join(self.gff_outfolder, "bbb.rnt"))) self.assertTrue(os.path.exists( os.path.join(self.gff_outfolder, "bbb.ptt")))
class TestTranscripSNP(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.example = Example() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_import_data(self): snp_file = os.path.join(self.test_folder, "snp") gen_file(snp_file, self.example.snp_file) depth_file = os.path.join(self.test_folder, "depth") gen_file(depth_file, self.example.depth_file) args = self.mock_args.mock() args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 max_quals, snps, dess, raw_snps = ts.import_data(snp_file, args, 2, depth_file) self.assertDictEqual(max_quals, {'NC_007795.1': 98.0, 'All_strain': 98.0}) self.assertListEqual(snps, [{'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.', 'indel': -1, 'pos': 1, 'id': '.', 'all_info': 'NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;DP4=0,0,60,9;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87', 'qual': 98.0, 'info': ['DP=89', 'DP4=0,0,60,9', 'VDB=8.46526e-15'], 'alt': 'A', 'ref': 'C', 'frac': -1, 'depth': 89, 'dp4_sum': 69}, {'dp4_frac': 1.0, 'strain': 'NC_007795.1', 'filter': '.', 'indel': 22, 'pos': 6, 'id': '.', 'all_info': 'NC_007795.1\t6\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14;DP4=0,0,40,0\tGT:PL:DP\t0/1:60,0,55:40', 'qual': 26.9515, 'info': ['INDEL', 'IDV=22', 'IMF=0.536585', 'DP=41', 'VDB=9.36323e-14', 'DP4=0,0,40,0'], 'alt': 'AA', 'ref': 'A', 'frac': 0.536585, 'depth': 41, 'dp4_sum': 40}]) def test_check_overlap(self): snps = {"test": []} overlaps = [{"test": []}] ts.check_overlap(snps, overlaps) self.assertListEqual(overlaps, [{'test': [], 'print': True}]) self.assertDictEqual(snps, {'test': [{'test': [], 'print': True}]}) def test_overlap_position(self): qual_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}, {'filter': '.', 'pos': 22182, 'alt': 'C', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'A', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}, {'filter': '.', 'pos': 30000, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}] conflicts, nooverlap = ts.overlap_position(qual_snps) self.assertListEqual(conflicts, [[{'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C', 'depth': 89, 'print': True, 'pos': 22182, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}]]) self.assertDictEqual(nooverlap, {1: [{'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'CA', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 22181, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 30000, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}], 2: [{'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'A', 'frac': -1, 'alt': 'C', 'depth': 89, 'print': True, 'pos': 22182, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t22182\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}, {'strain': 'NC_007795.1', 'info': 'MQ=20', 'indel': -1, 'qual': 98.0, 'ref': 'C', 'frac': -1, 'alt': 'A', 'depth': 89, 'print': True, 'pos': 30000, 'filter': '.', 'id': '.', 'all_info': 'NC_007795.1\t30000\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15;SGB=-0.693147\tGT:PL:DP\t1/1:125,184,0:87'}]}) def test_stat(self): stat_file = os.path.join(self.test_folder, "stat") max_quals = {'NC_007795.1': 98.0, 'All_strain': 98.0} trans_snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}] args = self.mock_args.mock() args.depth = 50 args.fraction = 0.3 args.quality = 20 ts.stat(max_quals, trans_snps, 2, stat_file, self.test_folder + "/test", args, "best.csv") datas = import_data(stat_file + "_best.csv") self.assertEqual("\n".join(datas), self.example.stat) def test_plot_bar(self): ts.plot_bar([3, 10, 30, 45, 50], "NC_007795.1", self.test_folder + "/test", "best.png") self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test_NC_007795.1_SNP_QUAL_best.png"))) def test_read_fasta(self): fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_file) seqs = ts.read_fasta(fasta_file) self.assertListEqual(seqs, [{'NC_007795.1': 'AAATATATCAGCACCGTAGACGATAGAGTAGTAC'}]) def test_gen_ref(self): refs = [] snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}, {'filter': '.', 'pos': 22500, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}] refs = ts.gen_ref(snps, 1, refs, 1) self.assertListEqual(refs, ['1:A', '1:A']) snps = [{'filter': '.', 'pos': 22181, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22181\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}, {'filter': '.', 'pos': 22500, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t22500\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}] refs = ts.gen_ref(snps, 1, refs, 2) self.assertListEqual(refs, ['1:A_1:A', '1:A_1:A', '1:A_1:A', '1:A_1:A']) def test_change(self): snp = {'filter': '.', 'pos': 1, 'alt': 'A', 'frac': -1, 'depth': 89, 'indel': -1, 'info': 'MQ=20', 'id': '.', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'all_info': 'NC_007795.1\t1\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'} seq = {"num_mod": 3, "seq": "CCCCATATCAGCACCGTAGACGATAGAGTAGTAC"} ts.change(snp, seq) self.assertDictEqual(seq, {'num_mod': 3, 'seq': 'CCCaATATCAGCACCGTAGACGATAGAGTAGTAC'}) def test_print_file(self): refs = {'NC_007795.1': ['1:A', '1:GT']} conflicts = [[{'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87', 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True}, {'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40', 'filter': '.', 'id': '.', 'frac': 0.536585, 'indel': 22, 'alt': 'AA', 'info': 'VDB=9.36323e-14 GT:PL:DP', 'qual': 26.9515, 'ref': 'A', 'strain': 'NC_007795.1', 'depth': 41, 'pos': 2, 'print': True}]] values = [{'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87', 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A,GT', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'CA', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 1, 'print': True}, {'all_info': 'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87', 'filter': '.', 'id': '.', 'frac': -1, 'indel': -1, 'alt': 'A', 'info': 'VDB=8.46526e-15', 'qual': 98.0, 'ref': 'C', 'strain': 'NC_007795.1', 'depth': 89, 'pos': 7, 'print': True}] mod_seq_init = {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'} mod_seqs = [{'genome': 'NC_007795.1', 'num_mod': -1, 'seq': 'aGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}, {'genome': 'NC_007795.1', 'num_mod': 0, 'seq': 'gtGTACaCTCAGCACCGTAGACGATAGAGTAGTAC'}] out_ref = StringIO() out_seq = os.path.join(self.test_folder, "seq") ts.print_file(refs, out_ref, conflicts, 1, values, mod_seq_init, mod_seqs, out_seq) self.assertEqual(out_ref.getvalue(), "1\t1\t1\t1:A\tNC_007795.1\n1\t1\t2\t1:GT\tNC_007795.1\n") self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa"))) def test_gen_new_fasta(self): out_ref = StringIO() out_seq = os.path.join(self.test_folder, "seq") nooverlap = {1: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 1, 'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 7, 'all_info': 'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}], 2: [{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A', 'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP', 'indel': 22, 'qual': 26.9515, 'pos': 2, 'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40'}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A', 'filter': '.', 'frac': -1, 'ref': 'C', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 7, 'all_info': 'NC_007795.1\t7\t.\tC\tA\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}]} seqs = [{'NC_007795.1': 'CAGTACCCTCAGCACCGTAGACGATAGAGTAGTAC'}] conflicts = [[{'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'A,GT', 'filter': '.', 'frac': -1, 'ref': 'CA', 'depth': 89, 'info': 'VDB=8.46526e-15', 'indel': -1, 'qual': 98.0, 'pos': 1, 'all_info': 'NC_007795.1\t1\t.\tCA\tA,GT\t98\t.\tDP=89;VDB=8.46526e-15\tGT:PL:DP\t1/1:125,184,0:87'}, {'strain': 'NC_007795.1', 'print': True, 'id': '.', 'alt': 'AA', 'filter': '.', 'frac': 0.536585, 'ref': 'A', 'depth': 41, 'info': 'VDB=9.36323e-14 GT:PL:DP', 'indel': 22, 'qual': 26.9515, 'pos': 2, 'all_info': 'NC_007795.1\t2\t.\tA\tAA\t26.9515\t.\tINDEL;IDV=22;IMF=0.536585;DP=41;VDB=9.36323e-14 GT:PL:DP\t0/1:60,0,55:40'}]] ts.gen_new_fasta(nooverlap, seqs, out_ref, conflicts, out_seq) self.assertEqual(out_ref.getvalue(), "1\t1\t1\t1:A\tNC_007795.1\n1\t1\t2\t1:GT\tNC_007795.1\n") self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_2_1.fa"))) def test_snp_detect(self): depth_file = os.path.join(self.test_folder, "depth") gen_file(depth_file, self.example.depth_file) fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_final) snp_file = os.path.join(self.test_folder, "NC_007795.1.snp") gen_file(snp_file, self.example.snp_final) out_seq = os.path.join(self.test_folder, "seq") out_snp = os.path.join(self.test_folder, "snp") stat_file = os.path.join(self.test_folder, "stat") args = self.mock_args.mock() args.depth = 5 args.fraction = 0.3 args.quality = 5 args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 ts.snp_detect(fasta_file, snp_file, depth_file, out_snp, out_seq, 2, stat_file, args) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "seq_NC_007795.1_2_1.fa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_seq_reference.csv"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_best.vcf"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_best.png"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_raw.png")))
class TestOptimizeTSSpredator(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_initiate(self): args = self.mock_args.mock() args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 max_num, best_para, current_para, indexs = ot.initiate(args) self.assertDictEqual(max_num, {'re_factor': 0.8, 'processing': 0.5, 'enrichment': 0.5, 'height': 0.9, 'base_height': 0.01, 're_height': 0.8, 'factor': 0.9}) self.assertDictEqual(best_para, {'re_factor': 0, 'processing': 0, 'enrichment': 0, 'height': 0, 'base_height': 0, 're_height': 0, 'factor': 0}) self.assertDictEqual(current_para, {'re_factor': 0, 'processing': 0, 'enrichment': 0, 'height': 0, 'base_height': 0, 're_height': 0, 'factor': 0}) self.assertDictEqual(indexs, {'step': 0, 'change': False, 'num': 0, 'first': True, 'length': 0, 'exist': False, 'switch': 0, 'extend': False, 'count': 0}) def test_get_gene_length(self): fasta = os.path.join(self.test_folder, "test.fa") gen_file(fasta, self.example.fasta) seq_len = ot.get_gene_length(fasta, "aaa") self.assertEqual(seq_len, 102) def test_read_predict_manual_gff(self): gff = os.path.join(self.test_folder, "test.gff") gen_file(gff, self.example.gff_file) args = self.mock_args.mock() args.gene_length = 1000 num, gffs = ot.read_predict_manual_gff(gff, args) self.assertEqual(num, 1) self.assertEqual(gffs[0].start, 633) def test_scoring_function(self): stat_value = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3} best = {"tp_rate": 0.8, "fp_rate": 0.0005, "tp": 100, "fp": 31, "fn": 45, "missing_ratio": 0.004} ot.scoring_function(best, stat_value, self.example.indexs, 1000) self.assertTrue(self.example.indexs["change"]) self.example.indexs["change"] = False stat_value = {"tp_rate": 0.8, "fp_rate": 0.0004, "tp": 100, "fp": 13} best = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3} ot.scoring_function(best, stat_value, self.example.indexs, 1000) self.assertFalse(self.example.indexs["change"]) def test_load_stat_csv(self): gen_file(os.path.join(self.test_folder, "stat.csv"), self.example.stat) list_num = [] best_para = {} datas = ot.load_stat_csv(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000) self.assertEqual(datas[0], 2) self.assertDictEqual(datas[1], {'fp': 230.0, 'tp': 789.0, 'missing_ratio': 0.29991126885536823, 'fp_rate': 8.15542105020548e-05, 'tp_rate': 0.7000887311446318, 'fn': 338.0}) self.assertDictEqual(datas[2], {'processing': 5.2, 'base_height': 0.086, 'factor': 7.6, 're_height': 2.3, 're_factor': 5.5, 'enrichment': 3.1, 'height': 2.4}) def test_reload_data(self): gen_file(os.path.join(self.test_folder, "stat.csv"), self.example.stat) list_num = [] best_para = {} datas = ot.reload_data(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000) self.assertDictEqual(datas[0], {'base_height': 0.086, 'processing': 5.2, 'height': 2.4, 'enrichment': 3.1, 're_factor': 5.5, 're_height': 2.3, 'factor': 7.6}) self.assertDictEqual(datas[1], {'tp_rate': 0.7000887311446318, 'tp': 789.0, 'fn': 338.0, 'fp': 230.0, 'fp_rate': 8.15542105020548e-05, 'missing_ratio': 0.29991126885536823}) def test_extend_data(self): best_para = copy.deepcopy(self.example.best_para) current_para = ot.extend_data(self.test_folder, self.example.best, best_para, 100) self.assertDictEqual(current_para, best_para) def test_run_random_part(self): list_num = [] current_para = copy.deepcopy(self.example.ref_para) para = ot.run_random_part(current_para, list_num, self.example.max_nums, 1000, self.example.indexs) self.assertTrue(para != self.example.ref_para) def test_run_large_change_part(self): list_num = [] seeds = {"seed": 0, "pre_seed": []} features = {"feature": "r", "pre_feature": ""} current_para = copy.deepcopy(self.example.ref_para) best_para = copy.deepcopy(self.example.best_para) para = ot.run_large_change_part(seeds, features, self.example.indexs, current_para, self.example.max_nums, best_para, list_num) self.assertTrue(para != self.example.ref_para) self.assertTrue(para != best_para) def test_gen_large_random(self): list_num = [] index_large = {0: "height", 1: "re_height", 2: "factor", 3: "re_factor", 4:"base_height", 5: "enrichment", 6: "processing"} best_para = copy.deepcopy(self.example.best_para) para = ot.gen_large_random(self.example.max_nums, "height", 0.2, list_num, 0.3, best_para, index_large, self.example.indexs) self.assertTrue(para != best_para) self.assertTrue(para["height"] > para["re_height"]) def test_run_small_change_part(self): seeds = {"seed": 0, "pre_seed": []} features = {"feature": "l", "pre_feature": ""} current_para = copy.deepcopy(self.example.ref_para) list_num = [] best_para = copy.deepcopy(self.example.best_para) para = ot.run_small_change_part(seeds, features, self.example.indexs, current_para, best_para, list_num, self.example.max_nums) self.assertTrue(para != best_para) def test_small_change(self): list_num = [] best_para = copy.deepcopy(self.example.best_para) para = ot.small_change(0.9, "height", 0.2, list_num, 0.5, best_para) self.assertTrue(para != 0.5) self.assertTrue(para > 0.2) def test_plus_process(self): list_num = [] actions = {"plus": False, "minus": False} best_para = copy.deepcopy(self.example.best_para) para = ot.plus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.2) self.assertEqual(para, 0.4) def test_minus_process(self): list_num = [] actions = {"plus": False, "minus": False} best_para = copy.deepcopy(self.example.best_para) para = ot.minus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.1) self.assertEqual(para, 0.2) def test_compare_manual_predict(self): out = StringIO() manual = os.path.join(self.test_folder, "manual.gff") predict = os.path.join(self.test_folder, "predict.gff") gen_file(manual, self.example.manual_file) gen_file(predict, self.example.gff_file) para_list = [copy.deepcopy(self.example.best_para)] args = self.mock_args.mock() args.manual = manual args.cores = 1 args.gene_length = 2000 args.cluster = 3 ot.compare_manual_predict(1000, para_list, [predict], self.test_folder, out, args) self.assertEqual(out.getvalue(), "1000\the_0.3_rh_0.2_fa_0.7_rf_0.3_bh_0.0_ef_2.5_pf_3.3\tTP\t1\tTP_rate\t0.5\tFP\t1\tFP_rate\t0.0005005005005005005\tFN\t1\tmissing_ratio\t0.5\n") def test_compute_stat(self): list_num = [self.example.best_para] best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7} self.example.indexs["change"] = True best = {"tp_rate": 0.6, "fp_rate": 0.0025, "tp": 40, "fp": 32, "fn": 45, "missing_ratio": 0.004} datas = ot.compute_stat(self.example.best, best, best_para, 1, list_num, self.test_folder, self.example.indexs) self.assertDictEqual(datas[0], self.example.best_para) self.assertDictEqual(datas[1], self.example.best) def test_run_tss_and_stat(self): list_num = [self.example.best_para] seeds = {"seed": 0, "pre_seed": []} features = {"feature": "l", "pre_feature": ""} best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7} current_para = {'re_factor': 0.3, 'processing': 2.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.2, 're_height': 0.2, 'factor': 0.7} stat_out = StringIO() wig = os.path.join(self.test_folder, "wig") fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") if not os.path.exists(wig): os.mkdir(wig) gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_aaa"] ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff args = self.mock_args.mock() args.steps = 2000 args.cores = 1 args.tsspredator_path = "test" args.libs = self.example.libs args.program = "TSS" args.cluster = 3 args.utr = 200 args.replicate = 2 args.replicate_name = "rep" args.project_strain = "aaa" args.manual = os.path.join(self.test_folder, "manual.gff") args.gene_length = 2000 gen_file(args.manual, self.example.manual_file) datas = ot.run_tss_and_stat(self.example.indexs, list_num, seeds, 0.4, 0.3, self.test_folder, stat_out, best_para, current_para, wig, fasta, gff, self.example.best, 3, args) self.assertFalse(datas[0]) def test_gen_config(self): wig = os.path.join(self.test_folder, "wig") if not os.path.exists(wig): os.mkdir(wig) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = 1 args.utr = 200 args.replicate_name = "test" filename = ot.gen_config(self.example.best_para, self.test_folder, 1, wig, fasta, gff, args) self.assertEqual(filename, "test_folder/config_1.ini") data = import_data("test_folder/config_1.ini") self.assertEqual("\n".join(data), self.example.config) def test_comparison(self): nums = {"overlap": 0, "predict": 0, "manual": 0} for index in range(0, 3): self.example.mans[index].attributes["print"] = False self.example.gffs[index].attributes["print"] = False args = self.mock_args.mock() args.cluster = 3 args.gene_length = 2000 ot.comparison(self.example.mans, self.example.gffs, nums, args) self.assertDictEqual(nums, {'manual': 1, 'predict': 2, 'overlap': 1}) def test_check_overlap(self): nums = {"overlap": 0, "predict": 0, "manual": 0} datas = ot.check_overlap(True, None, nums, 2000, self.example.mans[0], self.example.gffs[0], 100) self.assertFalse(datas[0]) self.assertEqual(datas[1], 140) def test_print_lib(self): libs = [{"condition": 1, "replicate": "a", "wig": "test_1.wig"}, {"condition": 2, "replicate": "a", "wig": "test_2.wig"}] out = StringIO() ot.print_lib(2, libs, out, self.test_folder, "aaa") self.assertEqual(out.getvalue(), "aaa_1a = test_folder/test_1.wig\naaa_2a = test_folder/test_2.wig\n") def test_import_lib(self): out = StringIO() if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) lib_dict = {"fp": [], "fm": [], "np": [], "nm": []} gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") args = self.mock_args.mock() args.project_strain = "aaa" args.program = "TSS" args.libs = self.example.libs lib_num = ot.import_lib(wig_folder, set(), lib_dict, out, "aaa.gff", [], "aaa.fa", args) self.assertEqual(lib_num, 1) def test_optimization_process(self): current_para = copy.deepcopy(self.example.ref_para) best_ref_para = copy.deepcopy(self.example.best_para) list_num = [best_ref_para] indexs = copy.deepcopy(self.example.indexs) best_para = {'re_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.6, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7} stat_out = StringIO() output_prefix = ["test_1"] gen_file(os.path.join(self.test_folder, "manual.gff"), self.example.manual_file) if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file(os.path.join(wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = 1 args.utr = 200 args.replicate_name = "test" args.steps = 2 args.tsspredator_path = "test" args.gene_length = 2000 args.manual = os.path.join(self.test_folder, "manual.gff") ot.optimization_process(indexs, current_para, list_num, self.example.max_nums, best_para, self.test_folder, stat_out, self.example.best, wig_folder, "aaa.fa", "aaa.gff", 2000, True, args) self.assertDictEqual(best_para, {'re_height': 0.2, 'factor': 0.7, 'processing': 3.3, 'height': 0.6, 'base_height': 0.0, 're_factor': 0.3, 'enrichment': 2.5}) self.assertDictEqual(self.example.best, {'missing_ratio': 0.29991126885536823, 'tp': 789.0, 'tp_rate': 0.7000887311446318, 'fp': 230.0, 'fn': 338.0, 'fp_rate': 8.15542105020548e-05}) def test_optimization(self): ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_1"] args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = 1 args.utr = 200 args.steps = 2 args.gene_length = 2000 args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 args.length = None args.replicate_name = "test" args.tsspredator_path = "test" args.manual = os.path.join(self.test_folder, "manual.gff") gen_file(args.manual, self.example.manual_file) args.output_folder = self.test_folder ot.optimization(wig_folder, fasta, gff, args) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "optimized_TSSpredator", "stat.csv")))
class TestsTSSpredator(unittest.TestCase): def setUp(self): self.mock_args = MockClass() self.mock = Mock_func() self.mock_parser = Mock_Multiparser() self.example = Example() self.test_folder = "test_folder" self.trans = "test_folder/trans" self.out = "test_folder/output" self.gffs = "test_folder/gffs" self.tsss = "test_folder/tsss" self.terms = "test_folder/terms" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.trans) os.mkdir(os.path.join(self.trans, "tmp")) os.mkdir(self.out) os.mkdir(self.gffs) os.mkdir(self.tsss) os.mkdir(os.path.join(self.tsss, "tmp")) os.mkdir(self.terms) args = self.mock_args.mock() args.tsss = self.tsss args.trans = self.trans args.out_folder = self.out self.utr = UTRDetection(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_compute_utr(self): ut.detect_5utr = self.mock.mock_detect_5utr ut.detect_3utr = self.mock.mock_detect_3utr term_path = os.path.join(self.terms, "tmp") os.mkdir(term_path) utr5_path = os.path.join(self.out, "5UTR") utr3_path = os.path.join(self.out, "3UTR") os.mkdir(utr5_path) os.mkdir(utr3_path) utr5_stat_path = os.path.join(utr5_path, "statistics") utr3_stat_path = os.path.join(utr3_path, "statistics") os.mkdir(utr5_stat_path) os.mkdir(utr3_stat_path) gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(term_path, "test_term.gff"), self.example.term_file) args = self.mock_args.mock() args.gffs = self.gffs args.tsss = self.tsss args.trans = self.trans args.terms = self.terms self.utr._compute_utr(args) self.assertTrue(os.path.exists(os.path.join(utr5_stat_path, "test_5utr_length.png"))) self.assertTrue(os.path.exists(os.path.join(utr3_stat_path, "test_3utr_length.png"))) def test_run_utr_detection(self): self.utr._check_gff = self.mock.mock_check_gff ut.detect_5utr = self.mock.mock_detect_5utr ut.detect_3utr = self.mock.mock_detect_3utr utr5_path = os.path.join(self.out, "5UTR") utr3_path = os.path.join(self.out, "3UTR") os.mkdir(utr5_path) os.mkdir(utr3_path) utr5_stat_path = os.path.join(utr5_path, "statistics") utr3_stat_path = os.path.join(utr3_path, "statistics") os.mkdir(utr5_stat_path) os.mkdir(utr3_stat_path) gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(self.terms, "test_term.gff"), self.example.term_file) args = self.mock_args.mock() args.tsss = self.tsss args.gffs = self.gffs args.trans = self.trans args.terms = self.terms args.out_folder = self.out self.utr.run_utr_detection(args) self.assertTrue(os.path.exists(os.path.join(utr5_stat_path, "test_5utr_length.png"))) self.assertTrue(os.path.exists(os.path.join(utr3_stat_path, "test_3utr_length.png")))
class TestOptimizeTSSpredator(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_initiate(self): args = self.mock_args.mock() args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 max_num, best_para, current_para, indexs = ot.initiate(args) self.assertDictEqual( max_num, { 're_factor': 0.8, 'processing': 0.5, 'enrichment': 0.5, 'height': 0.9, 'base_height': 0.01, 're_height': 0.8, 'factor': 0.9 }) self.assertDictEqual( best_para, { 're_factor': 0, 'processing': 0, 'enrichment': 0, 'height': 0, 'base_height': 0, 're_height': 0, 'factor': 0 }) self.assertDictEqual( current_para, { 're_factor': 0, 'processing': 0, 'enrichment': 0, 'height': 0, 'base_height': 0, 're_height': 0, 'factor': 0 }) self.assertDictEqual( indexs, { 'step': 0, 'change': False, 'num': 0, 'first': True, 'length': 0, 'exist': False, 'switch': 0, 'extend': False, 'count': 0 }) def test_read_predict_manual_gff(self): gff = os.path.join(self.test_folder, "test.gff") gen_file(gff, self.example.gff_file) args = self.mock_args.mock() args.gene_length = 1000 num, gffs = ot.read_predict_manual_gff(gff, 1000) self.assertEqual(num, 1) self.assertEqual(gffs[0].start, 633) def test_scoring_function(self): stat_value = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3} best = { "tp_rate": 0.8, "fp_rate": 0.0005, "tp": 100, "fp": 31, "fn": 45, "missing_ratio": 0.004 } ot.scoring_function(best, stat_value, self.example.indexs, 1000) self.assertTrue(self.example.indexs["change"]) self.example.indexs["change"] = False stat_value = {"tp_rate": 0.8, "fp_rate": 0.0004, "tp": 100, "fp": 13} best = {"tp_rate": 0.8, "fp_rate": 0.0003, "tp": 100, "fp": 3} ot.scoring_function(best, stat_value, self.example.indexs, 1000) self.assertFalse(self.example.indexs["change"]) def test_load_stat_csv(self): stat_file = os.path.join(self.test_folder, "stat.csv") gen_file(stat_file, self.example.stat) list_num = [] best_para = {} datas = ot.load_stat_csv(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000, stat_file) self.assertEqual(datas[0], 2) self.assertDictEqual( datas[1], { 'fp': 230.0, 'tp': 789.0, 'missing_ratio': 0.29991126885536823, 'fp_rate': 8.15542105020548e-05, 'tp_rate': 0.7000887311446318, 'fn': 338.0 }) self.assertDictEqual( datas[2], { 'processing': 5.2, 'base_height': 0.086, 'factor': 7.6, 're_height': 2.3, 're_factor': 5.5, 'enrichment': 3.1, 'height': 2.4 }) def test_reload_data(self): stat_file = os.path.join(self.test_folder, "stat.csv") gen_file(stat_file, self.example.stat) list_num = [] best_para = {} log = open(os.path.join(self.test_folder, "test.log"), "w") datas = ot.reload_data(self.test_folder, list_num, self.example.best, best_para, self.example.indexs, 1000, stat_file, log) self.assertDictEqual( datas[0], { 'base_height': 0.086, 'processing': 5.2, 'height': 2.4, 'enrichment': 3.1, 're_factor': 5.5, 're_height': 2.3, 'factor': 7.6 }) self.assertDictEqual( datas[1], { 'tp_rate': 0.7000887311446318, 'tp': 789.0, 'fn': 338.0, 'fp': 230.0, 'fp_rate': 8.15542105020548e-05, 'missing_ratio': 0.29991126885536823 }) def test_extend_data(self): best_para = copy.deepcopy(self.example.best_para) current_para = ot.extend_data(self.test_folder, self.example.best, best_para, 100, "aaa") self.assertDictEqual(current_para, best_para) def test_run_random_part(self): list_num = [] current_para = copy.deepcopy(self.example.ref_para) para = ot.run_random_part(current_para, list_num, self.example.max_nums, 1000, self.example.indexs) self.assertTrue(para != self.example.ref_para) def test_run_large_change_part(self): list_num = [] seeds = {"seed": 0, "pre_seed": []} features = {"feature": "r", "pre_feature": ""} current_para = copy.deepcopy(self.example.ref_para) best_para = copy.deepcopy(self.example.best_para) para = ot.run_large_change_part(seeds, features, self.example.indexs, current_para, self.example.max_nums, best_para, list_num) self.assertTrue(para != self.example.ref_para) self.assertTrue(para != best_para) def test_gen_large_random(self): list_num = [] index_large = { 0: "height", 1: "re_height", 2: "factor", 3: "re_factor", 4: "base_height", 5: "enrichment", 6: "processing" } best_para = copy.deepcopy(self.example.best_para) para = ot.gen_large_random(self.example.max_nums, "height", 0.2, list_num, 0.3, best_para, index_large, self.example.indexs) self.assertTrue(para != best_para) self.assertTrue(para["height"] > para["re_height"]) def test_run_small_change_part(self): seeds = {"seed": 0, "pre_seed": []} features = {"feature": "l", "pre_feature": ""} current_para = copy.deepcopy(self.example.ref_para) list_num = [] best_para = copy.deepcopy(self.example.best_para) para = ot.run_small_change_part(seeds, features, self.example.indexs, current_para, best_para, list_num, self.example.max_nums) self.assertTrue(para != best_para) def test_small_change(self): list_num = [] best_para = copy.deepcopy(self.example.best_para) para = ot.small_change(0.9, "height", 0.2, list_num, 0.5, best_para) self.assertTrue(para != 0.5) self.assertTrue(para > 0.2) def test_plus_process(self): list_num = [] actions = {"plus": False, "minus": False} best_para = copy.deepcopy(self.example.best_para) para = ot.plus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.2) self.assertEqual(para, 0.4) def test_minus_process(self): list_num = [] actions = {"plus": False, "minus": False} best_para = copy.deepcopy(self.example.best_para) para = ot.minus_process("height", best_para, 0.9, 0.5, actions, list_num, 0.1) self.assertEqual(para, 0.2) def test_compare_manual_predict(self): out = StringIO() manual = os.path.join(self.test_folder, "manual.gff") predict = os.path.join(self.test_folder, "predict.gff") gen_file(manual, self.example.manual_file) gen_file(predict, self.example.gff_file) para_list = [copy.deepcopy(self.example.best_para)] args = self.mock_args.mock() args.manual = manual args.cores = 1 args.gene_length = 2000 args.cluster = 3 ot.compare_manual_predict(1000, para_list, [predict], self.test_folder, out, args, self.example.mans, 3, 2000) self.assertEqual( out.getvalue(), "1000\the_0.3_rh_0.2_fa_0.7_rf_0.3_bh_0.0_ef_2.5_pf_3.3\tTP=0\tTP_rate=0.0\tFP=2\tFP_rate=0.00100150225338007\tFN=2\tmissing_ratio=0.6666666666666666\n" ) def test_compute_stat(self): list_num = [self.example.best_para] best_para = { 're_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7 } self.example.indexs["change"] = True best = { "tp_rate": 0.6, "fp_rate": 0.0025, "tp": 40, "fp": 32, "fn": 45, "missing_ratio": 0.004 } datas = ot.compute_stat(self.example.best, best, best_para, 1, list_num, self.test_folder, self.example.indexs, "aaa") self.assertDictEqual(datas[0], self.example.best_para) self.assertDictEqual(datas[1], self.example.best) def test_run_tss_and_stat(self): list_num = [self.example.best_para] seeds = {"seed": 0, "pre_seed": []} features = {"feature": "l", "pre_feature": ""} best_para = { 're_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7 } current_para = { 're_factor': 0.3, 'processing': 2.3, 'enrichment': 2.5, 'height': 0.5, 'base_height': 0.2, 're_height': 0.2, 'factor': 0.7 } stat_out = StringIO() wig = os.path.join(self.test_folder, "wig") fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") if not os.path.exists(wig): os.mkdir(wig) gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_aaa"] ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff args = self.mock_args.mock() args.steps = 2000 args.cores = 1 args.tsspredator_path = "test" args.libs = self.example.libs args.program = "TSS" args.cluster = 3 args.utr = 200 args.replicate = "all_2" args.replicate_name = "rep" args.project_strain = "aaa" args.manual = os.path.join(self.test_folder, "manual.gff") args.gene_length = 2000 log = open(os.path.join(self.test_folder, "test.log"), "w") gen_file(args.manual, self.example.manual_file) datas, set_config, run_tss = ot.run_tss_and_stat( self.example.indexs, list_num, seeds, 0.4, 0.3, self.test_folder, stat_out, best_para, current_para, wig, fasta, gff, self.example.best, 3, args, "aaa", self.example.mans, 2000, log, True, True) self.assertFalse(datas[0]) def test_gen_config(self): wig = os.path.join(self.test_folder, "wig") if not os.path.exists(wig): os.mkdir(wig) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.replicate_name = "test" filename = ot.gen_config(self.example.best_para, self.test_folder, 1, wig, fasta, gff, args, "aaa") self.assertEqual(filename, "test_folder/config_1.ini") data = import_data("test_folder/config_1.ini") self.assertEqual("\n".join(data), self.example.config) def test_comparison(self): nums = {"overlap": 0, "predict": 0, "manual": 0} for index in range(0, 3): self.example.mans[index].attributes["print"] = False self.example.gffs[index].attributes["print"] = False args = self.mock_args.mock() args.cluster = 3 args.gene_length = 2000 ot.comparison(self.example.mans, self.example.gffs, nums, args, 2000) self.assertDictEqual(nums, {'manual': 1, 'predict': 2, 'overlap': 1}) def test_check_overlap(self): nums = {"overlap": 0, "predict": 0, "manual": 0} datas = ot.check_overlap(True, None, nums, 2000, self.example.mans[0], self.example.gffs[0], 100) self.assertFalse(datas[0]) self.assertEqual(datas[1], 140) def test_print_lib(self): libs = [{ "condition": 1, "replicate": "a", "wig": "test_1.wig" }, { "condition": 2, "replicate": "a", "wig": "test_2.wig" }] out = StringIO() ot.print_lib(2, libs, out, self.test_folder, "aaa", ["a"]) self.assertEqual( out.getvalue(), "aaa_1a = test_folder/test_1.wig\naaa_2a = test_folder/test_2.wig\n" ) def test_import_lib(self): out = StringIO() if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) lib_dict = {"fp": [], "fm": [], "np": [], "nm": []} gen_file( os.path.join( wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") args = self.mock_args.mock() args.project_strain = "aaa" args.program = "TSS" args.libs = self.example.libs lib_num = ot.import_lib(wig_folder, set(), lib_dict, out, "aaa.gff", [], "aaa.fa", args, "aaa") self.assertEqual(lib_num, 1) def test_optimization_process(self): current_para = copy.deepcopy(self.example.ref_para) best_ref_para = copy.deepcopy(self.example.best_para) list_num = [best_ref_para] indexs = copy.deepcopy(self.example.indexs) best_para = { 're_factor': 0.3, 'processing': 3.3, 'enrichment': 2.5, 'height': 0.6, 'base_height': 0.0, 're_height': 0.2, 'factor': 0.7 } stat_out = StringIO() output_prefix = ["test_1"] gen_file(os.path.join(self.test_folder, "manual.gff"), self.example.manual_file) if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) gen_file( os.path.join( wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649587_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_forward_STRAIN_aaa.wig"), "test") gen_file( os.path.join( wig_folder, "GSM1649588_Hp26695_ML_B1_HS1_-TEX_reverse_STRAIN_aaa.wig"), "test") ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.replicate_name = "test" args.steps = 2 args.tsspredator_path = "test" args.gene_length = 2000 args.manual = os.path.join(self.test_folder, "manual.gff") log = open(os.path.join(self.test_folder, "test.log"), "w") ot.optimization_process(indexs, current_para, list_num, self.example.max_nums, best_para, self.test_folder, stat_out, self.example.best, wig_folder, "aaa.fa", "aaa.gff", 2, True, args, "aaa", self.example.mans, 2000, log) self.assertDictEqual( best_para, { 're_height': 0.2, 'factor': 0.7, 'processing': 3.3, 'height': 0.6, 'base_height': 0.0, 're_factor': 0.3, 'enrichment': 2.5 }) def test_optimization(self): ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_1"] args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.steps = 2 args.gene_length = 2000 args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 args.length = None args.replicate_name = "test" args.tsspredator_path = "test" args.manual = os.path.join(self.test_folder, "manual.gff") gen_file(args.manual, self.example.manual_file) log = open(os.path.join(self.test_folder, "test.log"), "w") args.output_folder = self.test_folder os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator")) ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa", log) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "optimized_TSSpredator", "stat_aaa.csv")))
class TestPPI(unittest.TestCase): def setUp(self): self.test_folder = "test_folder" self.mock_args = MockClass() if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(os.path.join(self.test_folder, "tmp_id_list")) # os.mkdir(os.path.join(self.test_folder, "tmp_nospecific")) os.mkdir(os.path.join(self.test_folder, "with_strain")) os.mkdir(os.path.join(self.test_folder, "with_strain/test_ptt")) os.mkdir(os.path.join(self.test_folder, "without_strain")) os.mkdir(os.path.join(self.test_folder, "without_strain/test_ptt")) os.mkdir(os.path.join(self.test_folder, "all_results")) os.mkdir(os.path.join(self.test_folder, "best_results")) os.mkdir(os.path.join(self.test_folder, "figures")) self.ppi = PPINetwork(self.test_folder) self.mock = Mock_func() self.example = Example() def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_wget_id(self): self.ppi._run_wget = self.mock.mock_run_wget strain = "test_strain" locus = "test_locus" strain_id = {"ptt": "test_strain", "string": "string_test", "file": "file_test"} files = {"id_list": "test", "id_log": "test"} log = open(os.path.join(self.test_folder, "test.log"), "w") detect = self.ppi._wget_id(strain, locus, strain_id, files, log) self.assertTrue(detect) def test_retrieve_id(self): self.ppi._run_wget = self.mock.mock_run_wget strain_id = {"ptt": "test_strain", "string": "string_test", "file": "file_test"} files = {"id_list": "test", "id_log": "test"} genes = [{"strain": "test_strain", "locus_tag": "test_locus", "gene": "dnaA"}] log = open(os.path.join(self.test_folder, "test.log"), "w") self.ppi._retrieve_id(strain_id, genes, files, log) def test_get_prefer_name(self): row_a = "999.aaa" files = {"id_list": self.test_folder} log = open(os.path.join(self.test_folder, "test.log"), "w") gen_file(os.path.join(self.test_folder, "aaa"), "999.aaa\t222\t333\ttest_aaa") name = self.ppi._get_prefer_name(row_a, "test", files, "test", log) self.assertEqual(name, "test_aaa") def test_get_pubmed(self): out_all = StringIO() out_best = StringIO() out_noall = StringIO() out_nobest = StringIO() self.ppi._run_wget = self.mock.mock_run_wget files = {"id_list": self.test_folder, "id_log": "test", "pubmed_log": "test", "all_specific": out_all, "best_specific": out_best, "all_nospecific": out_noall, "best_nospecific": out_nobest} row = self.example.ppi_line.split("\t") strain_id = {"file": "test_file","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} mode = "interaction" actor = "test_A" score = 11241 id_file = "SAOUHSC_01684" ptt = "test_ptt" gen_file(os.path.join(self.test_folder, "SAOUHSC_01684"), "93061.SAOUHSC_01684\t93061.SAOUHSC_01683\t333\ttest_aaa") gen_file(os.path.join(self.test_folder, "SAOUHSC_01683"), "93061.SAOUHSC_01683\t93061.SAOUHSC_01684\t333\ttest_bbb") gen_file(os.path.join(self.test_folder, "tmp_specific"), "") gen_file(os.path.join(self.test_folder, "tmp_nospecific"), "12949105\t0.151711\n1404401\t-0.210303") paths = {"all": self.test_folder, "fig": self.test_folder, "best": self.test_folder} querys = "all" first_output = {"specific_all": True, "specific_best": True, "nospecific_all": True, "nospecific_best": True} args = self.mock_args.mock() args.out_folder = self.test_folder args.querys = "all" args.no_specific = True args.score = 0 log = open(os.path.join(self.test_folder, "test.log"), "w") self.ppi._get_pubmed(row, strain_id, mode, actor, id_file, first_output, ptt, files, paths, args, log) data = import_data( "test_folder/without_strain/test_ptt/test_aaa_test_bbb.csv") self.assertEqual("\n".join(data), self.example.without_out) data = import_data( "test_folder/with_strain/test_ptt/test_aaa_test_bbb.csv") self.assertEqual("\n".join(data), self.example.with_out) def test_merge_information(self): first_output = {"specific_all": True, "specific_best": True, "nospecific_all": True, "nospecific_best": True} out_all = StringIO() out_best = StringIO() row_a = self.example.ppi_line.split("\t") score = 111 id_file = "SAOUHSC_01684" id_folder = self.test_folder file_type = "specific" all_folder = os.path.join(self.test_folder, "with_strain") best_folder = os.path.join(self.test_folder, "without_strain") ptt = "test_ptt" filename = os.path.join(self.test_folder, "SAOUHSC_01684") gen_file(filename, "93061.SAOUHSC_01684\t1000\t333\ttest_aaa") self.ppi._merge_information(first_output, filename, out_all, out_best, row_a, score, id_file, id_folder, file_type, all_folder, best_folder, ptt) self.assertEqual(out_all.getvalue(), self.example.merge_out + "\n") self.assertEqual(out_best.getvalue(), self.example.merge_out + "\n") def test_detect_protein(self): gen_file(os.path.join(self.test_folder, "test"), self.example.ptt_file) strain_id = {"file": "test","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} args = self.mock_args.mock() args.ptts = self.test_folder args.querys = "all" genes = self.ppi._detect_protein(strain_id, args) self.assertListEqual(genes, [ {'gene': 'SAOUHSC_00001', 'locus_tag': 'dnaA', 'strain': 'Staphylococcus_aureus_HG003'}, {'gene': 'SAOUHSC_00002', 'locus_tag': '-', 'strain': 'Staphylococcus_aureus_HG003'}, {'gene': 'SAOUHSC_00003', 'locus_tag': '-', 'strain': 'Staphylococcus_aureus_HG003'}]) def test_setup_nospecific(self): out_all = StringIO() out_best = StringIO() out_noall = StringIO() out_nobest = StringIO() paths = {"all": os.path.join(self.test_folder, "all_results"), "fig": os.path.join(self.test_folder, "figures"), "best": os.path.join(self.test_folder, "best_results")} strain_id = {"file": "test","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} files = {"id_list": self.test_folder, "id_log": "test", "pubmed_log": "test", "all_specific": out_all, "best_specific": out_best, "all_nospecific": out_noall, "best_nospecific": out_nobest} self.ppi._setup_nospecific(paths, strain_id, files) files["all_nospecific"].close() files["best_nospecific"].close() self.assertTrue(os.path.exists( "test_folder/all_results/without_strain/test_ptt")) self.assertTrue(os.path.exists( "test_folder/best_results/without_strain/test_ptt")) self.assertTrue(os.path.exists( "test_folder/figures/without_strain/test_ptt")) def test_setup_folder_and_read_file(self): paths = {"all": os.path.join(self.test_folder, "all_results"), "fig": os.path.join(self.test_folder, "figures"), "best": os.path.join(self.test_folder, "best_results")} strain_id = {"file": "test.ptt","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} files = {"id_list": self.test_folder, "id_log": "", "pubmed_log": "", "all_specific": "", "best_specific": "", "all_nospecific": "", "best_nospecific": "", "action_log": ""} gen_file(os.path.join(self.test_folder, "test.ptt"), self.example.ptt_file) args = self.mock_args.mock() args.querys = "all" args.no_specific = True args.out_folder = self.test_folder args.ptts = self.test_folder genes = self.ppi._setup_folder_and_read_file(strain_id, "", files, paths, args) for index in ("all_specific", "all_nospecific", "best_specific", "best_nospecific", "id_log", "action_log", "pubmed_log"): files[index].close() self.assertTrue(os.path.exists("test_folder/best_results/test")) self.assertTrue(os.path.exists("test_folder/all_results/test")) self.assertListEqual(genes, [ {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': 'dnaA', 'gene': 'SAOUHSC_00001'}, {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00002'}, {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00003'}]) def test_wget_actions(self): gen_file(os.path.join(self.test_folder, "test.txt"), "93061\ttest") self.ppi._run_wget = self.mock.mock_run_wget files = {"id_list": self.test_folder, "id_log": "", "pubmed_log": "", "all_specific": "", "best_specific": "", "all_nospecific": "", "best_nospecific": "", "action_log": ""} strain_id = {"file": "test.ptt","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} log = open(os.path.join(self.test_folder, "test.log"), "w") id_file = "test.txt" self.ppi._wget_actions(files, id_file, strain_id, self.test_folder, log) def test_retrieve_actions(self): self.ppi._run_wget = self.mock.mock_run_wget files = {"id_list": os.path.join(self.test_folder, "tmp_id_list"), "id_log": "", "pubmed_log": "", "all_specific": "", "best_specific": "", "all_nospecific": "", "best_nospecific": "", "action_log": ""} strain_id = {"file": "test.ptt","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} paths = {"all": os.path.join(self.test_folder, "all_results"), "fig": os.path.join(self.test_folder, "figures"), "best": os.path.join(self.test_folder, "best_results")} gen_file(os.path.join(self.test_folder, "tmp_id_list/test.txt"), "93061\ttest") gen_file(os.path.join(self.test_folder, "tmp_action"), self.example.ppi_line) args = self.mock_args.mock() args.no_specific = True args.querys = "all" args.out_folder = self.test_folder log = open(os.path.join(self.test_folder, "test.log"), "w") self.ppi._retrieve_actions(files, strain_id, paths, args, log)
class TestsRNAUTR(unittest.TestCase): def setUp(self): self.example = Example() self.mock = Mock_func() self.mock_args = MockClass() self.test_folder = "test_folder" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_import_data(self): pos = {"start": 4, "end": 40, "ori_start": 2, "ori_end": 3} datas = sud.import_data("+", "aaa", pos, "3UTR", "TSS", "cds", "srna_cover", "test") self.assertDictEqual(datas, { 'start_cleavage': 'NA', 'strand': '+', 'end_cleavage': 'test', 'start_tss': 'cds', 'end': 40, 'start': 4, 'utr': '3UTR', 'strain': 'aaa', 'datas': 'srna_cover'}) def test_read_data(self): args = self.mock_args.mock() args.gff_file = os.path.join(self.test_folder, "test.gff") args.ta_file = os.path.join(self.test_folder, "test.gff") args.tss_file = os.path.join(self.test_folder, "test.gff") args.pro_file = os.path.join(self.test_folder, "test.gff") args.seq_file = os.path.join(self.test_folder, "test.fa") gen_file(args.gff_file, self.example.gff_file) gen_file(args.seq_file, self.example.seq_file) args.hypo = False cdss, tas, tsss, pros, seq = sud.read_data(args) self.assertEqual(cdss[0].start, 4) self.assertEqual(tas[0].start, 4) self.assertEqual(tsss[0].start, 4) self.assertEqual(pros[0].start, 4) self.assertDictEqual( seq, {'aaa': 'ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT'}) def test_get_terminal(self): inters = [] seq = {"aaa": "ATATGACGATACGTAAACCGACCGAATATATCTTTTCACAACCAGATTACGATCGTCAT"} sud.get_terminal(self.example.gffs, inters, seq, "start") self.assertListEqual(inters, [{'end': 4, 'len_CDS': 0, 'strand': '+', 'strain': 'aaa', 'start': 1}]) def test_get_inter(self): inters = [] sud.get_inter(self.example.gffs, inters) self.assertListEqual(inters, [{'start': 14, 'strand': '+', 'end': 20, 'strain': 'aaa', 'len_CDS': 10}]) def test_set_cover_and_point(self): covers = [2, 3, 4, 1, 6, 2, 8, 3, 5, 6, 7, 5, 2, 1] cover_results = {"covers": None, "check_point": None} pos = {"start": 2, "end": 6, "ori_start": 2, "ori_end": 3} sud.set_cover_and_point(cover_results, self.example.inters[0], covers, pos, 5) self.assertListEqual(cover_results["covers"], [2, 3, 4, 1, 6, 2, 8, 3, 5]) self.assertDictEqual(cover_results["check_point"], {'srna_start': 0, 'utr_start': 2, 'utr_end': 3, 'srna_end': 12}) def test_check_import_srna_covers(self): args = self.mock_args.mock() cover = {"type": "5utr"} datas = {"num": 0, "cover_tmp": {"total": 100, "ori_total": 200}, "checks": {"detect_decrease": True}, "final_poss": {"start": 3, "end": 23}} cover_results = {"cover_sets": {"high": 50, "low": 10}, "srna_covers": {"cond_1": []}, "utr_covers": {"cond_1": []}, "type": "5utr", "intercds": "TSS"} args.min_len = 30 args.max_len = 500 pos = {"start": 1, "end": 25, "ori_start": 1, "ori_end": 25} sud.check_import_srna_covers(datas, cover_results, self.example.inters[0], "cond_1", "track", cover, pos, args, "5utr") self.assertDictEqual(datas["final_poss"], {'end': 23, 'start': 3}) self.assertDictEqual(cover_results["srna_covers"], { 'cond_1': [{'final_start': 3, 'high': 50, 'ori_avg': 8.0, 'final_end': 23, 'low': 10, 'type': '5utr', 'avg': 4, 'track': 'track'}]}) self.assertDictEqual(cover_results["utr_covers"], cover_results["srna_covers"]) datas["checks"] = {"detect_decrease": False} cover_results["srna_covers"] = {"cond_1": []} cover_results["utr_covers"] = {"cond_1": []} sud.check_import_srna_covers( datas, cover_results, self.example.inters[0], "cond_1", "track", cover, pos, args, "5utr") self.assertDictEqual(cover_results["srna_covers"], {'cond_1': []}) def test_check_pos(self): cover = {"pos": 4} check_point = {"utr_start": 1, "utr_end": 29, "srna_start": 3, "srna_end": 11} checks = {"srna": False, "utr": False} sud.check_pos(cover, check_point, checks, 4) self.assertDictEqual(checks, {'srna': True, 'utr': True}) def test_get_cover_5utr(self): args = self.mock_args.mock() datas = {"num": 0, "cover_tmp": {"5utr": 0}, "checks": {"detect_decrease": True}, "final_poss": {"start": 1, "end": 26}} cover = 20 cover_sets = {"high": 50, "low": 10} args.decrease_utr = 50 args.fuzzy_utr = 2 go_out = sud.get_cover_5utr(datas, cover_sets, cover, self.example.inters[0], args, 10) self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 10}) self.assertEqual(datas["num"], 0) self.assertTrue(go_out) self.assertDictEqual(datas["cover_tmp"], {'5utr': 0}) self.assertDictEqual(cover_sets, {'high': 50, 'low': 10}) cover = 20 datas = {"num": 0, "cover_tmp": {"5utr": 30}, "checks": {"detect_decrease": True}, "final_poss": {"start": 1, "end": 26}} cover_sets = {"low": 10, "high": 50} args.decrease_utr = 0.5 go_out = sud.get_cover_5utr(datas, cover_sets, cover, self.example.inters[0], args, 10) self.assertEqual(datas["num"], 1) self.assertFalse(go_out) self.assertDictEqual(datas["final_poss"], {'start': 1, 'end': 26}) self.assertDictEqual(datas["cover_tmp"], {'5utr': 20}) self.assertDictEqual(cover_sets, {'low': 20, 'high': 50}) def test_detect_cover_utr_srna(self): sud.coverage_comparison = self.mock.mock_coverage_comparison cover_results = {"cover_sets": {"low": 10, "high": 50}, "pos": {"low": 10, "high": 50}, "covers": [20], "type": "5utr", "srna_covers": {"frag_1": []}, "utr_covers": {"frag_1": []}, "intercds": "TSS", "check_point": {"utr_start": 1, "utr_end": 29, "srna_start": 2, "srna_end": 25}} datas = {"num": 0, "cover_tmp": {"total": 100, "ori_total": 200}, "checks": {"detect_decrease": True}, "final_poss": {"start": 3, "end": 23}} pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 23} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 sud.detect_cover_utr_srna(cover_results, pos, self.example.inters[0], "frag_1", "track_1", args, "frag", 2, 20, "+") self.assertDictEqual( cover_results["srna_covers"], {'frag_1': [{'low': 20, 'high': 50, 'track': 'track_1', 'final_start': 2, 'ori_avg': 0.8695652173913043, 'type': 'frag', 'final_end': 20, 'avg': 1.0526315789473684}]}) self.assertDictEqual(cover_results["utr_covers"], cover_results["srna_covers"]) self.assertDictEqual(cover_results["cover_sets"], {'best': 20, 'low': 20, 'high': 50}) def test_get_coverage(self): sud.coverage_comparison = self.mock.mock_coverage_comparison sud.detect_cover_utr_srna = self.mock.mock_detect_cover_utr_srna pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 srna_covers, utr_covers = sud.get_coverage( self.example.wigs, self.example.inters[0], pos, "3utr", "TSS", args) self.assertDictEqual( srna_covers, {'frag_1': [{'track': 'track_1', 'high': 50, 'final_start': 2, 'type': 'frag', 'avg': 8.052631578947368, 'low': 10, 'final_end': 3, 'ori_avg': 2.12}]}) self.assertDictEqual(utr_covers, srna_covers) def test_get_utr_cutoff(self): mediandict = {"aaa": {"5utr": {"bbb": {}}}} avgs = [30, 60, 550, 302, 44] sud.get_utr_cutoff("p_0.5", mediandict, avgs, "aaa", "5utr", "bbb") self.assertDictEqual( mediandict, {'aaa': {'5utr': {'bbb': {'mean': 197.2, 'median': 60}}}}) def test_detect_normal(self): sud.get_coverage = self.mock.mock_get_coverage diff = 50 pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} args = self.mock_args.mock() args.min_len = 30 args.max_len = 500 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.utrs = [] args.srnas = [] sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos, "3utr", self.example.tsss[0], args) self.assertListEqual( args.srnas, [{'end': 20, 'strand': '+', 'datas': {'frag_1': [{ 'track': 'track_1', 'final_start': 2, 'avg': 41.36842105263158, 'high': 50, 'type': 'frag', 'final_end': 20, 'ori_avg': 27.52, 'low': 10}]}, 'end_cleavage': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'strain': 'aaa', 'start': 2, 'start_tss': 'TSS:1_+'}]) self.assertListEqual( args.utrs, [{'end': 20, 'strand': '+', 'datas': {'frag_1': [{ 'track': 'track_1', 'final_start': 2, 'avg': 41.36842105263158, 'high': 50, 'type': 'frag', 'final_end': 20, 'ori_avg': 27.52, 'low': 10}]}, 'end_cleavage': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'strain': 'aaa', 'start': 2, 'start_tss': 'NA'}]) args.utrs = [] args.srnas = [] args.pros = self.example.pros args.min_len = 3 args.max_len = 20 pos = {"start": 2, "end": 24, "ori_start": 1, "ori_end": 25} sud.detect_normal(diff, self.example.wigs, self.example.inters[0], pos, "3utr", self.example.tsss[0], args) self.assertListEqual( args.srnas, [ {'start': 1, 'end': 18, 'start_tss': 'TSS:1_+', 'datas': {'frag_1': [{ 'ori_avg': 27.52, 'track': 'track_1', 'high': 50, 'low': 10, 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'final_start': 2}]}, 'start_cleavage': 'NA', 'end_cleavage': 'Cleavage:18_+', 'utr': '3utr', 'strand': '+', 'strain': 'aaa'}]) sud.get_coverage = get_coverage def test_detect_3utr_pro(self): sud.get_coverage = self.mock.mock_get_coverage args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 1 args.fuzzy_tsss = {"3utr": 3} args.pros = self.example.pros args.utrs = [] args.srnas = [] pos = {"start": 2, "end": 20, "ori_start": 1, "ori_end": 25} sud.detect_3utr_pro(self.example.inters[0], pos, self.example.wigs, "3utr", args) self.assertListEqual( args.srnas, [{'end_cleavage': 'NA', 'end': 20, 'start_cleavage': 'Cleavage:18_+', 'utr': '3utr', 'datas': {'frag_1': [{ 'low': 10, 'final_start': 2, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'ori_avg': 27.52, 'high': 50}]}, 'strand': '+', 'start_tss': 'NA', 'start': 18, 'strain': 'aaa'}]) self.assertListEqual( args.utrs, [{'end_cleavage': 'NA', 'end': 20, 'start_cleavage': 'NA', 'utr': '3utr', 'datas': {'frag_1': [{ 'low': 10, 'final_start': 2, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'avg': 41.36842105263158, 'ori_avg': 27.52, 'high': 50}]}, 'strand': '+', 'start_tss': 'NA', 'start': 18, 'strain': 'aaa'}]) sud.get_coverage = get_coverage def test_detect_twopro(self): sud.get_coverage = self.mock.mock_get_coverage pro_dict = [{"seq_id": "aaa", "source": "tsspredator", "feature": "processing", "start": 18, "end": 18, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "tsspredator", "feature": "processing", "start": 38, "end": 38, "phase": ".", "strand": "+", "score": "."}] attributes_pro = [{"ID": "processing0", "Name": "Processing_0"}, {"ID": "processing1", "Name": "Processing_1"}] pros = [] for index in range(0, 2): pros.append(Create_generator( pro_dict[index], attributes_pro[index], "gff")) args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 3 args.fuzzy_tsss = {"3utr": 3} args.pros = pros args.utrs = [] args.srnas = [] pos = {"start": 2, "end": 50, "ori_start": 1, "ori_end": 25} sud.detect_twopro(self.example.inters[0], pos, self.example.wigs, "interCDS", "interCDS", args) self.assertListEqual( args.srnas, [{'start_cleavage': 'Cleavage:18_+', 'utr': 'interCDS', 'datas': {'frag_1': [{ 'type': 'frag', 'low': 10, 'final_start': 2, 'high': 50, 'avg': 41.36842105263158, 'final_end': 20, 'track': 'track_1', 'ori_avg': 27.52}]}, 'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+', 'strand': '+', 'end': 38, 'strain': 'aaa', 'start': 18}]) self.assertListEqual( args.utrs, [{'start_cleavage': 'NA', 'utr': 'interCDS', 'datas': {'frag_1': [{ 'type': 'frag', 'low': 10, 'final_start': 2, 'high': 50, 'avg': 41.36842105263158, 'final_end': 20, 'track': 'track_1', 'ori_avg': 27.52}]}, 'start_tss': 'NA', 'end_cleavage': 'Cleavage:38_+', 'strand': '+', 'end': 38, 'strain': 'aaa', 'start': 18}]) sud.get_coverage = get_coverage def test_run_utr_detection(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.fuzzy_tsss = {"5utr": "n_3"} args.utrs = [] args.srnas = [] args.tsss = self.example.tsss args.pros = self.example.pros sud.get_coverage = self.mock.mock_get_coverage sud.run_utr_detection(self.example.wigs, self.example.inters[0], 2, 50, "5utr", args) sud.get_coverage = get_coverage self.assertListEqual( args.srnas, [{'start': 1, 'end': 50, 'start_cleavage': 'NA', 'datas': {'frag_1': [{ 'high': 50, 'final_end': 20, 'avg': 41.36842105263158, 'low': 10, 'ori_avg': 27.52, 'final_start': 2, 'type': 'frag', 'track': 'track_1'}]}, 'start_tss': 'TSS:1_+', 'strain': 'aaa', 'strand': '+', 'utr': '5utr', 'end_cleavage': 'NA'}]) self.assertListEqual( args.utrs, [{'start': 1, 'end': 50, 'start_cleavage': 'NA', 'datas': {'frag_1': [{ 'high': 50, 'final_end': 20, 'avg': 41.36842105263158, 'low': 10, 'ori_avg': 27.52, 'final_start': 2, 'type': 'frag', 'track': 'track_1'}]}, 'start_tss': 'NA', 'strain': 'aaa', 'strand': '+', 'utr': '5utr', 'end_cleavage': 'NA'}]) def test_class_utr(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.fuzzy_tsss = {"3utr": "p_3"} args.utrs = [] args.srnas = [] args.tsss = self.example.tsss args.pros = self.example.pros args.wig_fs = self.example.wigs sud.get_coverage = self.mock.mock_get_coverage sud.class_utr(self.example.inters[0], self.example.tas[0], args, args.wig_fs, args.wig_fs) sud.get_coverage = get_coverage self.assertListEqual( args.srnas, [{'end_cleavage': 'NA', 'start_tss': 'TSS:1_+', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 1, 'datas': {'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50}]}, 'strain': 'aaa', 'strand': '+'}, {'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'Cleavage:18_+', 'end': 20, 'start': 18, 'datas': {'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50}]}, 'strain': 'aaa', 'strand': '+'}]) self.assertListEqual( args.utrs, [{'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 1, 'datas': {'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50}]}, 'strain': 'aaa', 'strand': '+'}, {'end_cleavage': 'NA', 'start_tss': 'NA', 'utr': '3utr', 'start_cleavage': 'NA', 'end': 20, 'start': 18, 'datas': {'frag_1': [{ 'ori_avg': 27.52, 'final_start': 2, 'avg': 41.36842105263158, 'track': 'track_1', 'type': 'frag', 'final_end': 20, 'low': 10, 'high': 50}]}, 'strain': 'aaa', 'strand': '+'}]) def test_get_utr_coverage(self): utrs = [{'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': {'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50}]}, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'NA'}] covers = sud.get_utr_coverage(utrs) self.assertDictEqual(covers, {'aaa': {'interCDS': {}, '3utr': {'track_1': [27.52]}, '5utr': {}}}) def test_set_cutoff(self): args = self.mock_args.mock() args.texs = {"track_4@AND@track_6": 0} covers = {'aaa': {'5utr': {'track_4': [52, 11, 23]}, 'inter': {'track_3': [111]}, 'total': {'track_1': [27.52, 111]}, '3utr': {'track_1': [27.52, 111]}, 'interCDS': {'track_2': [12, 0]}}} args.coverages = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"} args.cover_notex = {"5utr": "p_0.3", "3utr": "n_10", "interCDS": "p_0.5"} mediandict = sud.set_cutoff(covers, args) self.assertDictEqual(mediandict, {'aaa': {'5utr': {'track_4': { 'median': 11, 'mean': 28.666666666666668}}, 'interCDS': {'track_2': {}}, '3utr': {'track_1': {}}}}) args.cover_notex = None mediandict = sud.set_cutoff(covers, args) self.assertDictEqual(mediandict, {'aaa': { '3utr': {'track_1': {'mean': 69.26, 'median': 10.0}}, '5utr': {'track_4': {'mean': 28.666666666666668, 'median': 11}}, 'interCDS': {'track_2': {'mean': 6.0, 'median': 0}}}}) def test_mean_score(self): lst = [1, 3, 5, 6, 7, 8] mean = sud.mean_score(lst) self.assertEqual(mean, 5.0) def test_median_score(self): lst = [1, 3, 5, 6, 7, 8] median = sud.median_score(lst, 0.5) self.assertEqual(median, 5) def test_detect_srna(self): sud.replicate_comparison = self.mock.mock_replicate_comparison args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.coverages = "cover" args.texs = "template_texs" args.tex_notex = "tex_notex" args.replicates = "rep" gffs = [] ncs = [] args.table_best = True args.out = StringIO() args.out_t = StringIO() median = {"aaa": {"3utr": 555}} args.srnas = [{'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': {'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50, "conds": ["frag"]}]}, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'Cleavage:18_+', 'detail': {'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50, "conds": ["frag"]}}] sud.detect_srna(median, args) self.assertEqual(args.out.getvalue(), ("aaa\tANNOgesic\tncRNA\t18\t20\t.\t+\t.\t" "ID=aaa_srna_utr0;Name=UTR_sRNA_00000;" "sRNA_type=3utr;best_avg_coverage=500;" "best_high_coverage=700;best_low_coverage=400;" "with_TSS=NA;start_cleavage=Cleavage:18_+;" "end_cleavage=NA\n")) self.assertEqual(args.out_t.getvalue(), ("aaa\t00000\t18\t20\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=200;high=700;low=400)\n")) def test_print_file(self): args = self.mock_args.mock() args.min_len = 1 args.max_len = 300 args.decrease_utr = 0.5 args.fuzzy_utr = 2 args.coverages = "cover" args.texs = "template_texs" args.tex_notex = "tex_notex" args.replicates = "rep" args.table_best = True args.out = StringIO() args.out_t = StringIO() srna = {'strand': '+', 'utr': '3utr', 'end': 20, 'start': 18, 'start_tss': 'NA', 'datas': {'frag_1': [{ 'final_end': 20, 'track': 'track_1', 'final_start': 2, 'ori_avg': 27.52, 'avg': 41.36842105263158, 'type': 'frag', 'low': 10, 'high': 50, "conds": ["frag"]}]}, 'end_cleavage': 'NA', 'strain': 'aaa', 'start_cleavage': 'Cleavage:18_+'} srna_datas = {"detail": [{"best": 500, "track": "frag", "high": 700, "low": 400, "start": 100, "end": 202, "conds": {"frag_1": "track_1"}, "avg": 200}], "conds": {"frag_1": "track_1"}, "best": 500, "track": "frag", "high": 700, "low": 400, "start": 100, "end": 202} sud.print_file(0, srna, 2, 50, srna_datas, args) self.assertEqual(args.out.getvalue(), ("aaa\tANNOgesic\tncRNA\t2\t50\t.\t+\t.\t" "ID=aaa_srna_utr0;Name=UTR_sRNA_00000;" "sRNA_type=3utr;best_avg_coverage=500;" "best_high_coverage=700;best_low_coverage=400;" "with_TSS=NA;start_cleavage=Cleavage:18_+;" "end_cleavage=NA\n")) self.assertEqual(args.out_t.getvalue(), ("aaa\t00000\t2\t50\t+\tfrag_1\ttrack_1\t500\t700\t400\tfrag(avg=200;high=700;low=400)\n"))
class TestConverter(unittest.TestCase): def setUp(self): self.converter = Converter() self.example = Example() self.converter.gff3parser = Mock_gff3_parser self.converter._print_rntptt_title = Mock_func().print_rntptt_title self.converter.tsspredator = Mock_TSSPredatorReader() self.converter._read_file = Mock_func().mock_read_file self.gff_file = self.example.gff_file self.ptt_out = self.example.ptt_out self.rnt_out = self.example.rnt_out self.srna_out = self.example.srna_out self.embl_file = self.example.embl_file self.embl_out = self.example.embl_out self.multi_embl = self.example.multi_embl self.gff_out = self.example.gff_out self.mastertable = self.example.mastertable self.tss_file = self.example.tss_file self.fasta_file = self.example.fasta_file self.transterm = self.example.transterm self.term_file = self.example.term_file self.circ_file = self.example.circrna_table self.circ_all = self.example.circrna_all self.circ_best = self.example.circrna_best self.test_folder = "test_folder" self.mock_args = MockClass() if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_print_rntptt_file(self): cdss = [] genes = [] rnas = [] gff_dict = Example().gff_dict for gff in gff_dict: if gff["feature"] == "gene": genes.append(self.converter.gff3parser.entries(self, gff)) elif gff["feature"] == "CDS": cdss.append(self.converter.gff3parser.entries(self, gff)) elif gff["feature"] == "tRNA": rnas.append(self.converter.gff3parser.entries(self, gff)) out_p = StringIO() out_r = StringIO() self.converter._print_rntptt_file(out_p, cdss, genes) self.converter._print_rntptt_file(out_r, rnas, genes) self.assertEqual(out_p.getvalue().split("\n")[:-1], self.example.ptt_out_list) self.assertEqual(out_r.getvalue().split("\n")[:-1], self.example.rnt_out_list) out_p.close() out_r.close() def test_srna2pttrnt(self): srna_input_file = os.path.join(self.test_folder, "srna.gff") srna_output_file = os.path.join(self.test_folder, "srna.out") with open(srna_input_file, "w") as fh: fh.write(self.gff_file) srnas = [] self.converter._srna2rntptt(srna_input_file, srna_output_file, srnas, 1234567) datas = import_data(srna_output_file) self.assertEqual(set(datas), set(self.srna_out.split("\n"))) def test_multi_embl_pos(self): embls = [] for line in self.embl_file.split("\n"): datas = self.converter._multi_embl_pos(line.strip()) if datas != "Wrong": embls.append(datas) for index in range(0, 7): self.assertDictEqual(embls[index], self.embl_out[index]) for index in range(0, 2): self.assertDictEqual(embls[-1]["pos"][index], self.multi_embl[index]) def test_parser_embl_data(self): embl_file = os.path.join(self.test_folder, "test.embl") embl_out = os.path.join(self.test_folder, "test.embl_out") out = StringIO() with open(embl_file, "w") as eh: for line in self.embl_file.split("\n"): eh.write(line + "\n") info = self.converter._parser_embl_data(embl_file, out) datas = out.getvalue().split("\n") self.assertEqual(set(datas[:-1]), set(self.gff_out.split("\n"))) self.assertEqual(info[0], "NC_007795.1") for index in range(0, 2): self.assertDictEqual(info[1]["pos"][index], self.multi_embl[index]) out.close() def test_multi_tss_class(self): nums = {"tss": 0, "tss_uni": 0, "class": 1} utrs = {"total": [], "pri": [], "sec": []} tss_features = {"tss_types": [], "locus_tags": [], "utr_lengths": []} tss_index = defaultdict(lambda: 0) master_file = os.path.join(self.test_folder, "test.tsv") fh = StringIO(self.mastertable) for tss in self.converter.tsspredator.entries(fh): self.converter._multi_tss_class(tss, tss_index, tss_features, nums, utrs) fh.close() self.assertDictEqual(nums, {'tss_uni': 0, 'class': 5, 'tss': 2}) def test_convert_mastertable2gff(self): master_file = os.path.join(self.test_folder, "test.tsv") with open(master_file, "w") as th: th.write(self.mastertable) out_gff = os.path.join(self.test_folder, "test.tsv_out") self.converter.convert_mastertable2gff(master_file, "ANNOgesic", "TSS", "aaa", out_gff) datas = import_data(out_gff) self.assertEqual(set(datas), set(self.tss_file.split("\n"))) def test_convert_gff2rntptt(self): srna_input_file = os.path.join(self.test_folder, "srna.gff") srna_output_file = os.path.join(self.test_folder, "srna.out") gff_file = os.path.join(self.test_folder, "test.gff") rnt_file = os.path.join(self.test_folder, "test.rnt") ptt_file = os.path.join(self.test_folder, "test.ptt") fasta_file = os.path.join(self.test_folder, "test.fa") with open(srna_input_file, "w") as fh: fh.write(self.gff_file) with open(gff_file, "w") as fh: fh.write(self.gff_file) with open(fasta_file, "w") as fh: fh.write(self.fasta_file) self.converter.convert_gff2rntptt(gff_file, fasta_file, ptt_file, rnt_file, srna_input_file, srna_output_file) self.assertTrue(srna_output_file) self.assertTrue(rnt_file) self.assertTrue(ptt_file) def test_convert_embl2gff(self): embl_file = os.path.join(self.test_folder, "test.embl") gff_file = os.path.join(self.test_folder, "test.embl_out") with open(embl_file, "w") as eh: for line in self.embl_file.split("\n"): eh.write(line + "\n") self.converter.convert_embl2gff(embl_file, gff_file) datas = import_data(gff_file) self.assertEqual(set(datas[1:-2]), set(self.gff_out.split("\n"))) def test_convert_transtermhp2gff(self): transterm_file = os.path.join(self.test_folder, "test_best_terminator_after_gene.bag") gff_file = os.path.join(self.test_folder, "transterm.gff") with open(transterm_file, "w") as th: th.write(self.transterm) self.converter.convert_transtermhp2gff(transterm_file, gff_file) datas = import_data(gff_file) self.assertEqual(set(datas), set(self.term_file.split("\n"))) def get_info(datas): f_datas = [] for data in datas: if not data.startswith("#"): f_datas.append("\t".join(data.split("\t")[:8])) return f_datas def test_convert_circ2gff(self): circ_file = os.path.join(self.test_folder, "circ.csv") out_all = os.path.join(self.test_folder, "all.gff") out_filter = os.path.join(self.test_folder, "best.gff") with open(circ_file, "w") as ch: ch.write(self.circ_file) args = self.mock_args.mock() args.start_ratio = 0.5 args.end_ratio = 0.5 args.support = 5 self.converter.convert_circ2gff(circ_file, args, out_all, out_filter) datas = import_data(out_all) f_datas = [] for data in datas: if not data.startswith("#"): f_datas.append("\t".join(data.split("\t")[:8])) c_datas = [] for data in self.circ_all.split("\n"): if not data.startswith("#"): c_datas.append("\t".join(data.split("\t")[:8])) self.assertListEqual(f_datas, c_datas) datas = import_data(out_filter) f_datas = [] for data in datas: if not data.startswith("#"): f_datas.append("\t".join(data.split("\t")[:8])) c_datas = [] for data in self.circ_best.split("\n"): if not data.startswith("#"): c_datas.append("\t".join(data.split("\t")[:8])) self.assertListEqual(f_datas, c_datas)
class TestSubLocal(unittest.TestCase): def setUp(self): self.example = Example() self.mock_args = MockClass() self.mock = Mock_func() self.test_folder = "test_folder" self.out = "test_folder/output" self.fastas = "test_folder/fastas" self.gffs = "test_folder/gffs" self.stat = "test_folder/stat" self.trans = "test_folder/tran" if (not os.path.exists(self.test_folder)): os.mkdir(self.test_folder) os.mkdir(self.out) os.mkdir(self.fastas) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(self.gffs) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(self.stat) os.mkdir(self.trans) args = self.mock_args.mock() args.gffs = self.gffs args.fastas = self.fastas args.out_folder = self.out args.trans = self.trans self.sub = SubLocal(args) def tearDown(self): if os.path.exists(self.test_folder): shutil.rmtree(self.test_folder) def test_get_protein_seq(self): gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file) gff = "aaa.gff" gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file) gen_file(os.path.join(self.trans, "aaa_transcript.gff"), self.example.tran_file) args = self.mock_args.mock() args.out_folder = self.test_folder log = open(os.path.join(self.test_folder, "test.log"), "w") prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans, args, log) self.assertEqual(prefix, "aaa") def test_run_psortb(self): self.sub._psortb = self.mock.mock_psortb tmp_result = os.path.join(self.out, "tmp_results") os.mkdir(tmp_result) args = self.mock_args.mock() args.psortb_path = "psortb_path" args.gram = "positive" log = open(os.path.join(self.test_folder, "test.log"), "w") self.sub._run_psortb(args, "aaa", self.out, self.test_folder, tmp_result, log) self.assertTrue(os.path.exists(os.path.join(self.out, "tmp_log"))) self.assertTrue( os.path.exists( os.path.join(tmp_result, "_".join(["aaa", "raw.txt"])))) def test_merge_and_stat(self): su.stat_sublocal = self.mock.mock_stat_sublocal os.mkdir(os.path.join(self.gffs, "aaa.gff_folder")) gen_file(os.path.join(self.gffs, "aaa.gff_folder/aaa.gff"), "test") os.mkdir(os.path.join(self.out, "psortb_results")) gen_file(os.path.join(self.out, "aaa_raw.txt"), "test") gen_file(os.path.join(self.out, "aaa_table.csv"), "test") log = open(os.path.join(self.test_folder, "test.log"), "w") self.sub._merge_and_stat(self.gffs, self.out, self.test_folder, self.stat, log) self.assertTrue(os.path.exists(os.path.join(self.stat, "aaa"))) self.assertTrue(os.path.exists(os.path.join(self.test_folder, "aaa"))) def test_compare_cds_tran(self): gff_file = os.path.join(self.test_folder, "aaa.gff") tran_file = os.path.join(self.test_folder, "aaa_transcript.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.tran_file) log = open(os.path.join(self.test_folder, "test.log"), "w") self.sub._compare_cds_tran(gff_file, tran_file, log) datas, string = extract_info("test_folder/output/all_CDSs/tmp_cds.gff", "file") self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')