def test_get_proper_tss(self): tss_file = os.path.join(self.test_folder, "tss.gff") gen_file(tss_file, self.example.gff_file) coverage = {"primary": 0, "secondary": 0, "internal": 0, "antisense": 50, "orphan": 10} tsss, num_tss = si.get_proper_tss(tss_file, coverage) self.assertEqual(tsss[0].start, 140)
def test_low_expression(self): ts.filter_low_expression = self.mock.mock_filter_low_expression gen_file(os.path.join(self.wigs, "test1_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_reverse.wig"), "test_r") gen_file(os.path.join(self.wigs, "test1_TEX_forward.wig"), "test_f") gen_file(os.path.join(self.wigs, "test1_TEX_reverse.wig"), "test_r") gen_file(os.path.join(self.gffs, "test_TSS.gff"), self.example.tss_file) os.mkdir(os.path.join(self.out, "statistics")) os.mkdir(os.path.join(self.out, "statistics/test")) libs = ["test1_TEX_forward.wig:tex:1:a:+", "test1_TEX_reverse.wig:tex:1:a:-", "test1_forward.wig:notex:1:a:+", "test1_reverse.wig:notex:1:a:-"] args = self.mock_args.mock() args.manual = "manual" args.libs = libs args.wig_folder = self.wigs args.program = "TSS" args.cluster = 3 self.tss._low_expression(args, self.gffs) shutil.rmtree("tmp") datas = import_data(os.path.join( self.out, "statistics/test/stat_test_low_expression_cutoff.csv")) self.assertEqual("\n".join(datas), "Genome\tCutoff_coverage\ntest\t100")
def test_gene_expression(self): gea.read_wig = MockFunc().mock_read_wig gea.read_libs = MockFunc().mock_read_libs gea.read_data = MockFunc().mock_read_data replicates = {"tex": 1, "frag": 1} stat_folder = os.path.join(self.test_folder, "stat") gff_folder = os.path.join(self.test_folder, "gff") if os.path.exists(gff_folder): shutil.rmtree(gff_folder) os.mkdir(gff_folder) gen_file(os.path.join(gff_folder, "aaa.gff"), "test") if not os.path.exists(stat_folder): os.mkdir(stat_folder) out_gff_folder = os.path.join(self.test_folder, "out_gff") if not os.path.exists(out_gff_folder): os.mkdir(out_gff_folder) gea.gene_expression(None, gff_folder, "all", "all", "test_wig", "test_wig", ["CDS"], "test_wig_folder", 5, 2, replicates, stat_folder, out_gff_folder, "high", 100, 0) datas = import_data(os.path.join(stat_folder, "aaa_CDS.csv")) dicts = {} for data in datas: dicts[data] = data refs = {} for data in self.example.out_stat.split("\n"): refs[data] = data self.assertDictEqual(dicts, refs)
def test_compare_tss(self): tr.stat_ta_tss = self.mock.mock_stat_ta_tss self.tran.multiparser = self.mock_parser gen_file(os.path.join(self.gffs, "test_TSS.gff"), self.example.gff_file) gen_file(os.path.join(self.gffs, "tmp/test_TSS.gff"), self.example.gff_file) gen_file(os.path.join(self.out_gff, "test_transcript.gff"), self.example.tran_file) gff_out = os.path.join(self.out, "gffs") gen_file(os.path.join(gff_out, "tmp_ta_tss"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_tss_ta"), self.example.gff_file) args = self.mock_args.mock() args.out_folder = self.out args.trans = self.trans args.compare_tss = self.gffs args.fuzzy = 2 log = open(os.path.join(self.test_folder, "test.log"), "w") self.tran._compare_tss(["test"], args, log) datas = import_data(os.path.join(self.gffs, "test_TSS.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.gff_file) datas = import_data(os.path.join(self.out_gff, "test_transcript.gff")) self.assertEqual("\n".join(datas), "##gff-version 3\n" + self.example.tran_file)
def test_convert_embl(self): gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file.split("//")[0]) log = open(os.path.join(self.test_folder, "test.log"), "w") out = self.ratt._convert_embl(self.test_folder, log) self.assertEqual(out, "test_folder/gbk/gbk_tmp") self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp"))
def test_plot_ppi(self): ppi_file = os.path.join(self.test_folder, "test_ppi") gen_file(ppi_file, self.example.ppi_file) pp.plot_ppi(ppi_file, 0, self.test_folder, 1000) strain = "Helicobacter pylori 26695 chromosome" self.assertTrue(os.path.exists( "test_folder/" + strain + "/HP0001_nusB.png"))
def test_stat(self): op.stat = self.mock.mock_stat table_file = os.path.join(self.output, "tables", "operon_test.csv") if not os.path.exists(table_file): gen_file(table_file, "test") self.operon._stat(os.path.join(self.output, "tables"), self.stat) self.assertTrue(os.path.exists(os.path.join(self.stat, "stat_operon_test.csv")))
def test_read_wig(self): libs = [{"name": "test1", "type": "frag", "cond": "1", "strand": "+", "rep": "a"}] filename = os.path.join(self.test_folder, "test_f.wig") gen_file(filename, self.example.wig_f) wigs = ta.read_wig(filename, libs, "+") self.assertDictEqual(wigs, self.example.wigs_f)
def test_stat_ta_tss(self): tss_file = os.path.join(self.test_folder, "aaa_TSS.gff") ta_file = os.path.join(self.test_folder, "aaa_transcript.gff") gen_file(tss_file, self.example.tss) gen_file(ta_file, self.example.ta) stat_file = os.path.join(self.test_folder, "stat") out_ta_file = os.path.join(self.test_folder, "out_ta.gff") out_tss_file = os.path.join(self.test_folder, "out_tss.gff") stc.stat_ta_tss(ta_file, tss_file, stat_file, out_ta_file, out_tss_file, 5) datas = import_data(stat_file) self.assertEqual("\n".join(datas), "All genomes:\n" + self.example.print_tas) datas, attributes = extract_info(out_ta_file, "file") self.assertListEqual( datas, ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.']) for attribute in attributes: if "associated_tss" in attribute: self.assertEqual("associated_tss=TSS:2131_f") datas, attributes = extract_info(out_tss_file, "file") self.assertListEqual(datas, ['aaa\tTSSpredator\tTSS\t2131\t2131\t.\t+\t.']) for attribute in attributes: if "Parent_tran" in attribute: self.assertEqual(attribute, "Parent_tran=tran0")
def test_import_data(self): mod_table = os.path.join(self.test_folder, "mod") gen_file(mod_table, self.example.mutation) datas = self.seq._import_data(mod_table, "test") self.assertListEqual(datas, [{'target_id': 'test_NC_000915.1', 'datas': [{'ref_nt': 'c', 'tar_nt': '', 'position': '3'}, {'ref_nt': '-', 'tar_nt': 'deletion', 'position': '6'}], 'ref_id': 'NC_000915.1'}])
def test_sort_srna_fasta(self): fasta = os.path.join(self.fastas, "test.fa") gen_file(fasta, ">aaa\nAAAAAAAA\n>bbb\nCCCC\n>ccc\nGGGGGGGGGGGG") self.star._sort_srna_fasta(fasta, "test", self.test_folder) datas = import_data(os.path.join(self.test_folder, "tmp_srna_target_test_sRNA.fa")) self.assertListEqual(datas, ['>bbb', 'CCCC', '>aaa', 'AAAAAAAA', '>ccc', 'GGGGGGGGGGGG'])
def test_import_bam(self): gen_file(os.path.join(self.test_folder, "test_1.bam"), "test") gen_file(os.path.join(self.test_folder, "test_2.bam"), "test") bams = [] num_bams = self.snp._import_bam(self.test_folder, bams) self.assertEqual(num_bams, 2) self.assertListEqual(bams, ['test_folder/test_1.bam', 'test_folder/test_2.bam'])
def test_read_gff(self): tss_file = os.path.join(self.test_folder, "test.gff") gen_file(tss_file, self.example.tss_file) tsss, tss_num = ptv.read_gff(tss_file) self.assertEqual(tsss["all"][0].start, 140) self.assertEqual(tsss["aaa"][0].start, 140) self.assertDictEqual(tss_num, {'all': 1, 'aaa': 1})
def test_change_format(self): input_file = os.path.join(self.test_folder, "input") output_file = os.path.join(self.test_folder, "output") gen_file(input_file, ">srna_1|Staphylococcus|Aar|12314|12444|forward\nATAGATTCCCGCGTATAGTCATCATTGTAC") cdf.change_format(input_file, output_file) data = import_data(output_file) self.assertListEqual(data, ['>srna_1|Staphylococcus|Aar', 'ATAGATTCCCGCGTATAGTCATCATTGTAC'])
def test_setup_folder_and_read_file(self): paths = {"all": os.path.join(self.test_folder, "all_results"), "fig": os.path.join(self.test_folder, "figures"), "best": os.path.join(self.test_folder, "best_results")} strain_id = {"file": "test.ptt","ptt": "test_ptt", "string": "test_string", "pie": "test_pie"} files = {"id_list": self.test_folder, "id_log": "", "pubmed_log": "", "all_specific": "", "best_specific": "", "all_nospecific": "", "best_nospecific": "", "action_log": ""} gen_file(os.path.join(self.test_folder, "test.ptt"), self.example.ptt_file) args = self.mock_args.mock() args.querys = "all" args.no_specific = True args.out_folder = self.test_folder args.ptts = self.test_folder genes = self.ppi._setup_folder_and_read_file(strain_id, "", files, paths, args) for index in ("all_specific", "all_nospecific", "best_specific", "best_nospecific", "id_log", "action_log", "pubmed_log"): files[index].close() self.assertTrue(os.path.exists("test_folder/best_results/test")) self.assertTrue(os.path.exists("test_folder/all_results/test")) self.assertListEqual(genes, [ {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': 'dnaA', 'gene': 'SAOUHSC_00001'}, {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00002'}, {'strain': 'Staphylococcus_aureus_HG003', 'locus_tag': '-', 'gene': 'SAOUHSC_00003'}])
def test_stat_ta_gff(self): gff_file = os.path.join(self.test_folder, "aaa.gff") ta_file = os.path.join(self.test_folder, "aaa_transcript.gff") gen_file(gff_file, self.example.gff) gen_file(ta_file, self.example.ta) stat_file = os.path.join(self.test_folder, "stat") out_ta_file = os.path.join(self.test_folder, "out_ta.gff") out_gff_file = os.path.join(self.test_folder, "out.gff") stc.stat_ta_gff(ta_file, gff_file, stat_file, out_ta_file, out_gff_file, ["gene"]) datas = import_data(stat_file) self.assertEqual("\n".join(datas), ("For gene:\n\tAll genomes:\n\tThe transcript " "information compares with gene:\n" + \ self.example.print_tag)) datas, attributes = extract_info(out_ta_file, "file") self.assertListEqual( datas, ['aaa\tfragmented_and_normal\tTranscript\t313\t3344\t.\t+\t.']) for attribute in attributes: if "type" in attribute: self.assertEqual(attribute, "type=cover_CDS") if "associated_cds=" in attribute: self.assertEqual(attribute, "associated_cds=YP_498609.1") datas, attributes = extract_info(out_gff_file, "file") self.assertListEqual(datas, ['aaa\tRefseq\tgene\t517\t1878\t.\t+\t.', 'aaa\tRefseq\tCDS\t517\t1878\t.\t+\t.']) for attribute in attributes: if "Parent_tran" in attribute: self.assertEqual(attribute, "Parent_tran=tran0")
def test_read_file(self): ribo_table = os.path.join(self.test_folder, "table") rfam_table = os.path.join(self.test_folder, "rfam") gen_file(ribo_table, self.example.table) gen_file(rfam_table, self.example.rfam) ribos, rfams = rg.read_file(ribo_table, rfam_table) self.assertListEqual(ribos, self.example.ribos) self.assertListEqual(rfams, self.example.rfams)
def test_compute(self): tr.assembly = self.mock.mock_assembly gen_file(os.path.join(self.frag, "tmp/test_forward.wig"), "test") args = self.mock_args.mock() args.replicates = "rep" args.out_foler = self.out strains = self.tran._compute("frag", self.frag, "libs", args) self.assertListEqual(strains, ['test'])
def test_get_genome_name(self): self.snp._get_header = self.mock.mock_get_header gen_file(os.path.join(self.test_folder, "header"), self.example.bam) args = self.mock_args.mock() args.samtools_path = "test" bam_datas = [{"sample": "NC_007795.1", "bam_number": 0, "bams": "test", "rep": 1}] seq_names = self.snp._get_genome_name(args, bam_datas)
def test_merge_sRNA(self): os.mkdir(os.path.join(self.srnas, "tmp")) self.term.multiparser = self.mock_parser gen_file(os.path.join(self.gffs, "aaa.gff"), self.example.gff_file) gen_file(os.path.join(self.srnas, "tmp/aaa_sRNA.gff"), self.example.srna_file) merge = self.term._merge_sRNA(self.srnas, ["aaa"], self.gffs) self.assertEqual(merge.split("/")[-1], "tmp_merge_gff") shutil.rmtree("tmp_merge_gff")
def test_read_data(self): inter = os.path.join(self.test_folder, "inter") fasta = os.path.join(self.test_folder, "fa") gen_file(inter, self.example.inter) gen_file(fasta, ">aaa\nATATACCGATC") inters, tsss, srnas, seq = sd.read_data(inter, None, None, fasta, True) self.assertEqual(inters[0].start, 2) self.assertDictEqual(seq, {'aaa': 'ATATACCGATC'})
def test_parser_embl_gbk(self): files = [os.path.join(self.test_folder, "aaa.gbk")] gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file) self.ratt._parser_embl_gbk(files) data = import_data(os.path.join(self.ref_embls, "gbk_tmp/NC_007795.1.gbk")) self.assertEqual("\n".join(data), self.example.gbk_file.split("//")[0] + "//") data = import_data(os.path.join(self.ref_embls, "gbk_tmp/NC_007799.1.gbk")) self.assertEqual("\n".join(data), self.example.gbk_file.split("//")[1].strip() + "\n//")
def test_get_prefer_name(self): row_a = "999.aaa" files = {"id_list": self.test_folder} log = open(os.path.join(self.test_folder, "test.log"), "w") gen_file(os.path.join(self.test_folder, "aaa"), "999.aaa\t222\t333\ttest_aaa") name = self.ppi._get_prefer_name(row_a, "test", files, "test", log) self.assertEqual(name, "test_aaa")
def mock_run_infernal(self, e_value, seq, type_, prefix): if type_ == "txt": gen_file("test_folder/output/test_RBS.txt", self.example.scan_file) return "test_folder/output/test_RBS.txt" else: gen_file("test_folder/output/test_RBS_rescan.txt", self.example.rescan_file) return "test_folder/output/test_RBS_rescan.txt"
def test_read_gff(self): gff_file = os.path.join(self.test_folder, "test.gff") tss_file = os.path.join(self.test_folder, "test_TSS.gff") gen_file(gff_file, self.example.gff_file) gen_file(tss_file, self.example.tss_file) gffs, tsss = vg.read_gff(gff_file, tss_file) self.assertEqual(gffs[0].start, 5) self.assertEqual(tsss[0].start, 3)
def test_compare_cds_tran(self): gff_file = os.path.join(self.test_folder, "aaa.gff") tran_file = os.path.join(self.test_folder, "aaa_transcript.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.tran_file) self.sub._compare_cds_tran(gff_file, tran_file) datas, string = extract_info("test_folder/output/all_CDS/tmp_cds.gff", "file") self.assertEqual("\n".join(datas), 'aaa\tRefSeq\tCDS\t3\t17\t.\t+\t.')
def test_read_predict_manual_gff(self): gff = os.path.join(self.test_folder, "test.gff") gen_file(gff, self.example.gff_file) args = self.mock_args.mock() args.gene_length = 1000 num, gffs = ot.read_predict_manual_gff(gff, args) self.assertEqual(num, 1) self.assertEqual(gffs[0].start, 633)
def test_modify_seq(self): mod_table = os.path.join(self.test_folder, "mod") gen_file(mod_table, self.example.mutation) gen_file(os.path.join(self.fasta, "NC_000915.1.fa"), self.example.fasta) self.seq.modify_seq(self.fasta, mod_table, self.test_folder, "test") datas = import_data(os.path.join(self.test_folder, "test_NC_000915.1.fa")) self.assertEqual("\n".join(datas), self.example.out_1)
def test_formatdb(self): database = "test_folder/test.fa" gen_file(database, "test") log = open(os.path.join(self.test_folder, "test.log"), "w") sr.change_format = self.mock.mock_change_format self.srna._run_format = self.mock.mock_run_format self.srna._formatdb(database, "type_", self.out, "blast_path", "sRNA", log) self.assertTrue(os.path.exists(os.path.join(self.out, "log.txt")))
def test_get_specific_seq(self): srna_file = os.path.join(self.test_folder, "aaa_sRNA.gff") seq_file = os.path.join(self.test_folder, "aaa.fa") srna_out = os.path.join(self.test_folder, "out") gen_file(srna_file, self.example.srna_file) gen_file(seq_file, self.example.seq_file) self.star._get_specific_seq(srna_file, seq_file, srna_out, ["aaa:+:5:8"]) datas = import_data(srna_out) self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT')
def test_intergenic_srna(self): si.read_libs = self.mock.mock_read_libs si.read_wig = self.mock.mock_read_wig gff_file = os.path.join(self.test_folder, "aaa.gff") tss_file = os.path.join(self.test_folder, "aaa_TSS.gff") tran_file = os.path.join(self.test_folder, "aaa_tran.gff") pro_file = os.path.join(self.test_folder, "aaa_processing.gff") wig_f_file = os.path.join(self.wig_folder, "wig_f.wig") wig_r_file = os.path.join(self.wig_folder, "wig_r.wig") gen_file(gff_file, self.example.gff_file) gen_file(tss_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) output_file = os.path.join(self.test_folder, "output") output_table = os.path.join(self.test_folder, "table") coverage = [0, 0, 0, 50, 10] si.replicate_comparison = self.mock.mock_replicate_comparison si.coverage_comparison = self.mock.mock_coverage_comparison args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file args.tss_file = tss_file args.table_best = True args.cutoffs = coverage args.out_folder = self.test_folder args.file_type = "frag" args.cut_notex = coverage args.input_libs = "input_libs" args.wig_folder = self.wig_folder args.wig_f_file = wig_f_file args.wig_r_file = wig_r_file args.tss_source = True args.output_file = output_file args.output_table = output_table args.in_cds = False args.wigs_f = None args.wigs_r = None si.intergenic_srna(args, args.input_libs, None, args.wigs_f, args.wigs_r) self.assertTrue(os.path.exists(output_file)) self.assertTrue(os.path.exists(output_table))
def test_post_modify(self): pre_longer = tr.longer_ta pre_fill = tr.fill_gap tr.longer_ta = self.mock.mock_longer_ta tr.fill_gap = self.mock.mock_fill_gap gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gff_out = os.path.join(self.out, "gffs") os.mkdir(os.path.join(self.out, "tmp_tran")) gen_file(os.path.join(gff_out, "tmp_uni"), self.example.tran_file) gen_file(os.path.join(gff_out, "tmp_overlap"), self.example.tran_file) gen_file(os.path.join(gff_out, "final_test"), self.example.tran_file) args = self.mock_args.mock() args.gffs = self.gffs args.out_folder = self.out args.length = 20 self.tran._post_modify(["test"], args) self.assertTrue( os.path.exists(os.path.join(gff_out, "test_transcript.gff"))) tr.longer_ta = pre_longer tr.fill_gap = pre_fill
def test_get_protein_seq(self): gen_file(os.path.join(self.fastas, "tmp/aaa.fa"), self.example.fasta_file) gff = "aaa.gff" gen_file(os.path.join(self.gffs, "tmp", gff), self.example.gff_file) gen_file(os.path.join(self.trans, "aaa_transcript.gff"), self.example.tran_file) args = self.mock_args.mock() args.out_folder = self.test_folder log = open(os.path.join(self.test_folder, "test.log"), "w") prefix = self.sub._get_protein_seq(gff, self.test_folder, self.trans, args, log) self.assertEqual(prefix, "aaa")
def test_merge_bams(self): args = self.mock_args.mock() args.frag_bams = os.path.join(self.test_folder, "frag_bams") args.normal_bams = os.path.join(self.test_folder, "tex_bams") os.mkdir(args.normal_bams) os.mkdir(args.frag_bams) self.snp._run_bam = self.mock.mock_run_bam gen_file(os.path.join(args.normal_bams, "tex.bam"), "test") gen_file(os.path.join(args.normal_bams, "notex.bam"), "test") gen_file(os.path.join(args.frag_bams, "farg.bam"), "test") args.samtools_path = "test" num = self.snp._merge_bams(args) self.assertEqual(num, 3)
def test_map2goslim(self): go.plot = Mock_func().mock_plot stat_file = os.path.join(self.test_folder, "stat.txt") term_file = os.path.join(self.test_folder, "term.txt") slim_file = os.path.join(self.test_folder, "slim.txt") go_table = os.path.join(self.test_folder, "go.csv") gen_file(term_file, self.example.obo) gen_file(slim_file, self.example.slim) gen_file(go_table, "aaa\t+\t150\t200\tYP_031579.1\tGO:0000003") go.map2goslim(slim_file, term_file, go_table, stat_file, self.test_folder) datas = import_data(stat_file) self.assertEqual(set(datas), set(self.example.out_stat.split("\n")))
def test_run_program(self): self.srna.multiparser = Mock_multiparser self.srna._check_gff = self.mock.mock_check_gff self.srna._run_normal = self.mock.mock_run_normal self.srna._run_utrsrna = self.mock.mock_run_utrsrna self.srna._merge_tex_frag_datas = self.mock.mock_merge_tex_frag_datas sr.filter_frag = self.mock.mock_run_filter_frag sr.merge_srna_gff = self.mock.mock_merge_srna_gff sr.merge_srna_table = self.mock.mock_merge_srna_table gen_file(os.path.join(self.gffs, "test.gff"), self.example.sorf_file) gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.sorf_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.sorf_file) gen_file(os.path.join(self.tsss, "test_processing.gff"), self.example.sorf_file) fuzzy_tsss = {"inter": 3} args = self.mock_args.mock() args.import_info = ["tss", "blast_nr", "blast_srna", "sec_str", "sorf"] args.trans = self.trans args.tsss = self.tsss args.pros = self.pros args.max_len = 300 args.min_len = 30 args.tex_notex = "tex_notex" args.fuzzy_tsss = fuzzy_tsss args.out_folder = self.out args.table_best = True args.wig_path = "wig_path" args.merge_wigs = "merge" args.libs = "libs" args.gffs = self.gffs args.in_cds = False args.utr_srna = True args.ex_srna = False args.cutoff_overlap = 0.5 args.source = True log = open(os.path.join(self.test_folder, "test.log"), "w") prefixs = self.srna._run_program(args, log) self.assertListEqual(prefixs, ['test'])
def test_start_stop_codon(self): gff_path = os.path.join(self.out, "gffs") table_path = os.path.join(self.out, "tables") os.mkdir(gff_path) os.mkdir(table_path) os.mkdir(os.path.join(gff_path, "all_candidates")) os.mkdir(os.path.join(table_path, "all_candidates")) os.mkdir(os.path.join(gff_path, "best_candidates")) os.mkdir(os.path.join(table_path, "best_candidates")) gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_all.csv"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.gff"), "test") gen_file(os.path.join(gff_path, "all_candidates/test_sORF_best.csv"), "test") so.sorf_detection = self.mock.mock_sorf_detection args = self.mock_args.mock() args.libs = "libs" args.tex_notex = "tex_notex" args.replicates = "replicates" args.start_codon = ["ATG"] args.stop_codon = ["TTA"] args.background = "background" args.wig_path = "wig_path" args.merge_wigs = "merge_wigs" self.sorf._start_stop_codon(["test"], args) self.assertTrue( os.path.exists( os.path.join(gff_path, "best_candidates/test_sORF.gff"))) self.assertTrue( os.path.exists( os.path.join(gff_path, "all_candidates/test_sORF.gff"))) self.assertTrue( os.path.exists( os.path.join(table_path, "best_candidates/test_sORF.csv"))) self.assertTrue( os.path.exists( os.path.join(table_path, "all_candidates/test_sORF.csv")))
def test_retrieve_uniprot(self): go.Gff3Parser = Mock_gff_parser database_file = os.path.join(self.test_folder, "database") gen_file(database_file, self.example.idmapping) gff_file = os.path.join(self.test_folder, "test.gff") gen_file(gff_file, "test") out_file = os.path.join(self.test_folder, "out.gff") tran_file = os.path.join(self.test_folder, "test_transcript.gff") gen_file(tran_file, "test") go.retrieve_uniprot(database_file, gff_file, out_file, tran_file, "express") datas = import_data(out_file) self.assertEqual(set(datas), set(self.example.out_retrieve.split("\n")))
def test_detect_operon(self): op.operon = self.mock.mock_operon gen_file(os.path.join(self.tsss, "tmp", "test_TSS.gff"), "test") gen_file(os.path.join(self.trans, "tmp", "test_transcript.gff"), "test") gen_file(os.path.join(self.gffs, "test.gff"), "test") args = self.mock_args.mock() args.gffs = self.out_gff args.term_fuzzy = 3 args.tss_fuzzy = 3 args.length = 100 self.operon._detect_operon(["test"], args) self.assertTrue( os.path.exists( os.path.join(self.output, "tables", "operon_test.csv")))
def test_read_data(self): gff_file = os.path.join(self.test_folder, "anno.gff") tran_file = os.path.join(self.test_folder, "tran.gff") pro_file = os.path.join(self.test_folder, "pro.gff") gen_file(gff_file, self.example.gff_file) gen_file(tran_file, self.example.gff_file) gen_file(pro_file, self.example.gff_file) args = self.mock_args.mock() args.gff_file = gff_file args.tran_file = tran_file args.pro_file = pro_file nums, cdss, tas, pros, genes, ncs = si.read_data(args) self.assertDictEqual(nums, {'ta': 3, 'cds': 3, 'pro': 3, 'uni': 0}) self.assertEqual(cdss[0].start, 140) self.assertEqual(tas[0].start, 140) self.assertEqual(pros[0].start, 140)
def test_compute_utr(self): ut.detect_5utr = self.mock.mock_detect_5utr ut.detect_3utr = self.mock.mock_detect_3utr term_path = os.path.join(self.terms, "tmp") os.mkdir(term_path) utr5_path = os.path.join(self.out, "5UTRs") utr3_path = os.path.join(self.out, "3UTRs") os.mkdir(utr5_path) os.mkdir(utr3_path) os.mkdir(os.path.join(utr5_path, "gffs")) os.mkdir(os.path.join(utr3_path, "gffs")) utr5_stat_path = os.path.join(utr5_path, "statistics") utr3_stat_path = os.path.join(utr3_path, "statistics") os.mkdir(utr5_stat_path) os.mkdir(utr3_stat_path) gen_file(os.path.join(self.gffs, "test.gff"), self.example.gff_file) gen_file(os.path.join(self.trans, "test_transcript.gff"), self.example.tran_file) gen_file(os.path.join(self.tsss, "test_TSS.gff"), self.example.tss_file) gen_file(os.path.join(term_path, "test_term.gff"), self.example.term_file) args = self.mock_args.mock() args.gffs = self.gffs args.tsss = self.tsss args.trans = self.trans args.terms = self.terms log = open(os.path.join(self.test_folder, "test.log"), "w") self.utr._compute_utr(args, log) self.assertTrue( os.path.exists(os.path.join(utr5_stat_path, "test_5utr_length.png"))) self.assertTrue( os.path.exists(os.path.join(utr3_stat_path, "test_3utr_length.png"))) shutil.rmtree(utr5_path) shutil.rmtree(utr3_path)
def test_snp_detect(self): depth_file = os.path.join(self.test_folder, "depth") gen_file(depth_file, self.example.depth_file) fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_final) snp_file = os.path.join(self.test_folder, "NC_007795.1.snp") gen_file(snp_file, self.example.snp_final) out_seq = os.path.join(self.test_folder, "seq") out_snp = os.path.join(self.test_folder, "snp") stat_file = os.path.join(self.test_folder, "stat") args = self.mock_args.mock() args.depth = 5 args.fraction = 0.3 args.quality = 5 args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 ts.snp_detect(fasta_file, snp_file, depth_file, out_snp, out_seq, 2, stat_file, args, 2) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "seq_NC_007795.1_1_1.fa"))) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "seq_NC_007795.1_1_2.fa"))) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "seq_NC_007795.1_2_1.fa"))) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "snp_seq_reference.csv"))) self.assertTrue( os.path.exists(os.path.join(self.test_folder, "snp_best.vcf"))) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_best.png"))) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "snp_NC_007795.1_SNP_QUAL_raw.png")))
def test_blast(self): sr.extract_blast = self.mock.mock_extract_blast self.srna._run_blast = self.mock.mock_run_blast self.srna._run_format = self.mock.mock_run_format gen_file(os.path.join(self.out, "tmp_basic_test"), self.example.srna_file) gen_file(os.path.join(self.out, "tmp_nr_test"), "test") gen_file(os.path.join(self.fastas, "tmp/test.fa"), ">test\nAAATTTGGGCCC") args = self.mock_args.mock() args.blast_path = "test" args.fastas = self.fastas args.out_folder = self.out self.srna._blast("database", False, "dna", args, ["test"], "blast_all", "nr", 0.0001) datas = import_data(os.path.join(self.out, "tmp_basic_test")) self.assertEqual("\n".join(datas), "test")
def test_optimization(self): ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_1"] args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.steps = 2 args.gene_length = 2000 args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 args.length = None args.replicate_name = "test" args.tsspredator_path = "test" args.manual = os.path.join(self.test_folder, "manual.gff") gen_file(args.manual, self.example.manual_file) log = open(os.path.join(self.test_folder, "test.log"), "w") args.output_folder = self.test_folder os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator")) ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa", log) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "optimized_TSSpredator", "stat_aaa.csv")))
def test_reextract_rbs(self): align_file = os.path.join(self.test_folder, "align") first_file = os.path.join(self.test_folder, "first") output_file = os.path.join(self.test_folder, "output") first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16 RF00162 6.2e-18 5 12""" gen_file(align_file, self.example.scan_file) gen_file(first_file, first_content) rr.reextract_rbs(align_file, first_file, output_file) data = import_data(output_file) self.assertEqual("\n".join(data), first_content) first_content = """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16 RF00178 6.2e-20 13 17""" gen_file(first_file, first_content) rr.reextract_rbs(align_file, first_file, output_file) data = import_data(output_file) self.assertEqual( "\n".join(data), """riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16 RF00162 6.2e-18 5 12 riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16 RF00178 6.2e-20 13 17""" )
def test_gen_seq(self): srna_seq = os.path.join(self.out, "sRNA_seqs") tar_seq = os.path.join(self.out, "target_seqs") os.mkdir(os.path.join(self.srnas, "tmp")) os.mkdir(os.path.join(self.fastas, "tmp")) os.mkdir(os.path.join(self.gffs, "tmp")) os.mkdir(tar_seq) gen_file(os.path.join(self.srnas, "tmp", "aaa_sRNA.gff"), self.example.srna_file) gen_file(os.path.join(self.fastas, "tmp", "aaa.fa"), self.example.seq_file) gen_file(os.path.join(self.gffs, "tmp", "aaa.gff"), self.example.gff_file) args = self.mock_args.mock() args.query = ["aaa:+:5:8"] args.features = ["CDS"] args.tar_start = 3 args.tar_end = 5 self.star._gen_seq(["aaa"], args) datas = import_data(os.path.join(srna_seq, "aaa_sRNA.fa")) self.assertEqual("\n".join(datas), '>srna0|aaa|5|8|+\nTAAT') datas = import_data(os.path.join(tar_seq, "aaa_target_1.fa")) self.assertEqual("\n".join(datas), '>AAA_000001|CDS_00000\nTAAATTCC')
def test_merge_bams(self): args = self.mock_args.mock() args.frag_bams = os.path.join(self.test_folder, "frag_bams") args.normal_bams = os.path.join(self.test_folder, "tex_bams") os.mkdir(args.normal_bams) os.mkdir(args.frag_bams) bam_datas = [{ "sample": "NC_007795.1", "bam_number": 0, "bams": "test", "rep": 1 }] self.snp._run_bam = self.mock.mock_run_bam gen_file(os.path.join(args.normal_bams, "tex.bam"), "test") gen_file(os.path.join(args.normal_bams, "notex.bam"), "test") gen_file(os.path.join(args.frag_bams, "farg.bam"), "test") args.bams = [args.frag_bams, args.normal_bams] args.samtools_path = "test" self.snp._merge_bams(args, bam_datas) self.assertEqual(bam_datas[0]["bam_number"], 1)
def test_transcript_snp(self): fasta = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta, self.example.fasta) snp = os.path.join(self.test_folder, "NC_007795.1.csv") gen_file(snp, self.example.snp) args = self.mock_args.mock() args.depth = 5 args.fraction = 0.3 args.quality = 2 args.depth_s = "n_10" args.depth_b = "a_2" args.dp4_sum = "n_10" args.dp4_frac = 0.5 args.idv = "n_10" args.imf = 0.5 args.filters = ["VDB_s0.1"] args.min_sample = 2 os.mkdir( os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test")) depth_file = os.path.join(self.test_folder, "tmp_depth") gen_file(depth_file, self.example.depth_file) self.snp._transcript_snp(fasta, snp, "test", "with", "test", 10, self.table, args) datas = import_data( os.path.join( self.test_folder, "compare_reference/statistics/stat_test_with_BAQ_SNP_best.csv") ) self.assertEqual("\n".join(datas), self.example.out_stat) datas = import_data( os.path.join( self.test_folder, "compare_reference/seqs/with_BAQ/test/test_NC_007795.1_1_1.fa") ) self.assertEqual("\n".join(datas), ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT") os.remove("test_seq_reference.csv") os.remove("test_best.vcf") os.remove("test_NC_007795.1_SNP_QUAL_best.png") os.remove("test_NC_007795.1_SNP_QUAL_raw.png")
def test_convert_sam2bam(self): self.circ._run_samtools_convert_bam = self.samtools.mock_covert_bam sam1 = os.path.join(self.test_folder, "test1.sam") sam2 = os.path.join(self.test_folder, "test2.sam") bam = os.path.join(self.test_folder, "test3.bam") gen_file(sam1, self.example.align_file) gen_file(sam2, self.example.align_file) gen_file(bam, self.example.align_file) align_files = ["test1"] bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam( self.test_folder, None, align_files) self.assertEqual(set(bam_files), set([bam, sam1.replace("sam", "bam"), sam2.replace("sam", "bam")])) self.assertEqual(set(convert_ones), set([sam2.replace("sam", "bam")])) self.assertEqual(set(remove_ones), set([sam1])) align_files = ["test3"] bam_files, convert_ones, remove_ones = self.circ._convert_sam2bam( self.test_folder, None, align_files) self.assertEqual(set(convert_ones), set([sam2.replace("sam", "bam"), sam1.replace("sam", "bam")])) self.assertEqual(set(remove_ones), set([]))
def test_blast(self): self.srna.helper.merge_blast_out = self.mock.mock_merge_blast_out sr.extract_blast = self.mock.mock_extract_blast self.srna._run_blast = self.mock.mock_run_blast self.srna._run_format = self.mock.mock_run_format gen_file(os.path.join(self.out, "tmp_basic_test"), self.example.srna_file) gen_file(os.path.join(self.out, "tmp_nr_test"), "test") gen_file(os.path.join(self.fastas, "tmp/test.fa"), ">test\nAAATTTGGGCCC") args = self.mock_args.mock() args.blast_path = "test" args.para_blast = 1 log = open(os.path.join(self.test_folder, "test.log"), "w") args.fastas = self.fastas args.out_folder = self.out args.blast_score_s = 0 args.blast_score_n = 0 self.srna._blast("database", False, "dna", args, ["test"], "blast_all", "nr", 0.0001, "tss", log) datas = import_data(os.path.join(self.out, "tmp_basic_test")) self.assertEqual("\n".join(datas), "test")
def test_optimize_tss(self): opt.Helper = Mock_helper opt.Multiparser = Mock_multiparser opt.optimization = Mock_func().mock_optimization gen_file(os.path.join(self.gffs, "tmp", "test.gff"), "test") gen_file(os.path.join(self.fastas, "tmp", "test.fa"), "test") args = self.mock_args.mock() args.fastas = self.fastas args.gffs = self.gffs args.wigs = self.wigs args.tsspredator_path = "test" args.manuals = self.manuals gen_file(os.path.join(self.manuals, "tmp", "test.gff"), "test") args.output_folder = self.test_folder args.project_strain = "test" args.height = 9 args.height_reduction = 9 args.factor = 9 args.factor_reduction = 9 args.base_height = 9 args.enrichment = 9 args.processing = 9 args.utr = 200 args.libs = "test" args.replicate_name = "test" args.cluster = 2 args.strain_lengths = {"test": 100} args.cores = 4 args.program = "TSS" args.replicate = 2 args.steps = 2000 log = open(os.path.join(self.test_folder, "test.log"), "w") opt.optimize_tss(args, log) self.assertTrue( os.path.exists(os.path.join(self.test_folder, "test.csv"))) log.close()
def test_annotation_transfer(self): gen_file(os.path.join(self.ref_fastas, "aaa.fa"), self.example.fasta_file) gen_file(os.path.join(self.tar_fastas, "bbb.fa"), self.example.fasta_file) gen_file(os.path.join(self.ref_embls, "aaa.gbk"), self.example.gbk_file.split("//")[0]) self.ratt._run_ratt = Mock_func().mock_run_ratt args = self.mock_args.mock() args.element = "element" args.ref_embls = self.ref_embls args.tar_fastas = self.tar_fastas args.ref_fastas = self.ref_fastas args.output_path = self.output_path args.gff_outfolder = self.gff_outfolder args.pairs = ["aaa:bbb"] args.convert = True self.ratt.annotation_transfer(args) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.gff"))) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.rnt"))) self.assertTrue( os.path.exists(os.path.join(self.gff_outfolder, "bbb.ptt")))
def mock_detect_3utr(self, tran, gff, term, fuzzy, utr3): gen_file("test_3utr_length.png", "test")
def mock_detect_5utr(self, tss, gff, tran, source, utr5): gen_file("test_5utr_length.png", "test")
def test_read_fasta(self): fasta_file = os.path.join(self.test_folder, "NC_007795.1.fa") gen_file(fasta_file, self.example.fasta_file) seqs = ts.read_fasta(fasta_file) self.assertListEqual(seqs, [{'NC_007795.1': 'AAATATATCAGCACCGTAGACGATAGAGTAGTAC'}])
def test_convert_embl(self): gen_file(os.path.join(self.test_folder, "aaa.gbk"), self.example.gbk_file.split("//")[0]) out = self.ratt._convert_embl(self.test_folder) self.assertEqual(out, "test_folder/gbk/gbk_tmp") self.assertTrue(os.path.exists("test_folder/gbk/gbk_tmp"))
def mock_run_ratt(self, ratt_path, tar, ref, out): gen_file("test_folder/gffs/tmp.gff", self.example.gff_file) gen_file("test_folder/gffs/tmp.ptt", self.example.ptt_file) gen_file("test_folder/gffs/tmp.rnt", self.example.rnt_file) pass
def test_screenshot(self): gen_file(os.path.join(self.tex_wig, "tex_1_f.wig"), self.example.wig_f) gen_file(os.path.join(self.tex_wig, "notex_1_f.wig"), self.example.wig_f) gen_file(os.path.join(self.frag_wig, "frag_f.wig"), self.example.wig_f) gen_file(os.path.join(self.tex_wig, "tex_1_r.wig"), self.example.wig_r) gen_file(os.path.join(self.tex_wig, "notex_1_r.wig"), self.example.wig_r) gen_file(os.path.join(self.frag_wig, "frag_r.wig"), self.example.wig_r) args = self.mock_args.mock() args.fasta = self.fasta args.main_gff = os.path.join(self.test_folder, "main.gff") gen_file(args.main_gff, self.example.main_gff) side_gff = os.path.join(self.test_folder, "side.gff") args.side_gffs = [side_gff] gen_file(side_gff, self.example.side_gff) args.frag_wigs = self.frag_wig args.tex_wigs = self.tex_wig args.height = 1000 args.tlibs = ["test_folder/tex/tex_1_f.wig:tex:1:a:+", "test_folder/tex/tex_1_r.wig:tex:1:a:-", "test_folder/tex/notex_1_f.wig:notex:1:a:+", "test_folder/tex/notex_1_r.wig:notex:1:a:-"] args.flibs = ["test_folder/frag/frag_f.wig:frag:1:a:+", "test_folder/frag/frag_r.wig:frag:1:a:-"] args.present = "expand" args.output_folder = self.output log = open(os.path.join(self.test_folder, "test.log"), "w") self.screen.screenshot(args, log) self.assertTrue(os.path.exists(os.path.join( self.output, "aaa", "forward"))) self.assertTrue(os.path.exists(os.path.join( self.output, "aaa", "reverse"))) datas = import_data(os.path.join( self.output, "aaa", "forward.txt")) datas = import_data(os.path.join( self.output, "aaa", "reverse.txt")) self.assertEqual("\n".join(datas), self.example.out_r)
def mock_run_blast(self, program, database, e, seq_file, blast_file, strand, para, test, log): gen_file('tmp_blast.txt', "test")
def mock_get_seq(self, gff, fasta, seq_file): gen_file('test_folder/output/sRNA_index_test', ">test\nAAATTTGGGCCC") gen_file('test_folder/output/sRNA_2d_test', ">test\n...()()...")