class Example(object): tss_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 16, "end": 16, "phase": ".", "strand": "-", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 54, "end": 54, "phase": ".", "strand": "+", "score": "."}] attributes_tss = [{"ID": "CDS0", "Name": "CDS_0", "type": "Primary", "associated_gene": "AAA_00001", "utr_length": "Primary_25"}, {"ID": "CDS1", "Name": "CDS_1", "type": "Internal", "associated_gene": "AAA_00002", "utr_length": "Internal_NA"}, {"ID": "CDS2", "Name": "CDS_2", "type": "Primary,Antisense", "associated_gene": "AAA_00004,AAA_00006", "utr_length": "Primary_25,Internal_NA"}] tss2_dict = [ {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 18, "end": 18, "phase": ".", "strand": "-", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 23, "end": 23, "phase": ".", "strand": "+", "score": "."}] attributes_tss2 = [{"ID": "CDS0", "Name": "CDS_0", "type": "Primary", "associated_gene": "AAA_00001", "utr_length": "Primary_25"}, {"ID": "CDS1", "Name": "CDS_1", "type": "Internal", "associated_gene": "AAA_00002", "utr_length": "Internal_NA"}, {"ID": "CDS2", "Name": "CDS_2", "type": "Primary,Antisense", "associated_gene": "AAA_00004,AAA_00006", "utr_length": "Primary_25,Internal_NA"}] gff_dict = [{"start": 6, "end": 15, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "gene"}, {"start": 1258, "end": 2234, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "gene"}, {"start": 3544, "end": 6517, "phase": ".", "strand": "-", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "gene"}] attributes_gff = [ {"ID": "gene0", "Name": "gene_0", "locus_tag": "AAA_00001"}, {"ID": "gene0", "Name": "gene_1", "locus_tag": "AAA_00002"}, {"ID": "gene1", "Name": "gene_2", "locus_tag": "AAA_00003"}] tsss = [] tsss2 = [] genes = [] for index in range(0, 3): tsss.append(Create_generator( tss_dict[index], attributes_tss[index], "gff")) tsss2.append(Create_generator( tss2_dict[index], attributes_tss2[index], "gff")) genes.append(Create_generator( gff_dict[index], attributes_gff[index], "gff"))
def test_check_overlap(self): tss_m_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 7, "end": 7, "phase": ".", "strand": "+", "score": "."} attributes_tss_m = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal", "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"} tss_m = Create_generator(tss_m_dict, attributes_tss_m, "gff") tss_p_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 7, "end": 7, "phase": ".", "strand": "+", "score": "."} attributes_tss_p = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal", "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"} tss_p = Create_generator(tss_p_dict, attributes_tss_p, "gff") tss_pre_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."} attributes_tss_pre = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal", "associated_gene": "AAA_00001,AAA_00004", "UTR_length": "Primary_25,Internal_NA"} tss_pre = Create_generator(tss_pre_dict, attributes_tss_pre, "gff") nums = {"tss_p": 0, "tss_m": 0, "tss": 0} tsss = {"tsss_p":[], "tsss_m": [], "merge": []} num_strain = {"aaa": {"overlap": 0, "tsspredator": 0, "manual": 0}} overlap_num = 0 output = mm.check_overlap(True, tss_pre, nums, False, num_strain, overlap_num, tss_m, tss_p, tsss, 1000, self.example.genes, self.example.genes) self.assertEqual(output, (False, 3, 1)) output = mm.check_overlap(False, tss_pre, nums, 100, num_strain, overlap_num, tss_m, tss_p, tsss, 1000, self.example.genes, self.example.genes) self.assertEqual(output, (False, 1000, 0))
class Example(object): gff_file = """test RefSeq CDS 5 10 . + . ID=cds0;Name=CDS_0""" tss_file = """test RefSeq TSS 3 3 . + . ID=tss0;Name=TSS_0""" tss_dict = [{ "seq_id": "test", "source": "intergenic", "feature": "TSS", "start": 170, "end": 170, "phase": ".", "strand": "+", "score": "." }] attributes_tsss = [{"ID": "tss0", "Name": "TSS_0"}] tsss = [] tsss.append(Create_generator(tss_dict[0], attributes_tsss[0], "gff")) gff_dict = [{ "seq_id": "test", "source": "RefSeq", "feature": "CDS", "start": 200, "end": 270, "phase": ".", "strand": "+", "score": "." }] attributes_gff = [{"ID": "cds0", "Name": "CDS_0"}] gffs = [] gffs.append(Create_generator(gff_dict[0], attributes_gff[0], "gff")) out_stat_test = """All genomes: The number of cds which is start from TSS: 250 (0.4166666666666667) The number of tRNA which is start from TSS: 20 (0.6666666666666666) The number of rRNA which is start from TSS: 20 (0.6666666666666666)""" out_stat = """All genomes:
def test_compare(self): data1_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 140, "end": 367, "phase": ".", "strand": "+", "score": "."} data2_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 180, "end": 400, "phase": ".", "strand": "+", "score": "."} data3_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 50, "end": 138, "phase": ".", "strand": "+", "score": "."} data4_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "transcript", "start": 5650, "end": 7100, "phase": ".", "strand": "+", "score": "."} attributes = {"ID": "tran0", "Name": "Tran_0", "locus_tag": "AAA_00001"} overlap = False data1 = Create_generator(data1_dict, attributes, "gff") data2 = Create_generator(data2_dict, attributes, "gff") data3 = Create_generator(data3_dict, attributes, "gff") data4 = Create_generator(data4_dict, attributes, "gff") overlap12 = cft.compare(data1, data2, overlap, 5) self.assertEqual(data1.start, 140) self.assertEqual(data1.end, 400) overlap13 = cft.compare(data1, data3, overlap, 5) self.assertEqual(data1.start, 50) self.assertEqual(data1.end, 400) overlap14 = cft.compare(data1, data4, overlap, 5) self.assertEqual(data1.start, 50) self.assertEqual(data1.end, 400) self.assertTrue(overlap12) self.assertTrue(overlap13) self.assertFalse(overlap14)
def test_sub_operon(self): tss_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 140, "end": 140, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 200, "end": 200, "phase": ".", "strand": "+", "score": "."}] attributes_tss = [ {"ID": "tss0", "Name": "TSS_0", "locus_tag": "AAA_00001"}, {"ID": "tss1", "Name": "TSS_1", "locus_tag": "BBB_00001"}] gff_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 540, "end": 640, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 166, "end": 198, "phase": ".", "strand": "+", "score": "."}] attributes_gff = [ {"ID": "tss0", "Name": "TSS_0", "locus_tag": "AAA_00001"}, {"ID": "tss1", "Name": "TSS_1", "locus_tag": "BBB_00001"}] tsss = {"with_feature": True, "num_feature": 2, "data_list": []} genes = {"data_list": []} for index in range(0, 2): genes["data_list"].append(Create_generator( gff_dict[index], attributes_gff[index], "gff")) tsss["data_list"].append(Create_generator( tss_dict[index], attributes_tss[index], "gff")) operons = op.sub_operon("+", tsss, 141, 800, genes, 30) self.assertDictEqual(operons[0], {'end': 199, 'start': 141, 'strand': '+'}) self.assertDictEqual(operons[1], {'end': 799, 'start': 200, 'strand': '+'})
class Example(object): inter = """aaa UTR_derived sORF 2 6 . + . ID=inter0;Name=inter_00000;UTR_type=3utr""" srna = """aaa UTR_derived sRNA 5 8 . + . ID=aaa_srna0;Name=srna_00000;UTR_type=3utr""" tss = """aaa tsspredator TSS 1 1 . + . ID=aaa_tss0;Name=TSS_00000""" wigs = {"aaa": {"frag_1": {"track_1|+|frag": [ 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2]}}} ta_dict = [{"seq_id": "aaa", "source": "intergenic", "feature": "Transcript", "start": 1, "end": 23, "phase": ".", "strand": "+", "score": "."}] attributes_tas = [{"ID": "tran0", "Name": "Transcript_0"}] tas = [] tas.append(Create_generator(ta_dict[0], attributes_tas[0], "gff")) tss_dict = [{"seq_id": "aaa", "source": "tsspredator", "feature": "TSS", "start": 1, "end": 1, "phase": ".", "strand": "+", "score": "."}] attributes_tss = [{"ID": "tss0", "Name": "TSS_0"}] tsss = [] tsss.append(Create_generator(tss_dict[0], attributes_tss[0], "gff")) srna_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 5, "end": 8, "phase": ".", "strand": "+", "score": "."}] attributes_srna = [{"ID": "srna0", "Name": "sRNA_0"}] srnas = [] srnas.append(Create_generator(srna_dict[0], attributes_srna[0], "gff"))
def test_diff_strand_tss_gene(self): tss_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."} attributes_tss = {"ID": "TSS0", "Name": "TSS_0", "type": "Primary,Internal", "associated_gene": "AAA_00001,AAA_00004", "utr_length": "Primary_25,Internal_NA"} tss = Create_generator(tss_dict, attributes_tss, "gff") tss_entry = [tss.attribute_string, { "utr_length": "Primary_25", "type": "Primary", "associated_gene": "AAA_00001"}] anti_ends = {"forward": 1, "reverse": -1} gene_ends = {"forward": -1, "reverse": -1} gff_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 6, "end": 12, "phase": ".", "strand": "-", "score": "."} attributes_gff = {"ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00005"} gene = Create_generator(gff_dict, attributes_gff, "gff") checks = {"orphan": False, "int_anti": False} output = mm.diff_strand_tss_gene(gene, tss, anti_ends, gene_ends, checks, tss_entry) self.assertEqual(output[0], 'utr_length=Primary_25,Antisense_NA;associated_gene=AAA_00001,AAA_00005;type=Primary,Antisense;Name=TSS_3+') self.assertDictEqual(output[1], { 'Name': 'TSS_3+', 'utr_length': 'Primary_25,Antisense_NA', 'type': 'Primary,Antisense', 'associated_gene': 'AAA_00001,AAA_00005'})
def test_get_circrna(self): circs = [] gffs = [] for index in range(0, 5): circs.append(Create_generator( self.example.circ_dict[index], self.example.attributes_circ[index], "circ")) for index in range(0, 3): gffs.append(Create_generator( self.example.gffs_dict[index], self.example.attributes_gffs[index], "gff")) out = StringIO() out_best = StringIO() args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 nums = circ.get_circrna(circs, gffs, 50, out, out_best, args) self.assertDictEqual(nums["support"], { 'aaa': {0: 2, 20: 1, 5: 2, 25: 1, 10: 2, 30: 1, 15: 1}, 'all': {0: 3, 20: 1, 5: 3, 25: 1, 10: 2, 30: 1, 15: 1}, 'bbb': {0: 1, 5: 1}}) self.assertDictEqual(nums["circular"], {'bbb': 1, 'aaa': 2, 'all': 3}) self.assertDictEqual(nums["conflict"], {'bbb': {0: 1, 5: 1}, 'aaa': {}, 'all': {0: 1, 5: 1}})
def test_detect_coverage(self): tss = { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 2, "end": 2, "phase": ".", "strand": "+", "score": "." } ref = { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "." } attributes_tss = {"type": "Primary", "ID": "tss0", "Name": "TSS:2_+"} attributes_ref = {"type": "Primary", "ID": "tss1", "Name": "TSS:3_+"} tss_diff, ref_diff = co.detect_coverage( self.example.wigs_f, Create_generator(tss, attributes_tss, "gff"), Create_generator(ref, attributes_ref, "gff")) self.assertEqual(tss_diff, 100) self.assertEqual(ref_diff, 50)
def test_detect_conflict(self): circ_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "circRNA", "start": 100, "end": 467, "phase": ".", "strand": "+", "score": ".", "support": 30, "start_site": 30, "end_site": 35, "situation": "P", "splice_type": "C" } attributes_circ = {"ID": "circrna0", "Name": "circRNA_0"} circrna = Create_generator(circ_dict, attributes_circ, "circ") gffs = [ Create_generator(self.example.cds_dict, self.example.attributes_cds, "gff") ] args = self.mock_args.mock() args.start_ratio = 0.3 args.end_ratio = 0.3 args.support = 5 out = StringIO() out_best = StringIO() circ.detect_conflict(gffs, circrna, 0, out, out_best, args) self.assertEqual( out.getvalue(), "circRNA_0 aaa + 100 467 AAA_00001 30 1.0 0.8571428571428571\n") out.close()
class Example(object): gff_dict = [ {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 30, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 14, "end": 35, "phase": ".", "strand": "-", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 37, "end": 55, "phase": ".", "strand": "-", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 40, "end": 66, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "bbb", "source": "Refseq", "feature": "CDS", "start": 4, "end": 25, "phase": ".", "strand": "-", "score": "."}] attributes_gff = [ {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001"}, {"ID": "cds1", "Name": "CDS_1", "locus_tag": "AAA_00002", "protein_id": "YP_500332.1"}, {"ID": "cds2", "Name": "CDS_2"}, {"ID": "cds3", "Name": "CDS_3", "locus_tag": "AAA_00003"}, {"ID": "cds4", "Name": "CDS_4", "locus_tag": "BBB_00001"}] ta_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "Transcript", "start": 1, "end": 367, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "Transcript", "start": 230, "end": 240, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "bbb", "source": "Refseq", "feature": "Transcript", "start": 430, "end": 5167, "phase": ".", "strand": "-", "score": "."}] attributes_tas = [ {"ID": "tran0", "Name": "Transcript_0", "locus_tag": "AAA_00001"}, {"ID": "tran1", "Name": "Transcript_1", "locus_tag": "AAA_00002"}, {"ID": "tran2", "Name": "Transcript_2", "locus_tag": "BBB_00001"}] tss_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 2, "end": 2, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 230, "end": 230, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "bbb", "source": "Refseq", "feature": "TSS", "start": 5166, "end": 5166, "phase": ".", "strand": "-", "score": "."}] attributes_tss = [{"ID": "tss0", "Name": "TSS_0", "type": "Primary", "associated_gene": "AAA_00001"}, {"ID": "tss1", "Name": "TSS_1", "type": "Internal", "associated_gene": "AAA_00002"}, {"ID": "tss2", "Name": "TSS_2", "type": "Orphan", "associated_gene": "orphan"}] gffs = [] tas = [] tsss = [] for index in range(0, 3): gffs.append(Create_generator(gff_dict[index], attributes_gff[index], "gff")) tas.append(Create_generator(ta_dict[index], attributes_tas[index], "gff")) tsss.append(Create_generator(tss_dict[index], attributes_tss[index], "gff")) seq = {"aaa": "AAAATTATAGGCGTAGTAACCTCTTGATAGCGATGGATATAGACCCTTATAAGGCCTCTGATTAGAAAATAGGTAGGCCCCCGGGGGTGTGTAATAGATAGAT", "bbb": "ATATGTACCCCGCGCCGTATAGCTATAAATTCGCTGCTTATTTTATA"}
class Example(object): tar_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 24, "end": 24, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 1243, "end": 1243, "phase": ".", "strand": "+", "score": "."}] attributes_tar = [{"coverage": "3", "ID": "tss1", "Name": "TSS:3_+"}, {"coverage": "340", "ID": "tss2", "Name": "TSS:24_+"}, {"coverage": "4440", "ID": "tss3", "Name": "TSS:1243_+"}] ref_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 333, "end": 333, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 1242, "end": 1242, "phase": ".", "strand": "+", "score": "."}] attributes_ref = [{"coverage": "3", "ID": "tss1", "Name": "TSS:3_+"}, {"coverage": "330", "ID": "tss2", "Name": "TSS:333_+"}, {"coverage": "1230", "ID": "tss3", "Name": "TSS:1242_+"}] tars = [] refs = [] for index in range(0, 3): tars.append(Create_generator(tar_dict[index], attributes_tar[index], "gff")) tars[-1].attributes["print"] = False refs.append(Create_generator(ref_dict[index], attributes_ref[index], "gff")) refs[-1].attributes["print"] = False
class Example(object): srnas = {"RNAplex": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,7", "tar_pos": "3,15"}], "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.43, "rank": 3, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00003", "energy": -6.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}]}, "RNAup": {"srna0": [{"target": "AAA_00001", "energy": -6.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}, {"target": "AAA_00002|dnaA", "energy": -3.5, "rank": 2, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna1": [{"target": "AAA_00003", "energy": -10.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}], "srna2": [{"target": "AAA_00001", "energy": -23.5, "rank": 1, "srna_pos": "2,10", "tar_pos": "10,15"}]}} srna_dict = [{"start": 6, "end": 15, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "sRNA"}, {"start": 1258, "end": 2234, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "sRNA"}, {"start": 3544, "end": 6517, "phase": ".", "strand": "-", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "sRNA"}] attributes_srna = [{"ID": "srna0", "Name": "sRNA_0"}, {"ID": "srna1", "Name": "sRNA_1"}, {"ID": "srna2", "Name": "sRNA_2"}] gff_dict = [{"start": 100, "end": 150, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "CDS"}, {"start": 2348, "end": 2934, "phase": ".", "strand": "+", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "CDS"}, {"start": 5544, "end": 5597, "phase": ".", "strand": "-", "seq_id": "aaa", "score": ".", "source": "Refseq", "feature": "CDS"}] attributes_gff = [{"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001"}, {"ID": "cds0", "Name": "CDS_1", "locus_tag": "AAA_00002"}, {"ID": "cds1", "Name": "CDS_2", "locus_tag": "AAA_00003"}] srna_gffs = [] gffs = [] for index in range(0, 3): srna_gffs.append(Create_generator(srna_dict[index], attributes_srna[index], "gff")) gffs.append(Create_generator(gff_dict[index], attributes_gff[index], "gff")) out_rna_txt = """>SAOUHSC_00001|dnaA >srna1023 ((((((&)))))) 571,576 : 20,25 (-5.30 = -7.89 + 0.18 + 2.41)""" out_print = """sRNA strain sRNA_position sRNA_interacted_position_RNAplex sRNA_strand target target_position target_interacted_position_RNAplex target_strand energy_RNAplex rank_RNAplex sRNA_1 aaa 1258-2234 1259-1267 + AAA_00003 5544-5597 5550-5545 - -10.5 1 sRNA_2 aaa 3544-6517 6508-6516 - AAA_00001 100-150 89-94 + -23.5 1 sRNA_2 aaa 3544-6517 6508-6516 - AAA_00003 5544-5597 5550-5545 - -6.5 2 sRNA_0 aaa 6-15 7-15 + AAA_00001 100-150 89-94 + -6.5 1 sRNA_0 aaa 6-15 7-12 + AAA_00002|dnaA 2348-2934 2330-2342 + -3.5 2 """ rnaup = """>srna1023 >SAOUHSC_00001|dnaA .(((((&))))). 571,576 : 20,25 (-4.87 = -8.00 + 0.31 + 2.81) AACCUC&GGGGUU >SAOUHSC_00002 (((..((((((((((((&)))))))))))).))) 14,30 : 11,26 (-5.91 = -13.15 + 4.20 + 3.05) GAAGAUCCUAUUUUUAA&UUAAAAAUGGGGGUUC """ rnaplex = """>SAOUHSC_00001|dnaA
def read_file(self, gff_file, input_file, hypo): self.circs = [] self.gffs = [] for index in range(0, 5): self.circs.append(Create_generator(self.example.circ_dict[index], self.example.attributes_circ[index], "circ")) for index in range(0, 3): self.gffs.append(Create_generator(self.example.gffs_dict[index], self.example.attributes_gffs[index], "gff")) return self.circs, self.gffs, 50
class Example(object): seq_file = """>aaa AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC""" gff_file = """aaa\tRefseq\tgene\t1\t12\t.\t+\t.\tID=gene_0;Name=GENE_0;locus_tag=AAA_00001 aaa\tRefseq\tCDS\t1\t12\t.\t+\t.\tID=cds_0;Name=CDS_0;locus_tag=AAA_00001;protein_id="YP.00001 aaa\tRefseq\tgene\t14\t34\t.\t-\t.\tID=gene_1;Name=gene_1;locus_tag=AAA_00002 aaa\tRefseq\tCDS\t14\t34\t.\t-\t.\tID=cds_1;Name=CDS_1;locus_tag=AAA_00002;protein_id="YP.00002""" gene_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 1, "end": 10, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 12, "end": 23, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 25, "end": 30, "phase": ".", "strand": "-", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "gene", "start": 33, "end": 43, "phase": ".", "strand": "-", "score": "."}] cdsf_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 1, "end": 10, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "rRNA", "start": 12, "end": 23, "phase": ".", "strand": "+", "score": "."}] cdsr_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 25, "end": 30, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "rRNA", "start": 33, "end": 43, "phase": ".", "strand": "+", "score": "."}] attributes_gene = [{"ID": "gene0", "Name": "danA", "locus_tag": "AAA_00001"}, {"ID": "gene1", "Name": "AAA_00002", "locus_tag": "AAA_00002"}, {"ID": "gene2", "Name": "AAA_00003", "locus_tag": "AAA_00003"}, {"ID": "gene3", "Name": "hrcA", "locus_tag": "AAA_00004"}] attributes_cdsf = [{"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001", "protein_id": "YP_000001", "Parent": "gene0"}, {"ID": "cds1", "Name": "CDS_1", "locus_tag": "AAA_00002"}] attributes_cdsr = [{"ID": "cds2", "Name": "CDS_2", "locus_tag": "AAA_00003", "protein_id": "YP_000004", "Parent": "gene2"}, {"ID": "cds3", "Name": "CDS_3", "locus_tag": "AAA_00004"}] fasta = "AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC" genes = [] cdss_f = [] cdss_r = [] for index in range(0, 2): cdss_f.append(Create_generator(cdsf_dict[index], attributes_cdsf[index], "gff")) cdss_r.append(Create_generator(cdsr_dict[index], attributes_cdsr[index], "gff")) for index in range(0, 4): genes.append(Create_generator(gene_dict[index], attributes_gene[index], "gff")) cdsf_result = """>AAA_00001|danA AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC >AAA_00002|CDS_1 AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC""" cdsr_result = """>AAA_00003 AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC >AAA_00004|CDS_3 AGGATAGTCCGATACGTATACTGATAAAGACCGAAAATATTAGCGCGTAGC""" all_result = """>AAA_00001|CDS_0
def test_detect_overlap(self): pre_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_pre = {"ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr"} tar1_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 3, "end": 33, "phase": ".", "strand": "+", "score": "." } attributes_tar1 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "3utr" } tar2_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "sRNA", "start": 53, "end": 233, "phase": ".", "strand": "+", "score": "." } attributes_tar2 = { "ID": "sRNA0", "Name": "srna_0", "sRNA_type": "5utr" } pre = Create_generator(pre_dict, attributes_pre, "gff") tar1 = Create_generator(tar1_dict, attributes_tar1, "gff") tar2 = Create_generator(tar2_dict, attributes_tar2, "gff") overlap = False overlap = ms.detect_overlap(tar1, pre, "UTR", overlap) self.assertTrue(overlap) overlap = False overlap = ms.detect_overlap(tar2, pre, "UTR", overlap) self.assertFalse(overlap)
def test_get_feature(self): attributes_cds = {"ID": "cds0", "Name": "CDS_0", "locus_tag": "AAA_00001", "protein_id": "YP_918384.3"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "AAA_00001") attributes_cds = {"ID": "cds0", "Name": "CDS_0", "protein_id": "YP_918384.3"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "YP_918384.3") attributes_cds = {"ID": "cds0", "Name": "CDS_0"} attributes = circ.get_feature(Create_generator(self.example.cds_dict, attributes_cds, "gff")) self.assertEqual(attributes, "cds0:122-267_f")
def test_gen_batch(self): gs.import_wig = Mock_func().mock_import_wig out = StringIO() lib_t = "wig1 wig2" lib_n = "wig3 wig4" lib_f = "wig5" gff_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 6, "phase": ".", "strand": "+", "score": "." } attributes_gff = { "ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001" } gff = Create_generator(gff_dict, attributes_gff, "gff") seq = {"aaa": "ATATGGCCGACGAGTTCGACGATACAACCCGTGGGG"} gs.gen_batch(lib_t, lib_n, lib_f, "+", [gff], out, seq) self.assertEqual(out.getvalue(), self.example.out_print_wig)
def test_set_cutoff(self): detects = {} detects["express"] = 100 gff_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 102, "phase": ".", "strand": "+", "score": "." } attributes_gff = { "ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001" } gff = Create_generator(gff_dict, attributes_gff, "gff") diff, cutoff_percent = gea.set_cutoff("tex", "all", "all", detects, gff) self.assertEqual(diff, 100) self.assertEqual(cutoff_percent, 0) diff, cutoff_percent = gea.set_cutoff("frag", "all", "n_50", detects, gff) self.assertEqual(diff, 100) self.assertEqual(cutoff_percent, 50) diff, cutoff_percent = gea.set_cutoff("tex", "p_0.5", "n_50", detects, gff) self.assertEqual(diff, 1.0) self.assertEqual(cutoff_percent, 0.5)
def test_detect_inter_type(self): inter_dict = [{ "seq_id": "aaa", "source": "UTR_derived", "feature": "Transcript", "start": 1, "end": 23, "phase": ".", "strand": "+", "score": "." }] attributes_inter = [{ "ID": "tran0", "Name": "Transcript_0", "UTR_type": "3utr" }] inters = [] inters.append( Create_generator(inter_dict[0], attributes_inter[0], "gff")) sd.get_coverage = self.mock.mock_get_coverage wigs = {"forward": "wigs_f", "reverse": "wigs_r"} data = sd.detect_inter_type(inters, wigs, "test") self.assertDictEqual( data, {'aaa': { 'interCDS': [], '5utr': [], '3utr': ['2'] }}) sd.get_coverage = copy.deepcopy(get_coverage)
def test_detect_express(self): gff_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 5, "phase": ".", "strand": "+", "score": "." } attributes_gff = { "ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001" } gff = Create_generator(gff_dict, attributes_gff, "gff") texs = {"tex1_tex2": 0} plots = {"frag": {}} detects = {"cond": 0, "track": 0, "import": False, "express": 0} gea.detect_express(self.example.wig_frags["aaa"]["frag"]["track_1"], gff, 5, detects, "all", "all", texs, "frag", 2, "track_1", plots, "high", "frag") self.assertDictEqual( { 'track': 1, 'import': False, 'cond': 0, 'express': 2 }, detects)
def test_fix_primary_type(self): tss_dict = [{ "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 2, "end": 2, "phase": ".", "strand": "+", "score": "." }, { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "." }, { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 4, "end": 4, "phase": ".", "strand": "+", "score": "." }] attributes = [{ "type": "Primary", "ID": "tss0", "Name": "TSS:2_+", "UTR_length": "Primary_10", "associated_gene": "AAA_00001" }, { "type": "Primary&Internal", "ID": "tss1", "Name": "TSS:3_+", "UTR_length": "Primary_20&Internal_NA", "associated_gene": "AAA_00001&AAA_00005" }, { "type": "Primary&Primary", "ID": "tss2", "Name": "TSS:4_+", "UTR_length": "Primary_40&Primary_60", "associated_gene": "AAA_00001&AAA_00004" }] tsss = [] for index in range(0, 3): tsss.append( Create_generator(tss_dict[index], attributes[index], "gff")) new_tsss = co.fix_primary_type(tsss, self.example.wigs_f, self.example.wigs_r) utrs = [] for tss in new_tsss: utrs.append(tss.attributes["UTR_length"]) self.assertEqual( set(utrs), set(["Internal_NA&Secondary_20", "Primary_60", "Primary_10"]))
def mock_read_gff(self, srna_file, data_type): srnas = [] for index in range(0, 2): srnas.append( Create_generator(self.example.srna_dict[index], self.example.attributes_srna[index], "gff")) return srnas
class Example(object): wigs = { "aaa": { "frag_1": { "track_1|+|frag": [ 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2, 100, 30, 23, 21, 21, 2 ] } } } ta_dict = [{ "seq_id": "aaa", "source": "ANNOgesic", "feature": "Transcript", "start": 4, "end": 20, "phase": ".", "strand": "+", "score": "." }] attributes_tas = [{ "ID": "tran0", "Name": "Transcript_0", "detect_lib": "fragmented&tex_notex" }] tas = [] for index in range(0, 1): tas.append( Create_generator(ta_dict[index], attributes_tas[index], "gff"))
def test_compare_tran(self): tran_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "Transcript", "start": 100, "end": 500, "phase": ".", "strand": "+", "score": "." } attributes_tran = {"ID": "tran0", "Name": "Tran_0"} out = StringIO() gffs = read_dict(3, self.example.gff_dict, self.example.attributes_gff) tran = Create_generator(tran_dict, attributes_tran, "gff") c_gff.compare_tran(gffs, tran, out) datas, attributes = extract_info(out.getvalue(), "string") parents = [] for attribute in attributes: for element in attribute: if "Parent" in element: parents.append(element) self.assertEqual(set(datas), set(["aaa\tRefseq\tCDS\t160\t300\t.\t+\t."])) self.assertEqual(set(parents), set(["Parent=tran0"])) out.close()
def test_fix_primary_type(self): wigs = {"aaa": {"track_1": [{"pos": 1, "coverage": 200}, {"pos": 2, "coverage": 300}, {"pos": 3, "coverage": 400}, {"pos": 4, "coverage": 600}, {"pos": 5, "coverage": 650}, {"pos": 6, "coverage": 655}]}} tss_dict = [{"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 3, "end": 3, "phase": ".", "strand": "+", "score": "."}, {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 5, "end": 5, "phase": ".", "strand": "+", "score": "."}] attributes_tss = [{"ID": "CDS0", "Name": "CDS_0", "type": "Primary,Primary", "associated_gene": "AAA_00001,AAA_00002", "utr_length": "Primary_25,Primary_200"}, {"ID": "CDS1", "Name": "CDS_1", "type": "Primary,Antisense", "associated_gene": "AAA_00001,AAA_00004", "utr_length": "Primary_27,Antisense_NA"}] tsss = [] for index in range(0, 2): tsss.append(Create_generator( tss_dict[index], attributes_tss[index], "gff")) mm.fix_primary_type(tsss, wigs, "test") self.assertEqual(tsss[0].attributes["type"], "Primary") self.assertEqual(tsss[1].attributes["type"], "Antisense,Secondary")
def entries(self, fh): for line in fh: if "gff" in line: lists = self.example.gff_dict attributes = self.example.attributes_gff num = 3 elif "tran" in line: lists = self.example.tran_dict attributes = self.example.attributes_tran num = 3 elif "term" in line: lists = self.example.term_dict attributes = self.example.attributes_term num = 3 elif "tss" in line: lists = self.example.tss_dict attributes = self.example.attributes_tss num = 3 elif "utr5" in line: lists = self.example.utr5_dict attributes = self.example.attributes_utr5 num = 2 elif "utr3" in line: lists = self.example.utr3_dict attributes = self.example.attributes_utr3 num = 2 for index in range(0, num): yield Create_generator(lists[index], attributes[index], "gff")
def test_compare_term(self): ta_dict = {"seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 138, "end": 540, "phase": ".", "strand": "+", "score": "."} attributes_ta = {"ID": "tran0", "Name": "Transcript_0"} ta = Create_generator(ta_dict, attributes_ta, "gff") term = du.compare_term(ta, self.example.terms, 5) self.assertEqual(term.start, 530)
def test_compare_wigs(self): gff_dict = { "seq_id": "aaa", "source": "Refseq", "feature": "CDS", "start": 3, "end": 5, "phase": ".", "strand": "+", "score": "." } attributes_gff = { "ID": "CDS0", "Name": "CDS_0", "locus_tag": "AAA_00001" } gff = Create_generator(gff_dict, attributes_gff, "gff") texs = {"tex1_tex2": 0} replicates = {"tex": 1, "frag": 1} stats = { "CDS": { "total": { "total": 0, "least_one": 0, "all": 0, "none": 0 }, "aaa": { "total": 0, "least_one": 0, "all": 0, "none": 0 } } } outs = {"CDS": {"least_one": [], "all": [], "none": []}} plots = {} gea.compare_wigs(self.example.wig_texs, gff, 2, texs, replicates, stats["CDS"], outs["CDS"], plots, "high", 5, "all", "all") self.assertDictEqual( stats, { 'CDS': { 'total': { 'tex': 1, 'none': 0, 'total': 0, 'all': 1, 'least_one': 1 }, 'aaa': { 'tex': 1, 'none': 0, 'total': 0, 'all': 1, 'least_one': 1 } } })
def test_del_repeat(self): tss_dict = [{ "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 2, "end": 2, "phase": ".", "strand": "+", "score": "." }, { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 22, "end": 22, "phase": ".", "strand": "+", "score": "." }, { "seq_id": "aaa", "source": "Refseq", "feature": "TSS", "start": 122, "end": 122, "phase": ".", "strand": "-", "score": "." }] attributes = [{ "type": "Primary", "ID": "tss0", "Name": "TSS:2_+", "UTR_length": "Primary_100", "associated_gene": "AAA_00001" }, { "type": "Primary&Primary", "ID": "tss1", "Name": "TSS:22_+", "UTR_length": "Primary_20&Primary_50", "associated_gene": "AAA_00004&AAA_00005" }, { "type": "Secondary&Internal", "ID": "tss2", "Name": "TSS:122_-", "UTR_length": "Secondary_220&Internal_NA", "associated_gene": "AAA_00008&AAA_00009" }] tsss = [] for index in range(0, 3): tsss.append( Create_generator(tss_dict[index], attributes[index], "gff")) co.del_repeat(tsss) utrs = [] for tss in tsss: utrs.append(tss.attributes["UTR_length"]) self.assertEqual( set(utrs), set(["Primary_100", "Primary_20", "Internal_NA&Secondary_220"]))