def test_create_starts_and_stops(self): gene = Gene() mrna = Mock() gene.children = {'mrna':[mrna]} gene.create_starts_and_stops('ATGC') mrna.create_starts_and_stops.assertCalledWith('ATGC')
def test_get_cds_length(self): gene = Gene() mrna = Mock() cds = Mock() cds.length = Mock(return_value=42) mrna.get_cds = Mock(return_value=cds) gene.children = {'mrna':[mrna]} self.assertEquals(gene.get_cds_length(), 42)
def test_match_cds_and_exon_end(self): gene = Gene() mrna = Mock() gene.children = {'mrna':[mrna]} gene.match_cds_and_exon_end() mrna.match_cds_and_exon_end.assertCalled()
def process_gene_line(self, line, gene_type): """Extracts arguments from a line and instantiates a Gene object.""" kwargs = self.extract_gene_args(line) if not kwargs: return gene_id = kwargs['identifier'] gene = Gene(**kwargs) if gene_type == 'pseudogene': gene.pseudo = True self.genes[gene_id] = gene
def test_to_tbl_negative(self): gene = Gene("seq1", "maker", [1, 50], "-", "foo_gene_1") mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "50\t1\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected)
def test_max_intron_length_filter(self): intron_length = MaxIntronLengthFilter(30) # Create a mock sequence seq = Sequence() # Give the sequence some genes seq.genes = [ Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo1'), Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo2'), Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo3') ] # Give the mock mrnas some exon's test_mrna0 = Mock() test_mrna0.identifier = 'foo1-RA' test_mrna0.death_flagged = False test_mrna0.exon = Mock() test_mrna0.get_shortest_intron = Mock(return_value=20) test_mrna0.get_longest_intron = Mock(return_value=20) test_mrna1 = Mock() test_mrna1.identifier = 'foo2-RA' test_mrna1.death_flagged = False test_mrna1.exon = Mock() test_mrna1.get_shortest_intron = Mock(return_value=30) test_mrna1.get_longest_intron = Mock(return_value=30) test_mrna2 = Mock() test_mrna2.identifier = 'foo2-RB' test_mrna2.death_flagged = False test_mrna2.exon = None test_mrna3 = Mock() test_mrna3.identifier = 'foo3-RA' test_mrna3.death_flagged = False test_mrna3.exon = Mock() test_mrna3.get_shortest_intron = Mock(return_value=40) test_mrna3.get_longest_intron = Mock(return_value=40) # Give the mock genes some mrnas seq.genes[0].mrnas = [test_mrna0] seq.genes[0].death_flagged = False seq.genes[1].mrnas = [test_mrna1, test_mrna2] seq.genes[1].death_flagged = False seq.genes[2].mrnas = [test_mrna3] seq.genes[2].death_flagged = False # Apply the filter intron_length.apply(seq) self.assertEqual(len(seq.genes), 3) self.assertEqual(seq.genes[0].mrnas, [test_mrna0]) self.assertEqual(seq.genes[1].mrnas, [test_mrna1, test_mrna2])
def test_to_tbl_positive_with_name(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1", name="wtfg") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tgene\twtfg\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected)
def test_max_cds_length_filter(self): cds_length = MaxCDSLengthFilter(100) # Create a mock sequence seq = Sequence() # Give the sequence some genes seq.genes = [ Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo1'), Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo2'), Gene('foo_seq', 'geib_labs', [1, 2], '+', 'foo3') ] # Give the mock mrnas some cds's test_mrna0 = Mock() test_mrna0.identifier = 'foo1-RA' test_mrna0.death_flagged = False test_mrna0.cds = Mock() test_mrna0.cds.length = Mock(return_value=90) test_mrna1 = Mock() test_mrna1.identifier = 'foo2-RA' test_mrna1.death_flagged = False test_mrna1.cds = None test_mrna2 = Mock() test_mrna2.identifier = 'foo2-RB' test_mrna2.death_flagged = False test_mrna2.cds = Mock() test_mrna2.cds.length = Mock(return_value=100) test_mrna3 = Mock() test_mrna3.identifier = 'foo3-RA' test_mrna3.death_flagged = False test_mrna3.cds = Mock() test_mrna3.cds.length = Mock(return_value=110) # Give the mock genes some mrnas seq.genes[0].mrnas = [test_mrna0] seq.genes[0].death_flagged = False seq.genes[1].mrnas = [test_mrna1, test_mrna2] seq.genes[1].death_flagged = False seq.genes[2].mrnas = [test_mrna3] seq.genes[2].death_flagged = False # Apply the filter cds_length.apply(seq) self.assertEqual(len(seq.genes), 3) self.assertEqual(seq.genes[0].mrnas, [test_mrna0]) self.assertEqual(seq.genes[1].mrnas, [test_mrna1, test_mrna2])
def setUp(self): self.test_gene0 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.test_gene1 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.fake_mrna1 = Mock() self.fake_mrna1.identifier = "fake_mrna1" self.fake_mrna1.death_flagged = False self.fake_mrna2 = Mock() self.fake_mrna2.identifier = "fake_mrna2" self.fake_mrna2.death_flagged = False self.test_gene1.mrnas.append(self.fake_mrna1) self.test_gene1.mrnas.append(self.fake_mrna2)
def test_make_positive(self): seq_len = 8 gene = Gene(start=1, end=7, strand='-') mrna = Mock() mrna.type = 'mrna' mrna.make_positive = Mock() gene.add_child(mrna) gene.make_positive(seq_len) self.assertEqual(gene.start, 2) self.assertEqual(gene.end, 8) self.assertEqual(gene.strand, '+') mrna.make_positive.assertCalledWith(seq_len)
def build_transcript_dictionary(seqs, genes): transcripts = {} for gene in genes: gene = Gene.from_gff_feature(gene) if gene == None: print("Could not convert GFFFeature to Gene, skipping") continue new_mrnas = [] for mrna in gene['mrna']: mrna = Mrna.from_gff_feature(mrna) if mrna == None: print("Could not convert GFFFeature to Mrna, skipping") continue new_mrnas.append(mrna) gene.children['mrna'] = new_mrnas if gene.seqid in transcripts: transcripts[gene.seqid].genes.append(gene) else: if gene.seqid in seqs: transcripts[gene.seqid] = Transcript([gene], seqs[gene.seqid]) else: print("Gene "+gene.attributes["ID"]+" is on sequence "+gene.seqid+" which does not exist. Skipping...") return transcripts
def test_to_tbl_positive(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected)
def process_gene_line(self, line): """Extracts arguments from a line and instantiates a Gene object.""" kwargs = self.extract_gene_args(line) if not kwargs: return gene_id = kwargs['identifier'] self.genes[gene_id] = Gene(**kwargs)
def test_gene_initialized_without_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(newgene.annotations) self.assertEquals(0, len(newgene.annotations.keys()))
def __init__(self, population_size, filename_of_genes, max_weight, logger): self.population_size = population_size self.genes = Gene(filename_of_genes, logger) self.max_weight = max_weight self.max_score_for_run = None self.logger = logger self.best_chromosome = np.array([0, 0]) return
def test_gene_initialized_with_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1", annotations={"bar": ["cat"]}) self.assertTrue(newgene.annotations) self.assertEquals(1, len(newgene.annotations.keys()))
def setUp(self): self.gene1 = Gene() self.mrna1 = Mock() self.cds1 = Mock() self.exon1 = Mock() self.gene1.get_mrna = Mock(return_value=self.mrna1) self.mrna1.get_cds = Mock(return_value=self.cds1) self.mrna1.get_exon = Mock(return_value=self.exon1) self.gene1.start = 1 self.gene1.end = 100 self.gene1.attributes = {'ID':'foo_gene'} self.mrna1.start = 1 self.mrna1.end = 100 self.mrna1.attributes = {'ID':'m.foo'} self.cds1.start = 1 self.cds1.end = 100 self.cds1.phase = 0 self.exon1.start = 1 self.exon1.end = 100
class TestGene(unittest.TestCase): def setUp(self): self.test_gene0 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.test_gene1 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.fake_mrna1 = Mock() self.fake_mrna1.identifier = "fake_mrna1" self.fake_mrna1.death_flagged = False self.fake_mrna2 = Mock() self.fake_mrna2.identifier = "fake_mrna2" self.fake_mrna2.death_flagged = False self.test_gene1.mrnas.append(self.fake_mrna1) self.test_gene1.mrnas.append(self.fake_mrna2) def test_constructor(self): self.assertEqual('Gene', self.test_gene0.__class__.__name__) def test_length(self): self.assertEqual(3703, self.test_gene0.length()) def test_gagflagged(self): self.assertFalse(self.test_gene0.gagflagged()) self.test_gene0.annotations = [["gag_flag", "nice gene"]] self.assertTrue(self.test_gene0.gagflagged()) def test_number_of_gagflags(self): self.fake_mrna1.number_of_gagflags.return_value = 2 self.fake_mrna2.number_of_gagflags.return_value = 1 self.test_gene1.annotations = [["gag_flag", "nice gene"]] self.assertEquals(4, self.test_gene1.number_of_gagflags()) def test_get_mrna_ids(self): expected = ["fake_mrna1", "fake_mrna2"] self.assertEquals(self.test_gene1.get_mrna_ids(), expected) def test_remove_mrna(self): self.assertEquals(self.test_gene1.mrnas, [self.fake_mrna1, self.fake_mrna2]) self.assertEquals(len(self.test_gene1.removed_mrnas), 0) self.test_gene1.remove_mrna('fake_mrna1') self.assertEquals(self.test_gene1.mrnas, [self.fake_mrna2]) self.assertEquals(self.test_gene1.removed_mrnas, [self.fake_mrna1]) def test_get_longest_exon(self): self.fake_mrna1.get_longest_exon.return_value = 10 self.fake_mrna2.get_longest_exon.return_value = 20 self.assertEquals(20, self.test_gene1.get_longest_exon()) def test_get_shortest_exon(self): self.fake_mrna1.get_shortest_exon.return_value = 5 self.fake_mrna2.get_shortest_exon.return_value = 8 self.assertEquals(5, self.test_gene1.get_shortest_exon()) def test_get_total_exon_length(self): self.fake_mrna1.get_total_exon_length.return_value = 15 self.fake_mrna2.get_total_exon_length.return_value = 25 self.assertEquals(40, self.test_gene1.get_total_exon_length()) def test_get_num_exons(self): self.fake_mrna1.get_num_exons.return_value = 5 self.fake_mrna2.get_num_exons.return_value = 4 self.assertEquals(9, self.test_gene1.get_num_exons()) def test_get_longest_intron(self): self.fake_mrna1.get_longest_intron.return_value = 8 self.fake_mrna2.get_longest_intron.return_value = 10 self.assertEquals(10, self.test_gene1.get_longest_intron()) def test_get_shortest_intron(self): self.fake_mrna1.get_shortest_intron.return_value = 5 self.fake_mrna2.get_shortest_intron.return_value = 8 self.assertEquals(5, self.test_gene1.get_shortest_intron()) def test_get_total_intron_length(self): self.fake_mrna1.get_total_intron_length.return_value = 15 self.fake_mrna2.get_total_intron_length.return_value = 25 self.assertEquals(40, self.test_gene1.get_total_intron_length()) def test_get_num_introns(self): self.fake_mrna1.get_num_introns.return_value = 3 self.fake_mrna2.get_num_introns.return_value = 2 self.assertEquals(5, self.test_gene1.get_num_introns()) def test_get_partial_info(self): self.fake_mrna1.has_stop.return_value = True self.fake_mrna1.has_start.return_value = True self.fake_mrna2.has_stop.return_value = False self.fake_mrna2.has_start.return_value = True results = self.test_gene1.get_partial_info() self.assertEquals(1, results["complete"]) def test_adjust_indices(self): self.test_gene1.adjust_indices(16) self.fake_mrna1.adjust_indices.assert_called_with(16, 1) self.assertEquals(3750, self.test_gene1.indices[0]) # adjust them back self.test_gene1.adjust_indices(-16) self.fake_mrna1.adjust_indices.assert_called_with(-16, 1) self.assertEquals(3734, self.test_gene1.indices[0]) def test_remove_mrnas_with_internal_stops(self): helper = Mock() helper.mrna_contains_internal_stop.return_value = True self.assertEquals(2, len(self.test_gene1.mrnas)) self.test_gene1.remove_mrnas_with_internal_stops(helper) self.assertEquals(0, len(self.test_gene1.mrnas)) def test_contains_mrna(self): self.fake_mrna1.identifier = "foo_mrna" self.fake_mrna2.identifier = "bar_mrna" self.assertTrue(self.test_gene1.contains_mrna("foo_mrna")) self.assertFalse(self.test_gene1.contains_mrna("zub_mrna")) def test_cds_to_gff(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_gff("foo_seq", "foo_mrna") self.fake_mrna1.cds_to_gff.assert_called_with("foo_seq", "maker") def test_cds_to_gff_no_such_mrna(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_gff("foo_seq", "bar_mrna") self.assertFalse(foo) def test_cds_to_tbl(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_tbl("foo_mrna") self.fake_mrna1.cds_to_tbl.assert_called_with() def test_to_mrna_fasta(self): helper = Mock() helper.mrna_to_fasta.return_value = "mrna_to_fasta\n" expected = "mrna_to_fasta\nmrna_to_fasta\n" self.assertEquals(expected, self.test_gene1.to_mrna_fasta(helper)) def test_to_cds_fasta(self): helper = Mock() helper.mrna_to_cds_fasta.return_value = "mrna_to_CDS_fasta\n" expected = "mrna_to_CDS_fasta\nmrna_to_CDS_fasta\n" self.assertEquals(expected, self.test_gene1.to_cds_fasta(helper)) def test_to_protein_fasta(self): helper = Mock() helper.mrna_to_protein_fasta.return_value = "mrna_to_protein_fasta\n" expected = "mrna_to_protein_fasta\nmrna_to_protein_fasta\n" self.assertEquals(expected, self.test_gene1.to_protein_fasta(helper)) def test_to_gff(self): self.fake_mrna1.to_gff.return_value = "fake mrna1 to gff here:)\n" self.fake_mrna2.to_gff.return_value = "fake mrna2 to gff here:)\n" expected = "sctg_0080_0020\tmaker\tgene\t3734\t7436\t.\t+\t." expected += "\tID=1;foo=dog\n" expected += "fake mrna1 to gff here:)\n" expected += "fake mrna2 to gff here:)\n" self.test_gene1.add_annotation('foo', 'dog') self.assertEquals(expected, self.test_gene1.to_gff()) def test_to_gff_with_name(self): self.fake_mrna1.to_gff.return_value = "fake mrna1 to gff here:)\n" self.fake_mrna2.to_gff.return_value = "fake mrna2 to gff here:)\n" expected = "sctg_0080_0020\tmaker\tgene\t3734\t7436\t.\t+\t." expected += "\tID=1;Name=foo_gene;foo=dog\n" expected += "fake mrna1 to gff here:)\n" expected += "fake mrna2 to gff here:)\n" self.test_gene1.add_annotation('foo', 'dog') self.test_gene1.name = "foo_gene" self.assertEquals(expected, self.test_gene1.to_gff()) def test_str(self): expected = "Gene (ID=1, seq_name=sctg_0080_0020) containing 2 mrnas" self.assertEquals(expected, str(self.test_gene1)) def test_create_starts_and_stops(self): mrna1 = Mock() mrna2 = Mock() self.test_gene0.mrnas = [mrna1, mrna2] seq_object = Mock() self.test_gene0.create_starts_and_stops(seq_object) mrna1.create_start_and_stop_if_necessary.assert_called_with(seq_object, '+') mrna2.create_start_and_stop_if_necessary.assert_called_with(seq_object, '+') def test_to_tbl_positive(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_positive_with_name(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1", name="wtfg") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tgene\twtfg\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_gene_initialized_without_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(newgene.annotations) self.assertEquals(0, len(newgene.annotations)) def test_gene_initialized_with_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1",\ annotations=[["bar", "cat"]]) self.assertTrue(newgene.annotations) self.assertEquals(1, len(newgene.annotations)) def test_to_tbl_negative(self): gene = Gene("seq1", "maker", [1, 50], "-", "foo_gene_1") mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "50\t1\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected)
class TestGene(unittest.TestCase): def setUp(self): self.gene1 = Gene() self.mrna1 = Mock() self.cds1 = Mock() self.exon1 = Mock() self.gene1.get_mrna = Mock(return_value=self.mrna1) self.mrna1.get_cds = Mock(return_value=self.cds1) self.mrna1.get_exon = Mock(return_value=self.exon1) self.gene1.start = 1 self.gene1.end = 100 self.gene1.attributes = {'ID':'foo_gene'} self.mrna1.start = 1 self.mrna1.end = 100 self.mrna1.attributes = {'ID':'m.foo'} self.cds1.start = 1 self.cds1.end = 100 self.cds1.phase = 0 self.exon1.start = 1 self.exon1.end = 100 def test_from_gff_feature_success(self): gff_gene = Mock() gff_gene.type = "gene" tran_gene = Gene.from_gff_feature(gff_gene) self.assertTrue(tran_gene) def test_from_gff_features_fails(self): gff_gene = Mock() gff_gene.type = "asdf" tran_gene = Gene.from_gff_feature(gff_gene) self.assertFalse(tran_gene) def test_get_cds_length(self): gene = Gene() mrna = Mock() cds = Mock() cds.length = Mock(return_value=42) mrna.get_cds = Mock(return_value=cds) gene.children = {'mrna':[mrna]} self.assertEquals(gene.get_cds_length(), 42) def test_remove_contig_from_gene_id(self): expected = 'g.123' self.gene1.attributes['ID'] = 'contig123|g.123' self.gene1.remove_contig_from_gene_id() self.assertEquals(expected, self.gene1.attributes['ID']) def test_gene_to_tbl_nostart_nostop(self): expected = \ "<1\t>100\tgene\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "<1\t>100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tproduct\thypothetical protein\n" self.mrna1.__contains__ = no_start_no_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) def test_gene_to_tbl_start_nostop(self): expected = \ "1\t>100\tgene\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "1\t>100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tproduct\thypothetical protein\n" self.mrna1.__contains__ = start_no_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) def test_gene_to_tbl_nostart_stop(self): expected = \ "<1\t100\tgene\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "<1\t100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tproduct\thypothetical protein\n" self.mrna1.__contains__ = no_start_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) def test_gene_to_tbl_start_stop(self): expected = \ "1\t100\tgene\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "1\t100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tproduct\thypothetical protein\n" self.mrna1.__contains__ = start_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) def test_gene_to_tbl_genename(self): expected = \ "<1\t>100\tgene\n"\ "\t\t\tgene\tf00x4\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "<1\t>100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tproduct\thypothetical protein\n" self.gene1.attributes["Name"] = "f00x4" self.mrna1.__contains__ = no_start_no_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) def test_gene_to_tbl_dbxref(self): expected = \ "<1\t>100\tgene\n"\ "\t\t\tlocus_tag\tfoo_gene\n"\ "<1\t>100\tCDS\n"\ "\t\t\tprotein_id\tm.foo\n"\ "\t\t\tdb_xref\tPfam:foo\n"\ "\t\t\tdb_xref\tPfam:dog\n"\ "\t\t\tdb_xref\tPfam:baz\n"\ "\t\t\tproduct\thypothetical protein\n"\ self.gene1.get_mrna().attributes["Dbxref"] = "Pfam:foo,Pfam:dog,Pfam:baz" self.mrna1.__contains__ = no_start_no_stop tbl = self.gene1.to_tbl() self.assertEquals(tbl, expected) ### FIX PHASE TESTS ### def test_fix_phase(self): self.gene1.start = 2 self.mrna1.start = 2 self.mrna1.__contains__ = no_start_stop self.cds1.start = 2 self.cds1.phase = 0 self.assertEqual(self.cds1.phase, 0) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.phase, 1) def test_fix_phase_to_two(self): self.gene1.start = 3 self.mrna1.start = 3 self.mrna1.__contains__ = no_start_stop self.cds1.start = 3 self.cds1.phase = 0 self.assertEqual(self.cds1.phase, 0) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.phase, 2) def test_fix_phase_does_nothing_when_indices_too_large(self): self.gene1.start = 4 self.mrna1.start = 4 self.mrna1.__contains__ = start_stop self.cds1.start = 4 self.cds1.phase = 0 self.assertEqual(self.cds1.phase, 0) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.phase, 0) def test_fix_phase_works_on_cds_only(self): self.gene1.start = 1 self.mrna1.start = 1 self.mrna1.__contains__ = no_start_stop self.cds1.start = 3 self.cds1.phase = 0 self.assertEqual(self.cds1.phase, 0) self.assertEqual(self.cds1.start, 3) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.phase, 2) self.assertEqual(self.cds1.start, 1) def test_fix_phase_does_nothing_when_not_partial(self): self.gene1.start = 2 self.mrna1.start = 2 self.mrna1.__contains__ = start_stop self.cds1.start = 2 self.cds1.phase = 0 self.assertEqual(self.cds1.phase, 0) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.phase, 0) def test_fix_phase_adjusts_end_on_3prime_partial(self): self.gene1.start = 2 self.mrna1.start = 2 self.mrna1.__contains__ = start_no_stop self.cds1.start = 2 self.gene1.end = 2 self.mrna1.end = 2 self.cds1.end = 2 self.assertEqual(self.cds1.end, 2) self.gene1.fix_phase("ATGC") self.assertEqual(self.cds1.end, 4) #### MAKE POSITIVE TESTS #### def test_make_positive(self): seq_len = 8 gene = Gene(start=1, end=7, strand='-') mrna = Mock() mrna.type = 'mrna' mrna.make_positive = Mock() gene.add_child(mrna) gene.make_positive(seq_len) self.assertEqual(gene.start, 2) self.assertEqual(gene.end, 8) self.assertEqual(gene.strand, '+') mrna.make_positive.assertCalledWith(seq_len) #### MATCH CDS AND EXON END TESTS #### def test_match_cds_and_exon_end(self): gene = Gene() mrna = Mock() gene.children = {'mrna':[mrna]} gene.match_cds_and_exon_end() mrna.match_cds_and_exon_end.assertCalled() #### STARTS AND STOPS TESTS #### def test_create_starts_and_stops(self): gene = Gene() mrna = Mock() gene.children = {'mrna':[mrna]} gene.create_starts_and_stops('ATGC') mrna.create_starts_and_stops.assertCalledWith('ATGC')
class TestGene(unittest.TestCase): def setUp(self): self.test_gene0 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.test_gene1 = Gene(seq_name="sctg_0080_0020", source="maker", indices=[3734, 7436], strand='+', identifier=1) self.fake_mrna1 = Mock() self.fake_mrna1.identifier = "fake_mrna1" self.fake_mrna1.death_flagged = False self.fake_mrna2 = Mock() self.fake_mrna2.identifier = "fake_mrna2" self.fake_mrna2.death_flagged = False self.test_gene1.mrnas.append(self.fake_mrna1) self.test_gene1.mrnas.append(self.fake_mrna2) def test_constructor(self): self.assertEqual('Gene', self.test_gene0.__class__.__name__) def test_length(self): self.assertEqual(3703, self.test_gene0.length()) def test_gagflagged(self): self.assertFalse(self.test_gene0.gagflagged()) self.test_gene0.annotations = {"gag_flag": ["nice gene"]} self.assertTrue(self.test_gene0.gagflagged()) def test_number_of_gagflags(self): self.fake_mrna1.number_of_gagflags.return_value = 2 self.fake_mrna2.number_of_gagflags.return_value = 1 self.test_gene1.annotations = {"gag_flag": ["nice gene"]} self.assertEquals(4, self.test_gene1.number_of_gagflags()) def test_get_mrna_ids(self): expected = ["fake_mrna1", "fake_mrna2"] self.assertEquals(self.test_gene1.get_mrna_ids(), expected) def test_remove_mrna(self): self.assertEquals(self.test_gene1.mrnas, [self.fake_mrna1, self.fake_mrna2]) self.assertEquals(len(self.test_gene1.removed_mrnas), 0) self.test_gene1.remove_mrna('fake_mrna1') self.assertEquals(self.test_gene1.mrnas, [self.fake_mrna2]) self.assertEquals(self.test_gene1.removed_mrnas, [self.fake_mrna1]) def test_remove_mrnas_from_list(self): self.fake_mrna3 = Mock() self.fake_mrna3.identifier = "fake_mrna3" self.fake_mrna3.death_flagged = False self.test_gene1.mrnas.append(self.fake_mrna3) bad_mrnas = ["fake_mrna3", "fake_mrna1"] self.assertEquals(3, len(self.test_gene1.mrnas)) self.assertEquals(0, len(self.test_gene1.removed_mrnas)) removed_mrnas = self.test_gene1.remove_mrnas_from_list(bad_mrnas) self.assertEquals(2, len(removed_mrnas)) self.assertEquals(1, len(self.test_gene1.mrnas)) self.assertEquals(2, len(self.test_gene1.removed_mrnas)) def test_remove_empty_mrnas(self): self.fake_mrna1.rna_type = "mRNA" self.fake_mrna1.cds = Mock() self.fake_mrna1.exon = None self.fake_mrna2.rna_type = "mRNA" self.fake_mrna2.cds = None self.fake_mrna2.exon = Mock() self.fake_mrna3 = Mock() self.fake_mrna3.rna_type = "mRNA" self.fake_mrna3.identifier = "fake_mrna3" self.fake_mrna3.death_flagged = False self.fake_mrna3.cds = Mock() self.fake_mrna3.exon = Mock() self.test_gene1.mrnas.append(self.fake_mrna3) self.assertEquals(3, len(self.test_gene1.mrnas)) self.assertEquals(0, len(self.test_gene1.removed_mrnas)) removed_mrnas = self.test_gene1.remove_empty_mrnas() self.assertEquals(2, len(removed_mrnas)) self.assertEquals(1, len(self.test_gene1.mrnas)) self.assertEquals(2, len(self.test_gene1.removed_mrnas)) def test_get_longest_exon(self): self.fake_mrna1.get_longest_exon.return_value = 10 self.fake_mrna2.get_longest_exon.return_value = 20 self.assertEquals(20, self.test_gene1.get_longest_exon()) def test_get_shortest_exon(self): self.fake_mrna1.get_shortest_exon.return_value = 5 self.fake_mrna2.get_shortest_exon.return_value = 8 self.assertEquals(5, self.test_gene1.get_shortest_exon()) def test_get_total_exon_length(self): self.fake_mrna1.get_total_exon_length.return_value = 15 self.fake_mrna2.get_total_exon_length.return_value = 25 self.assertEquals(40, self.test_gene1.get_total_exon_length()) def test_get_num_exons(self): self.fake_mrna1.get_num_exons.return_value = 5 self.fake_mrna2.get_num_exons.return_value = 4 self.assertEquals(9, self.test_gene1.get_num_exons()) def test_get_longest_intron(self): self.fake_mrna1.get_longest_intron.return_value = 8 self.fake_mrna2.get_longest_intron.return_value = 10 self.assertEquals(10, self.test_gene1.get_longest_intron()) def test_get_shortest_intron(self): self.fake_mrna1.get_shortest_intron.return_value = 5 self.fake_mrna2.get_shortest_intron.return_value = 8 self.assertEquals(5, self.test_gene1.get_shortest_intron()) def test_get_total_intron_length(self): self.fake_mrna1.get_total_intron_length.return_value = 15 self.fake_mrna2.get_total_intron_length.return_value = 25 self.assertEquals(40, self.test_gene1.get_total_intron_length()) def test_get_num_introns(self): self.fake_mrna1.get_num_introns.return_value = 3 self.fake_mrna2.get_num_introns.return_value = 2 self.assertEquals(5, self.test_gene1.get_num_introns()) def test_get_partial_info(self): self.fake_mrna1.has_stop.return_value = True self.fake_mrna1.has_start.return_value = True self.fake_mrna2.has_stop.return_value = False self.fake_mrna2.has_start.return_value = True results = self.test_gene1.get_partial_info() self.assertEquals(1, results["complete"]) def test_adjust_indices(self): self.test_gene1.adjust_indices(16) self.fake_mrna1.adjust_indices.assert_called_with(16, 1) self.assertEquals(3750, self.test_gene1.indices[0]) # adjust them back self.test_gene1.adjust_indices(-16) self.fake_mrna1.adjust_indices.assert_called_with(-16, 1) self.assertEquals(3734, self.test_gene1.indices[0]) def test_remove_mrnas_with_internal_stops(self): helper = Mock() helper.mrna_contains_internal_stop.return_value = True self.assertEquals(2, len(self.test_gene1.mrnas)) self.test_gene1.remove_mrnas_with_internal_stops(helper) self.assertEquals(0, len(self.test_gene1.mrnas)) def test_contains_mrna(self): self.fake_mrna1.identifier = "foo_mrna" self.fake_mrna2.identifier = "bar_mrna" self.assertTrue(self.test_gene1.contains_mrna("foo_mrna")) self.assertFalse(self.test_gene1.contains_mrna("zub_mrna")) def test_cds_to_gff(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_gff("foo_seq", "foo_mrna") self.fake_mrna1.cds_to_gff.assert_called_with("foo_seq", "maker") def test_cds_to_gff_no_such_mrna(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_gff("foo_seq", "bar_mrna") self.assertFalse(foo) def test_cds_to_tbl(self): self.fake_mrna1.identifier = "foo_mrna" foo = self.test_gene1.cds_to_tbl("foo_mrna") self.fake_mrna1.cds_to_tbl.assert_called_with() def test_to_mrna_fasta(self): helper = Mock() helper.mrna_to_fasta.return_value = "mrna_to_fasta\n" expected = "mrna_to_fasta\nmrna_to_fasta\n" self.assertEquals(expected, self.test_gene1.to_mrna_fasta(helper)) def test_to_cds_fasta(self): helper = Mock() helper.mrna_to_cds_fasta.return_value = "mrna_to_CDS_fasta\n" expected = "mrna_to_CDS_fasta\nmrna_to_CDS_fasta\n" self.assertEquals(expected, self.test_gene1.to_cds_fasta(helper)) def test_to_protein_fasta(self): helper = Mock() helper.mrna_to_protein_fasta.return_value = "mrna_to_protein_fasta\n" expected = "mrna_to_protein_fasta\nmrna_to_protein_fasta\n" self.assertEquals(expected, self.test_gene1.to_protein_fasta(helper)) def test_to_gff(self): self.fake_mrna1.to_gff.return_value = "fake mrna1 to gff here:)\n" self.fake_mrna2.to_gff.return_value = "fake mrna2 to gff here:)\n" expected = "sctg_0080_0020\tmaker\tgene\t3734\t7436\t.\t+\t." expected += "\tID=1;foo=dog\n" expected += "fake mrna1 to gff here:)\n" expected += "fake mrna2 to gff here:)\n" self.test_gene1.add_annotation('foo', 'dog') self.assertEquals(expected, self.test_gene1.to_gff()) def test_to_gff_with_name(self): self.fake_mrna1.to_gff.return_value = "fake mrna1 to gff here:)\n" self.fake_mrna2.to_gff.return_value = "fake mrna2 to gff here:)\n" expected = "sctg_0080_0020\tmaker\tgene\t3734\t7436\t.\t+\t." expected += "\tID=1;Name=foo_gene;foo=dog\n" expected += "fake mrna1 to gff here:)\n" expected += "fake mrna2 to gff here:)\n" self.test_gene1.add_annotation('foo', 'dog') self.test_gene1.name = "foo_gene" self.assertEquals(expected, self.test_gene1.to_gff()) def test_str(self): expected = "Gene (ID=1, seq_name=sctg_0080_0020) containing 2 mrnas" self.assertEquals(expected, str(self.test_gene1)) def test_create_starts_and_stops(self): mrna1 = Mock() mrna2 = Mock() self.test_gene0.mrnas = [mrna1, mrna2] seq_object = Mock() self.test_gene0.create_starts_and_stops(seq_object) mrna1.create_start_and_stop_if_necessary.assert_called_with( seq_object, '+') mrna2.create_start_and_stop_if_necessary.assert_called_with( seq_object, '+') def test_add_mrna_annotation(self): mrna = Mock() mrna.identifier = "foo_mrna" self.test_gene0.mrnas = [mrna] self.test_gene0.add_mrna_annotation("foo_mrna", "gag_flag", "awesome_anno") mrna.add_annotation.assert_called_with("gag_flag", "awesome_anno") def test_to_tbl_positive(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_positive_start_nostop(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" mrna2.has_start.return_value = True mrna2.has_stop.return_value = False gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t>50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_positive_nostart_stop(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" mrna2.has_start.return_value = False mrna2.has_stop.return_value = True gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "<1\t50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_positive_nostart_nostop(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" mrna2.has_start.return_value = False mrna2.has_stop.return_value = False gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "<1\t>50\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_positive_with_name(self): gene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1", name="wtfg") self.assertFalse(gene.annotations) gene.add_annotation('foo', 'dog') mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "1\t50\tgene\n\t\t\tgene\twtfg\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_to_tbl_negative(self): gene = Gene("seq1", "maker", [1, 50], "-", "foo_gene_1") mrna1 = Mock() mrna1.to_tbl.return_value = "mrna1_to_tbl...\n" mrna2 = Mock() mrna2.to_tbl.return_value = "mrna2_to_tbl...\n" gene.mrnas.append(mrna1) gene.mrnas.append(mrna2) expected = "50\t1\tgene\n\t\t\tlocus_tag\tfoo_gene_1\nmrna1_to_tbl...\nmrna2_to_tbl...\n" self.assertEquals(gene.to_tbl(), expected) def test_gene_initialized_without_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1") self.assertFalse(newgene.annotations) self.assertEquals(0, len(newgene.annotations.keys())) def test_gene_initialized_with_annotations(self): newgene = Gene(seq_name="seq1", source="maker", indices=[1, 50], strand="+", identifier="foo_gene_1",\ annotations={"bar": ["cat"]}) self.assertTrue(newgene.annotations) self.assertEquals(1, len(newgene.annotations.keys()))
def test_from_gff_feature_success(self): gff_gene = Mock() gff_gene.type = "gene" tran_gene = Gene.from_gff_feature(gff_gene) self.assertTrue(tran_gene)
def test_from_gff_features_fails(self): gff_gene = Mock() gff_gene.type = "asdf" tran_gene = Gene.from_gff_feature(gff_gene) self.assertFalse(tran_gene)