def get_genome(self, fasta_path='', predict_noncoding_cds=False): """returns a Genome object, from an Ensembl gtf file""" self.genome.fasta_path = fasta_path with open(self.gtf_path) as f: for linea in f.readlines(): line_dic = self._parse_line(linea) gene = self._get_gene(line_dic) if not line_dic or 'transcript_id' not in line_dic or \ (self.gene_biotype and line_dic['gene_biotype'] not in self.gene_biotype): continue trans = self._get_transcript(line_dic, gene) if line_dic['type'] == 'start_codon': trans.add_cds(ft.fix_order(line_dic['start'], line_dic['stop'], trans.strand)[0]) elif line_dic['type'] == 'stop_codon': trans.cds_stop = ft.fix_order(line_dic['start'], line_dic['stop'], trans.strand)[0] elif line_dic['type'] == 'exon': self._set_exon(line_dic['start'], line_dic['stop'], trans, gene, int(line_dic['exon_number']), line_dic['exon_id']) elif line_dic['type'] == 'CDS': trans.add_cds(line_dic['start'], line_dic['stop']) if predict_noncoding_cds: self.set_noncoding_cds() return self.genome
def test_fix_order(self): sites = ft.fix_order(10, 20, '+') self.assertLess(sites[0], sites[1])
def test_fix_order_rev_opp(self): sites = ft.fix_order(20, 10, '-') self.assertGreater(sites[0], sites[1])