Ejemplo n.º 1
0
 def parse_ref_exons(self):
     """ Return fasta reference with only the sequences needed"""
     ens_db = EnsemblRelease(75)
     try:
         exons = ens_db.exons_at_locus(self.chrom, self.start, self.stop)
     except ValueError as e:
         # Load pyensembl db
         raise e
     exon_array = np.zeros(self.stop - self.start)
     exon_numbers = self.get_exon_numbers(ens_db, exons[0].gene_name)
     for exobj in exons:
         start = exobj.start - self.start
         stop = exobj.end - self.start
         i = start
         while i < stop:
             exon_array[i] = 1
             i += 1
     # 2:29,448,326-29,448,432 exon 19
     # exon 22 start: 29445210
     # exon 18 end: 29449940
     # intron 19: 29446395-29448326
     # ATI initiation 29446768-29448326
     return exon_array, exon_numbers[(exon_numbers['start'] > self.hist[0][0]) &
                                     (exon_numbers['start'] < self.hist[0][-1])]