예제 #1
0
def remove_UTR_ensembl_exons (protein_id, species, exons):
    
    '''
    Removes the untranslated regions from ensembl exons
    This is for the purpose of statistics generating
    '''
    pc  = ProteinContainer.Instance()
    dmc = DataMapContainer.Instance()
    dm_key = (protein_id, species)
    try:
        dm = dmc.get(dm_key)
    except KeyError:
        return None
    
    new_exons =[]
    
    for exon in exons:
       
        new_exon = EnsemblExon((exon.ref_protein_id, exon.species), exon.exon_id, exon.start, exon.stop, exon.strand, exon.sequence)
        new_exon.set_exon_ordinal(exon.ordinal)
        
    
    return new_exons
        
 def load_exons (self):
     '''
     Load the exons from the fasta file and create
     the dictionary mapping them by their Ensembl id.
     Exons are given appropriate ordinals. 
     '''
     data_map_container      = DataMapContainer.Instance()
     logger                  = Logger.Instance()
     containers_logger       = logger.get_logger('containers')
     
     data_map = data_map_container.get((self.ref_protein_id, self.species))
     self.strand = data_map.strand
     
     fasta_path = self.get_exon_file_path()
     try:
         fasta = open(fasta_path, 'r')
     except IOError:
         containers_logger.error("%s,%s,%s" % (self.ref_protein_id, self.species, "Loading ensembl exons failed."))
         return None
     fasta.close()
      
     exon_list = []
     seq_records = read_seq_records_from_file(fasta_path, IUPAC.ambiguous_dna)
     
     for seq_record in seq_records:
         (start, stop, transcript_id, exon_id, strand) = seq_record.id.split('|')
         if (int(strand) == 1):
             self.strand = 1
             exon = EnsemblExon((self.ref_protein_id, self.species), exon_id, start, stop, strand, seq_record.seq)
         else:
             self.strand = -1
             exon = EnsemblExon((self.ref_protein_id, self.species), exon_id, stop, start, strand, seq_record.seq)
         exon_list.append(exon)
     fasta.close()
     self.exons = dict([(exon.exon_id, exon) for exon in exon_list])
     
     # assign orinals to exons
     ordinal = 1
     if self.strand == 1:
         for exon in sorted (self.exons.values(), key = lambda exon: exon.start ):
             exon.set_exon_ordinal(ordinal)
             ordinal += 1
     else:
         for exon in sorted (self.exons.values(), key = lambda exon: exon.start, reverse = True):
             exon.set_exon_ordinal(ordinal)
             ordinal += 1
     
     return exon_list