コード例 #1
0
def translate_alignment_exons_for_protein(protein_id, exon_number):
    '''
    Translates all the proteins for which there is SW to gene alignment
    '''
    algorithm = "sw_gene"
    
    # instantiate all the utilities
    logger              = Logger.Instance()
    dc                  = DirectoryCrawler()
    translation_logger  = logger.get_logger("translator")
    
    # instantiate all the containters
    eec                 = EnsemblExonContainer.Instance()
    ec                  = ExonContainer.Instance()
    pc                  = ProteinContainer.Instance()    

    failed_species          = []
    assembled_protein_path  = dc.get_assembled_protein_path(protein_id)

    # for all the species for which it is required to generate translated protein
    for species in get_species_list(protein_id, assembled_protein_path):
        
        # get all you need for the processing
        assembled_protein_fasta = "%s/%s.fa" % (dc.get_assembled_protein_path(protein_id), species)
        exon_key                = (protein_id, species, algorithm)
        target_prot             = pc.get(protein_id)
        target_prot_seq         = target_prot.get_sequence_record().seq
        
        try:
            exons = ec.get(exon_key)
        except KeyError:
            translation_logger.error("%s,%s,%s" % (protein_id, species, "No exons available"))
            failed_species.append(species)
            continue
        exons_for_transcription = []

        # THIS PART WILL NOT EXIST IN THE NEAR FUTURE
        last_translated_exon = False
        for al_exon in exons.get_ordered_exons():

            ref_exon     = eec.get(al_exon.ref_exon_id)
            trans_exon   = Exon_translation(ref_exon, al_exon)
            # if we've already bumped into exon with UTR on its end, all the other exons are not viable
            if last_translated_exon:
                trans_exon.viability = False
                
            if trans_exon.viability:
                (trans_exon, last_translated_exon)  = chop_off_start_utr(al_exon.ref_exon_id, trans_exon, target_prot_seq, exon_number)
                trans_exon                          = chop_off_end_utr (al_exon.ref_exon_id, trans_exon, target_prot_seq, exon_number, protein_id)
            
            exons_for_transcription.append(trans_exon)
        # up to here - this will get trashed
        
        assemble_and_store_protein (protein_id, species, exons_for_transcription, target_prot_seq, assembled_protein_fasta)
        create_protein_alignment   (protein_id, species)
                   
    write_failed_species_to_status(failed_species, assembled_protein_path)
    return failed_species
コード例 #2
0
        
if __name__ == '__main__':   
    query_dna = "GTCCTCCGCAAAGGCCTCAAGGCCACATCGGGGCGCAGCTCCCA---GGACCACAGAGCCCCTCTGG------------TCGGGTGGCAC---------CAAGCCCCCCACT--GAGGGCTCCTCCCGAGGGCACGAGGACAGGAGGGACAAGCAGGAGTCCTCA---GAGAGCGACCCCGAGGGGCCCATTGCCGCCCAGATGCTGTCCTTTGTCATGGACGACCCTGACTTTGAGAGCGAC---TCAGATACTCAGCGGACAGCG"
    target_dna = "GTCCTCCATACCAGCTTCGAAGCCACGGAGGGGGACAGCTCCCACGAGGACCGCAGCACCCCCCTGGCCAGGCGGTGTCTCTGTTCGCACAGGTCCGGAGAAGCGCAGCAGCACCAGGCCCCCTGCTGAG--ATGGAGCCGGGGAAGGGTGAGCAGGCCTCCTCGTCGGAGAGTGACCCCGAGGGACCCATTGCTGCACAAATGCTGTCCTTCGTCATGGATGACCCCGACTTTGAGAGCGAGGGATCAGACACACAGCGCAGGGCG"
    target_prot = "MFSALKKLVGSDQAPGRDKNIPAGLQSMNQALQRRFAKGVQYNMKIVIRGDRNTGKTALWHRLQGRPFVEEYIPTQEIQVTSIHWSYKTTDDIVKVEVWDVVDKGKCKKRGDGLKMENDPQEAESEMALDAEFLDVYKNCNGVVMMFDITKQWTFNYILRELPKVPTHVPVCVLGNYRDMGEHRVILPDDVRDFIDNLDRPPGSSYFRYAESSMKNSFGLKYLHKFFNIPFLQLQRETLLRQLETNQLDMDATLEELSVQQETEDQNYGIFLEMMEARSRGHASPLAANGQSPSPGSQSPVVPAGAVSTGSSSPGTPQPAPQLPLNAAPPSSVPPVPPSEALPPPACPSAPAPRRSIISRLFGTSPATEAAPPPPEPVPAAEGPATVQSVEDFVPDDRLDRSFLEDTTPARDEKKVGAKAAQQDSDSDGEALGGNPMVAGFQDDVDLEDQPRGSPPLPAGPVPSQDITLSSEEEAEVAAPTKGPAPAPQQCSEPETKWSSIPASKPRRGTAPTRTAAPPWPGGVSVRTGPEKRSSTRPPAEMEPGKGEQASSSESDPEGPIAAQMLSFVMDDPDFESEGSDTQRRADDFPVRDDPSDVTDEDEGPAEPPPPPKLPLPAFRLKNDSDLFGLGLEEAGPKESSEEGKEGKTPSKEKKKKKKKGKEEEEKAAKKKSKHKKSKDKEEGKEERRRRQQRPPRSRERTAADELEAFLGGGAPGGRHPGGGDYEEL"
    #print analyse_SW_alignment(query_dna, target_dna, target_prot)
    ec = ExonContainer.Instance()
    eec = EnsemblExonContainer.Instance()
    exon_key = ("ENSP00000340983", "Ailuropoda_melanoleuca", "sw_gene")
    exons = ec.get(exon_key)
    exons_for_transcription = []
    for al_exon in exons.get_ordered_exons():
        al_exon = al_exon[0]
        ref_exon = eec.get(al_exon.ref_exon_id)
        trans_exon = Exon_translation(al_exon.ordinal, 
                                      ref_exon.length, 
                                      al_exon.alignment_info["query_seq"], 
                                      al_exon.alignment_info["sbjct_seq"])
        trans_exon.set_intervals(al_exon.alignment_info["query_start"], 
                                 al_exon.alignment_info["query_end"], 
                                 al_exon.alignment_info["sbjct_start"], 
                                 al_exon.alignment_info["sbjct_end"])
        trans_exon.set_identity(al_exon.alignment_info["identities"], al_exon.alignment_info["length"])
        trans_exon.set_viablity(al_exon.viability)
        exons_for_transcription.append(trans_exon)
        
    print transcribe_exons(exons_for_transcription, target_prot)
    '''
    #print analyse_SW_alignment(query_dna, target_dna, target_prot)
    exons_SW = parse_SW_output("/home/intern/Documents/sw_input/sw_output/ex1.swout")
    for ex in exons_SW:
        print ex.match, ex.length, ex.score