def translate_alignment_exons_for_protein(protein_id, exon_number): ''' Translates all the proteins for which there is SW to gene alignment ''' algorithm = "sw_gene" # instantiate all the utilities logger = Logger.Instance() dc = DirectoryCrawler() translation_logger = logger.get_logger("translator") # instantiate all the containters eec = EnsemblExonContainer.Instance() ec = ExonContainer.Instance() pc = ProteinContainer.Instance() failed_species = [] assembled_protein_path = dc.get_assembled_protein_path(protein_id) # for all the species for which it is required to generate translated protein for species in get_species_list(protein_id, assembled_protein_path): # get all you need for the processing assembled_protein_fasta = "%s/%s.fa" % (dc.get_assembled_protein_path(protein_id), species) exon_key = (protein_id, species, algorithm) target_prot = pc.get(protein_id) target_prot_seq = target_prot.get_sequence_record().seq try: exons = ec.get(exon_key) except KeyError: translation_logger.error("%s,%s,%s" % (protein_id, species, "No exons available")) failed_species.append(species) continue exons_for_transcription = [] # THIS PART WILL NOT EXIST IN THE NEAR FUTURE last_translated_exon = False for al_exon in exons.get_ordered_exons(): ref_exon = eec.get(al_exon.ref_exon_id) trans_exon = Exon_translation(ref_exon, al_exon) # if we've already bumped into exon with UTR on its end, all the other exons are not viable if last_translated_exon: trans_exon.viability = False if trans_exon.viability: (trans_exon, last_translated_exon) = chop_off_start_utr(al_exon.ref_exon_id, trans_exon, target_prot_seq, exon_number) trans_exon = chop_off_end_utr (al_exon.ref_exon_id, trans_exon, target_prot_seq, exon_number, protein_id) exons_for_transcription.append(trans_exon) # up to here - this will get trashed assemble_and_store_protein (protein_id, species, exons_for_transcription, target_prot_seq, assembled_protein_fasta) create_protein_alignment (protein_id, species) write_failed_species_to_status(failed_species, assembled_protein_path) return failed_species
if __name__ == '__main__': query_dna = "GTCCTCCGCAAAGGCCTCAAGGCCACATCGGGGCGCAGCTCCCA---GGACCACAGAGCCCCTCTGG------------TCGGGTGGCAC---------CAAGCCCCCCACT--GAGGGCTCCTCCCGAGGGCACGAGGACAGGAGGGACAAGCAGGAGTCCTCA---GAGAGCGACCCCGAGGGGCCCATTGCCGCCCAGATGCTGTCCTTTGTCATGGACGACCCTGACTTTGAGAGCGAC---TCAGATACTCAGCGGACAGCG" target_dna = "GTCCTCCATACCAGCTTCGAAGCCACGGAGGGGGACAGCTCCCACGAGGACCGCAGCACCCCCCTGGCCAGGCGGTGTCTCTGTTCGCACAGGTCCGGAGAAGCGCAGCAGCACCAGGCCCCCTGCTGAG--ATGGAGCCGGGGAAGGGTGAGCAGGCCTCCTCGTCGGAGAGTGACCCCGAGGGACCCATTGCTGCACAAATGCTGTCCTTCGTCATGGATGACCCCGACTTTGAGAGCGAGGGATCAGACACACAGCGCAGGGCG" target_prot = "MFSALKKLVGSDQAPGRDKNIPAGLQSMNQALQRRFAKGVQYNMKIVIRGDRNTGKTALWHRLQGRPFVEEYIPTQEIQVTSIHWSYKTTDDIVKVEVWDVVDKGKCKKRGDGLKMENDPQEAESEMALDAEFLDVYKNCNGVVMMFDITKQWTFNYILRELPKVPTHVPVCVLGNYRDMGEHRVILPDDVRDFIDNLDRPPGSSYFRYAESSMKNSFGLKYLHKFFNIPFLQLQRETLLRQLETNQLDMDATLEELSVQQETEDQNYGIFLEMMEARSRGHASPLAANGQSPSPGSQSPVVPAGAVSTGSSSPGTPQPAPQLPLNAAPPSSVPPVPPSEALPPPACPSAPAPRRSIISRLFGTSPATEAAPPPPEPVPAAEGPATVQSVEDFVPDDRLDRSFLEDTTPARDEKKVGAKAAQQDSDSDGEALGGNPMVAGFQDDVDLEDQPRGSPPLPAGPVPSQDITLSSEEEAEVAAPTKGPAPAPQQCSEPETKWSSIPASKPRRGTAPTRTAAPPWPGGVSVRTGPEKRSSTRPPAEMEPGKGEQASSSESDPEGPIAAQMLSFVMDDPDFESEGSDTQRRADDFPVRDDPSDVTDEDEGPAEPPPPPKLPLPAFRLKNDSDLFGLGLEEAGPKESSEEGKEGKTPSKEKKKKKKKGKEEEEKAAKKKSKHKKSKDKEEGKEERRRRQQRPPRSRERTAADELEAFLGGGAPGGRHPGGGDYEEL" #print analyse_SW_alignment(query_dna, target_dna, target_prot) ec = ExonContainer.Instance() eec = EnsemblExonContainer.Instance() exon_key = ("ENSP00000340983", "Ailuropoda_melanoleuca", "sw_gene") exons = ec.get(exon_key) exons_for_transcription = [] for al_exon in exons.get_ordered_exons(): al_exon = al_exon[0] ref_exon = eec.get(al_exon.ref_exon_id) trans_exon = Exon_translation(al_exon.ordinal, ref_exon.length, al_exon.alignment_info["query_seq"], al_exon.alignment_info["sbjct_seq"]) trans_exon.set_intervals(al_exon.alignment_info["query_start"], al_exon.alignment_info["query_end"], al_exon.alignment_info["sbjct_start"], al_exon.alignment_info["sbjct_end"]) trans_exon.set_identity(al_exon.alignment_info["identities"], al_exon.alignment_info["length"]) trans_exon.set_viablity(al_exon.viability) exons_for_transcription.append(trans_exon) print transcribe_exons(exons_for_transcription, target_prot) ''' #print analyse_SW_alignment(query_dna, target_dna, target_prot) exons_SW = parse_SW_output("/home/intern/Documents/sw_input/sw_output/ex1.swout") for ex in exons_SW: print ex.match, ex.length, ex.score