def test_2ypd_A(): seq = ("MADAAASPVGKRLLLLFADTAASASASAPAAAAASGDPGPALRTRAWRAGTVRAMSGAVPQDLA" + "IFVEFDGCNWKQHSWVKVHAEEVIVLLLEGSLVWAPREDPVLLQGIRVSIAQWPALTFTPLVDK" + "LGLGSVVPVEYLLDRELRFLSDANGLHLFQMGTDSQNQILLEHAALRETVNALISDQKLQEIFS" + "RGPYSVQGHRVKIYQPEGEEGWLYGVVSHQDSITRLMEVSVTESGEIKSVDPRLIHVMLMDNST" + "PQSEGGTLKAVKSSKGKKKRESIEGKDGRRRKSASDSGCDPASKKLKGDRGEVDSNGSDGGEAS" + "RGPWKGGNASGEPGLDQRAKQPPSTFVPQINRNIRFATYTKENGRTLVVQDEPVGGDTPASFTP" + "YSTATGQTPLAPEVGGAENKEAGKTLEQVGQGIVASAAVVTTASSTPNTVRISDTGLAAGTVPE" + "KQKGSRSQASGENSRNSILASSGFGAPLPSSSQPLTFGSGRSQSNGVLATENKPLGFSFGCSSA" + "QEAQKDTDLSKNLFFQCMSQTLPTSNYFTTVSESLADDSSSRDSFKQSLESLSSGLCKGRSVLG" + "TDTKPGSKAGSSVDRKVPAESMPTLTPAFPRSLLNARTPENHENLFLQPPKLSREEPSNPFLAF" + "VEKVEHSPFSSFASQASGSSSSATTVTSKVAPSWPESHSSADSASLAKKKPLFITTDSSKLVSG" + "VLGSALTSGGPSLSAMGNGRSSSPTSSLTQPIEMPTLSSSPTEERPTVGPGQQDNPLLKTFSNV" + "FGRHSGGFLSSPADFSQENKAPFEAVKRFSLDERSLACRQDSDSSTNSDLSDLSDSEEQLQAKT" + "GLKGIPEHLMGKLGPNGERSAELLLGKSKGKQAPKGRPRTAPLKVGQSVLKDVSKVKKLKQSGE" + "PFLQDGSCINVAPHLHKCRECRLERYRKFKEQEQDDSTVACRFFHFRRLIFTRKGVLRVEGFLS" + "PQQSDPDAMNLWIPSSSLAEGIDLETSKYILANVGDQFCQLVMSEKEAMMMVEPHQKVAWKRAV" + "RGVREMCDVCETTLFNIHWVCRKCGFGVCLDCYRLRKSRPRSETEEMGDEEVFSWLKCAKGQSH" + "EPENLMPTQIIPGTALYNIGDMVHAARGKWGIKANCPCISRQNKSVLRPAVTNGMSQLPSINPS" + "ASSGNETTFSGGGGPAPVTTPEPDHVPKADSTDIRSEEPLKTDSSASNSNSELKAIRPPCPDTA" + "PPSSALHWLADLATQKAKEETKEAGSLRSVLNKESHSPFGLDSFNSTAKVSPLTPKLFNSLLLG" + "PTASNNKTEGSSLRDLLHSGPGKLPQTPLDTGIPFPPVFSTSSAGVKSKASLPNFLDHIIASVV" + "ENKKTSDASKRACNLTDTQKEVKEMVMGLNVLDPHTSHSWLCDGRLLCLHDPSNKNNWKIFREC" + "WKQGQPVLVSGVHKKLKSELWKPEAFSQEFGDQDVDLVNCRNCAIISDVKVRDFWDGFEIICKR" + "LRSEDGQPMVLKLKDWPPGEDFRDMMPTRFEDLMENLPLPEYTKRDGRLNLASRLPSYFVRPDL" + "GPKMYNAYGLITAEDRRVGTTNLHLDVSDAVNVMVYVGIPIGEGAHDEEVLKTIDEGDADEVTK" + "QRIHDGKEKPGALWHIYAAKDAEKIRELLRKVGEEQGQENPPDHDPIHDQSWYLDQTLRKRLYE" + "EYGVQGWAIVQFLGDAVFIPAGAPHQVHNLYSCIKVAEDFVSPEHVKHCFRLTQEFRHLSNTHT" + "NHEDKLQVKNIIYHAVKDAVGTLKAHESKLARS") template_id = TemplateID('2YPD', 'A') alignments = domain_aligner.get_domain_alignments(seq, template_id=template_id) ok_(alignments[0].range.get_length() <= len(seq))
def test_coagulation_factor_v(): seq = "MFPGCPRLWVLVVLGTSWVGWGSQGTEAAQLRQFYVAAQGISWSYRPEPTNSSLNLSVTSFKKIVYREYEPYFKKEKPQSTISGLLGPTLYAEVGDIIKVHFKNKADKPLSIHPQGIRYSKLSEGASYLDHTFPAEKMDDAVAPGREYTYEWSISEDSGPTHDDPPCLTHIYYSHENLIEDFNSGLIGPLLICKKGTLTEGGTQKTFDKQIVLLFAVFDESKSWSQSSSLMYTVNGYVNGTMPDITVCAHDHISWHLLGMSSGPELFSIHFNGQVLEQNHHKVSAITLVSATSTTANMTVGPEGKWIISSLTPKHLQAGMQAYIDIKNCPKKTRNLKKITREQRRHMKRWEYFIAAEEVIWDYAPVIPANMDKKYRSQHLDNFSNQIGKHYKKVMYTQYEDESFTKHTVNPNMKEDGILGPIIRAQVRDTLKIVFKNMASRPYSIYPHGVTFSPYEDEVNSSFTSGRNNTMIRAVQPGETYTYKWNILEFDEPTENDAQCLTRPYYSDVDIMRDIASGLIGLLLICKSRSLDRRGIQRAADIEQQAVFAVFDENKSWYLEDNINKFCENPDEVKRDDPKFYESNIMSTINGYVPESITTLGFCFDDTVQWHFCSVGTQNEILTIHFTGHSFIYGKRHEDTLTLFPMRGESVTVTMDNVGTWMLTSMNSSPRSKKLRLKFRDVKCIPDDDEDSYEIFEPPESTVMATRKMHDRLEPEDEESDADYDYQNRLAAALGIRSFRNSSLNQEEEEFNLTALALENGTEFVSSNTDIIVGSNYSSPSNISKFTVNNLAEPQKAPSHQQATTAGSPLRHLIGKNSVLNSSTAEHSSPYSEDPIEDPLQPDVTGIRLLSLGAGEFKSQEHAKHKGPKVERDQAAKHRFSWMKLLAHKVGRHLSQDTGSPSGMRPWEDLPSQDTGSPSRMRPWKDPPSDLLLLKQSNSSKILVGRWHLASEKGSYEIIQDTDEDTAVNNWLISPQNASRAWGESTPLANKPGKQSGHPKFPRVRHKSLQVRQDGGKSRLKKSQFLIKTRKKKKEKHTHHAPLSPRTFHPLRSEAYNTFSERRLKHSLVLHKSNETSLPTDLNQTLPSMDFGWIASLPDHNQNSSNDTGQASCPPGLYQTVPPEEHYQTFPIQDPDQMHSTSDPSHRSSSPELSEMLEYDRSHKSFPTDISQMSPSSEHEVWQTVISPDLSQVTLSPELSQTNLSPDLSHTTLSPELIQRNLSPALGQMPISPDLSHTTLSPDLSHTTLSLDLSQTNLSPELSQTNLSPALGQMPLSPDLSHTTLSLDFSQTNLSPELSHMTLSPELSQTNLSPALGQMPISPDLSHTTLSLDFSQTNLSPELSQTNLSPALGQMPLSPDPSHTTLSLDLSQTNLSPELSQTNLSPDLSEMPLFADLSQIPLTPDLDQMTLSPDLGETDLSPNFGQMSLSPDLSQVTLSPDISDTTLLPDLSQISPPPDLDQIFYPSESSQSLLLQEFNESFPYPDLGQMPSPSSPTLNDTFLSKEFNPLVIVGLSKDGTDYIEIIPKEEVQSSEDDYAEIDYVPYDDPYKTDVRTNINSSRDPDNIAAWYLRSNNGNRRNYYIAAEEISWDYSEFVQRETDIEDSDDIPEDTTYKKVVFRKYLDSTFTKRDPRGEYEEHLGILGPIIRAEVDDVIQVRFKNLASRPYSLHAHGLSYEKSSEGKTYEDDSPEWFKEDNAVQPNSSYTYVWHATERSGPESPGSACRAWAYYSAVNPEKDIHSGLIGPLLICQKGILHKDSNMPMDMREFVLLFMTFDEKKSWYYEKKSRSSWRLTSSEMKKSHEFHAINGMIYSLPGLKMYEQEWVRLHLLNIGGSQDIHVVHFHGQTLLENGNKQHQLGVWPLLPGSFKTLEMKASKPGWWLLNTEVGENQRAGMQTPFLIMDRDCRMPMGLSTGIISDSQIKASEFLGYWEPRLARLNNGGSYNAWSVEKLAAEFASKPWIQVDMQKEVIITGIQTQGAKHYLKSCYTTEFYVAYSSNQINWQIFKGNSTRNVMYFNGNSDASTIKENQFDPPIVARYIRISPTRAYNRPTLRLELQGCEVNGCSTPLGMENGKIENKQITASSFKKSWWGDYWEPFRARLNAQGRVNAWQAKANNNKQWLEIDLLKIKKITAIITQGCKSLSSEMYVKSYTIHYSEQGVEWKPYRLKSSMVDKIFEGNTNTKGHVKNFFNPPIISRFIRVIPKTWNQSIALRLELFGCDIY" alignments = domain_aligner.get_domain_alignments(seq, require_resnum=534, template_id=TemplateID( '2R7E', 'A'))
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None): target_species_id = target_species_id.upper() sequence_id = model_storage.get_sequence_id(target_sequence) lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id, target_species_id, str(require_resnum), str(chosen_template_id)) if model_storage.model_dir is None: raise InitError("model directory is not set") lock_path = os.path.join(model_storage.model_dir, lock_name) with FileLock(lock_path): model_paths = model_storage.list_models(target_sequence, target_species_id, require_resnum, chosen_template_id) if len(model_paths) > 0: return select_best_model(model_paths, target_sequence, require_resnum) else: ModelLogger.get_current().clear() domain_alignments = \ domain_aligner.get_domain_alignments(target_sequence, require_resnum, chosen_template_id) if len(domain_alignments) <= 0: _log.warn("no domain alignments for target={} resnum={} template={}" .format(target_sequence, require_resnum, chosen_template_id)) return None domain_alignment = select_best_domain_alignment(domain_alignments) return modeler.build_model(target_sequence, target_species_id, domain_alignment, require_resnum)
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None): target_species_id = target_species_id.upper() sequence_id = model_storage.get_sequence_id(target_sequence) lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id, target_species_id, str(require_resnum), str(chosen_template_id)) if model_storage.model_dir is None: raise InitError("model directory is not set") lock_path = os.path.join(model_storage.model_dir, lock_name) with FileLock(lock_path): model_paths = model_storage.list_models(target_sequence, target_species_id, require_resnum, chosen_template_id) if len(model_paths) > 0: return select_best_model(model_paths) else: ModelLogger.get_current().clear() domain_alignments = \ domain_aligner.get_domain_alignments(target_sequence, require_resnum, chosen_template_id) if len(domain_alignments) <= 0: _log.warn("no domain alignments for target={} resnum={} template={}" .format(target_sequence, require_resnum, chosen_template_id)) return None domain_alignment = select_best_domain_alignment(domain_alignments) return modeler.build_model(target_sequence, target_species_id, domain_alignment, require_resnum)
def test_align_with_repeats(): sequence = """MRRGRLLEIALGFTVLLASYTSHGADANLEAGNVKETRASRAKRRGGGGHD ALKGPNVCGSRYNAYCCPGWKTLPGGNQCIVPICRHSCGDGFCSRPNMCTCPSGQIAPSCGSRSIQHCN IRCMNGGSCSDDHCLCQKGYIGTHCGQPVCESGCLNGGRCVAPNRCACTYGFTGPQCERDYRTGPCFTV ISNQMCQGQLSGIVCTKTLCCATVGRAWGHPCEMCPAQPHPCRRGFIPNIRTGACQDVDECQAIPGLCQ GGNCINTVGSFECKCPAGHKLNEVSQKCEDIDECSTIPGICEGGECTNTVSSYFCKCPPGFYTSPDGTR CIDVRPGYCYTALTNGRCSNQLPQSITKMQCCCDAGRCWSPGVTVAPEMCPIRATEDFNKLCSVPMVIP GRPEYPPPPLGPIPPVLPVPPGFPPGPQIPVPRPPVEYLYPSREPPRVLPVNVTDYCQLVRYLCQNGRC IPTPGSYRCECNKGFQLDLRGECIDVDECEKNPCAGGECINNQGSYTCQCRAGYQSTLTRTECRDIDEC LQNGRICNNGRCINTDGSFHCVCNAGFHVTRDGKNCEDMDECSIRNMCLNGMCINEDGSFKCICKPGFQ LASDGRYCKDINECETPGICMNGRCVNTDGSYRCECFPGLAVGLDGRVCVDTHMRSTCYGGYKRGQCIK PLFGAVTKSECCCASTEYAFGEPCQPCPAQNSAEYQALCSSGPGMTSAGSDINECALDPDICPNGICEN LRGTYKCICNSGYEVDSTGKNCVDINECVLNSLLCDNGQCRNTPGSFVCTCPKGFIYKPDLKTCEDIDE CESSPCINGVCKNSPGSFICECSSESTLDPTKTICIETIKGTCWQTVIDGRCEININGATLKSQCCSSL GAAWGSPCTLCQVDPICGKGYSRIKGTQCEDIDECEVFPGVCKNGLCVNTRGSFKCQCPSGMTLDATGR ICLDIRLETCFLRYEDEECTLPIAGRHRMDACCCSVGAAWGTEECEECPMRNTPEYEELCPRGPGFATK EITNGKPFFKDINECKMIPSLCTHGKCRNTIGSFKCRCDSGFALDSEERNCTDIDECRISPDLCGRGQC VNTPGDFECKCDEGYESGFMMMKNCMDIDECQRDPLLCRGGVCHNTEGSYRCECPPGHQLSPNISACID INECELSAHLCPNGRCVNLIGKYQCACNPGYHSTPDRLFCVDIDECSIMNGGCETFCTNSEGSYECSCQ PGFALMPDQRSCTDIDECEDNPNICDGGQCTNIPGEYRCLCYDGFMASEDMKTCVDVNECDLNPNICLS GTCENTKGSFICHCDMGYSGKKGKTGCTDINECEIGAHNCGKHAVCTNTAGSFKCSCSPGWIGDGIKCT DLDECSNGTHMCSQHADCKNTMGSYRCLCKEGYTGDGFTCTDLDECSENLNLCGNGQCLNAPGGYRCEC DMGFVPSADGKACEDIDECSLPNICVFGTCHNLPGLFRCECEIGYELDRSGGNCTDVNECLDPTTCISG NCVNTPGSYICDCPPDFELNPTRVGCVDTRSGNCYLDIRPRGDNGDTACSNEIGVGVSKASCCCSLGKA WGTPCEMCPAVNTSEYKILCPGGEGFRPNPITVILEDIDECQELPGLCQGGKCINTFGSFQCRCPTGYY LNEDTRVCDDVNECETPGICGPGTCYNTVGNYTCICPPDYMQVNGGNNCMDMRRSLCYRNYYADNQTCD GELLFNMTKKMCCCSYNIGRAWNKPCEQCPIPSTDEFATLCGSQRPGFVIDIYTGLPVDIDECREIPGV CENGVCINMVGSFRCECPVGFFYNDKLLVCEDIDECQNGPVCQRNAECINTAGSYRCDCKPGYRFTSTG QCNDRNECQEIPNICSHGQCIDTVGSFYCLCHTGFKTNDDQTMCLDINECERDACGNGTCRNTIGSFNC RCNHGFILSHNNDCIDVDECASGNGNLCRNGQCINTVGSFQCQCNEGYEVAPDGRTCVDINECLLEPRK CAPGTCQNLDGSYRCICPPGYSLQNEKCEDIDECVEEPEICALGTCSNTEGSFKCLCPEGFSLSSSGRR CQDLRMSYCYAKFEGGKCSSPKSRNHSKQECCCALKGEGWGDPCELCPTEPDEAFRQICPYGSGIIVGP DDSAVDMDECKEPDVCKHGQCINTDGSYRCECPFGYILAGNECVDTDECSVGNPCGNGTCKNVIGGFEC TCEEGFEPGPMMTCEDINECAQNPLLCAFRCVNTYGSYECKCPVGYVLREDRRMCKDEDECEEGKHDCT EKQMECKNLIGTYMCICGPGYQRRPDGEGCVDENECQTKPGICENGRCLNTRGSYTCECNDGFTASPNQ DECLDNREGYCFTEVLQNMCQIGSSNRNPVTKSECCCDGGRGWGPHCEICPFQGTVAFKKLCPHGRGFM TNGADIDECKVIHDVCRNGECVNDRGSYHCICKTGYTPDITGTSCVDLNECNQAPKPCNFICKNTEGSY QCSCPKGYILQEDGRSCKDLDECATKQHNCQFLCVNTIGGFTCKCPPGFTQHHTSCIDNNECTSDINLC GSKGICQNTPGSFTCECQRGFSLDQTGSSCEDVDECEGNHRCQHGCQNIIGGYRCSCPQGYLQHYQWNQ CVDENECLSAHICGGASCHNTLGSYKCMCPAGFQYEQFSGGCQDINECGSAQAPCSYGCSNTEGGYLCG CPPGYFRIGQGHCVSGMGMGRGNPEPPVSGEMDDNSLSPEACYECKINGYPKRGRKRRSTNETDASNIE DQSETEANVSLASWDVEKTAIFAFNISHVSNKVRILELLPALTTLTNHNRYLIESGNEDGFFKINQKEG ISYLHFTKKKPVAGTYSLQISSTPLYKKKELNQLEDKYDKDYLSGELGDNLKMKIQVLLH """.replace('\n','') template_id = TemplateID('2w86', 'A') residue_number = 545 domain_alignments = domain_aligner.get_domain_alignments(sequence, residue_number, template_id) domain_alignment = select_best_domain_alignment(domain_alignments) context = modeler._prepare_context(template_id.pdbid) context.set_main_target(sequence, 'HUMAN', template_id.chain_id) alignments = modeler._make_alignments(sequence, 'HUMAN', domain_alignment, context, residue_number) alignment = alignments[template_id.chain_id] ok_(alignment.is_target_residue_covered(residue_number))
def test_align_filamin(): sequence = """ MSSSHSRAGQSAAGAAPGGGVDTRDAEMPATEKDLAEDAPWKKIQQNTFTRWCNEHLKCVSKRIANLQTDLS DGLRLIALLEVLSQKKMHRKHNQRPTFRQMQLENVSVALEFLDRESIKLVSIDSKAIVDGNLKLILGLIWTL ILHYSISMPMWDEEEDEEAKKQTPKQRLLGWIQNKLPQLPITNFSRDWQSGRALGALVDSCAPGLCPDWDSW DASKPVTNAREAMQQADDWLGIPQVITPEEIVDPNVDEHSVMTYLSQFPKAKLKPGAPLRPKLNPKKARAYG PGIEPTGNMVKKRAEFTVETRSAGQGEVLVYVEDPAGHQEEAKVTANNDKNRTFSVWYVPEVTGTHKVTVLF AGQHIAKSPFEVYVDKSQGDASKVTAQGPGLEPSGNIANKTTYFEIFTAGAGTGEVEVVIQDPMGQKGTVEP QLEARGDSTYRCSYQPTMEGVHTVHVTFAGVPIPRSPYTVTVGQACNPSACRAVGRGLQPKGVRVKETADFK VYTKGAGSGELKVTVKGPKGEERVKQKDLGDGVYGFEYYPMVPGTYIVTITWGGQNIGRSPFEVKVGTECGN QKVRAWGPGLEGGVVGKSADFVVEAIGDDVGTLGFSVEGPSQAKIECDDKGDGSCDVRYWPQEAGEYAVHVL CNSEDIRLSPFMADIRDAPQDFHPDRVKARGPGLEKTGVAVNKPAEFTVDAKHGGKAPLRVQVQDNEGCPVE ALVKDNGNGTYSCSYVPRKPVKHTAMVSWGGVSIPNSPFRVNVGAGSHPNKVKVYGPGVAKTGLKAHEPTYF TVDCAEAGQGDVSIGIKCAPGVVGPAEADIDFDIIRNDNDTFTVKYTPRGAGSYTIMVLFADQATPTSPIRV KVEPSHDASKVKAEGPGLSRTGVELGKPTHFTVNAKAAGKGKLDVQFSGLTKGDAVRDVDIIDHHDNTYTVK YTPVQQGPVGVNVTYGGDPIPKSPFSVAVSPSLDLSKIKVSGLGEKVDVGKDQEFTVKSKGAGGQGKVASKI VGPSGAAVPCKVEPGLGADNSVVRFLPREEGPYEVEVTYDGVPVPGSPFPLEAVAPTKPSKVKAFGPGLQGG SAGSPARFTIDTKGAGTGGLGLTVEGPCEAQLECLDNGDGTCSVSYVPTEPGDYNINILFADTHIPGSPFKA HVVPCFDASKVKCSGPGLERATAGEVGQFQVDCSSAGSAELTIEICSEAGLPAEVYIQDHGDGTHTITYIPL CPGAYTVTIKYGGQPVPNFPSKLQVEPAVDTSGVQCYGPGIEGQGVFREATTEFSVDARALTQTGGPHVKAR VANPSGNLTETYVQDRGDGMYKVEYTPYEEGLHSVDVTYDGSPVPSSPFQVPVTEGCDPSRVRVHGPGIQSG TTNKPNKFTVETRGAGTGGLGLAVEGPSEAKMSCMDNKDGSCSVEYIPYEAGTYSLNVTYGGHQVPGSPFKV PVHDVTDASKVKCSGPGLSPGMVRANLPQSFQVDTSKAGVAPLQVKVQGPKGLVEPVDVVDNADGTQTVNYV PSREGPYSISVLYGDEEVPRSPFKVKVLPTHDASKVKASGPGLNTTGVPASLPVEFTIDAKDAGEGLLAVQI TDPEGKPKKTHIQDNHDGTYTVAYVPDVTGRYTILIKYGGDEIPFSPYRVRAVPTGDASKCTVTVSIGGHGL GAGIGPTIQIGEETVITVDTKAAGKGKVTCTVCTPDGSEVDVDVVENEDGTFDIFYTAPQPGKYVICVRFGG EHVPNSPFQVTALAGDQPSVQPPLRSQQLAPQYTYAQGGQQTWAPERPLVGVNGLDVTSLRPFDLVIPFTIK KGEITGEVRMPSGKVAQPTITDNKDGTVTVRYAPSEAGLHEMDIRYDNMHIPGSPLQFYVDYVNCGHVTAYG PGLTHGVVNKPATFTVNTKDAGEGGLSLAIEGPSKAEISCTDNQDGTCSVSYLPVLPGDYSILVKYNEQHVP GSPFTARVTGDDSMRMSHLKVGSAADIPINISETDLSLLTATVVPPSGREEPCLLKRLRNGHVGISFVPKET GEHLVHVKKNGQHVASSPIPVVISQSEIGDASRVRVSGQGLHEGHTFEPAEFIIDTRDAGYGGLSLSIEGPS KVDINTEDLEDGTCRVTYCPTEPGNYIINIKFADQHVPGSPFSVKVTGEGRVKESITRRRRAPSVANVGSHC DLSLKIPEISIQDMTAQVTSPSGKTHEAEIVEGENHTYCIRFVPAEMGTHTVSVKYKGQHVPGSPFQFTVGP LGEGGAHKVRAGGPGLERAEAGVPAEFSIWTREAGAGGLAIAVEGPSKAEISFEDRKDGSCGVAYVVQEPGD YEVSVKFNEEHIPDSPFVVPVASPSGDARRLTVSSLQESGLKVNQPASFAVSLNGAKGAIDAKVHSPSGALE ECYVTEIDQDKYAVRFIPRENGVYLIDVKFNGTHIPGSPFKIRVGEPGHGGDPGLVSAYGAGLEGGVTGNPA EFVVNTSNAGAGALSVTIDGPSKVKMDCQECPEGYRVTYTPMAPGSYLISIKYGGPYHIGGSPFKAKVTGPR LVSNHSLHETSSVFVDSLTKATCAPQHGAPGPGPADASKVVAKGLGLSKAYVGQKSSFTVDCSKAGNNMLLV GVHGPRTPCEEILVKHVGSRLYSVSYLLKDKGEYTLVVKWGDEHIPGSPYRVVVP """.replace('\n','') template_id = TemplateID('4M9P', 'A') residue_number = 429 domain_alignments = domain_aligner.get_domain_alignments(sequence, residue_number, template_id) ok_(len(domain_alignments) > 0) domain_alignment = select_best_domain_alignment(domain_alignments) context = modeler._prepare_context(template_id.pdbid) context.set_main_target(sequence, 'HUMAN', template_id.chain_id) alignments = modeler._make_alignments(sequence, 'HUMAN', domain_alignment, context, residue_number) alignment = alignments[template_id.chain_id] _log.debug("alignment is {}".format(alignment)) ok_(alignment.is_target_residue_covered(residue_number))
def _choose_best_target_alignment(self, context, interacting_chain_alignments, potential_target_sequences, chain_id): best_alignment = None for target_id in potential_target_sequences: template_chain_sequence = context.get_sequence(chain_id) template_chain_secstr = context.get_secondary_structure(chain_id) alignment = kmad_aligner.align( template_chain_sequence, template_chain_secstr, potential_target_sequences[target_id]) _log.debug("alignment {} has coverage {} %".format( alignment, alignment.get_percentage_coverage())) if alignment.get_percentage_coverage() < 90.0: # If the coverage is too low, we need to bother interpro. domain_alignments = \ domain_aligner.get_domain_alignments(potential_target_sequences[target_id], None, TemplateID(context.template_pdbid, chain_id)) interacting_alignments = list( filter( lambda ali: self._preserves_interactions( context, ali.target_alignment.replace('-', ''), chain_id, interacting_chain_alignments), domain_alignments)) _log.debug( "preserve interactions with chains {}: filtered {} alignments out of {}" .format(interacting_chain_alignments.keys(), len(interacting_alignments), len(domain_alignments))) if len(interacting_alignments) > 0: domain_alignment = self._join_alignments_to_best_template_coverage( interacting_alignments) elif len(domain_alignments) > 0: domain_alignment = self._join_alignments_to_best_template_coverage( domain_alignments) else: continue alignment = kmad_aligner.align( template_chain_sequence, template_chain_secstr, domain_alignment.get_target_sequence()) alignment.target_id = target_id if best_alignment is None or \ best_alignment.get_percentage_identity() < alignment.get_percentage_identity(): best_alignment = alignment return best_alignment
def test_find_template(mock_get_domain_ranges): seq = ( "MGKLVALVLLGVGLSLVGEMFLAFRERVNASREVEPVEPENCHLIEELESGSEDIDILPSGLAFISSGLKYP" + "GMPNFAPDEPGKIFLMDLNEQNPRAQALEISGGFDKELFNPHGISIFIDKDNTVYLYVVNHPHMKSTVEIFK" + "FEEQQRSLVYLKTIKHELLKSVNDIVVLGPEQFYATRDHYFTNSLLSFFEMILDLRWTYVLFYSPREVKVVA" + "KGFCSANGITVSADQKYVYVADVAAKNIHIMEKHDNWDLTQLKVIQLGTLVDNLTVDPATGDILAGCHPNPM" + "KLLNYNPEDPPGSEVLRIQNVLSEKPRVSTVYANNGSVLQGTSVASVYHGKILIGTVFHKTLYCEL") mock_get_domain_ranges.return_value = [SequenceRange(0, len(seq), seq)] alignments = domain_aligner.get_domain_alignments(seq, require_resnum=190) ok_(any([ali.count_aligned_residues() > 300 for ali in alignments]))
def test_any_template(): seq = ("GYVPAVVIGTGYGAAVSALRLGEAGVQTLMLEMGQLWNQPGPDGNIFCGMLNPDKRSS" + "WFKNRTEAPLGSFLWLDVVNRNIDPYAGVLDRVNYDQMSVYVGRGVGGGSLVNGGMAV" + "EPKRSYFEEILPRVDSSEMYDRYFPRANSMLRVNHIDTKWFEDTEWYKFARVSREQAG" + "KAGLGTVFVPNVYDFGYMQREAAGEVPKSALATEVIYGNNHGKQSLDKTYLAAALGTG" + "KVTIQTLHQVKTIRQTKDGGYALTVEQKDTDGKLLATKEISCRYLFLGAGSLGSTELL" + "VRARDTGTLPNLNSEVGAGWGPNGNIMTARANHMWNPTGAHQSSIPALGIDAWDNSDS" + "SVFAEIAPMPAGLETWVSLYLAITKNPQRGTFVYDAATDRAKLNWTRDQNAPAVNAAK" + "ALFDRINKANGTIYRYDLFGTQLKAFADDFCYNPLGGCVLGKATDDYGRVAGYKNLYV" + "TDGSLIPGSVGVNPFVTITALAERNVERIIKQDV") alignments = domain_aligner.get_domain_alignments(seq) ok_(len(alignments) > 0)
def _choose_best_target_alignment(self, context, interacting_chain_alignments, potential_target_sequences, chain_id): best_alignment = None for target_id in potential_target_sequences: template_chain_sequence = context.get_sequence(chain_id) template_chain_secstr = context.get_secondary_structure(chain_id) alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr, potential_target_sequences[target_id]) _log.debug("alignment {} has coverage {} %".format(alignment, alignment.get_percentage_coverage())) if alignment.get_percentage_coverage() < 90.0: # If the coverage is too low, we need to bother interpro. domain_alignments = \ domain_aligner.get_domain_alignments(potential_target_sequences[target_id], None, TemplateID(context.template_pdbid, chain_id)) interacting_alignments = list(filter(lambda ali: self._preserves_interactions(context, ali.target_alignment.replace('-', ''), chain_id, interacting_chain_alignments), domain_alignments)) _log.debug("preserve interactions with chains {}: filtered {} alignments out of {}" .format(interacting_chain_alignments.keys(), len(interacting_alignments), len(domain_alignments))) if len(interacting_alignments) > 0: domain_alignment = self._join_alignments_to_best_template_coverage(interacting_alignments) elif len(domain_alignments) > 0: domain_alignment = self._join_alignments_to_best_template_coverage(domain_alignments) else: continue alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr, domain_alignment.get_target_sequence()) alignment.target_id = target_id if best_alignment is None or \ best_alignment.get_percentage_identity() < alignment.get_percentage_identity(): best_alignment = alignment return best_alignment
def test_secretase(): seq = ("MTAAVFFGCAFIAFGPALALYVFTIATEPLRIIFLIAGAFFWLVSLLISSLVWFMARVII" + "DNKDGPTQKYLLIFGAFVSVYIQEMFRFAYYKLLKKASEGLKSINPGETAPSMRLLAYVS" + "GLGFGIMSGVFSFVNTLSDSLGPGTVGIHGDSPQFFLYSAFMTLVIILLHVFWGIVFFDG" + "CEKKKWGILLIVLLTHLLVSAQTFISSYYGINLASAFIILVLMGTWAFLAAGGSCRSLKL" + "CLLCQDKNFLLYNQRSR") template_id = TemplateID('5A63', 'C') alignments = domain_aligner.get_domain_alignments(seq, require_resnum=72, template_id=template_id) ok_(len(alignments) > 0) for alignment in alignments: eq_(alignment.template_id, template_id)
def test_no_alignment_flip(mock_get_domain_ranges): seq = ( "MGKLVALVLLGVGLSLVGEMFLAFRERVNASREVEPVEPENCHLIEELESGSEDIDILPSGLAFISSGLKYP" + "GMPNFAPDEPGKIFLMDLNEQNPRAQALEISGGFDKELFNPHGISIFIDKDNTVYLYVVNHPHMKSTVEIFK" + "FEEQQRSLVYLKTIKHELLKSVNDIVVLGPEQFYATRDHYFTNSLLSFFEMILDLRWTYVLFYSPREVKVVA" + "KGFCSANGITVSADQKYVYVADVAAKNIHIMEKHDNWDLTQLKVIQLGTLVDNLTVDPATGDILAGCHPNPM" + "KLLNYNPEDPPGSEVLRIQNVLSEKPRVSTVYANNGSVLQGTSVASVYHGKILIGTVFHKTLYCEL") mock_get_domain_ranges.return_value = [SequenceRange(0, len(seq), seq)] alignments = domain_aligner.get_domain_alignments(seq) for alignment in alignments: _log.debug("got alignment {}".format(alignment)) ok_(alignment.target_alignment.replace('-','') in seq)
def test_no_alignment_flip(mock_get_domain_ranges): seq = ( "MGKLVALVLLGVGLSLVGEMFLAFRERVNASREVEPVEPENCHLIEELESGSEDIDILPSGLAFISSGLKYP" + "GMPNFAPDEPGKIFLMDLNEQNPRAQALEISGGFDKELFNPHGISIFIDKDNTVYLYVVNHPHMKSTVEIFK" + "FEEQQRSLVYLKTIKHELLKSVNDIVVLGPEQFYATRDHYFTNSLLSFFEMILDLRWTYVLFYSPREVKVVA" + "KGFCSANGITVSADQKYVYVADVAAKNIHIMEKHDNWDLTQLKVIQLGTLVDNLTVDPATGDILAGCHPNPM" + "KLLNYNPEDPPGSEVLRIQNVLSEKPRVSTVYANNGSVLQGTSVASVYHGKILIGTVFHKTLYCEL") mock_get_domain_ranges.return_value = [SequenceRange(0, len(seq), seq)] alignments = domain_aligner.get_domain_alignments(seq) for alignment in alignments: _log.debug("got alignment {}".format(alignment)) ok_(alignment.target_alignment.replace('-', '') in seq)
def test_3ly6_A(): seq = ("MGQGEPSQRSTGLAGLYAAPAASPVFIKGSGMDALGIKSCDFQAARNNEEHHTKALSSRRLFVR" + "RGQPFTIILYFRAPVRAFLPALKKVALTAQTGEQPSKINRTQATFPISSLGDRKWWSAVVEERD" + "AQSWTISVTTPADAVIGHYSLLLQVSGRKQLLLGQFTLLFNPWNREDAVFLKNEAQRMEYLLNQ" + "NGLIYLGTADCIQAESWDFGQFEGDVIDLSLRLLSKDKQVEKWSQPVHVARVLGALLHFLKEQR" + "VLPTPQTQATQEGALLNKRRGSVPILRQWLTGRGRPVYDGQAWVLAAVACTVLRCLGIPARVVT" + "TFASAQGTGGRLLIDEYYNEEGLQNGEGQRGRIWIFQTSTECWMTRPALPQGYDGWQILHPSAP" + "NGGGVLGSCDLVVRAVKEGTLGLTPAVSDLFAAINASCVVWKCCEDGTLELTDSNTKYVGNNIS" + "TKGVGSDRCEDITQNYKYPEGSLQEKEVLERVEKEKMEREKDNGIRPPSLETASPLYLLLKAPS" + "SLPLRGDAQISVTLVNHSEQEKAVQLAIGVQAVHYNGVLAAKLWRKKLHLTLSANLEKIITIGL" + "FFSNFERNPPENTFLRLTAMATHSESNLSCFAQEDIAICRPHLAIKMPEKAEQYQPLTASVSLQ" + "NSLDAPMEDCVISILGRGLIHRERSYRFRSVWPENTMCAKFQFTPTHVGLQRLTVEVDCNMFQN" + "LTNYKSVTVVAPELSA") template_id = TemplateID('3LY6', 'A') alignments = domain_aligner.get_domain_alignments(seq, template_id=template_id) ok_(alignments[0].range.get_length() <= len(seq))
def test_align_rab3d(): sequence = "MDEDVLTTLKILIIGESGVGKSSLLLRFTDDTFDPELAATIGVDFKVKTISVDGN" + \ "KAKLAIWVTLHQQTANFFLKSQIGNSPILKWAMWQYDTAGQERFRTLTPSYYRGAQGVILVYDVTRRDTF" + \ "VKLDNWLNELETYCTRNDIVNMLVGNKIDKENREVDRNEGLKFARKHSMLFIEASAKTCDGVQCAFEELV" + \ "EKIIQTPGLWESENQNKGVKLSHREEGQGGGACGGYCSVL" template_id = TemplateID('2GF9', 'A') residue_number = 70 domain_alignments = domain_aligner.get_domain_alignments(sequence, residue_number, template_id) if len(domain_alignments) > 0: domain_alignment = select_best_domain_alignment(domain_alignments) context = modeler._prepare_context(template_id.pdbid) context.set_main_target(sequence, 'HUMAN', template_id.chain_id) alignments = modeler._make_alignments(sequence, 'HUMAN', domain_alignment, context, residue_number) alignment = alignments[template_id.chain_id] ok_(alignment.is_target_residue_covered(residue_number))
def test_myosin(): seq = """ MAQKGQLSDDEKFLFVDKNFINSPVAQADWAAKRLVWVPSEKQGFEAASIKEEKGDEVVVELVENGKKVTVG KDDIQKMNPPKFSKVEDMAELTCLNEASVLHNLRERYFSGLIYTYSGLFCVVVNPYKHLPIYSEKIVDMYKG KKRHEMPPHIYAIADTAYRSMLQDREDQSILCTGESGAGKTENTKKVIQYLAVVASSHKGKKDTSITQGPSF AYGELEKQLLQANPILEAFGNAKTVKNDNSSRFGKFIRINFDVTGYIVGANIETYLLEKSRAIRQARDERTF HIFYYMIAGAKEKMRSDLLLEGFNNYTFLSNGFVPIPAAQDDEMFQETVEAMAIMGFSEEEQLSILKVVSSV LQLGNIVFKKERNTDQASMPDNTAAQKVCHLMGINVTDFTRSILTPRIKVGRDVVQKAQTKEQADFAVEALA KATYERLFRWILTRVNKALDKTHRQGASFLGILDIAGFEIFEVNSFEQLCINYTNEKLQQLFNHTMFILEQE EYQREGIEWNFIDFGLDLQPCIELIERPNNPPGVLALLDEECWFPKATDKSFVEKLCTEQGSHPKFQKPKQL KDKTEFSIIHYAGKVDYNASAWLTKNMDPLNDNVTSLLNASSDKFVADLWKDVDRIVGLDQMAKMTESSLPS ASKTKKGMFRTVGQLYKEQLGKLMTTLRNTTPNFVRCIIPNHEKRSGKLDAFLVLEQLRCNGVLEGIRICRQ GFPNRIVFQEFRQRYEILAANAIPKGFMDGKQACILMIKALELDPNLYRIGQSKIFFRTGVLAHLEEERDLK ITDVIMAFQAMCRGYLARKAFAKRQQQLTAMKVIQRNCAAYLKLRNWQWWRLFTKVKPLLQVTRQEEEMQAK EDELQKTKERQQKAENELKELEQKHSQLTEEKNLLQEQLQAETELYAEAEEMRVRLAAKKQELEEILHEMEA RLEEEEDRGQQLQAERKKMAQQMLDLEEQLEEEEAARQKLQLEKVTAEAKIKKLEDEILVMDDQNNKLSKER KLLEERISDLTTNLAEEEEKAKNLTKLKNKHESMISELEVRLKKEEKSRQELEKLKRKLEGDASDFHEQIAD LQAQIAELKMQLAKKEEELQAALARLDDEIAQKNNALKKIRELEGHISDLQEDLDSERAARNKAEKQKRDLG EELEALKTELEDTLDSTATQQELRAKREQEVTVLKKALDEETRSHEAQVQEMRQKHAQAVEELTEQLEQFKR AKANLDKNKQTLEKENADLAGELRVLGQAKQEVEHKKKKLEAQVQELQSKCSDGERARAELNDKVHKLQNEV ESVTGMLNEAEGKAIKLAKDVASLSSQLQDTQELLQEETRQKLNVSTKLRQLEEERNSLQDQLDEEMEAKQN LERHISTLNIQLSDSKKKLQDFASTVEALEEGKKRFQKEIENLTQQYEEKAAAYDKLEKTKNRLQQELDDLV VDLDNQRQLVSNLEKKQRKFDQLLAEEKNISSKYADERDRAEAEAREKETKALSLARALEEALEAKEELERT NKMLKAEMEDLVSSKDDVGKNVHELEKSKRALETQMEEMKTQLEELEDELQATEDAKLRLEVNMQALKGQFE RDLQARDEQNEEKRRQLQRQLHEYETELEDERKQRALAAAAKKKLEGDLKDLELQADSAIKGREEAIKQLRK LQAQMKDFQRELEDARASRDEIFATAKENEKKAKSLEADLMQLQEDLAAAERARKQADLEKEELAEELASSL SGRNALQDEKRRLEARIAQLEEELEEEQGNMEAMSDRVRKATQQAEQLSNELATERSTAQKNESARQQLERQ NKELRSKLHEMEGAVKSKFKSTIAALEAKIAQLEEQVEQEAREKQAATKSLKQKDKKLKEILLQVEDERKMA EQYKEQAEKGNARVKQLKRQLEEAEEESQRINANRRKLQRELDEATESNEAMGREVNALKSKLRGPPPQETS Q""".replace('\n', '') template_id = TemplateID('2AKA', 'A') alignments = domain_aligner.get_domain_alignments(seq, require_resnum=676, template_id=template_id) ok_(len(alignments) > 0) for alignment in alignments: eq_(alignment.template_id, template_id)
def test_myosin(): seq = """ MAQKGQLSDDEKFLFVDKNFINSPVAQADWAAKRLVWVPSEKQGFEAASIKEEKGDEVVVELVENGKKVTVG KDDIQKMNPPKFSKVEDMAELTCLNEASVLHNLRERYFSGLIYTYSGLFCVVVNPYKHLPIYSEKIVDMYKG KKRHEMPPHIYAIADTAYRSMLQDREDQSILCTGESGAGKTENTKKVIQYLAVVASSHKGKKDTSITQGPSF AYGELEKQLLQANPILEAFGNAKTVKNDNSSRFGKFIRINFDVTGYIVGANIETYLLEKSRAIRQARDERTF HIFYYMIAGAKEKMRSDLLLEGFNNYTFLSNGFVPIPAAQDDEMFQETVEAMAIMGFSEEEQLSILKVVSSV LQLGNIVFKKERNTDQASMPDNTAAQKVCHLMGINVTDFTRSILTPRIKVGRDVVQKAQTKEQADFAVEALA KATYERLFRWILTRVNKALDKTHRQGASFLGILDIAGFEIFEVNSFEQLCINYTNEKLQQLFNHTMFILEQE EYQREGIEWNFIDFGLDLQPCIELIERPNNPPGVLALLDEECWFPKATDKSFVEKLCTEQGSHPKFQKPKQL KDKTEFSIIHYAGKVDYNASAWLTKNMDPLNDNVTSLLNASSDKFVADLWKDVDRIVGLDQMAKMTESSLPS ASKTKKGMFRTVGQLYKEQLGKLMTTLRNTTPNFVRCIIPNHEKRSGKLDAFLVLEQLRCNGVLEGIRICRQ GFPNRIVFQEFRQRYEILAANAIPKGFMDGKQACILMIKALELDPNLYRIGQSKIFFRTGVLAHLEEERDLK ITDVIMAFQAMCRGYLARKAFAKRQQQLTAMKVIQRNCAAYLKLRNWQWWRLFTKVKPLLQVTRQEEEMQAK EDELQKTKERQQKAENELKELEQKHSQLTEEKNLLQEQLQAETELYAEAEEMRVRLAAKKQELEEILHEMEA RLEEEEDRGQQLQAERKKMAQQMLDLEEQLEEEEAARQKLQLEKVTAEAKIKKLEDEILVMDDQNNKLSKER KLLEERISDLTTNLAEEEEKAKNLTKLKNKHESMISELEVRLKKEEKSRQELEKLKRKLEGDASDFHEQIAD LQAQIAELKMQLAKKEEELQAALARLDDEIAQKNNALKKIRELEGHISDLQEDLDSERAARNKAEKQKRDLG EELEALKTELEDTLDSTATQQELRAKREQEVTVLKKALDEETRSHEAQVQEMRQKHAQAVEELTEQLEQFKR AKANLDKNKQTLEKENADLAGELRVLGQAKQEVEHKKKKLEAQVQELQSKCSDGERARAELNDKVHKLQNEV ESVTGMLNEAEGKAIKLAKDVASLSSQLQDTQELLQEETRQKLNVSTKLRQLEEERNSLQDQLDEEMEAKQN LERHISTLNIQLSDSKKKLQDFASTVEALEEGKKRFQKEIENLTQQYEEKAAAYDKLEKTKNRLQQELDDLV VDLDNQRQLVSNLEKKQRKFDQLLAEEKNISSKYADERDRAEAEAREKETKALSLARALEEALEAKEELERT NKMLKAEMEDLVSSKDDVGKNVHELEKSKRALETQMEEMKTQLEELEDELQATEDAKLRLEVNMQALKGQFE RDLQARDEQNEEKRRQLQRQLHEYETELEDERKQRALAAAAKKKLEGDLKDLELQADSAIKGREEAIKQLRK LQAQMKDFQRELEDARASRDEIFATAKENEKKAKSLEADLMQLQEDLAAAERARKQADLEKEELAEELASSL SGRNALQDEKRRLEARIAQLEEELEEEQGNMEAMSDRVRKATQQAEQLSNELATERSTAQKNESARQQLERQ NKELRSKLHEMEGAVKSKFKSTIAALEAKIAQLEEQVEQEAREKQAATKSLKQKDKKLKEILLQVEDERKMA EQYKEQAEKGNARVKQLKRQLEEAEEESQRINANRRKLQRELDEATESNEAMGREVNALKSKLRGPPPQETS Q""".replace('\n','') template_id = TemplateID('2AKA', 'A') alignments = domain_aligner.get_domain_alignments(seq, require_resnum=676, template_id=template_id) ok_(len(alignments) > 0) for alignment in alignments: eq_(alignment.template_id, template_id)
def test_cadherin(): seq = ("MTIHQFLLLFLFWVCLPHFCSPEIMFRRTPVPQQRILSSRVPRSDGKILHRQKRGWMWNQ" + "FFLLEEYTGSDYQYVGKLHSDQDKGDGSLKYILSGDGAGTLFIIDEKTGDIHATRRIDRE" + "EKAFYTLRAQAINRRTLRPVEPESEFVIKIHDINDNEPTFPEEIYTASVPEMSVVGTSVV" + "QVTATDADDPSYGNSARVIYSILQGQPYFSVEPETGIIRTALPNMNRENREQYQVVIQAK" + "DMGGQMGGLSGTTTVNITLTDVNDNPPRFPQNTIHLRVLESSPVGTAIGSVKATDADTGK" + "NAEVEYRIIDGDGTDMFDIVTEKDTQEGIITVKKPLDYESRRLYTLKVEAENTHVDPRFY" + "YLGPFKDTTIVKISIEDVDEPPVFSRSSYLFEVHEDIEVGTIIGTVMARDPDSISSPIRF" + "SLDRHTDLDRIFNIHSGNGSLYTSKPLDRELSQWHNLTVIAAEINNPKETTRVAVFVRIL" + "DVNDNAPQFAVFYDTFVCENARPGQLIQTISAVDKDDPLGGQKFFFSLAAVNPNFTVQDN" + "EDNTARILTRKNGFNRHEISTYLLPVVISDNDYPIQSSTGTLTIRVCACDSQGNMQSCSA" + "EALLLPAGLSTGALIAILLCIIILLVIVVLFAALKRQRKKEPLILSKEDIRDNIVSYNDE" + "GGGEEDTQAFDIGTLRNPAAIEEKKLRRDIIPETLFIPRRTPTAPDNTDVRDFINERLKE" + "HDLDPTAPPYDSLATYAYEGNDSIAESLSSLESGTTEGDQNYDYLREWGPRFNKLAEMYG" + "GGESDKDS") alignments = domain_aligner.get_domain_alignments(seq, require_resnum=540) ok_(len(alignments) > 0) max_identity = 0.0 for alignment in alignments: max_identity = max(max_identity, alignment.get_percentage_identity()) ok_(max_identity > 70.0)
model_storage.model_dir = tmp_dir final_output_dir = settings.MODEL_DIR if args.output_dir: final_output_dir = args.output_dir if not os.path.isdir(final_output_dir): raise ValueError("Not a directory: {}".format(final_output_dir)) try: sequence = list(parse_fasta(args.fasta).values())[0] species_id = args.species.upper() if args.template: pdbid, chain_id = args.template.split('_') template_id = TemplateID(pdbid, chain_id) else: template_id = None domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id) _log.info("{} domain alignments".format(len(domain_alignments))) ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments] for t in ts: t.start() for t in ts: t.join() finally: shutil.rmtree(tmp_dir)
model_storage.model_dir = tmp_dir final_output_dir = settings.MODEL_DIR if args.output_dir: final_output_dir = args.output_dir if not os.path.isdir(final_output_dir): raise ValueError("Not a directory: {}".format(final_output_dir)) try: sequence = parse_fasta(args.fasta).values()[0] species_id = args.species.upper() if args.template: pdbid, chain_id = args.template.split('_') template_id = TemplateID(pdbid, chain_id) else: template_id = None domain_alignments = domain_aligner.get_domain_alignments(sequence, args.position, template_id) _log.info("{} domain alignments".format(len(domain_alignments))) ts = [ModelThread(sequence, species_id, ali, final_output_dir) for ali in domain_alignments] for t in ts: t.start() for t in ts: t.join() finally: shutil.rmtree(tmp_dir)
blaster.blastp_exe = BLASTP_EXE def pick_random_sequences(n): sprot_sequences = parse_fasta(SPROT_FASTA) keys = random.sample(sprot_sequences.keys(), n) return {key: sprot_sequences[key] for key in keys} sequences = pick_random_sequences(10) for key in sequences: while True: try: domain_alignments = domain_aligner.get_domain_alignments( sequences[key]) break except HTTPError: continue for domain_alignment in domain_alignments: template_seq = dssp.get_sequence(domain_alignment.template_id) template_secstr = dssp.get_secondary_structure( domain_alignment.template_id) full_alignment = kmad_aligner.align(template_seq, template_secstr, sequences[key]) print(key, domain_alignment.template_id, domain_alignment.get_percentage_identity(), full_alignment.get_percentage_identity())
def test_4rh7_A(): seq = ("MANGTADVRKLFIFTTTQNYFGLMSELWDQPLLCNCLEINNFLDDGNQMLLRVQRSDAGISFSN" + "TIEFGDTKDKVLVFFKLRPEVITDENLHDNILVSSMLESPISSLYQAVRQVFAPMLLKDQEWSR" + "NFDPKLQNLLSELEAGLGIVLRRSDTNLTKLKFKEDDTRGILTPSDEFQFWIEQAHRGNKQISK" + "ERANYFKELFETIAREFYNLDSLSLLEVVDLVETTQDVVDDVWRQTEHDHYPESRMLHLLDIIG" + "GSFGRFVQKKLGTLNLWEDPYYLVKESLKAGISICEQWVIVCNHLTGQVWQRYVPHPWKNEKYF" + "PETLDKLGKRLEEVLAIRTIHEKFLYFLPASEEKIICLTRVFEPFTGLNPVQYNPYTEPLWKAA" + "VSQYEKIIAPAEQKIAGKLKNYISEIQDSPQQLLQAFLKYKELVKRPTISKELMLERETLLARL" + "VDSIKDFRLDFENRCRGIPGDASGPLSGKNLSEVVNSIVWVRQLELKVDDTIKIAEALLSDLPG" + "FRCFHQSAKDLLDQLKLYEQEQFDDWSRDIQSGLSDSRSGLCIEASSRIMELDSNDGLLKVHYS" + "DRLVILLREVRQLSALGFVIPAKIQQVANIAQKFCKQAIILKQVAHFYNSIDQQMIQSQRPMML" + "QSALAFEQIIKNSKAGSGGKSQITWDNPKELEGYIQKLQNAAERLATENRKLRKWHTTFCEKVV" + "VLMNIDLLRQQQRWKDGLQELRTGLATVEAQGFQASDMHAWKQHWNHQLYKALEHQYQMGLEAL" + "NENLPEINIDLTYKQGRLQFRPPFEEIRAKYYREMKRFIGIPNQFKGVGEAGDESIFSIMIDRN" + "ASGFLTIFSKAEDLFRRLSAVLHQHKEWIVIGQVDMEALVEKHLFTVHDWEKNFKALKIKGKEV" + "ERLPSAVKVDCLNINCNPVKTVIDDLIQKLFDLLVLSLKKSIQAHLHEIDTFVTEAMEVLTIMP" + "QSVEEIGDANLQYSKLQERKPEILPLFQEAEDKNRLLRTVAGGGLETISNLKAKWDKFELMMES" + "HQLMIKDQIEVMKGNVKSRLQIYYQELEKFKARWDQLKPGDDVIETGQHNTLDKSAKLIKEKKI" + "EFDDLEVTRKKLVDDCHHFRLEEPNFSLASSISKDIESCAQIWAFYEEFQQGFQEMANEDWITF" + "RTKTYLFEEFLMNWHDRLRKVEEHSVMTVKLQSEVDKYKIVIPILKYVRGEHLSPDHWLDLFRL" + "LGLPRGTSLEKLLFGDLLRVADTIVAKAADLKDLNSRAQGEVTIREALRELDLWGVGAVFTLID" + "YEDSQSRTMKLIKDWKDIVNQVGDNRCLLQSLKDSPYYKGFEDKVSIWERKLAELDEYLQNLNH" + "IQRKWVYLEPIFGRGALPKEQTRFNRVDEDFRSIMTDIKKDNRVTTLTTHAGIRNSLLTILDQL" + "QRCQKSLNEFLEEKRSAFPRFYFIGDDDLLEILGQSTNPSVIQSHLKKLFAGINSVCFDEKSKH" + "ITAMKSLEGEVVPFKNKVPLSNNVETWLNDLALEMKKTLEQLLKECVTTGRSSQGAVDPSLFPS" + "QILCLAEQIKFTEDVENAIKDHSLHQIETQLVNKLEQYTNIDTSSEDPGNTESGILELKLKALI" + "LDIIHNIDVVKQLNQIQVHTTEDWAWKKQLRFYMKSDHTCCVQMVDSEFQYTYEYQGNASKLVY" + "TPLTDKCYLTLTQAMKMGLGGNPYGPAGTGKTESVKALGGLLGRQVLVFNCDEGIDVKSMGRIF" + "VGLVKCGAWGCFDEFNRLEESVLSAVSMQIQTIQDALKNHRTVCELLGKEVEVNSNSGIFITMN" + "PAGKGYGGRQKLPDNLKQLFRPVAMSHPDNELIAEVILYSEGFKDAKVLSRKLVAIFNLSRELL" + "TPQQHYDWGLRALKTVLRGSGNLLRQLNKSGTTQNANESHIVVQALRLNTMSKFTFTDCTRFDA" + "LIKDVFPGIELKEVEYDELSAALKQVFEEANYEIIPNQIKKALELYEQLCQRMGVVIVGPSGAG" + "KSTLWRMLRAALCKTGKVVKQYTMNPKAMPRYQLLGHIDMDTREWSDGVLTNSARQVVREPQDV" + "SSWIICDGDIDPEWIESLNSVLDDNRLLTMPSGERIQFGPNVNFVFETHDLSCASPATISRMGM" + "IFLSDEETDLNSLIKSWLRNQPAEYRNNLENWIGDYFEKALQWVLKQNDYVVETSLVGTVMNGL" + "SHLHGCRDHDEFIINLIRGLGGNLNMKSRLEFTKEVFHWARESPPDFHKPMDTYYDSTRGRLAT" + "YVLKKPEDLTADDFSNGLTLPVIQTPDMQRGLDYFKPWLSSDTKQPFILVGPEGCGKGMLLRYA" + "FSQLRSTQIATVHCSAQTTSRHLLQKLSQTCMVISTNTGRVYRPKDCERLVLYLKDINLPKLDK" + "WGTSTLVAFLQQVLTYQGFYDENLEWVGLENIQIVASMSAGGRLGRHKLTTRFTSIVRLCSIDY" + "PEREQLQTIYGAYLEPVLHKNLKNHSIWGSSSKIYLLAGSMVQVYEQVRAKFTVDDYSHYFFTP" + "CILTQWVLGLFRYDLEGGSSNHPLDYVLEIVAYEARRLFRDKIVGAKELHLFDIILTSVFQGDW" + "GSDILDNMSDSFYVTWGARHNSGARAAPGQPLPPHGKPLGKLNSTDLKDVIKKGLIHYGRDNQN" + "LDILLFHEVLEYMSRIDRVLSFPGGSLLLAGRSGVGRRTITSLVSHMHGAVLFSPKISRGYELK" + "QFKNDLKHVLQLAGIEAQQVVLLLEDYQFVHPTFLEMINSLLSSGEVPGLYTLEELEPLLLPLK" + "DQASQDGFFGPVFNYFTYRIQQNLHIVLIMDSANSNFMINCESNPALHKKCQVLWMEGWSNSSM" + "KKIPEMLFSETGGGEKYNDKKRKEEKKKNSVDPDFLKSFLLIHESCKAYGATPSRYMTFLHVYS" + "AISSSKKKELLKRQSHLQAGVSKLNEAKALVDELNRKAGEQSVLLKTKQDEADAALQMITVSMQ" + "DASEQKTELERLKHRIAEEVVKIEERKNKIDDELKEVQPLVNEAKLAVGNIKPESLSEIRSLRM" + "PPDVIRDILEGVLRLMGIFDTSWVSMKSFLAKRGVREDIATFDARNISKEIRESVEELLFKNKG" + "SFDPKNAKRASTAAAPLAAWVKANIQYSHVLERIHPLETEQAGLESNLKKTEDRKRKLEELLNS" + "VGQKVSELKEKFQSRTSEAAKLEAEVSKAQETIKAAEVLINQLDREHKRWNAQVVEITEELATL" + "PKRAQLAAAFITYLSAAPESLRKTCLEEWTKSAGLEKFDLRRFLCTESEQLIWKSEGLPSDDLS" + "IENALVILQSRVCPFLIDPSSQATEWLKTHLKDSRLEVINQQDSNFITALELAVRFGKTLIIQE" + "MDGVEPVLYPLLRRDLVAQGPRYVVQIGDKIIDYNEEFRLFLSTRNPNPFIPPDAASIVTEVNF" + "TTTRSGLRGQLLALTIQHEKPDLEEQKTKLLQQEEDKKIQLAKLEESLLETLATSQGNILENKD" + "LIESLNQTKASSALIQESLKESYKLQISLDQERDAYLPLAESASKMYFIISDLSKINNMYRFSL" + "AAFLRLFQRALQNKQDSENTEQRIQSLISSLQHMVYEYICRCLFKADQLMFALHFVRGMHPELF" + "QENEWDTFTGVVVGDMLRKADSQQKIRDQLPSWIDQERSWAVATLKIALPSLYQTLCFEDAALW" + "RTYYNNSMCEQEFPSILAKKVSLFQQILVVQALRPDRLQSAMALFACKTLGLKEVSPLPLNLKR" + "LYKETLEIEPILIIISPGADPSQELQELANAERSGECYHQVAMGQGQADLAIQMLKECARNGDW" + "LCLKNLHLVVSWLPVLEKELNTLQPKDTFRLWLTAEVHPNFTPILLQSSLKITYESPPGLKKNL" + "MRTYESWTPEQISKKDNTHRAHALFSLAWFHAACQERRNYIPQGWTKFYEFSLSDLRAGYNIID" + "RLFDGAKDVQWEFVHGLLENAIYGGRIDNYFDLRVLQSYLKQFFNSSVIDVFNQRNKKSIFPYS" + "VSLPQSCSILDYRAVIEKIPEDDKPSFFGLPANIARSSQRMISSQVISQLRILGRSITAGSKFD" + "REIWSNELSPVLNLWKKLNQNSNLIHQKVPPPNDRQGSPILSFIILEQFNAIRLVQSVHQSLAA" + "LSKVIRGTTLLSSEVQKLASALLNQKCPLAWQSKWEGPEDPLQYLRGLVARALAIQNWVDKAEK" + "QALLSETLDLSELFHPDTFLNALRQETARAVGRSVDSLKFVASWKGRLQEAKLQIKISGLLLEG" + "CSFDGNQLSENQLDSPSVSSVLPCFMGWIPQDACGPYSPDECISLPVYTSAERDRVVTNIDVPC" + "GGNQDQWIQCGAALFLKNQ") template_id = TemplateID('4RH7', 'A') alignments = domain_aligner.get_domain_alignments(seq, template_id=template_id) ok_(len(alignments) > 0)
domain_aligner.similar_ranges_min_overlap_percentage = SIMILAR_RANGES_MIN_OVERLAP_PERCENTAGE domain_aligner.similar_ranges_max_length_difference_percentage = SIMILAR_RANGES_MAX_LENGTH_DIFFERENCE_PERCENTAGE kmad_aligner.kmad_exe = KMAD_EXE blaster.blastp_exe = BLASTP_EXE def pick_random_sequences(n): sprot_sequences = parse_fasta(SPROT_FASTA) keys = random.sample(sprot_sequences.keys(), n) return {key:sprot_sequences[key] for key in keys} sequences = pick_random_sequences(10) for key in sequences: while True: try: domain_alignments = domain_aligner.get_domain_alignments(sequences[key]) break except HTTPError: continue for domain_alignment in domain_alignments: template_seq = dssp.get_sequence(domain_alignment.template_id) template_secstr = dssp.get_secondary_structure(domain_alignment.template_id) full_alignment = kmad_aligner.align(template_seq, template_secstr, sequences[key]) print(key, domain_alignment.template_id, domain_alignment.get_percentage_identity(), full_alignment.get_percentage_identity())