def _create_mapping(r): _, query_start, query_end = parse_header(ali.ids[0]) # create mapping from query into PDB Uniprot sequence # A_i will be query sequence indices, A_j Uniprot sequence indices m = map_indices(ali[0], query_start, query_end, ali[r["alignment_id"]], r["alignment_start"], r["alignment_end"]) # create mapping from PDB Uniprot into seqres numbering # j will be Uniprot sequence index, k seqres index n = pd.DataFrame({ "j": list(range(r["uniprot_start"], r["uniprot_end"] + 1)), "k": list(range(r["resseq_start"], r["resseq_end"] + 1)), }) # need to convert to strings since other mapping has indices as strings n.loc[:, "j"] = n.j.astype(str) n.loc[:, "k"] = n.k.astype(str) # join over Uniprot indices (i.e. j); # get rid of any position that is not aligned mn = m.merge(n, on="j", how="inner").dropna() # extract final mapping from seqres (k) to query (i) map_ = dict(zip(mn.k, mn.i)) return map_, mn
def align_dca2pdb(msa_name, pdbseq, msaseq): """ Some code taken from https://github.com/bsir/dca-frustratometer/blob/master/dca_frustratometer.py :param msa_name: :param pdbseq: PDB seq string :param msaseq: MSA seq string :return: """ import numpy as np from Bio import pairwise2 from evcouplings.compare import mapping as evmp print("(map_dca2pdb)\taligning dca sequence to pdb coord sequence...") # need to penalize for opening and adding gaps otherwise mapping is off (s param {-.5,-.1}) alignments_1 = pairwise2.align.globalxs(pdbseq[0], msaseq[0], -.5, -.1) alignments_2 = pairwise2.align.globalxs(pdbseq[1], msaseq[1], -.5, -.1) print(pairwise2.format_alignment(*alignments_1[0], full_sequences=True)) print(pairwise2.format_alignment(*alignments_2[0], full_sequences=True)) map_1 = evmp.map_indices(alignments_1[0][0], 1, 0, alignments_1[0][1], 1, 0) map_2 = evmp.map_indices(alignments_2[0][0], 1 + len(pdbseq[0]), 0, alignments_2[0][1], 1 + len(msaseq[0]), 0) map_pdb_dca = map_1.append(map_2) map_pdb_dca = map_pdb_dca.rename(columns={ "i": "pdb_i", "A_i": "pdb_res", "j": "dca_i", "A_j": "dca_res" }) outfile = "results\\reference_maps\\ref_map_{}.txt".format( msa_name.strip(".fas")) np.savetxt(outfile, map_pdb_dca, header="pdb_i\tpdb_res\tdca_i\tdca_res", fmt="%s\t%s\t%s\t%s", comments='') print("(map_dca2pdb)\tWrote {}".format(outfile)) map_pdb_dca = map_pdb_dca.dropna() map_dca2pdb_dict = dict(zip(map_pdb_dca["dca_i"], map_pdb_dca["pdb_i"])) return map_dca2pdb_dict