Esempio n. 1
0
def showPDB_interactions(pdbPath, complexName, l_r_pairsres , showPairs=True):
  rnameFile= os.path.join(pdbPath,complexName+"_r_u.pdb")
  rname= complexName+"_r_u"

  lnameFile= os.path.join(pdbPath,complexName+"_l_u.pdb")
  lname= complexName+"_l_u"

###   Load Structures
  pymol.finish_launching()

  pymol.cmd.load(lnameFile,lname)
  pymol.cmd.show_as("cartoon",lname)

  pymol.cmd.load(rnameFile,rname)
  pymol.cmd.show_as("cartoon",rname)

  pymol.cmd.select("ml"," model "+ lname)
  pymol.cmd.color("grey","ml")
  pymol.cmd.select("mr"," model "+ rname)
  pymol.cmd.color("palegreen","mr")
  if showPairs:
    for i, ((chainL, resL, resNameL), (chainR, resR, resNameR)) in enumerate(l_r_pairsres):
      resNameL=  one_to_three(resNameL)
      resNameR=  one_to_three(resNameR)
      pymol.cmd.distance( "dist%d"%i, " model %s and chain %s and resi %s and resn %s and name CA"%(lname, chainL, resL, resNameL),
                                " model %s and chain %s and resi %s and resn %s and name CA"%(rname, chainR, resR, resNameR),
                    )
      print( (chainL, resL, resNameL), (chainR, resR, resNameR))
  else:
    res_pred_l, res_pred_r = zip(* l_r_pairsres)
    res_pred_l= [ elem[:2] for elem in res_pred_l]
    res_pred_r= [ elem[:2] for elem in res_pred_r]
    showPDB_patches_all(pdbPath, complexName, res_pred_l, res_pred_r, res_true_l={}, res_true_r={})
Esempio n. 2
0
def get_mutation_dict(mutation):
    if mutation.strip()[1].isdigit():
        pattern = re.compile(r"(?P<wt>[a-zA-Z]{1})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{1})")
        mut_dict = pattern.match(mutation.strip()).groupdict()
        mut_dict['wt'] = one_to_three(mut_dict['wt'].upper())
        mut_dict['mt'] = one_to_three(mut_dict['mt'].upper())
    else:
        pattern = re.compile(r"(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})")
        mut_dict = pattern.match(mutation.strip()).groupdict()
    return mut_dict
Esempio n. 3
0
    def getStructFromFasta(self, fname, chainType):
        '''
    Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled
    and thus no coordiantes availables. Implements from Structure to Residue hierarchy.
    :param fname: str. path to fasta file
    @chainType: str. "l" or "r"
    '''

        seq = self.parseFasta(
            fname, inputNumber="1" if chainType == "l" else
            "2")  #inpuNumber is used to report which partner fails if error
        prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0]
        chainId = chainType.upper()
        residues = []
        struct = Structure(prefix)
        model = Model(0)
        struct.add(model)
        chain = Chain(chainId)
        model.add(chain)
        for i, aa in enumerate(seq):
            try:
                resname = one_to_three(aa)
            except KeyError:
                resname = "UNK"
            res = Residue((' ', i, ' '), resname, prefix)
            chain.add(res)
        return struct
Esempio n. 4
0
def BSParser(infile):
    bslist = []
    for line in open(infile):
        content = line.strip().split('\t')
        pdbid   = content[BSLineOrder.index("PDBID")].lower()
        chainid = content[BSLineOrder.index("ChainID")]
        bscode  = content[BSLineOrder.index("BSID")]
        ligname = content[BSLineOrder.index("LigName")]
        ligchain= content[BSLineOrder.index("LigChain")]
        bsres   = content[BSLineOrder.index("BSRes")]
        newbs   = BindingSite(pdbid, chainid, bscode, ligchain, ligname)
        for eachres in bsres.split():
            try:
                resname = one_to_three(eachres[0])
            except:
                print "wrong bindingsite res: " + eachres
                continue
            try:
                resnum = int(eachres[1:])
            except:
                continue
                #raise Exception("convert %s to number" % eachres[1:])
            residue = Residue(resnum, resname, chainid)
            try:
                newbs.appendResidue(residue)
            except Exception as e:
                print e
                continue
        bslist.append(newbs)

        if not PDBFileExist(pdbid):
            print "Cannot find file " + pdbid
    return bslist
Esempio n. 5
0
    def _standard_resname(self, res):
        resname3 = res.upper()
        if resname3 not in list(aa3) and resname3 not in list(aa1):
            # TODO: mutation to selenocysteine (U;SEC) is not working
            raise ValueError("Unrecognised residue {}".format(res))
        if len(resname3) == 1:
            resname3 = one_to_three(resname3)

        return resname3
def accessibility_class(residue, accessibility):
    #get solvent accessibility class
    #use relative accessibility.
    #acc>=0.95 (2), 0.95>acc>=0.05 (1), 0.05>acc>0 (0)
    Type='Miller' #Miller or Wilke type available
    resmax=residue_max_acc[Type]
    try:
        rel_acc=float(accessibility)/float(resmax[one_to_three(residue)])
    except:
        return ("NA","NA")
#    print(rel_acc)
    if rel_acc>=0.95:
        return (rel_acc,2)
    elif rel_acc>=0.05:
        return (rel_acc,1)
    else:
        return (rel_acc,0)
Esempio n. 7
0
 def pdb_atom(self, ind, a, aa, chain, pos, xyz):
     """
     PDB file ATOM template
     Input:
         ind  : int, atom index
         a    : str, atom ('N', 'CA', 'C' or 'CB')
         aa   : char, one letter aminoacid name
         chain: char, chain id character
         pos  : aminoacid position
         xyz  : list of coordinates
     
     Output:
         atom: pdb like ATOM list
     """
     atom = 'ATOM {:>6}  {:3} {:3} {:1} {:>4}   '.format(
         ind + 1, a, one_to_three(aa), chain, pos + 1)
     if 'C' in a:
         last_char = 'C'
     else:
         last_char = 'N'
     atom += '{:7.3f} {:7.3f} {:7.3f} {:6.3f} {:6.3f}           {}'.format(
         xyz[0], xyz[1], xyz[2], 1.0, 1.0, last_char)
     return atom
Esempio n. 8
0
def side_chain_term(oa, k=1*kilocalorie_per_mole, gmmFileFolder="/Users/weilu/opt/parameters/side_chain", forceGroup=25):
    # add chi forces
    # The sign of the equilibrium value is opposite and magnitude differs slightly
    # 251.04 = 60 * 4.184 kJ, converted from default value in LAMMPS AWSEM
    # multiply interaction strength by overall scaling
    k = k.value_in_unit(kilojoule_per_mole)
    k_side_chain = k * oa.k_awsem
    n_components = 3

    means_all_res = np.zeros((20, 3, 3))
    precisions_chol_all_res = np.zeros((20, 3, 3, 3))
    log_det_all_res = np.zeros((20, 3))
    weights_all_res = np.zeros((20, 3))
    mean_dot_precisions_chol_all_res = np.zeros((20, 3, 3))

    res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
                            'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
    gamma_se_map_1_letter = {   'A': 0,  'R': 1,  'N': 2,  'D': 3,  'C': 4,
                                'Q': 5,  'E': 6,  'G': 7,  'H': 8,  'I': 9,
                                'L': 10, 'K': 11, 'M': 12, 'F': 13, 'P': 14,
                                'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19}
    for i, res_type_one_letter in enumerate(res_type_map_letters):
        res_type = one_to_three(res_type_one_letter)
        if res_type == "GLY":
            weights_all_res[i] = np.array([1/3, 1/3, 1/3])
            continue

        means = np.loadtxt(f"{gmmFileFolder}/{res_type}_means.txt")
        precisions_chol = np.loadtxt(f"{gmmFileFolder}/{res_type}_precisions_chol.txt").reshape(3,3,3)
        log_det = np.loadtxt(f"{gmmFileFolder}/{res_type}_log_det.txt")
        weights = np.loadtxt(f"{gmmFileFolder}/{res_type}_weights.txt")
        means_all_res[i] = means

        precisions_chol_all_res[i] = precisions_chol
        log_det_all_res[i] = log_det
        weights_all_res[i] = weights


        for j in range(n_components):
            mean_dot_precisions_chol_all_res[i][j] = np.dot(means[j], precisions_chol[j])

    means_all_res = means_all_res.reshape(20, 9)
    precisions_chol_all_res = precisions_chol_all_res.reshape(20, 27)
    mean_dot_precisions_chol_all_res = mean_dot_precisions_chol_all_res.reshape(20, 9)

    log_weights = np.log(weights_all_res)
    sumexp_line = "+".join([f"exp(log_gaussian_and_weights_{i}-c)" for i in range(n_components)])
    const = 3 * np.log(2 * np.pi)
    side_chain = CustomCompoundBondForce(4, f"-{k_side_chain}*(log({sumexp_line})+c);\
                                        c=max(log_gaussian_and_weights_0,max(log_gaussian_and_weights_1,log_gaussian_and_weights_2));\
                                        log_gaussian_and_weights_0=log_gaussian_prob_0+log_weights(res,0);\
                                        log_gaussian_and_weights_1=log_gaussian_prob_1+log_weights(res,1);\
                                        log_gaussian_and_weights_2=log_gaussian_prob_2+log_weights(res,2);\
                                        log_gaussian_prob_0=-.5*({const}+log_prob_0)+log_det(res,0);\
                                        log_gaussian_prob_1=-.5*({const}+log_prob_1)+log_det(res,1);\
                                        log_gaussian_prob_2=-.5*({const}+log_prob_2)+log_det(res,2);\
                                        log_prob_0=((r1*pc(res,0)+r2*pc(res,3)+r3*pc(res,6)-mdpc(res,0))^2+\
                                        (r1*pc(res,1)+r2*pc(res,4)+r3*pc(res,7)-mdpc(res,1))^2+\
                                        (r1*pc(res,2)+r2*pc(res,5)+r3*pc(res,8)-mdpc(res,2))^2);\
                                        log_prob_1=((r1*pc(res,9)+r2*pc(res,12)+r3*pc(res,15)-mdpc(res,3))^2+\
                                        (r1*pc(res,10)+r2*pc(res,13)+r3*pc(res,16)-mdpc(res,4))^2+\
                                        (r1*pc(res,11)+r2*pc(res,14)+r3*pc(res,17)-mdpc(res,5))^2);\
                                        log_prob_2=((r1*pc(res,18)+r2*pc(res,21)+r3*pc(res,24)-mdpc(res,6))^2+\
                                        (r1*pc(res,19)+r2*pc(res,22)+r3*pc(res,25)-mdpc(res,7))^2+\
                                        (r1*pc(res,20)+r2*pc(res,23)+r3*pc(res,26)-mdpc(res,8))^2);\
                                        r1=10*distance(p1,p4);\
                                        r2=10*distance(p2,p4);\
                                        r3=10*distance(p3,p4)")

    side_chain.addPerBondParameter("res")
    side_chain.addTabulatedFunction("pc", Discrete2DFunction(20, 27, precisions_chol_all_res.T.flatten()))
    side_chain.addTabulatedFunction("log_weights", Discrete2DFunction(20, 3, log_weights.T.flatten()))
    side_chain.addTabulatedFunction("log_det", Discrete2DFunction(20, 3, log_det_all_res.T.flatten()))
    side_chain.addTabulatedFunction("mdpc", Discrete2DFunction(20, 9, mean_dot_precisions_chol_all_res.T.flatten()))
    for i in range(oa.nres):
        if i not in oa.chain_starts and i not in oa.chain_ends and not oa.res_type[i] == "IGL":
            # print(i)
            # if i != 1:
            #     continue
            # print(oa.n[i], oa.ca[i], oa.c[i], oa.cb[i])
            # print(i, oa.seq[i], gamma_se_map_1_letter[oa.seq[i]], precisions_chol_all_res[gamma_se_map_1_letter[oa.seq[i]]])

            side_chain.addBond([oa.n[i], oa.ca[i], oa.c[i], oa.cb[i]], [gamma_se_map_1_letter[oa.seq[i]]])
    side_chain.setForceGroup(forceGroup)
    return side_chain
Esempio n. 9
0
# quiet the SettingWithCopyWarning when converting dtypes in get_deletions/mutations methods
pd.options.mode.chained_assignment = None

from Bio.PDB.Polypeptide import one_to_three

_aa_property_dict_one = {'Tiny': ['A','C','G','S','T'],
'Small': ['A','C','D','G','N','P','S','T','V'],
'Aliphatic': ['A','I','L','V'],
'Aromatic': ['F','H','W','Y'],
'Non-polar': ['A','C','F','G','I','L','M','P','V','W','Y'],
'Polar': ['D','E','H','K','N','Q','R','S','T'],
'Charged': ['D','E','H','K','R'],
'Basic': ['H','K','R'],
'Acidic': ['D','E']}

_aa_property_dict_three = {k: [one_to_three(x) for x in v] for k,v in _aa_property_dict_one.items()}


def pairwise_sequence_alignment(a_seq, b_seq, engine, a_seq_id=None, b_seq_id=None,
                                gapopen=10, gapextend=0.5,
                                outfile=None, outdir=None, force_rerun=False):
    """Run a global pairwise sequence alignment between two sequence strings.

    Args:
        a_seq (str, Seq, SeqRecord, SeqProp): Reference sequence
        b_seq (str, Seq, SeqRecord, SeqProp): Sequence to be aligned to reference
        engine (str): `biopython` or `needle` - which pairwise alignment program to use
        a_seq_id (str): Reference sequence ID. If not set, is "a_seq"
        b_seq_id (str): Sequence to be aligned ID. If not set, is "b_seq"
        gapopen (int): Only for `needle` - Gap open penalty is the score taken away when a gap is created
        gapextend (float): Only for `needle` - Gap extension penalty is added to the standard gap penalty for each 
Esempio n. 10
0
log = logging.getLogger(__name__)

_aa_property_dict_one = {
    'Aliphatic': set(['A', 'I', 'L', 'V']),
    'Aromatic': set(['F', 'H', 'W', 'Y']),
    'Non-polar': set(['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y']),
    'Polar': set(['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T']),
    'Charged': set(['D', 'E', 'H', 'K', 'R']),
    'Basic': set(['H', 'K', 'R']),
    'Acidic': set(['D', 'E']),
    'Tiny': set(['A', 'C', 'G', 'S', 'T']),
    'Small': set(['A', 'C', 'D', 'G', 'N', 'P', 'S', 'T', 'V'])
}

_aa_property_dict_three = {
    k: [one_to_three(x) for x in v]
    for k, v in _aa_property_dict_one.items()
}

_aa_flexibility_dict_one = {
    'A': -0.605,
    'C': -0.692,
    'D': -0.279,
    'E': -0.16,
    'F': -0.719,
    'G': -0.537,
    'H': -0.662,
    'I': -0.682,
    'K': -0.043,
    'L': -0.631,
    'M': -0.626,
Esempio n. 11
0
log = logging.getLogger(__name__)


_aa_property_dict_one = {
    'Aliphatic': ['A', 'I', 'L', 'V'],
    'Aromatic' : ['F', 'H', 'W', 'Y'],
    'Non-polar': ['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y'],
    'Polar'    : ['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T'],
    'Charged'  : ['D', 'E', 'H', 'K', 'R'],
    'Basic'    : ['H', 'K', 'R'],
    'Acidic'   : ['D', 'E']}
# 'Tiny': ['A','C','G','S','T']
# 'Small': ['A','C','D','G','N','P','S','T','V']

_aa_property_dict_three = {k: [one_to_three(x) for x in v] for k, v in _aa_property_dict_one.items()}


_aa_flexibility_dict_one = {'A': -0.605,
                            'C': -0.692,
                            'D': -0.279,
                            'E': -0.16,
                            'F': -0.719,
                            'G': -0.537,
                            'H': -0.662,
                            'I': -0.682,
                            'K': -0.043,
                            'L': -0.631,
                            'M': -0.626,
                            'N': -0.381,
                            'P': -0.271,
Esempio n. 12
0
from scipy import spatial
import matplotlib.pyplot as plt
import cPickle
from Bio.PDB import *
from Bio import SeqIO
import tempfile
from scipy.sparse import lil_matrix
from Bio.SubsMat import MatrixInfo
from Bio.PDB.Polypeptide import one_to_three
import urllib, os, traceback, pdb
AA = 'ACDEFGHIKLMNPQRSTVWY-'
aaidx = dict(zip(AA, range(len(AA))))
aa3idx = {}
for __i__, __a__ in enumerate(AA):
    try:
        aa3idx[one_to_three(__a__)] = __i__
    except:
        continue


def module_exists(module_name):
    try:
        __import__(module_name)
    except ImportError:
        return False
    else:
        return True


def score_match(pair, matrix=MatrixInfo.blosum62):
    """
Esempio n. 13
0
def mutate_whole_sequence(res_list, new_sequence):
    for i in range(len(res_list)):
        res_list[i].resname = one_to_three(new_sequence[i])
    return res_list
Esempio n. 14
0
from scipy import spatial
import matplotlib.pyplot as plt
import cPickle
from Bio.PDB import *
from Bio import SeqIO
import tempfile
from scipy.sparse import lil_matrix
from Bio.SubsMat import MatrixInfo
from Bio.PDB.Polypeptide import one_to_three
import urllib,os,traceback,pdb
AA='ACDEFGHIKLMNPQRSTVWY-'
aaidx=dict(zip(AA,range(len(AA))))
aa3idx={}
for __i__,__a__ in enumerate(AA):
    try:
        aa3idx[one_to_three(__a__)]=__i__
    except:
        continue


def module_exists(module_name):
    try:
        __import__(module_name)
    except ImportError:
        return False
    else:
        return True
def score_match(pair,matrix=MatrixInfo.blosum62):
    """
    Given a tuple pair of amino acids, it returns the substitution matrix
    score
Esempio n. 15
0
Wrapper for stride (predictor of RASA)
@author: Afsar with modifications from Basir
"""
import numpy as np
import tempfile
import os

from Bio.PDB.Polypeptide import one_to_three

from codebase.constants import amino_acids

to_one_letter_code = {}
aa3idx = {}
for index, amino_acid in enumerate(amino_acids):
    try:
        aa3idx[one_to_three(amino_acid)] = index
        to_one_letter_code[one_to_three(amino_acid)] = amino_acid
    except ():
        continue


def get_max_asa(s=None):
    """
    This function returns a dictionary containing the maximum ASA for 
    different residues. when s=single, single letter codes of aa are also
    added to the dictionary
    """
    max_acc = {
        "ALA": 106.0,
        "CYS": 135.0,
        "ASP": 163.0,
Esempio n. 16
0
from Bio.PDB.Polypeptide import one_to_three

_aa_property_dict_one = {
    'Tiny': ['A', 'C', 'G', 'S', 'T'],
    'Small': ['A', 'C', 'D', 'G', 'N', 'P', 'S', 'T', 'V'],
    'Aliphatic': ['A', 'I', 'L', 'V'],
    'Aromatic': ['F', 'H', 'W', 'Y'],
    'Non-polar': ['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y'],
    'Polar': ['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T'],
    'Charged': ['D', 'E', 'H', 'K', 'R'],
    'Basic': ['H', 'K', 'R'],
    'Acidic': ['D', 'E']
}

_aa_property_dict_three = {
    k: [one_to_three(x) for x in v]
    for k, v in _aa_property_dict_one.items()
}


def pairwise_sequence_alignment(a_seq,
                                b_seq,
                                engine,
                                a_seq_id=None,
                                b_seq_id=None,
                                gapopen=10,
                                gapextend=0.5,
                                outfile=None,
                                outdir=None,
                                force_rerun=False):
    """Run a global pairwise sequence alignment between two sequence strings.