예제 #1
0
def calculate_residue_depth(aromatic_residues, model):
    """Returns a list of surface exposed residues as determined by residue depth.

    Parameters
    ----------
    aromatic_residues: list of :class:`Bio.PDB.Residue.Residue`
        residues included in the analysis
    model: :class:`Bio.PDB.Model.Model`
        BioPython object containing a model of a PDB or MMCIF file

    Returns
    -------
    surface_exposed_res: list of str
        List of residue names corresponding to the surface exposed residues

    """
    try:
        surface = get_surface(model)
        cutoff = 3.03
        surface_exposed_res = []
        for residue in aromatic_residues:
            depth = residue_depth(residue, surface)
            if (depth <= cutoff):
                surface_exposed_res.append(residue.node_label)
        return surface_exposed_res
    except Exception as e:
        warnings.warn(
            "Unable to calculate residue depth. Check that MSMS is installed. Please note that MSMS is not compatible with MacOS Catalina.",
            RuntimeWarning,
            stacklevel=2)
        return []
예제 #2
0
def GetResidueDepPDB(pdb, pdbfile):
    s  = GetStructure(pdb)
    model = s[0]
    residuelist = Selection.unfold_entities(model, 'R')
    try:
        surface = get_surface(pdbfile, PDBTOXYZ, MSMS)
    except:
        print "cannot get surface for " + pdbfile
        return
    content = ""
    for residue in residuelist:
        if not is_aa(residue):
            continue
        # minimun average depth for all atoms
        resid   = residue.get_id()
        resname = residue.get_resname()
        chainid = residue.get_parent().get_id()
        try:
            rd = residue_depth(residue, surface)
        except:
            continue
        ca_rd = ca_depth(residue, surface)
        info    = [pdb, chainid, resid[1], resname, str(rd), str(ca_rd)]
        for each in info:
            if not each:
                continue
        #print info
        newline = "\t".join(map(str, info)) + "\n"
        content = content + newline

    mutex_writefile.acquire()
    outobj = open(OUT, "a")
    outobj.write(content)
    outobj.close()
    mutex_writefile.release()
예제 #3
0
def get_residue_depths(pdb_file):
    """
    Get residue depths

    Parameters
    ----------
    pdb_file

    Returns
    -------
    dict of depth _ ca/cb/mean
    """
    structure, residues, _, _, _ = read_pdb(pdb_file)
    surface = get_surface(structure)
    data = {
        "depth_mean":
        np.array([residue_depth(residue, surface) for residue in residues]),
        "depth_cb":
        np.array([
            min_dist(get_beta_coordinates(residue), surface)
            for residue in residues
        ]),
        "depth_ca":
        np.array([ca_depth(residue, surface) for residue in residues]),
    }
    return data
예제 #4
0
def get_structure_stat(structure, surface):
    print "* " + structure.id
    print "  number of models:" + str(len(structure.get_list()))
    modelnum = 1
    for model in structure.get_list():
        print "  Model " + str(modelnum)
        print "    number of chains:" + str(len(model.get_list()))
        #print list(model)
        for chain in model.get_list():
            print "    chain " + chain.id
            print "      number of residues                   : " + str(
                len(chain.child_list))
            count_W = 0
            count_H = 0
            text_H = ''
            for res in chain:
                id = res.id
                if id[0] == 'W':
                    count_W = count_W + 1
                elif id[0] != ' ':
                    #print id
                    #residue_depth(res, surface)
                    resname = res.resname
                    type = '?'
                    #print resname
                    if (Is_carbohydrate_residue(resname)):
                        type = ' carbohydrate '
                    if (Is_cofactor_residue(resname)):
                        type = ' cofactor '
                    if (Is_protein_residue(resname)):
                        type = ' protein '
                    if (Is_ion_residue(resname)):
                        type = ' ion '
                    if Is_not_lig_residue(res):
                        type = ' notLig '
                    if Has_backbone(res):
                        type = type + ' has backbone: amino_acid??'
                    if Is_lig_residue(res):
                        type = type + ' maybe Lig? '

                    rd = residue_depth(res, surface)
                    o = calc_occupancy(res)
                    b = calc_bfactor(res)
                    text_H = text_H + "        " + id[0] + " " + str(
                        id[1]) + " (barial = " + str(rd) + ";"
                    text_H = text_H + " occupancy = " + str(o) + ";"
                    text_H = text_H + " bfactor = " + str(
                        b) + ";" + resname + "==" + type
                    text_H = text_H + ")" + '\n'
                    count_H = count_H + 1
            print "      number that are water residues       : " + str(
                count_W)
            print "      number that are other hetro-residues : " + str(
                count_H)
            print text_H
            #print "    number of atoms: " +
        modelnum = modelnum + 1
예제 #5
0
def get_depth_contact_res(model, contact_res):

    surface = get_surface(model)
    data = {}
    for r in contact_res:
        chain = model[r[0]]
        res = chain[r[1]]
        data[r] = residue_depth(res, surface)
    return data
예제 #6
0
def get_depth_contact_res(model, contact_res):
    """Get the residue Depth

    Args:
        model (bio model): model of the strucrture
        contact_res (list): list of contact residues

    Returns:
        dict: depth res
    """

    surface = get_surface(model)
    data = {}
    for r in contact_res:
        chain = model[r[0]]
        res = chain[r[1]]
        data[r] = residue_depth(res, surface)
    return data
예제 #7
0
def get_structure_eval(structure, surface):
    size_score = eval_size(structure)
    rd = 0
    o = 0
    b = 0
    for model in structure.get_list():
        for chain in model.get_list():
            rd = 0
            o = 0
            b = 0
            for res in chain:
                rd = rd + residue_depth(res, surface)
                o = o + calc_occupancy(res)
                b = b + calc_bfactor(res)

    # we should also consider penlizing the ligand if not all atoms are reprenented in pdb
    #eval = rd + o - 0.5 * b + size_score
    eval = rd + size_score
    return rd, o, b, eval
예제 #8
0
def readPDBFile(structName, fileName):

    charged = {"ARG", "HIS", "LYS", "ASP", "GLU"}
    polar = {"SER", "THR", "TYR", "ASN", "GLN"}
    nonPolar = {
        "ALA", "VAL", "ILE", "LEU", "MET", "PHE", "PRO", "TRP", "GLY", "CYS"
    }

    #code from https://warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/calculate/#BioPython
    structure = Bio.PDB.PDBParser().get_structure(structName, fileName)
    for model in structure:

        surface = get_surface(
            model)  #numpy array of all surface vertices of folded protein

        for chain in model:

            polypeptides = Bio.PDB.PPBuilder().build_peptides(chain)
            #dssp = DSSP(chain, fileName, dssp = "mkdssp")

            atoms = Bio.PDB.Selection.unfold_entities(chain, "A")
            ns = Bio.PDB.NeighborSearch(atoms)

            for poly_index, poly in enumerate(polypeptides):

                startIndex = poly[0].id[1]
                endIndex = poly[-1].id[1]

                phiPsi = poly.get_phi_psi_list()

                for residue in range(len(poly)):
                    #takes first data point for overlapping chains
                    if poly[residue].id[1] not in result:

                        res = poly[residue].resname

                        #encoded as int from 0-1
                        resCode = Bio.PDB.Polypeptide.three_to_index(
                            poly[residue].resname) / 20

                        phi, psi = phiPsi[residue]
                        #phi/psi cannot be calculated at edges
                        if phi != None:
                            phi /= 360
                        if psi != None:
                            psi /= 360

                        #average depth of all atoms in residue from surface
                        depth = residue_depth(poly[residue], surface)

                        #return number of total atoms and grouped residues within searchRadius of resiude's alpha carbon
                        searchRadius = 5
                        closeAtoms = ns.search(poly[residue]["CA"].coord,
                                               searchRadius)
                        numCloseAtoms = len(closeAtoms)
                        residues, curCharged, curPolar, curNonPolar = set(
                        ), set(), set(), set()
                        for atom in closeAtoms:
                            currentRes = atom.get_parent()
                            curName = currentRes.resname
                            residues.add(currentRes)
                            if curName in charged:
                                curCharged.add(currentRes)
                            elif curName in polar:
                                curPolar.add(currentRes)
                            else:
                                curNonPolar.add(currentRes)

                        if (chain, poly[residue].id) in dssp:
                            secondary = (Bio.PDB.DSSP.ss_to_index(
                                dssp[(chain, poly[residue].id)][2])) / 7
                            energyList = list(dssp[(chain,
                                                    poly[residue].id)][6:])

                        featuresList = [
                            resCode, phi, psi, depth,
                            len(residues),
                            len(curCharged),
                            len(curPolar),
                            len(curNonPolar)
                        ] + [secondary] + energyList

                        if None not in featuresList:  #removes residues for which phi/psi cannot be calculated
                            result[poly[residue].id[1]] = featuresList
    return result
예제 #9
0
def get_residue_depth(pdb_fh, msms_fh):
    """
    Extracts Residue depth from PDB structure 

    :param pdb_fh: path to PDB structure file
    :param msms_fh: path to MSMS libraries
    :returns data_depth: pandas table with residue depth per residue
    """
    from Bio.PDB import Selection, PDBParser
    from Bio.PDB.Polypeptide import is_aa
    from Bio.PDB.ResidueDepth import get_surface, _read_vertex_array, residue_depth, ca_depth, min_dist
    surface_fh = "%s/%s.msms.vert" % (dirname(msms_fh), basename(pdb_fh))
    if not exists(surface_fh):
        pdb_to_xyzr_fh = "%s/pdb_to_xyzr" % dirname(msms_fh)
        xyzr_fh = "%s/%s.xyzr" % (dirname(msms_fh), basename(pdb_fh))
        pdb_to_xyzr_com = "%s %s > %s" % (pdb_to_xyzr_fh, pdb_fh, xyzr_fh)
        msms_com = "%s -probe_radius 1.5 -if %s -of %s > %s.log" % (
            msms_fh, xyzr_fh, splitext(surface_fh)[0], splitext(surface_fh)[0])
        log_fh = "%s.log" % msms_fh
        log_f = open(log_fh, 'a')
        log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com, msms_com))
        subprocess.call("%s;%s" % (pdb_to_xyzr_com, msms_com),
                        shell=True,
                        stdout=log_f,
                        stderr=subprocess.STDOUT)
        log_f.close()

    surface = _read_vertex_array(surface_fh)

    pdb_parser = PDBParser()
    pdb_data = pdb_parser.get_structure("pdb_name", pdb_fh)
    model = pdb_data[0]
    residue_list = Selection.unfold_entities(model, 'R')

    depth_dict = {}
    depth_list = []
    depth_keys = []
    for residue in residue_list:
        if not is_aa(residue):
            continue
        rd = residue_depth(residue, surface)
        ca_rd = ca_depth(residue, surface)
        # Get the key
        res_id = residue.get_id()
        chain_id = residue.get_parent().get_id()
        if chain_id == "A":
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        else:
            break
    depth_df = pd.DataFrame(depth_dict).T.reset_index()
    depth_df = depth_df.drop("level_0", axis=1)
    aasi_prev = 0
    for i in range(len(depth_df)):
        if depth_df.loc[i, "level_1"][1] != aasi_prev:
            depth_df.loc[i, "aasi"] = depth_df.loc[i, "level_1"][1]
            aasi_prev = depth_df.loc[i, "level_1"][1]

    depth_df = depth_df.drop("level_1", axis=1)
    depth_df = depth_df.loc[~pd.isnull(depth_df.loc[:, "aasi"]), :]
    depth_df = depth_df.set_index("aasi", drop=True)
    depth_df.columns = ["Residue depth", "Residue (C-alpha) depth"]
    return depth_df
예제 #10
0
def get_residue_depth(pdb_fh,msms_fh):
    """
    Extracts Residue depth from PDB structure 

    :param pdb_fh: path to PDB structure file
    :param msms_fh: path to MSMS libraries
    :returns data_depth: pandas table with residue depth per residue
    """
    from Bio.PDB import Selection,PDBParser
    from Bio.PDB.Polypeptide import is_aa
    from Bio.PDB.ResidueDepth import get_surface,_read_vertex_array,residue_depth,ca_depth,min_dist
    surface_fh="%s/%s.msms.vert" % (dirname(msms_fh),basename(pdb_fh))
    if not exists(surface_fh):
        pdb_to_xyzr_fh="%s/pdb_to_xyzr" % dirname(msms_fh)
        xyzr_fh="%s/%s.xyzr" % (dirname(msms_fh),basename(pdb_fh))
        pdb_to_xyzr_com="%s %s > %s" % (pdb_to_xyzr_fh,pdb_fh,xyzr_fh)
        msms_com="%s -probe_radius 1.5 -if %s -of %s > %s.log" % (msms_fh,xyzr_fh,splitext(surface_fh)[0],splitext(surface_fh)[0])
        log_fh="%s.log" % msms_fh
        log_f = open(log_fh,'a')
        log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com,msms_com))
        subprocess.call("%s;%s" % (pdb_to_xyzr_com,msms_com) , shell=True,stdout=log_f, stderr=subprocess.STDOUT)
        log_f.close()

    surface =_read_vertex_array(surface_fh)
    
    pdb_parser=PDBParser()
    pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh)
    model = pdb_data[0]
    residue_list = Selection.unfold_entities(model, 'R') 
    
    depth_dict = {}
    depth_list = []
    depth_keys = []
    for residue in residue_list:
        if not is_aa(residue):
            continue
        rd = residue_depth(residue, surface)
        ca_rd = ca_depth(residue, surface)
        # Get the key
        res_id = residue.get_id()
        chain_id = residue.get_parent().get_id()
        if chain_id=="A":
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        else:
            break
    depth_df=pd.DataFrame(depth_dict).T.reset_index()
    depth_df=depth_df.drop("level_0",axis=1)
    aasi_prev=0
    for i in range(len(depth_df)):
        if depth_df.loc[i,"level_1"][1]!=aasi_prev:
            depth_df.loc[i,"aasi"]=depth_df.loc[i,"level_1"][1]
            aasi_prev=depth_df.loc[i,"level_1"][1]

    depth_df=depth_df.drop("level_1",axis=1)
    depth_df=depth_df.loc[~pd.isnull(depth_df.loc[:,"aasi"]),:]
    depth_df=depth_df.set_index("aasi",drop=True)
    depth_df.columns=["Residue depth","Residue (C-alpha) depth"]
    return depth_df