Esempio n. 1
0
    def GetUniqueChains(self, pdir, pdbID, chains_to_check):
        """ Returns a List Unique Chains based on the C-alpha atom information.
			Structure based, not sequence based """
        e = 'pdb' + self.pdbID + '.ent'
        BioParser = PDBParser(PERMISSIVE=True, QUIET=True)
        BioStructure = BioParser.get_structure(
            self.pdbID, pdir + 'pdb' + self.pdbID + '.ent')
        BioModel = BioStructure[0]
        Chain_AtomSeq = []
        listMatches = []
        for item in chains_to_check:
            pdbid_chain = e[3:7] + '_' + item
            BioChain = BioModel[item]
            residues = []
            for residue in BioChain:
                for atom in residue:
                    if atom.name == 'CA':
                        aa1 = amino_dict.replace_all(residue.resname,
                                                     amino_dict.one_letter)
                        residues.append(aa1)
            req_res = [x for x in residues if x in amino_dict.amino]
            atom = "".join(req_res)
            Chain_AtomSeq.append((pdbid_chain, atom))

        Chain_Dict = {}
        for k, v in Chain_AtomSeq:
            Chain_Dict.setdefault(k, v)
        # print (Chain_Dict)

        allChains = [i for i in Chain_Dict.values()]
        set_allChains = list(set(allChains))
        # print (set_allChains)
        groups = {}
        for k, v in Chain_Dict.items():
            groups.setdefault(v, []).append(k)
        matches = {k: v for k, v in groups.items()}
        list_of_matches = [i for i in matches.values()]
        # print (list_of_matches)
        listMatches.append(list_of_matches)
        req_matches = [i[0] for i in matches.values()]
        return sorted(req_matches), sorted(list_of_matches)
def AEI(AAAB14):
    """ Returns the Reduced Amino Acid Representation for the
		amino acid vertices participating in the Delaunay
		Tessellation. 
		Specific for Ion Environments project."""
    Data = []
    Head = AAAB14[0]
    Tail = AAAB14[1:]
    pos = GetInd(Head, 'QuadB')
    Head.insert(pos + 1, 'AEI_Group')
    Head.insert(pos + 2, 'Sort_AEI')
    Data.append(Head)
    for item in Tail:
        point = item[pos]
        point = point[0:3]
        Vais = amino_dict.replace_all(point, amino_dict.Red_Wang)
        Vais = Vais.upper()
        Vais_sort = ''.join(sorted(Vais))
        item.insert(pos + 1, Vais)
        item.insert(pos + 2, Vais_sort)
        Data.append(item)
    return Data
def ProteinDelaunay(pdbid, chain):
    """ Generate the Delaunay Tessellation of all the points in the given point cloud.
    The point cloud is basically the Calpha coordinates of a three dimensional protein.
    The point, vertices, simplices and neighbors in the entire point cloud are obtained
    as arrays.
    """
    Data = []
    Head = ['PDBID', 'Quad', 'SortedQuad', 'RedAlpha', 'SortRedAlpha', 'V1', 'V2', 'V3', 'V4', 'L1', 'L2', 'L3', 'L4',
            'L5', 'L6', 'SumL', 'AvgL', 'DevL', 'DevTetra', 'Vol', 'TF1', 'TF2', 'TF3', 'TF4', 'SumTF', 'AvgTF', 'hullArea', 'hullVolume']
    Data.append(Head)
    
    pointcloud, bf, resname = PointCloudData(pdbid, chainid)
    print "Given PDB ID: ", pdbid
    print "Given Chain ID:", chain
    print "Number of C-alpha points: ", len(pointcloud)

    # Convex Hull.
    ConvxHull = ConvexHull(pointcloud)
    hullArea = round(ConvxHull.area, 4)
    hullVolume = round(ConvxHull.volume, 4)

    # Delaunay Tessellation
    delaunay_hull = Delaunay(pointcloud, furthest_site=False, incremental=False, qhull_options='Qc') # noqa E501
    delaunay_points = delaunay_hull.points
    delaunay_vertices = delaunay_hull.vertices
    delaunay_simplices = delaunay_hull.simplices
    delaunay_neighbors = delaunay_hull.neighbors
    print "Number of Delaunay Simplices: ", len(delaunay_simplices)

    for i in delaunay_vertices:

        # Obtain the indices of the vertices.
        one, two, three, four = i[2], i[1], i[3], i[0]

        # Obtain the coordinates based on the indices.
        cordA = pointcloud[one]
        cordB = pointcloud[two]
        cordC = pointcloud[three]
        cordD = pointcloud[four]

        # Get three letter amino acid names based on indices.
        a = resname[one]
        b = resname[two]
        c = resname[three]
        d = resname[four]

        # Get the temprature factors for the amino acids.
        a_tf = bf[one]
        b_tf = bf[two]
        c_tf = bf[three]
        d_tf = bf[four]

        # Get the string of three letter amino acids
        # forming the vertices of the tetrahedra.
        amino = [a, b, c, d]
        sortAmino = sorted(amino)
        amino = '-'.join(amino)
        sortAmino = '-'.join(sortAmino)

        # Get one letter code of the amino acids
        oneA = amino_dict.replace_all(a, amino_dict.one_letter)
        oneB = amino_dict.replace_all(b, amino_dict.one_letter)
        oneC = amino_dict.replace_all(c, amino_dict.one_letter)
        oneD = amino_dict.replace_all(d, amino_dict.one_letter)
        oneLet = [oneA, oneB, oneC, oneD]
        sortOneLet = sorted(oneLet)
        oneLet = ''.join(oneLet)
        sortOneLet = ''.join(sortOneLet)

        # Get Reduced Amino Acid Representations.
        flpA = amino_dict.replace_all(oneA, amino_dict.FLP)
        flpB = amino_dict.replace_all(oneB, amino_dict.FLP)
        flpC = amino_dict.replace_all(oneC, amino_dict.FLP)
        flpD = amino_dict.replace_all(oneD, amino_dict.FLP)
        flp = [flpA, flpB, flpC, flpD]
        sortflp = sorted(flp)
        flp = (''.join(flp)).upper()
        sortflp = (''.join(sortflp)).upper()

        # Calculate distances between the tetrahedra vertices.
        AB = np.linalg.norm(cordA - cordB)
        AC = np.linalg.norm(cordA - cordC)
        AD = np.linalg.norm(cordA - cordD)
        BC = np.linalg.norm(cordB - cordC)
        BD = np.linalg.norm(cordB - cordD)
        CD = np.linalg.norm(cordC - cordD)

        # Calculate the tetrahedra Volume.
        A_prime = cordA - cordD
        B_prime = cordB - cordD
        C_prime = cordC - cordD
        primes = [A_prime, B_prime, C_prime]
        primes = np.asarray(primes)
        det = np.linalg.det(primes)
        Vol = round((abs(det) / 6), 4)

        # Sum of Edge Lengths.
        SumL = (AB + AC + AD + BC + BD + CD)
        SumL = round(SumL, 4)

        # Average Edge Lengths.
        AvgL = round((SumL / 6), 4)

        # Deviation in Edge Lengths.
        devLp = (AB - AvgL) ** 2
        devLq = (AC - AvgL) ** 2
        devLr = (AD - AvgL) ** 2
        devLs = (BC - AvgL) ** 2
        devLt = (BD - AvgL) ** 2
        devLu = (CD - AvgL) ** 2
        devLy = [devLp, devLq, devLr, devLs, devLt, devLu]
        sumDevL = sum(devLy)
        DevL = round(math.sqrt(sumDevL / 6.0), 4)

        # Deviation in Tetrahedrality
        lenArr = [AB, AC, AD, BC, BD, CD]
        DevT = DevTetra(lenArr)

        # Sum and Average Temperature Factors.
        SumTF = round((a_tf + b_tf + c_tf + d_tf), 4)
        AvgTF = round(SumTF / 4, 4)

        # Data List
        line =  [pdbid, oneLet, sortOneLet, flp, sortflp, one, two, three, four, AB, AC, AD, BC, BD, CD, SumL, AvgL, DevL, DevT, Vol, a_tf, b_tf, c_tf, d_tf, SumTF, AvgTF, hullArea, hullVolume]
        Data.append(line)

    ## Get coordinates based on the vertices.
    ## vertices_coords store the x, y, z coordinates for the delaunay_vertices.
    vertices_coords = pointcloud[delaunay_vertices]
    ## delaunay_indices store the indices for the delaunay_points.
    delaunay_indices = np.arange(len(delaunay_points))

    ## Get ready for mayavi plot.
    fig = mlab.figure(1, bgcolor=(0, 0, 0))
    fig.scene.disable_render = True
    ## Get a 3d scatter plot for the delaunay_points.
    mlab.points3d(delaunay_points[:,0], delaunay_points[:,1], delaunay_points[:,2], scale_factor=0.40, color=(0.99, 0.00, 0.00))
    ion_c_alpha_scatter = mlab.pipeline.scalar_scatter(delaunay_points[:,0], delaunay_points[:,1], delaunay_points[:,2], delaunay_indices)
    ion_c_alpha_delaunay = mlab.pipeline.delaunay3d(ion_c_alpha_scatter)
    ion_c_alpha_edges = mlab.pipeline.extract_edges(ion_c_alpha_delaunay)
    mlab.pipeline.surface(ion_c_alpha_edges, colormap='winter', opacity=0.4)
    mlab.savefig(pdbid + '_MayaviViz.x3d')
    mlab.show()
    return Data
def ProteinDelaunay(pdbid, chain):
    """ Generate the Delaunay Tessellation of all the points in the given point cloud.
    The point cloud is basically the Calpha coordinates of a three dimensional protein.
    The point, vertices, simplices and neighbors in the entire point cloud are obtained
    as arrays.
    """
    Data = []
    Head = [
        'PDBID', 'Quad', 'SortedQuad', 'RedAlpha', 'SortRedAlpha', 'V1', 'V2',
        'V3', 'V4', 'L1', 'L2', 'L3', 'L4', 'L5', 'L6', 'SumL', 'AvgL', 'DevL',
        'DevTetra', 'Vol', 'TF1', 'TF2', 'TF3', 'TF4', 'SumTF', 'AvgTF'
    ]
    Data.append(Head)

    pointcloud, bf, resname = PointCloudData(pdbid, chainid)

    # Convex Hull.
    ConvxHull = ConvexHull(pointcloud)
    hullArea = round(ConvxHull.area, 4)
    hullVolume = round(ConvxHull.volume, 4)

    # Delaunay Tessellation
    delaunay_hull = Delaunay(pointcloud,
                             furthest_site=False,
                             incremental=False,
                             qhull_options='Qc')  # noqa E501
    delaunay_points = delaunay_hull.points
    delaunay_vertices = delaunay_hull.vertices
    delaunay_simplices = delaunay_hull.simplices
    delaunay_neighbors = delaunay_hull.neighbors

    for i in delaunay_vertices:

        # Obtain the indices of the vertices.
        one, two, three, four = i[2], i[1], i[3], i[0]

        # Obtain the coordinates based on the indices.
        cordA = pointcloud[one]
        cordB = pointcloud[two]
        cordC = pointcloud[three]
        cordD = pointcloud[four]

        # Get three letter amino acid names based on indices.
        a = resname[one]
        b = resname[two]
        c = resname[three]
        d = resname[four]

        # Get the temprature factors for the amino acids.
        a_tf = bf[one]
        b_tf = bf[two]
        c_tf = bf[three]
        d_tf = bf[four]

        # Get the string of three letter amino acids
        # forming the vertices of the tetrahedra.
        amino = [a, b, c, d]
        sortAmino = sorted(amino)
        amino = '-'.join(amino)
        sortAmino = '-'.join(sortAmino)

        # Get one letter code of the amino acids
        oneA = amino_dict.replace_all(a, amino_dict.one_letter)
        oneB = amino_dict.replace_all(b, amino_dict.one_letter)
        oneC = amino_dict.replace_all(c, amino_dict.one_letter)
        oneD = amino_dict.replace_all(d, amino_dict.one_letter)
        oneLet = [oneA, oneB, oneC, oneD]
        sortOneLet = sorted(oneLet)
        oneLet = ''.join(oneLet)
        sortOneLet = ''.join(sortOneLet)

        # Get Reduced Amino Acid Representations.
        flpA = amino_dict.replace_all(oneA, amino_dict.FLP)
        flpB = amino_dict.replace_all(oneB, amino_dict.FLP)
        flpC = amino_dict.replace_all(oneC, amino_dict.FLP)
        flpD = amino_dict.replace_all(oneD, amino_dict.FLP)
        flp = [flpA, flpB, flpC, flpD]
        sortflp = sorted(flp)
        flp = (''.join(flp)).upper()
        sortflp = (''.join(sortflp)).upper()

        # Calculate distances between the tetrahedra vertices.
        AB = np.linalg.norm(cordA - cordB)
        AC = np.linalg.norm(cordA - cordC)
        AD = np.linalg.norm(cordA - cordD)
        BC = np.linalg.norm(cordB - cordC)
        BD = np.linalg.norm(cordB - cordD)
        CD = np.linalg.norm(cordC - cordD)

        # Calculate the tetrahedra Volume.
        A_prime = cordA - cordD
        B_prime = cordB - cordD
        C_prime = cordC - cordD
        primes = [A_prime, B_prime, C_prime]
        primes = np.asarray(primes)
        det = np.linalg.det(primes)
        Vol = round((abs(det) / 6), 4)

        # Sum of Edge Lengths.
        SumL = (AB + AC + AD + BC + BD + CD)
        SumL = round(SumL, 4)

        # Average Edge Lengths.
        AvgL = round((SumL / 6), 4)

        # Deviation in Edge Lengths.
        devLp = (AB - AvgL)**2
        devLq = (AC - AvgL)**2
        devLr = (AD - AvgL)**2
        devLs = (BC - AvgL)**2
        devLt = (BD - AvgL)**2
        devLu = (CD - AvgL)**2
        devLy = [devLp, devLq, devLr, devLs, devLt, devLu]
        sumDevL = sum(devLy)
        DevL = round(math.sqrt(sumDevL / 6.0), 4)

        # Deviation in Tetrahedrality
        lenArr = [AB, AC, AD, BC, BD, CD]
        DevT = DevTetra(lenArr)

        # Sum and Average Temperature Factors.
        SumTF = round((a_tf + b_tf + c_tf + d_tf), 4)
        AvgTF = round(SumTF / 4, 4)

        # Data List
        line = [
            pdbid, oneLet, sortOneLet, flp, sortflp, one, two, three, four, AB,
            AC, AD, BC, BD, CD, SumL, AvgL, DevL, DevT, Vol, a_tf, b_tf, c_tf,
            d_tf, SumTF, AvgTF
        ]
        Data.append(line)
    return Data