예제 #1
0
def calc_dihedral(chain, child_res_id):
    """
    calculates the dihedral angles (phi, psi) for residue of index
    child_res_id in chain
    
    returns a tuple of form (phi, psi), if it exists
    
    """
    from math import pi
    from Bio import PDB
    
    try:
        CP = chain.child_list[(child_res_id-1)]['C'].get_vector()
        N = chain.child_list[child_res_id]['N'].get_vector()
        CA = chain.child_list[child_res_id]['CA'].get_vector()
        C = chain.child_list[child_res_id]['C'].get_vector()
        NA = chain.child_list[(child_res_id+1)]['N'].get_vector()
    except KeyError:
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    else:
        try:
            phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi
            psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi
            return (phi, psi)
        except ZeroDivisionError:
            return ()
예제 #2
0
def calc_all_dihedrals(chain, child_res_id):
    """
    calculates the dihedral angles (phi, psi, chia, chib) for residue of
    index child_res_id in chain
    
    returns a tuple of form (phi, psi, chia, chib), if it exists
    
    where ambiguity in chi angle definition exists:
    chia is in reference to the longer side chain or the heavier atom
    chib to the shorter
    
    if no ambiguity, chia=chib
    
    if residue is a GLY or ALA return only (phi, psi)
    """

    from math import pi
    from Bio import PDB
    try:
        residue = chain.child_list[child_res_id]
        name = residue.get_resname()
        if name == 'GLY' or name == 'ALA':
            dih = calc_dihedral(chain, child_res_id)
            phi = dih[0]
            psi = dih[1]
            return (phi, psi, 0, 0)          
        
    except KeyError:
        print "key error line 103"
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    except IndexError:
        print 'IndexError line 106, probable cause: irregular PDB file'
        return ()     
    try:
        CP = chain.child_list[(child_res_id-1)]['C'].get_vector()
        N = chain.child_list[child_res_id]['N'].get_vector()
        CA = chain.child_list[child_res_id]['CA'].get_vector()
        C = chain.child_list[child_res_id]['C'].get_vector()
        NA = chain.child_list[(child_res_id+1)]['N'].get_vector()
        CB = chain.child_list[child_res_id]['CB'].get_vector()
        fourth_chi_atom = chain.child_list[child_res_id].child_list[5].get_vector()
        if name == 'VAL' or name == 'ILE' or name == 'THR':
            alt_fourth_chi_atom = chain.child_list[child_res_id].child_list[6].get_vector()
        else:
            alt_fourth_chi_atom = fourth_chi_atom
    except KeyError:
        print 'KeyError line 119'
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    except IndexError:
        print 'IndexError line 122, probable cause: irregular PDB file'
        return ()
    else:
        try:
            phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi
            psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi
            chia= PDB.calc_dihedral(C, CA, CB, fourth_chi_atom)*-180/pi
            chib= PDB.calc_dihedral(C, CA, CB, alt_fourth_chi_atom)*-180/pi
            return (phi, psi, chia, chib)
        except ZeroDivisionError:
            return ()
                def calc_phi_psi(structure):
                    '''Function makes 3 lists of proteins C-alpha, C and N atom vectors. These lists are then used to calculate
                     dihedral angles of proteins.'''

                    atom_vector_list_Ca = []
                    atom_vector_list_N = []
                    atom_vector_list_C = []

                    # For-loop for acquiring atom vectors, but only for those residues which have a C-alpha atom.
                    for chain in structure.get_chains():
                        for res in chain:
                            if res.has_id('CA'):
                                for atom in res:
                                    if atom.get_name() == 'N':
                                        atom_vector_list_N.append(atom.get_vector())
                                    elif atom.get_name() == 'CA':
                                        atom_vector_list_Ca.append(atom.get_vector())
                                    elif atom.get_name() == 'C':
                                        atom_vector_list_C.append(atom.get_vector())
                                    else:
                                        pass

                    len_vec = 0

                    ### The if statement compares vector list length between C-alpha vector list and two others, if one of them is
                    ### shorter than C-alpha, possibly due to an error in the PDB structure, then the length vector which is
                    ### required for calculating dihedral angles is set to be C-alpha which is the same length as other vector lists
                    if len(atom_vector_list_Ca) > len(atom_vector_list_C) or len(atom_vector_list_Ca) > len(
                            atom_vector_list_N):
                        c_ca = len(atom_vector_list_Ca) - len(atom_vector_list_C)
                        n_ca = len(atom_vector_list_Ca) - len(atom_vector_list_N)
                        if c_ca == n_ca:
                            len_vec = len(atom_vector_list_Ca) - c_ca
                    else:
                        len_vec = len(atom_vector_list_Ca)

                    dihedral_phi = []
                    dihedral_psi = []

                    # So we don't include first amino acid which has no phi angle and last amino acid which has no psi angle!
                    cut_off = range(1, len_vec - 1)

                    # Calculation of phi angles!
                    for i in cut_off:
                        dihedral_phi.append(PDB.calc_dihedral(atom_vector_list_C[i - 1],
                                                              atom_vector_list_N[i],
                                                              atom_vector_list_Ca[i],
                                                              atom_vector_list_C[i]))

                    # Calculation of psi angles!
                    for i in cut_off:
                        dihedral_psi.append(PDB.calc_dihedral(atom_vector_list_N[i],
                                                              atom_vector_list_Ca[i],
                                                              atom_vector_list_C[i],
                                                              atom_vector_list_N[i + 1]))
                    return (dihedral_phi, dihedral_psi)
예제 #4
0
파일: pdb.py 프로젝트: alisterburt/ABTT
def read(pdb_file):
    """
    reads a pdb file into a structure object
    :param pdb_file: pdb format file
    :return: structure
    """
    logging.info(f'reading pdb file: {pdb_file}')
    if not pdb_file.lower().endswith('.cif'):
        structure = PDB.PDBParser().get_structure(pdb_file, pdb_file)
    else:
        logging.info(f'switched to cif modus for file: {pdb_file}')
        structure = PDB.MMCIFParser().get_structure(pdb_file, pdb_file)

    return structure
예제 #5
0
def calc_ramachandran(file_name_list):
    """
    Main calculation and plotting definition
    :param file_name_list: List of PDB files to plot
    :return: Nothing
    """
    global RAMA_PREF_VALUES

    if RAMA_PREF_VALUES is None:
        RAMA_PREF_VALUES = _cache_RAMA_PREF_VALUES()

    # Read in the expected torsion angles
    normals = {}
    outliers = {}
    for key, val in RAMA_PREFERENCES.items():
        normals[key] = {"x": [], "y": []}
        outliers[key] = {"x": [], "y": []}

    # Calculate the torsion angle of the inputs
    for inp in file_name_list:
        if not os.path.isfile(inp):
            continue
        structure = PDB.PDBParser().get_structure('input_structure', inp)
        for model in structure:
            for chain in model:
                polypeptides = PDB.PPBuilder().build_peptides(chain)
                for poly_index, poly in enumerate(polypeptides):
                    phi_psi = poly.get_phi_psi_list()
                    for res_index, residue in enumerate(poly):
                        res_name = "{}".format(residue.resname)
                        res_num = residue.id[1]
                        phi, psi = phi_psi[res_index]
                        if phi and psi:
                            if str(poly[res_index + 1].resname) == "PRO":
                                aa_type = "PRE-PRO"
                            elif res_name == "PRO":
                                aa_type = "PRO"
                            elif res_name == "GLY":
                                aa_type = "GLY"
                            else:
                                aa_type = "General"
                            if RAMA_PREF_VALUES[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \
                                    RAMA_PREFERENCES[aa_type]["bounds"][1]:
                                outliers[aa_type]["x"].append(math.degrees(phi))
                                outliers[aa_type]["y"].append(math.degrees(psi))
                            else:
                                normals[aa_type]["x"].append(math.degrees(phi))
                                normals[aa_type]["y"].append(math.degrees(psi))
    return normals, outliers
예제 #6
0
def get_dihedral( residue_list ):

	'''
	returns phi and psi angles of a residue and the amino acid sidechain present

	residue_list - []Bio.PDB.Residue - list of 3 *hopefully* continuous residues

	'''

	for one, two in zip( residue_list[:-1], residue_list[1:] ):

		if ( two.get_id()[1] - one.get_id()[1] ) != 1:

			raise BackboneError( "Discontinuous residues", two.get_id()[1] )

	atoms = (
		{"C": False},
		{"N": False,
		"CA": False,
		"C": False},
		{"N": False}
	)

	for i, residue in enumerate( residue_list ):

		if i == 1:

			res_name = SeqUtils.seq1( residue.get_resname() )

			if not is_aa( res_name ):

				raise BackboneError( "Not a valid amino acid", residue.get_id()[1] )

		for atom in residue.get_unpacked_list():

			if atom.name in atoms[i].keys():
				
				atoms[i][ atom.name ] = atom.get_vector()

	if False in map( check_dict, atoms ):

		raise BackboneError( "Missing backbone atoms", residue.get_id()[1] )

	dihedrals = [
		PDB.calc_dihedral( atoms[0]["C"], atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"] ), #phi
		PDB.calc_dihedral( atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"], atoms[2]["N"] ) #psi
	]

	return ( dihedrals, res_name )
예제 #7
0
def Separate_Chains(pdb_file):
    """Separate the two chains and return their name in a list
    Input: 
    -pdb file = target file 
    Output:
    -interaction = list with chain information
    """
    folder = "pdb_chains"

    if not Check_folder(folder):

        return False

    pdb_parser = pdb.PDBParser(PERMISSIVE=True, QUIET=True)

    pdb_structure = pdb_parser.get_structure("pdb_file", pdb_file)

    interaction = list(
        pdb_file[:-4].split("_")[-1]
    )  # Obtain 2 length lists with the chain names from file name, the order of the letters need match with the order in the pdb file(format= something_chains.pdb)

    if len(interaction) != 2:  #if the length is not true, something goes wrong

        print(settings.IncorrectName(interaction))

    for model in pdb_structure:
        for chain in model:

            id = chain.get_id()

            class chain(pdb.Select):
                def accept_chain(self, chain):
                    if chain.get_id() == id:
                        return True
                    else:
                        return False

            io = pdb.PDBIO()

            io.set_structure(pdb_structure)

            name = "%s_chain_%s.pdb" % (interaction[0] + interaction[1],
                                        interaction[i])

            file_name = os.path.join(folder, name)

            io.save(file_name, chain())

    return interaction
예제 #8
0
def _read_structure(path, pdb_id='pdb', cif_id='cif'):
    file_name = os.path.basename(path).split('.')[0]
    file_sufix = os.path.basename(path).split('.')[1]
    dir_path = os.path.dirname(path)
    if file_sufix == 'pdb':
        parser = struct.PDBParser(QUIET=True)
        structure = parser.get_structure(pdb_id, path)
    elif file_sufix == 'cif':
        parser = struct.MMCIFParser()
        structure = parser.get_structure(cif_id, path)
    else:
        print("ERROR: Unreognized file type " + file_sufix + " in " +
              file_name)
        sys.exit(1)
    return structure, dir_path, file_name
예제 #9
0
def compute_chi3(structure_, model_, chain_, curr_residue_):
    chi3 = 999.00
    if curr_residue_.has_id('CB') and curr_residue_.has_id(
            'CG') and curr_residue_.has_id('CD'):
        curr_cb = structure_[model_.id][chain_.id][
            curr_residue_.id]['CB'].get_vector()
        curr_cg = structure_[model_.id][chain_.id][
            curr_residue_.id]['CG'].get_vector()
        curr_cd = structure_[model_.id][chain_.id][
            curr_residue_.id]['CD'].get_vector()

        if curr_residue_.has_id('NE') and curr_residue_.resname == 'ARG':
            curr_ne = structure_[model_.id][chain_.id][
                curr_residue_.id]['NE'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ne)), 2)

        if curr_residue_.has_id('OE1') and (curr_residue_.resname == 'GLN'
                                            or curr_residue_.resname == 'GLU'):
            curr_oe1 = structure_[model_.id][chain_.id][
                curr_residue_.id]['OE1'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_oe1)), 2)

        if curr_residue_.has_id('CE') and curr_residue_.resname == 'LYS':
            curr_ce = structure_[model_.id][chain_.id][
                curr_residue_.id]['CE'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ce)), 2)

    if curr_residue_.has_id('CB') and curr_residue_.has_id(
            'CG') and curr_residue_.has_id('SD') and curr_residue_.has_id(
                'CE') and curr_residue_.resname == 'MET':
        curr_cb = structure_[model_.id][chain_.id][
            curr_residue_.id]['CB'].get_vector()
        curr_cg = structure_[model_.id][chain_.id][
            curr_residue_.id]['CG'].get_vector()
        curr_sd = structure_[model_.id][chain_.id][
            curr_residue_.id]['SD'].get_vector()
        curr_ce = structure_[model_.id][chain_.id][
            curr_residue_.id]['CE'].get_vector()
        chi3 = round(
            math.degrees(PDB.calc_dihedral(curr_cb, curr_cg, curr_sd,
                                           curr_ce)), 2)
    return chi3
예제 #10
0
파일: tools.py 프로젝트: mcbeaker/pro-min
def calc_vecsum(metVal, ox):
    # print(valenceDictionary.keys())
    # The borderline and outlier thresholds are >0.10 and >0.23, respectively, for nVECSUM,
    # >10% and >25%, respectively, for the vacancy parameter, which is the percentage of all expected coordination sites left vacant (Supplementary Fig. 2 and Supplementary Table 2). For example, ions with all coordination sites occupied by ligands (vacancy = 0) are classi- fied as acceptable. For geometry with an expected coordination number greater than four, metals with one vacant coordina- tion site (vacancy ≤ 25%) are borderline, and metals with two or more vacant coordination sites (vacancy > 25%)
    vecsum = 0
    fij = PDB.Vector(x=0, y=0, z=0)
    bonds = [
        key for key in metVal[ox].keys() if key not in ['coordNum', 'valence']
    ]
    for bond in bonds:
        distance = metVal[ox][bond]['dist']
        metVec = metVal[ox][bond]['metVec']
        ligVec = metVal[ox][bond]['ligVec']
        # print('metVec',metVec)
        # print('ligVec',ligVec)
        vec = (ligVec - metVec)
        rij = vec.__truediv__(distance)
        ligOcc = metVal[ox][bond]['ligOcc']
        bondValence = metVal[ox][bond]['bond_val']
        # print('blha: ' + str(bondValence))
        sij = float(ligOcc) * bondValence
        # print('sij',sij)
        # raise TypeError('somethingHappend ' + str(ij))
        fij = fij.__add__(np.multiply(rij.get_array(), sij))
        # print('fij: ',fij)
    vecsum = math.sqrt(fij.__mul__(fij)) / metVal[ox]['valence']
    # print('vecsum: ',vecsum)
    return vecsum
 def PDBToNPY(self, fpathin):
     parser = PDB.PDBParser()
     io = PDB.PDBIO()
     struct = parser.get_structure('1ABZ', fpathin)
     allcoords1 = []
     for model in struct:
         for chain in model:
             for residue in chain:
                 for atom in residue:
                     x, y, z = atom.get_coord()
                     cSet = []
                     cSet.append(x)
                     cSet.append(y)
                     cSet.append(z)
                     allcoords1.append(cSet)
     return allcoords1
예제 #12
0
def AngleFinder(Atom1,Atom2,Atom3):
	vector1 = Atom1.get_vector()
	vector2 = Atom2.get_vector()
	vector3 = Atom3.get_vector()
	angle = bp.calc_angle(vector1,vector2,vector3)

	return math.degrees(angle)
예제 #13
0
def assign_sensitivity(structure, md_df, chain, pdb_path, go):
    """
    Changed:
    lookup the sensitivities directly in the df, no dict.
    :param structure:
    :param md_df:
    :param chain:
    :param pdb_path:
    :return:
    """
    seq_pdb = []
    residues = structure[0][chain]
    for res in residues:  # move along the protein chain
        if not pdb.is_aa(res):
            continue
        aa = three2single[res.get_resname()]
        seq_pdb.append(aa)
    # get the sequence:
    aas = ''.join(md_df['AA'].values[1:].tolist())

    # align

    seq_md = ''.join(md_df['AA'][1:])
    aligned_md, aligned_pdb, identity = water(seq_md, seq_pdb)

    gos = [c for c in md_df.columns if c.startswith('GO:')]

    for aa_md, aa_pdb, res, pos in zip(aligned_md, aligned_pdb, residues,
                                       range(len(aligned_md))):
        if aa_md == '-' or aa_pdb == '-':
            continue
        res.sensitivity = {go: md_df.loc[pos, go] for go in gos}
    return structure
예제 #14
0
    def test_is_protein(self):
        struct = bpdb.PDBParser().get_structure(
            "temp", 'test/forgi/threedee/data/1MFQ.pdb')
        chains = struct.get_chains()

        for c in chains:
            ftup.is_protein(c)
예제 #15
0
    def generate_hit_distance_matrix(self, type='CA'):
        hit_distance_matrix = np.zeros((self.hit_span, self.hit_span))

        r1_type = 'CA'
        r2_type = 'CA'
        if type == 'NO':
            r1_type = 'N'
            r2_type = 'O'

        parser = PDB.PDBParser()
        chains = parser.get_structure(id='temp', file=self.pdb_path)[0]
        chain = chains[
            self.chain_id] if self.chain_id in chains else chains['A']

        for residue1 in chain.get_residues():
            r1 = residue1.id[1]
            if self.hit_range[0] < r1 < self.hit_range[1]:
                for residue2 in chain.get_residues():
                    r2 = residue2.id[1]
                    if self.hit_range[0] < r2 < self.hit_range[
                            1] and r1_type in residue1 and r2_type in residue2:
                        distance = residue1[r1_type] - residue2[r2_type]
                        hit_distance_matrix[r1 - self.hit_range[0]][
                            r2 - self.hit_range[0]] = distance

        return hit_distance_matrix
def get_phi_psi(structure):
    """
    Calculate phi,psi dihedral angles and return lists.
    Uses the polypeptide class."""

    # Create a list of  polypeptide objects
    ppb = PDB.PPBuilder()
    pp_list = ppb.build_peptides(structure)

    # Get phi and psi angles
    phi_angles_list = []
    psi_angles_list = []

    # Iterate over polypeptide molecules
    for pp in pp_list:

        # Calculate phi and psi angles and unpack list and tuple
        Agg_phi = []
        Agg_psi = []

        for phi,psi in pp.get_phi_psi_list():

            # put them in the lists
            Agg_phi.append(phi)
            Agg_psi.append(psi)

        phi_angles_list.append(Agg_phi)
        psi_angles_list.append(Agg_psi)

    return phi_angles_list, psi_angles_list
예제 #17
0
def test_parse_chain(pdbfile):
	"""Test parsing residues from a PDB file vs the BioPython implementation."""

	# Parse using our code
	with open(pdbfile) as fobj:
		residues1 = list(parse_pdb_chain(fobj))

	# Parse using BioPython
	parser = PDB.PDBParser()
	structure = parser.get_structure('test', pdbfile)
	residues2 = list(structure.get_residues())

	assert len(residues1) == len(residues2)

	# Compare residues
	for res1, res2 in zip(residues1, residues2):

		# Residue attributes
		assert res1.name == res2.resname
		assert res1.seq == res2.id[1]

		# Compare atoms
		assert len(res1.atoms) == len(res2)

		# Both should be in the same order they were in in the file...
		for a1, a2 in zip(res1.atoms, res2):
			assert a1.name == a2.name
			assert np.allclose(a1.coord, a2.coord)
			assert a1.serial == a2.serial_number
예제 #18
0
파일: working.py 프로젝트: eckhaus/Thesis
class PASS(PredictionAlgorithm):
    pdbParser = PDB.PDBParser(PERMISSIVE=1)

    def __init__(self, pdbLoader, outputFolder):
        self.executionString = "./algo/pass %s"
        PredictionAlgorithm.__init__(self, pdbLoader, outputFolder)

    def run_one(self, structure):
        PredictionAlgorithm.run_one(self, structure)
        print structure.pdbID + "_asps.pdb"
        # cleanup
        # TODO: test whether the files is present...
        try:
            copyfile(structure.fileName,
                     self.outputFolder + "/" + structure.pdbID + ".pdb")
        except:
            pass
        try:
            tryMove(structure.pdbID + "_asps.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_asps.pdb")
            tryMove(structure.pdbID + "_lig1.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig1.pdb")
            tryMove(structure.pdbID + "_lig2.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig2.pdb")
            tryMove(structure.pdbID + "_lig3.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig3.pdb")
            tryMove(structure.pdbID + "_probes.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_probes.pdb")
        except:
            pass
 def __init__(self, xtal=False, num_range=False, verbose=False):
     if xtal:
         self.verbose = verbose
         try:
             xtal.pdb_code
             self.structure = xtal
         except:
             self.structure = Structure.objects.get(
                 pdb_code__index=xtal.upper())
         self.parent_prot_conf = ProteinConformation.objects.get(
             protein=self.structure.protein_conformation.protein.parent)
         io = StringIO(self.structure.pdb_data.pdb)
         self.pdb_struct = PDB.PDBParser(QUIET=True).get_structure(
             self.structure.pdb_code.index, io)[0]
         self.range = []
         if num_range:
             self.range = [[int(i) for i in num_range.split('-')]]
         else:
             for t in ProteinSegment.objects.filter(proteinfamily='GPCR',
                                                    category='helix'):
                 resis = Residue.objects.filter(
                     protein_conformation__protein=self.structure.
                     protein_conformation.protein.parent,
                     protein_segment=t)
                 if len(resis) == 0:
                     continue
                 self.range.append([
                     resis[0].sequence_number,
                     resis.reverse()[0].sequence_number
                 ])
def _extract_seq_from_pdb(pdb_filepath, AA3_to_AA1=generic.AA3_to_AA1):
    parser = PDB.PDBParser(QUIET=True)
    with open(pdb_filepath, 'r') as file:
        struct = parser.get_structure('placeholder', file)
    cid_seq_map = dict()
    for model in struct:
        for chain in model:
            seq = []
            for residue in chain:
                atom_type, res_id = residue.get_id()[:2]
                # res_id should start from 1
                if res_id < len(seq) + 1:
                    continue
                while res_id > len(seq) + 1:
                    seq.append("X")
                if atom_type == " ":
                    res_3 = residue.resname
                    try:
                        res_1 = AA3_to_AA1[res_3]
                    except IndexError:
                        continue
                    seq.append(res_1)
            cid_seq_map[chain.id] = "".join(seq)
        break
    return cid_seq_map

#
# if __name__ == "__main__":
#     path = list(os.listdir(paths.PDB_FOLDER))[0]
#     path = os.path.join(paths.PDB_FOLDER, path)
#     print(path)
#     print(_extract_seq_from_pdb(path))
예제 #21
0
def pdb2xyz(inputfile, outputPrefix, keepIntermediate=False):
    """pdb2xyz: Transform a pdb file to a goccs compatible xyz file with number of atoms, elements and coordinates into an ouputfile, prefixed with outputPrefix.xyz. If you set keepIntermediate to true then the pdb file written by PDBFixer will be kept in the output folder. """

    pdbfixedfilename = outputPrefix + "_fixed.pdb"
    xyzoutfilename = outputPrefix + ".xyz"
    fixer = pdbfixer.PDBFixer(inputfile)
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions,
                      open(pdbfixedfilename, 'w'))

    parser = PDB.PDBParser()
    #parser = PDB.MMCIFParser() #in case it's a cif file

    structure = parser.get_structure("input", pdbfixedfilename)

    #print(dir(structure))

    natoms = sum(1 for _ in structure.get_atoms())

    #print("Writing output")
    outputhandle = open(xyzoutfilename, "w")
    outputhandle.write("""%d
    empty line\n""" % (natoms))

    for atom in structure.get_atoms():
        element = atom.element
        coords = atom.get_coord()
        outputhandle.write("%s     %.3f     %.3f     %.3f\n" %
                           (element, coords[0], coords[1], coords[2]))
    outputhandle.close()
    if not keepIntermediate:
        os.remove(pdbfixedfilename)
예제 #22
0
def save_results(out_models, output, directory, verbose):
    """Saves the resulting models into PDB files. Creates a specific directory for the model if it does not exist.
    Additionally, each chain receives a new ID in order to distinguish those chains that were equivalent.

    Keyword arguments:
    out_models -- list of the resulting model objects created by the program
    output -- name of the output model/file given by the user
    verbose -- boolean, prints to stderr the progress of the program"""
    u = 1
    if verbose:
        sys.stderr.write("Saving models...\n")
    io = PDB.PDBIO()
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i in range(len(out_models)):
        id_list = []
        final_model = UpdModel(str(i))
        old_model = out_models[i]
        for chain in old_model.get_chains():
            new_chain = chain.copy()
            new_chain.id = new_id(id_list)
            id_list.append(new_chain.id)
            final_model.add(new_chain)
        io.set_structure(final_model)
        io.save(directory + "/" + output + "_" + str(u) + ".pdb")
        if verbose:
            sys.stderr.write("  " + output + "_" + str(u) + ".pdb saved\n")
        u += 1
예제 #23
0
def ligandfilter(pdb):
    """
    Remove water and other ligands from pdb.
    :param pdb: PDB.Structure.Structure
    :return: None
    """
    # Remove non amino acid residues
    # To upkeep the integrity due to detaching, iterate over child_list copy!
    for model in pdb.child_list[:]:
        for chain in model.child_list[:]:
            for res in chain.child_list[:]:
                if not PDB.is_aa(res):
                    chain.detach_child(res.id)
            if len(chain) == 0:
                model.detach_child(chain)
        if len(model) == 0:
            pdb.detach_child(model)
    # if the pdb still has more than one model, it's probably an NMR structure
    # simply keep the first model
    if len(pdb) > 1:
        for model in pdb.child_list[1:]:
            pdb.detach_child(model.id)
    if len(pdb.child_list[0]) > 1:
        model = pdb.child_list[0]
        for chain in model.child_list[1:]:
            model.detach_child(chain.id)
    # There is only one model left
    assert len(pdb) == 1
    # This model has only one chain
    assert len(pdb.child_list[0]) == 1
예제 #24
0
 def calc_dihedral(self):
     cb = self.cov_receptor.parent['CB']
     ca = self.cov_receptor.parent['CA']
     lig_cov_neoghbors = self.get_atom_neighbors(self.cov_ligand, list(self.ligand.get_atoms()))
     self.angles = list()
     ang1 = math.degrees( bp.calc_dihedral(ca.get_vector(), 
                                           cb.get_vector(), 
                                           self.cov_receptor.get_vector(),
                                           self.cov_ligand.get_vector()))
     self.angles.append(ang1)
     for i in lig_cov_neoghbors:
         ang = math.degrees( bp.calc_dihedral(cb.get_vector(),
                                              self.cov_receptor.get_vector(),
                                              self.cov_ligand.get_vector(),
                                              i.get_vector()))
         self.angles.append(ang)
예제 #25
0
def parse_pdb(pdb_file):
    #pdb_file = 'pdb5l6t.ent' #np.random.choice(pdb_list)
    p = bio.PDBParser()
    s = p.get_structure('X', pdb_file)

    gen = s.get_models()
    l = list(gen)
    mod = l[np.random.randint(
        0, len(l))]  #choose random model when more than 1 exists

    seq_strs = []
    seq_locs = []
    for chain in mod:
        seq_str = ''
        seq_loc = []
        for residue in chain:
            if residue.get_id()[0] == ' ':
                letter_code = residue_letter_codes[residue.get_resname()]
                seq_str += letter_code
                for atom in residue:
                    seq_loc.append(atom.get_full_id()[3][1])
        seq_strs.append(seq_str)
        seq_locs.append(np.unique(seq_loc))

    return seq_strs, seq_locs
예제 #26
0
def annotate_fallback(chain_list):
    """
    If neither DSSR nor MC-Annotate are available, we use an ad-hoc implementation of canonical
    basepair detection as fallback.
    This does not work well for missing atoms or modified residues.
    """
    kdtree = bpdb.NeighborSearch(
        [atom for chain in chain_list for atom in chain.get_atoms()])
    pairs = kdtree.search_all(10, "R")
    basepairs = {}
    # Sorted, so conflicting basepairs are deterministically solved
    for res1, res2 in sorted(pairs):
        if res1.resname.strip() not in RNA_RESIDUES or res1.id[0].startswith(
                "H_"):
            continue
        if res2.resname.strip() not in RNA_RESIDUES or res2.id[0].startswith(
                "H_"):
            continue
        labels = {res1.resname.strip(), res2.resname.strip()}
        try:
            is_bp = is_basepair_pair(res1, res2)
            if is_bp:
                res1_id = fgr.resid_from_biopython(res1)
                res2_id = fgr.resid_from_biopython(res2)
                if res1_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res1_id, res1_id, res2_id, res1_id,
                                      basepairs[res1_id]))
                    continue
                if res2_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res2_id, res2_id, res1_id, res2_id,
                                      basepairs[res2_id]))
                    continue
                basepairs[res1_id] = res2_id
                basepairs[res2_id] = res1_id
        except KeyError as e:
            log.debug("Missing atom %s. %s has atoms %s, %s has atoms %s", e,
                      res1, res1.child_dict, res2, res2.child_dict)
            pass

    seq_ids = []
    for chain in sorted(chain_list, key=lambda x: x.id):
        for residue in chain:
            seq_ids.append(fgr.resid_from_biopython(residue))
    bpseq = ""
    chain_dict = {c.id: c for c in chain_list}
    for i, seqid in enumerate(seq_ids):
        if seqid in basepairs:
            bp = seq_ids.index(basepairs[seqid]) + 1
        else:
            bp = 0

        bpseq += "{} {} {}\n".format(
            i + 1, chain_dict[seqid.chain][seqid.resid].resname.strip(), bp)
    return bpseq, seq_ids
예제 #27
0
 def __init__(self, out_dir=None):
     """ Create parsing and writing objects, specify output directory. """
     self.parser = PDBParser(QUIET=True)
     self.writer = PDB.PDBIO()
     if out_dir is None:
         out_dir = os.path.join(os.getcwd(), "chain_PDBs")
     self.out_dir = out_dir
예제 #28
0
def load_structures(files_to_load, quiet=False):
    """Load PDB files from a list and return a list of the structures"""
    parser = PDB.PDBParser(QUIET=True, PERMISSIVE=True)
    structures = []
    longest_line_len = 0
    for file in files_to_load:
        name = os.path.splitext(file)[0]
        if not quiet:
            print_line = "Loading " + name + "..."
            print(print_line, end="\r")
            longest_line_len = max(longest_line_len, len(print_line))
        new_structure = parser.get_structure(name, file)
        # Remove residue 0 to dedicate it to the donor fluorophore
        for new_model in new_structure:
            for new_chain in new_model:
                for residue in new_chain:
                    if residue.id[1] == 0:
                        new_chain.detach_child(residue.id)
        structures.append(new_structure)
        # save_structure(new_structure, name + ".no0.pdb")
    if not quiet:
        final_print_str = "Loaded " + str(len(files_to_load)) + " structures."
        num_spaces = max(0, longest_line_len - len(final_print_str))
        print(final_print_str + " " * num_spaces)
    return structures
예제 #29
0
def adjacencyMat(prot,
                 chainID,
                 seqIDs,
                 normalise=True,
                 mode='bool',
                 thresh=6.0):
    size = len(seqIDs)
    mat = np.zeros(shape=(size, size))

    prefix = "./PPI4DOCK/PPI4DOCK_docking_set/"
    chainFile = f"{prefix}/{prot}/{chainID}_model_st.pdb"
    parser = PDB.PDBParser()
    structure = parser.get_structure(chainID, chainFile)
    chain = structure[0][chainID]

    for i, resA in enumerate(seqIDs):
        for j, resB in enumerate(seqIDs):
            if resA != resB:
                distance = centralCarbon(chain[resA]) - centralCarbon(
                    chain[resB])
            else:  #same residue,self loop
                distance = 0
            mat[i][j] = distance

    if mode == 'distance':
        mat = np.where(mat < thresh, thresh - mat, 0)
        if normalise:
            mat = mat / thresh

    elif mode == 'bool':
        mat = np.where(mat < thresh, 1, 0)

    return mat
예제 #30
0
def get_dssp_info(PDB_file, model, dir):
    """Runs DSSP on protein input"""

    #TODO : you can run DSSP through biopython. The output contains a lot of useful information.
    #Tip : make sure your secondary structure indexing matches the sequence order in the PDB file!

    return PDB.DSSP(model, dir + '/' + PDB_file, dssp='mkdssp')
def main():
    parser = optparse.OptionParser()
    parser.add_option("-p",
                      "--pdb",
                      dest="pdb",
                      help="path to PDB file",
                      metavar="STRING")
    parser.add_option("-f",
                      "--pdb_fasta",
                      dest="pdb_fasta",
                      help="path to PDB fasta file (out)",
                      metavar="STRING")

    (options, args) = parser.parse_args()
    pdb_fasta = options.pdb_fasta
    pdb_file = options.pdb

    pdb_name = os.path.basename(pdb_file).split(".")[0]

    parser = BP.PDBParser()
    ppb = PPBuilder(radius=1000)  # retrieve all amino acids
    pdbseq = ""
    structure = parser.get_structure(pdb_name, pdb_file)
    model = structure[0]
    for chain in model:
        for pp in ppb.build_peptides(model[chain.id], aa_only=False):
            pdbseq += (pp.get_sequence())

    print ">", pdb_name, len(pdbseq)
    print pdbseq

    with open(pdb_fasta, "w") as o:
        o.write(">%s %i\n%s\n" % (pdb_name, len(pdbseq), pdbseq))
예제 #32
0
def pdb2cd(name):
    f = name + ".pdb"
    dssp_tuple = dssp_dict_from_pdb_file(f)
    dssp_dict = dssp_tuple[0]
    p = PDBParser(QUIET=True).get_structure("file", f)

    # Initiates and fills array ("cc") with chains.
    cc = [chain.get_id() for model in p for chain in model]

    # Determines length of sequence, initiates an array ("ss") of same length.
    howLong = ss_out = 0
    for c in cc:
        howLong += len([_ for _ in p[0][c].get_residues() if PDB.is_aa(_)])
    if not howLong == len(dssp_tuple[1]): howLong = len(dssp_tuple[1])
    ss = np.arange(1, howLong + 1)

    # Fills the array ("ss") with secondary structures.
    for i in ss:
        ss_lib = dssp_dict[dssp_tuple[1][
            i -
            3]]  # ss_lib = dssp_dict[(dssp_tuple[1][0][0], (' ', i-1, ' '))]
        dict_ss = ss_lib[1]
        if dict_ss == 'H':
            ss_out = 0
        if dict_ss == 'E':
            ss_out = 1
        if dict_ss == '-':  # else:# dict_ss == '-':
            ss_out = 2
        ss[i - 1] = ss_out
    # Returns the fractional composition of alpha helix, beta sheet or random coil.
    alpha = (ss == 0).sum() / ss.__len__()
    beta = (ss == 1).sum() / ss.__len__()
    coil = (ss == 2).sum() / ss.__len__()
    abc = [alpha, beta, coil]
    return abc
예제 #33
0
 def create_g_alpha_pdb_array(signprot_complex):
     segments = ProteinSegment.objects.filter(proteinfamily='Alpha')
     residues = Residue.objects.filter(
         protein_conformation__protein__entry_name=signprot_complex.
         structure.pdb_code.index.lower() + '_a')
     pdb_array = OrderedDict()
     parse = GPCRDBParsingPDB()
     for s in segments:
         if s.slug not in pdb_array:
             pdb_array[s.slug] = OrderedDict()
         for r in residues.filter(protein_segment=s):
             try:
                 rotamers = Rotamer.objects.filter(
                     structure=signprot_complex.structure,
                     residue__display_generic_number__label=r.
                     display_generic_number.label)
                 if len(rotamers) == 0:
                     raise Exception()
                 rotamer = parse.right_rotamer_select(rotamers)
                 p = PDB.PDBParser(QUIET=True).get_structure(
                     'structure', StringIO(rotamer.pdbdata.pdb))[0]
                 atoms = []
                 for chain in p:
                     for res in chain:
                         for atom in res:
                             atoms.append(atom)
             except:
                 atoms = 'x'
             pdb_array[r.protein_segment.slug][
                 r.display_generic_number.label] = atoms
     return pdb_array
예제 #34
0
def calc_vecsum(structure,metalName,valenceDictionary):
	# print(valenceDictionary.keys())
	metals = ["FE", "CO", "MN", "CU", "NI", "MO","W", "V"]
	atoms = list(structure.get_atoms())
	metalRow = get_metalRow(list(structure.get_atoms()),metalName)
	metalAtom = atoms[metalRow]
	numAtoms = len(atoms)

	vecsum = 0
	fij = PDB.Vector(x=0,y=0,z=0)
	for idx in range(0,numAtoms):
		if idx != metalRow:
			# print('blah')
			atomNames = metalName+"_"+atoms[idx].get_name().upper()
			ligandAtom = atoms[idx]
			distance = abs(ligandAtom - metalAtom)
			vec = (ligandAtom.get_vector() - metalAtom.get_vector())
			rij = vec.__truediv__(distance)
			ligOcc = ligandAtom.get_occupancy()
			# print('ligOCC: ',ligOcc)
			# print('valence: ',valenceDictionary[atomNames]['Valence'])
			oxInd = valenceDictionary[atomNames]['Ox'].index(valenceDictionary['oxNum'])
			bondValence = float(valenceDictionary[atomNames]['Valence'][oxInd])
			# print('blha: ' + str(bondValence))
			sij = float(ligOcc) * bondValence
			# print('sij',sij)
			# raise TypeError('somethingHappend ' + str(ij))
			fij = fij.__add__(np.multiply(rij.get_array(),sij))
			# print('fij: ',fij)
	vecsum =  math.sqrt(fij.__mul__(fij)) / float(valenceDictionary['Valency'])
	# print('vecsum: ',vecsum)
	return vecsum
 def load_pdb_fobject(self, fobject):
     parser = PDB.PDBParser(QUIET = True)
     res = parser.get_structure("c",fobject)
     for a in res.get_atoms():
         if re.match(r'^[A-Z]{1,2}[0-9]?\*$',a.id):
             a.id = a.id.replace("*","'")
     return res
예제 #36
0
def from_structure(structure):
    """Return contact data from a 3D structure (in pdb format).
    """

    try:
        from Bio import PDB
        if isinstance(structure, str):
            p = PDB.PDBParser()
            structure = p.get_structure('S', structure)
        if isinstance(structure, PDB.Structure.Structure):
            for _ in structure.get_chains():
                atoms = [
                    np.array(atom.get_coord())
                    for atom in structure.get_atoms()
                ]
    except ImportError:
        print("Biopython not found.")
        raise

    atoms = np.array(structure)
    try:
        import scipy
        D = scipy.spatial.distance.pdist(atoms, 'euclidean')
        D = scipy.spatial.distance.squareform(D)
    except ImportError:
        print("Scipy not found.")
        raise
    m = np.max(1 / D[D != 0])
    M = np.zeros(D.shape)
    M[D != 0] = 1 / D[D != 0]
    M[D == 0] = m
    return M
def read_dssp(dssp_file):

    try:
        dssp, keys = BP.make_dssp_dict(dssp_file)
    except(PDBException):
        print("SKIPPING THIS protein: pdb exception occurred for  %s" % dssp_file)
        return

    return dssp, keys
예제 #38
0
    def _get_system_vectors(rotation_axis, m_point, target_vector):
        r = pdb.vector_to_axis(rotation_axis, m_point)  # the perpendicular projection m_point to rotation_axis
        o = m_point - r                                 # corresponded rotation axis vector

        if r.norm() < 1e-9:  # m_point on rotation axis
            return
        r_normd = r.normalized()
        f = target_vector - o
        theta_norm = rotation_axis.normalized()
        s_normd = r_normd ** theta_norm
        return r, f, r_normd, s_normd
예제 #39
0
def compute_torsion_angles(previous_residue, residue, next_residue):
    """
    Little helper function, calculates the backbone phi and psi torsion
    angles from the given residues and returns them
    :param residue: The amino acid residue the torsion angles shall be computed
    :return: Phi and psi backbone torsion angles
    """
    # print previous_residue.get_id()[1], residue.get_id()[1], next_residue.get_id()[1]
    # extract the atoms for the torsion calculation
    # 1.) for the phi
    atom_CO_0 = previous_residue['C'].get_vector()
    atom_N_1 = residue['N'].get_vector()
    atom_CA_1 = residue['CA'].get_vector()
    atom_CO_1 = residue['C'].get_vector()
    atom_N_2 = next_residue['N'].get_vector()

    phi_angle = PDB.calc_dihedral(atom_CO_0, atom_N_1, atom_CA_1, atom_CO_1)
    psi_angle = PDB.calc_dihedral(atom_N_1, atom_CA_1, atom_CO_1, atom_N_2)

    # convert into degrees
    return math.degrees(phi_angle), math.degrees(psi_angle)
예제 #40
0
 def analyze_dihedral(self):  
     """
     Deprecated. Please use class Dihedral_Analisys
     """
     angles = list()
     cov_atm_lig = self.ligand.child_dict[self.ligand_dict['cov_atm']]
     ##dihedral between CA < CB < SG < ligand
     angle_1 = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CA'].get_vector(),
                                              self.covalent_res.child_dict['CB'].get_vector(),
                                              self.covalent_atm_res.get_vector(),
                                              cov_atm_lig.get_vector()))
     angles.append(angle_1)
     ##all dihedral of CB < SG < ligand-covalent-atom < other ligand atoms
     ns  = bp.NeighborSearch(list(self.ligand.get_atom()))        
     neigh = ns.search(cov_atm_lig.get_coord(), 2) 
     neigh = filter(lambda x: x.name != self.ligand_dict['cov_atm'], neigh)# removes the atom itself
     for i in neigh:
         ang = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CB'].get_vector(),
                                              self.covalent_atm_res.get_vector(),
                                              cov_atm_lig.get_vector(),
                                              i.get_vector()))
         angles.append(ang)
     open('/'.join([self.path, DIHEDRAL_OUTPUT]), 'w').write(reduce(lambda x, ang: ' '.join([x, str(ang)]), angles, ''))
예제 #41
0
파일: __init__.py 프로젝트: gieses/CLQC
    def __get_residues__(self, structure):
        """
        Gets all amino acids residues from a given structure and stores them
        in an array.

        parameters:
        ----------------
        structure: PDB strutore obj,
                   openened PDB structure file object

        Returns:
        ---------------------------------------
        array: np-arr,
               residue objects from Bio.PDB
        """
        residues_arr = []
        for res_i in structure.get_residues():
            if PDB.is_aa(res_i):
                residues_arr.append(res_i)
        return(np.array(residues_arr))
예제 #42
0
 def calculate_torsion_psi(current_residue, next_residue):
     atom1 = current_residue['N'].get_vector()
     atom2 = current_residue['CA'].get_vector()
     atom3 = current_residue['C'].get_vector()
     atom4 = next_residue['N'].get_vector()
     return PDB.calc_dihedral(atom1, atom2, atom3, atom4)
예제 #43
0
 def calculate_torsion_phi(previous_residue, current_residue):
     atom1 = previous_residue['C'].get_vector()
     atom2 = current_residue['N'].get_vector()
     atom3 = current_residue['CA'].get_vector()
     atom4 = current_residue['C'].get_vector()
     return PDB.calc_dihedral(atom1, atom2, atom3, atom4)
예제 #44
0
def get_pose_constraints(Pose, MaxDist, MinPositionSeperation, SasaRadius, SasaScale, UpstreamGrep, DownstreamGrep, NeedHydrogen=True):
    '''  '''
    # AlexsSasaCalculator is from Alex's interface_fragment_matching 
    # thanks Alex!
    #
    # This is used to give buried polar contacts more weight. Thanks Alex Ford!
    try:
      from interface_fragment_matching.utility.analysis import AtomicSasaCalculator
      # make instace of Alex's sasa calculator
      AlexsSasaCalculator = AtomicSasaCalculator(probe_radius=SasaRadius)
      ResidueAtomSasa = AlexsSasaCalculator.calculate_per_atom_sasa(Pose)    
    except ImportError:
      ' Error: SASA weighting of contacts requires interface_fragment_matching from Alex Ford '

    # for making full atom kd tree
    ResAtmCoordLists = []
    # for translating from kd tree index to ( residue, atom ) coord
    ResAtmRecordLists = []

    # loop through all residue numbers
    for Res in range(1, Pose.n_residue() + 1):
      # remade for each residue
      AtmRecordList = []
      AtmCoordList = []
      # loop through residue's atom numbers
      for Atm in range(1, Pose.residue(Res).natoms() + 1):
        # add (residue, atom) coord to residue's list
        AtmRecordList.append((Res, Atm))
        # add atom xyz coord to residue's list
        AtmCoordList.append( np.array(list(Pose.residue(Res).atom(Atm).xyz())) )
      
      # add residue's lists to respective global lists
      ResAtmCoordLists.extend(AtmCoordList)
      ResAtmRecordLists.extend(AtmRecordList)

    ResidueAtomArray = np.array( ResAtmCoordLists )
    ResidueAtomKDTree = spatial.KDTree( ResidueAtomArray )

    ResidueAtomNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, MaxDist )
    # ResidueAtomNearNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 2.0 )
    ResidueAtomHydrogens = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 1.1 )

    # holds constraints before printing
    AllConstraints = [] 
    # holds sorted cst
    AllBackboneBackboneCst = []
    AllBackboneSidechainCst = []
    AllSidechainSidechainCst = []

    # All contacts are from upstream to downstream residues to avoid double counting
    Upstream = []
    for UpIndex, UpXyzCoords in enumerate(ResAtmCoordLists):
      UpRes, UpAtm = ResAtmRecordLists[UpIndex]

      # # loop through residues storing info on oxygens
      # for UpRes in range( 1, Pose.n_residue() + 1 ):
      #   # loop through atoms
      #   for UpAtm in range( 1, Pose.residue(UpRes).natoms() + 1 ):
      UpName = Pose.residue(UpRes).atom_name(UpAtm).replace(' ', '')

      # skip virtual residues
      if Pose.residue(UpRes).is_virtual(UpAtm):
        continue

      #                                this guy 
      #                                 /
      # checks upstream name           V
      if re.match(UpstreamGrep, UpName ): 
        # print '\n'*2
        # print 'UpRes, UpName', UpRes, UpName

        # get neighbors of upstream residues
        NeighborsOfUpstream = ResidueAtomNeighbors[UpIndex]
        
        # prep for loop
        Downstreams = []

        Constraints = []
        BackboneBackboneCst = []
        BackboneSidechainCst = []
        SidechainSidechainCst = []

        # ArbitrayOrderOfAtomNames = {}
        for DownIndex in NeighborsOfUpstream:
          # name presumes downstream, checks with if imediately below
          DownRes, DownAtm = ResAtmRecordLists[DownIndex]

          # checks that downstream residue is dowstream of upstream and passes min primary sequence spacing
          if DownRes - UpRes >= MinPositionSeperation:
            DownName = Pose.residue(DownRes).atom_name(DownAtm).replace(' ', '')
            
            # skip if same atom
            if UpRes == DownRes:
              if UpName == DownName:
                continue

            # skip virtual residues
            if Pose.residue(DownRes).is_virtual(DownAtm):
              continue

            # checks downstream name
            if re.match( DownstreamGrep, DownName ):
              # print 'DownRes, DownName', DownRes, DownName

              PotentialUpstreamHydrogens = ResidueAtomHydrogens[UpIndex]
              UpstreamHydrogens = []
              # print 'PotentialUpstreamHydrogens', PotentialUpstreamHydrogens
              for UpH_I in PotentialUpstreamHydrogens:
                UpH_Res, UpH_Atm = ResAtmRecordLists[UpH_I]
                UpH_Name  = Pose.residue(UpH_Res).atom_name(UpH_Atm).replace(' ', '')
                # print 'UpH_Name', UpH_Name
                if 'H' in UpH_Name:
                  UpstreamHydrogens.append((UpH_Res, UpH_Atm, UpH_Name))
                # print 'UpstreamHydrogens', UpstreamHydrogens

              PotentialDownstreamHydrogens = ResidueAtomHydrogens[DownIndex]
              DownstreamHydrogens = []
              # print 'PotentialDownstreamHydrogens', PotentialDownstreamHydrogens
              for DownH_I in PotentialDownstreamHydrogens:
                DownH_Res, DownH_Atm = ResAtmRecordLists[DownH_I]
                DownH_Name = Pose.residue(DownH_Res).atom_name(DownH_Atm).replace(' ', '')
                # print 'DownH_Name', DownH_Name
                if 'H' in DownH_Name:
                  DownstreamHydrogens.append((DownH_Res, DownH_Atm, DownH_Name))
                # print 'DownstreamHydrogens', DownstreamHydrogens

              # check their is at least one hydrogen in system before adding constraint
              if len(UpstreamHydrogens) or len(DownstreamHydrogens) or NeedHydrogen == False:

                # these trys / excepts seperate 
                # backbone-backbone from 
                # backbone-sidechain from
                # sidechain-sidechain interactions
                # 
                # in future maybe sort into seperate lists, shouldn't rely on ResidueAtomSasa to know what is in backbone
                try:
                  UpstreamSasa = ResidueAtomSasa[UpRes][UpName]
                  DownstreamSasa = ResidueAtomSasa[DownRes][DownName]
                  AverageSasa = np.mean([UpstreamSasa, DownstreamSasa])        
                  BBBB = 1
                  BBSC = SCSC = 0
                except KeyError:                
                  # These lines handle backbone to sidechain interactions
                  # set weight equal to the most buried 
                  try:
                    UpstreamSasa = ResidueAtomSasa[UpRes][UpName]
                    AverageSasa = SasaScale.FloorSasa
                    BBSC = 1
                    BBBB = SCSC = 0
                  except KeyError:
                    try:
                      DownstreamSasa = ResidueAtomSasa[DownRes][DownName]
                      AverageSasa = SasaScale.FloorSasa 
                      BBSC = 1
                      BBBB = SCSC = 0            
                    
                    # set weight of side chain side chain equal to the most buried             
                    except KeyError:
                      AverageSasa = SasaScale.CeilingSasa 
                      SCSC = 1
                      BBSC = BBBB = 0

                # use instance of sasa_scale to calculate weight based on avg sasa of N and O
                SasaBasedWeight = SasaScale.weigh(AverageSasa)
                # print 
                # print 'AverageSasa', AverageSasa
                # print 'SasaBasedWeight', SasaBasedWeight

                # print 'found downstream neighbor %s'%DownName
                DownXyzCoords = np.array( list(Pose.residue(DownRes).atom(DownAtm).xyz()) )
                # print 'DownRes, DownName', DownRes, DownName
                # print 'DownXyzCoords', DownXyzCoords

                # ## Get neighbors for angles and torsions to use with AtomPairs

                SelectUpNeighbors = []
                # iterates through upstream atom neighbors for references for angle
                for UpNeighborIndex in NeighborsOfUpstream:
                  UpNeighborRes, UpNeighborAtm = ResAtmRecordLists[UpNeighborIndex]
                  UpNeighborName = Pose.residue(UpNeighborRes).atom_name(UpNeighborAtm).replace(' ', '')

                  # keep looking if neighbor is hyrdogen
                  if 'H' in UpNeighborName:
                    continue                

                  # skip virtual residues
                  if Pose.residue(UpNeighborRes).is_virtual(UpNeighborAtm):
                    continue

                  # keep looking if neighbor is self
                  if UpNeighborName == UpName and UpNeighborRes == UpRes:
                    continue
                  # keep looking if neighbor is downstream residue again
                  if UpNeighborName == DownName and UpNeighborRes == DownRes:
                    continue
                  UpNeighborCoords = ResAtmCoordLists[UpNeighborIndex]
                  DistanceToNeighbor = solenoid_tools.vector_magnitude( UpXyzCoords - UpNeighborCoords )
                  SelectUpNeighbors.append( (DistanceToNeighbor, UpNeighborName, UpNeighborRes, UpNeighborCoords) )

                # sort by distance to atom, nearest first
                SelectUpNeighbors.sort()                
                UpNeighbor1Tuple = SelectUpNeighbors[0]
                UpNeighbor2Tuple = SelectUpNeighbors[1]
                # print '\n'*2
                # print 'UpRes, UpName', UpRes, UpName
                # print 'UpstreamHydrogens', UpstreamHydrogens
                # print 'SelectUpNeighbors', SelectUpNeighbors

                 # get neighbors of upstream residues
                NeighborsOfDownstream = ResidueAtomNeighbors[DownIndex]
                SelectDownNeighbors = []
                # iterates through upstream atom neighbors for references for angle
                for DownNeighborIndex in NeighborsOfDownstream:
                  DownNeighborRes, DownNeighborAtm = ResAtmRecordLists[DownNeighborIndex]
                  DownNeighborName = Pose.residue(DownNeighborRes).atom_name(DownNeighborAtm).replace(' ', '')

                  # keep looking if neighbor is hyrdogen
                  if 'H' in DownNeighborName:
                    continue                

                  # skip virtual residues
                  if Pose.residue(DownNeighborRes).is_virtual(DownNeighborAtm):
                    continue

                  # keep looking if neighbor is self
                  if DownNeighborName == DownName and DownNeighborRes == DownRes:
                    continue
                  # keep looking if neighbor is upstream residue
                  if DownNeighborName == UpName and DownNeighborRes == UpRes:
                    continue

                  DownNeighborCoords = ResAtmCoordLists[DownNeighborIndex]
                  DistanceToNeighbor = solenoid_tools.vector_magnitude( DownXyzCoords - DownNeighborCoords )
                  SelectDownNeighbors.append( (DistanceToNeighbor, DownNeighborName, DownNeighborRes, DownNeighborCoords) )

                # sort by distance to atom, nearest first
                SelectDownNeighbors.sort()
                DownNeighbor1Tuple = SelectDownNeighbors[0]
                DownNeighbor2Tuple = SelectDownNeighbors[1]
                # print 'DownRes, DownName', DownRes, DownName
                # print 'DownstreamHydrogens', DownstreamHydrogens
                # print 'SelectDownNeighbors', SelectDownNeighbors

                Distance = solenoid_tools.vector_magnitude(DownXyzCoords - UpXyzCoords)
                
                DistanceCst = 'AtomPair %s %d %s %d SCALARWEIGHTEDFUNC %f HARMONIC %.2f 1.0' %( UpName, UpRes, DownName, DownRes, SasaBasedWeight, Distance )

                # Use Biopython for angle and dihedral calculations
                # here 'Vec' means PDB.Vector of atom's xyz coord
                UpstreamVec = PDB.Vector(UpXyzCoords)
                DownstreamVec = PDB.Vector(DownXyzCoords)
                
                UpNeighbor1Vec = PDB.Vector(UpNeighbor1Tuple[3])
                UpNeighbor2Vec = PDB.Vector(UpNeighbor2Tuple[3])
                DownNeighbor1Vec = PDB.Vector(DownNeighbor1Tuple[3])
                DownNeighbor2Vec = PDB.Vector(DownNeighbor2Tuple[3])

                Angle1 = PDB.calc_angle(UpNeighbor1Vec, UpstreamVec, DownstreamVec)
                AngleCst1 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Angle1 )
                Angle2 = PDB.calc_angle(UpstreamVec, DownstreamVec, DownNeighbor1Vec)
                AngleCst2 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Angle2 )

                Torsion1 = PDB.calc_dihedral(UpNeighbor2Vec, UpNeighbor1Vec, UpstreamVec, DownstreamVec)
                TorsionCst1 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor2Tuple[1], UpNeighbor2Tuple[2], UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Torsion1 )
                Torsion2 = PDB.calc_dihedral(UpNeighbor1Vec, UpstreamVec, DownstreamVec, DownNeighbor1Vec)
                TorsionCst2 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Torsion2 )
                Torsion3 = PDB.calc_dihedral(UpstreamVec, DownstreamVec, DownNeighbor1Vec, DownNeighbor2Vec)
                TorsionCst3 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], DownNeighbor2Tuple[1], DownNeighbor2Tuple[2], SasaBasedWeight, Torsion3 )

                # adds constraint to running lists of constraints
                Constraints.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if BBBB: BackboneBackboneCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if BBSC: BackboneSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if SCSC: SidechainSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )

              # else:
              #   print 'No hydrogen!'
              #   sys.exit()

        AllConstraints.extend(Constraints)
        AllBackboneBackboneCst.extend(BackboneBackboneCst)
        AllBackboneSidechainCst.extend(BackboneSidechainCst)
        AllSidechainSidechainCst.extend(SidechainSidechainCst)

    SortedConstraints = (AllBackboneBackboneCst, AllBackboneSidechainCst, AllSidechainSidechainCst)

    return AllConstraints, SortedConstraints
def get_nr_pdb_list(TMPDIR):
    """
    The main program logic to get the nonredundant list of pdb identifiers,
    selecting the highest resolution as representative.
    See module docstring at top of file for description

    Parameters:
       TMPDIR - name of temp directory to use
    Return value:
       None.

       Output is to stdout:
       list of list of pdb ids, each entry in list (line) is a list of pdb ids
       reprsenting a cluster; first in the inner (cluster) list is
       the chosen represenstative.
    """
    pdb_dict = {} # dict of {pdbid : MethodResolution} to cache info from PDB
    
    for cluster in yield_cluster_from_file(sys.stdin):
        if not cluster[0].is_protein(): # since clustered, if one not, all not
            continue # discard non-protein sequences
        orig_seqlist = list(cluster.seqlist) # keep copy before deleting some
        cluster.discard_short_seqs()
        if len(cluster) > 1:
            # now we need to look in PDB files to find highest res X-ray struct
            for seq in cluster.seqlist:
                pdbid = seq.descr[:4].lower()
                if pdb_dict.has_key(pdbid):
                    methres = pdb_dict[pdbid]
                    seq.is_xray = methres.is_xray
                    seq.resolution = methres.resolution
                else:
                    name = "pdb" + pdbid
                    pdbfile = os.path.join(PDBDIV_ROOT,
                                           os.path.join(pdbid[1:3], name + ".ent.gz"))
                    tmp_pdbfilename = os.path.join(TMPDIR, name)
                    os.system("gzip " + pdbfile + " -d -c > " + tmp_pdbfilename)
                    pdbheader = PDB.parse_pdb_header(tmp_pdbfilename)
                    if 'x-ray' in pdbheader['structure_method'].lower():
                        seq.is_xray = True
                        seq.resolution = float(pdbheader['resolution'])
                    methres = MethodResolution()
                    methres.is_xray = seq.is_xray
                    methres.resolution = seq.resolution
                    pdb_dict[pdbid] = methres
                    os.unlink(tmp_pdbfilename)
            cluster.discard_non_xray()
        if len(cluster) > 1:
            cluster.discard_lower_resolution()
        if len(cluster) > 1:
            cluster.discard_lower_similarity()
        if len(cluster) > 1:
            cluster.seqlist = [cluster.seqlist[0]] # arbitrary: use first seq

        repr_id =  cluster.seqlist[0].descr[:6].lower()
        sys.stdout.write(repr_id + ": ")
        for seq in orig_seqlist:
            other_id = seq.descr[:6].lower()
            if other_id != repr_id:
                sys.stdout.write(other_id + " ")
        sys.stdout.write("\n")
예제 #46
0
def is_no_aa_chain(chain):
	"""
	Test if a chain contains no amino acids.
	"""
	return all([(not PDB.is_aa(r)) for r in chain])