def get_side_chain_vector(residue):
     """
     Find the average of the unit vectors to different atoms in the side chain
     from the c-alpha atom. For glycine the average of the N-Ca and C-Ca is
     used.
     Returns (C-alpha coordinate vector, side chain unit vector) for residue r
     """
     u = None
     gly = 0
     if is_aa(residue) and residue.has_id('CA'):
         ca = residue['CA'].get_coord()
         dv = np.array(
             [ak.get_coord() for ak in residue.get_unpacked_list()[4:]])
         if len(dv) < 1:
             if residue.has_id('N') and residue.has_id('C'):
                 dv = [residue['C'].get_coord(), residue['N'].get_coord()]
                 dv = np.array(dv)
                 gly = 1
             else:
                 return None
         dv = dv - ca
         if gly:
             dv = -dv
         n = np.sum(abs(dv)**2, axis=-1)**(1. / 2)
         v = dv / n[:, np.newaxis]
         u = (Vector(ca), Vector(v.mean(axis=0)))
     return u
def fromDihedral(A, B, C, bond_BC, bond_CD, angle_BCD, torsion_BC):
    """
    Calculate a new point from three points A,B,C, an dihedral angle
    (torsion_BC) and bond angle (angle_BCD). Done by putting a
    coordinate system in C and then going from spherical coordinates
    and then translating the coordinate system to C.

    See Parsons et al. for details
    """

    # Turn the bond angle into an angle in [pi/2, pi]
    angle_BCD = pi - angle_BCD 

    # Calculate position of D from spherical coordinate representation
    D2 = Vector(cos(angle_BCD), cos(torsion_BC)*sin(angle_BCD), sin(torsion_BC)*sin(angle_BCD))**bond_CD

    # Calculate rotation matrix M
    bc = (C-B) / float(bond_BC)  # Normalized by previous bond length
    n = (B-A)**bc
    n.normalize()                # Normalized by calculation
    nXbc = n**bc                 # Normalized by definition

    M = (array([bc.get_array(), nXbc.get_array(), n.get_array()])).T

    # Calculate position of D by rotation and translation
    D = D2.left_multiply(M) + C
    
    return D
Example #3
0
def COM(object):
	com_n = Vector(0,0,0)
	com_d = 0.0
	for atom in object.get_atoms():
		position = atom.get_vector()
		com_n += Vector(position._ar*np.array(atom.mass))
		com_d += atom.mass
	com = com_n.__div__(com_d)
	return com
Example #4
0
 def test_transform(self):
     """Transform entities (rotation and translation)."""
     for o in (self.s, self.m, self.c, self.r, self.a):
         rotation = rotmat(Vector(1,3,5), Vector(1,0,0))
         translation=numpy.array((2.4,0,1), 'f')
         oldpos = self.get_pos(o)
         o.transform(rotation, translation)
         newpos = self.get_pos(o)
         newpos_check = numpy.dot(oldpos, rotation) + translation
         for i in range(0, 3):
             self.assertAlmostEqual(newpos[i], newpos_check[i])
Example #5
0
def calculateCoordinates(refA, refB, refC, L, ang, di):
    AV=refA.get_vector()
    BV=refB.get_vector()
    CV=refC.get_vector()
    
    CA=AV-CV
    CB=BV-CV

    ##CA vector
    AX=CA[0]
    AY=CA[1]
    AZ=CA[2]

    ##CB vector
    BX=CB[0]
    BY=CB[1]
    BZ=CB[2]

    ##Plane Parameters
    A=(AY*BZ)-(AZ*BY)
    B=(AZ*BX)-(AX*BZ)
    G=(AX*BY)-(AY*BX)

    ##Dot Product Constant
    F= math.sqrt(BX*BX + BY*BY + BZ*BZ) * L * math.cos(ang*(math.pi/180.0))

    ##Constants
    const=math.sqrt( math.pow((B*BZ-BY*G),2) *(-(F*F)*(A*A+B*B+G*G)+(B*B*(BX*BX+BZ*BZ) + A*A*(BY*BY+BZ*BZ)- (2*A*BX*BZ*G) + (BX*BX+ BY*BY)*G*G - (2*B*BY)*(A*BX+BZ*G))*L*L))
    denom= (B*B)*(BX*BX+BZ*BZ)+ (A*A)*(BY*BY+BZ*BZ) - (2*A*BX*BZ*G) + (BX*BX+BY*BY)*(G*G) - (2*B*BY)*(A*BX+BZ*G)

    X= ((B*B*BX*F)-(A*B*BY*F)+(F*G)*(-A*BZ+BX*G)+const)/denom

    if((B==0 or BZ==0) and (BY==0 or G==0)):
        const1=math.sqrt( G*G*(-A*A*X*X+(B*B+G*G)*(L-X)*(L+X)))
        Y= ((-A*B*X)+const1)/(B*B+G*G)
        Z= -(A*G*G*X+B*const1)/(G*(B*B+G*G))
    else:
        Y= ((A*A*BY*F)*(B*BZ-BY*G)+ G*( -F*math.pow(B*BZ-BY*G,2) + BX*const) - A*( B*B*BX*BZ*F- B*BX*BY*F*G + BZ*const)) / ((B*BZ-BY*G)*denom)
        Z= ((A*A*BZ*F)*(B*BZ-BY*G) + (B*F)*math.pow(B*BZ-BY*G,2) + (A*BX*F*G)*(-B*BZ+BY*G) - B*BX*const + A*BY*const) / ((B*BZ-BY*G)*denom)

    
    #GET THE NEW VECTOR from the orgin
    D=Vector(X, Y, Z) + CV
    with warnings.catch_warnings():
        # ignore inconsequential warning
        warnings.simplefilter("ignore")
        temp=calc_dihedral(AV, BV, CV, D)*(180.0/math.pi)
    
  
    di=di-temp
    rot= rotaxis(math.pi*(di/180.0), CV-BV)
    D=(D-BV).left_multiply(rot)+BV
    
    return D.get_array()
Example #6
0
def calculateCoordinates(refA, refB, refC, L, ang, di):
	AV=refA.get_vector(); BV=refB.get_vector(); CV=refC.get_vector()
	CA=AV-CV; CB=BV-CV
	##CA vector
	AX=CA[0]; AY=CA[1]; AZ=CA[2]
	##CB vector
	BX=CB[0]; BY=CB[1]; BZ=CB[2]
	##Plane Parameters
	A=(AY*BZ)-(AZ*BY); B=(AZ*BX)-(AX*BZ); G=(AX*BY)-(AY*BX)
	##Dot Product Constant
	F= math.sqrt(BX*BX + BY*BY + BZ*BZ) * L * math.cos(ang*(math.pi/180.0))
	##Constants
	const=math.sqrt( math.pow((B*BZ-BY*G),2) *(-(F*F)*(A*A+B*B+G*G)+(B*B*(BX*BX+BZ*BZ) + A*A*(BY*BY+BZ*BZ)- (2*A*BX*BZ*G) + (BX*BX+ BY*BY)*G*G - (2*B*BY)*(A*BX+BZ*G))*L*L))
	denom= (B*B)*(BX*BX+BZ*BZ)+ (A*A)*(BY*BY+BZ*BZ) - (2*A*BX*BZ*G) + (BX*BX+BY*BY)*(G*G) - (2*B*BY)*(A*BX+BZ*G)
	X= ((B*B*BX*F)-(A*B*BY*F)+(F*G)*(-A*BZ+BX*G)+const)/denom
	if((B==0 or BZ==0) and (BY==0 or G==0)):
		const1=math.sqrt( G*G*(-A*A*X*X+(B*B+G*G)*(L-X)*(L+X)))
		Y= ((-A*B*X)+const1)/(B*B+G*G)
		Z= -(A*G*G*X+B*const1)/(G*(B*B+G*G))
	else:
		Y= ((A*A*BY*F)*(B*BZ-BY*G)+ G*( -F*math.pow(B*BZ-BY*G,2) + BX*const) - A*( B*B*BX*BZ*F- B*BX*BY*F*G + BZ*const)) / ((B*BZ-BY*G)*denom)
		Z= ((A*A*BZ*F)*(B*BZ-BY*G) + (B*F)*math.pow(B*BZ-BY*G,2) + (A*BX*F*G)*(-B*BZ+BY*G) - B*BX*const + A*BY*const) / ((B*BZ-BY*G)*denom)
	#GET THE NEW VECTOR from the orgin
	D=Vector(X, Y, Z) + CV
	with warnings.catch_warnings():
		# ignore inconsequential warning
		warnings.simplefilter("ignore")
		temp=calc_dihedral(AV, BV, CV, D)*(180.0/math.pi)
	di=di-temp
	rot= rotaxis(math.pi*(di/180.0), CV-BV)
	D=(D-BV).left_multiply(rot)+BV
	return D
Example #7
0
    def dihedral_calcul(self, others):
        """ KC - dihedral score calculated """

        self.angle_dihedres = []
        all = others
        all.append(self)

        for n in xrange(len(self.res)):
            try:
                ecart_type = stat_ecart_type([
                    calc_dihedral(
                        Vector(all[i].get_res()[n]['C'].get_coord()),
                        Vector(all[i].get_res()[n]['CA'].get_coord()),
                        Vector(all[i].get_res()[n]['CB'].get_coord()),
                        Vector(all[i].get_res()[n]['CG'].get_coord())) /
                    math.pi * 180 for i in xrange(len(all))
                ])
            except:
                ecart_type = 0.01
            self.angle_dihedres.append(ecart_type)

        plot([x + 1 for x in xrange(len(self.angle_dihedres))],
             self.angle_dihedres)
        savefig("angle_dihedres.png")
        clf()

        mini = min(self.angle_dihedres)
        maxi = max(self.angle_dihedres)
        bfactor_angle = [(var - mini) * 100 / (maxi - mini)
                         for var in self.angle_dihedres]
        assert max(
            bfactor_angle
        ) <= 100, "maximum de bfactor trop haut apres normalisation: " + str(
            max(bfactor_angle))
        assert min(
            bfactor_angle
        ) >= 0, "minimum de bfactor trop bas apres normalisation: " + str(
            min(bfactor_angle))

        for model in others:
            model.set_angles_dihedres(self.angle_dihedres)
            model.create_bfactor_file(bfactor_angle, "_dihedre")
Example #8
0
def add_frame(img):
    global infile
    global chain_boundary
    global end
    chain1 = []
    chain2 = []
    line = []
    while line[:11] != 'ITEM: ATOMS':
        line = next(infile)
    line = next(infile)
    while line[:5] not in ['ITEM:', '\n']:
        line_split = line.split()
        if line_split[1] != '1':
            pass
        else:
            if int(line_split[0]) <= chain_boundary:
                atom = Vector(line_split[-3:])
                atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0))
                chain1.append(atom)
            else:
                atom = Vector(line_split[-3:])
                atom = Vector((atom._ar * np.array(400.0)) + np.array(-200.0))
                chain2.append(atom)

        try:
            line = next(infile)
        except StopIteration:
            end = True
            break
    #if line == '\n':
    #	end = True
    for i, atom1 in enumerate(chain1):
        row = []
        for j, atom2 in enumerate(chain2):
            d = (atom1 - atom2).norm()
            img[i][j] += d
Example #9
0
    def center(self):
        """
        Pocket centroid.

        Returns
        -------
        Bio.PDB.vectors.Vector
            Coordinates for the pocket centroid.
        """

        ca_atoms = self.ca_atoms
        ca_atom_vectors = ca_atoms["ca.atom"].to_list()
        ca_atom_vectors = [i for i in ca_atom_vectors if i is not None]
        centroid = self.center_of_mass(ca_atom_vectors, geometric=False)
        centroid = Vector(centroid)

        return centroid
Example #10
0
 def __init__(self, symbol, name, atomid, coords, bfactor, load_json=False):
     if not load_json:
         self.symbol = symbol.capitalize()
         self.name = name
         self.atomid = atomid
         self.coords = coords  # (), for consistency save everything as np.array()
         self.mc_sc = False
         if self.name == "CA" or self.name == "C" or self.name == "N" or self.name == "O":
             self.mc_sc = True
         if element_mass.get(self.symbol) is None:
             element_mass[self.symbol] = element(self.symbol).atomic_weight
         self.atomic_mass = element_mass[self.symbol]
         self.vector = Vector(x=self.coords[0], y=self.coords[1], z=self.coords[2])
         self.bfactor = bfactor
     else:
         self.symbol = None
         self.name = None
         self.atomid = None
         self.coords = None
         self.mc_sc = None
         self.atomic_mass = None
         self.vector = None
         self.bfactor = None
Example #11
0
def create_random_pdb(separation_distance, move_chain_id, fix_chain_id, input_file_name, output_pdb_name, model_number = 0):
	results = {}
	structure = PDBParser(PERMISSIVE=1).get_structure('whatever', input_file_name)
	chain_moved = structure[model_number][move_chain_id]
	chain_fixed = structure[model_number][fix_chain_id]

	old_fixed_centre = COM(chain_fixed)
	old_moved_centre = COM(chain_moved)
	com_denominator=0.0
	com_numerator = Vector(0,0,0)
		
	for atom in chain_moved.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position - old_fixed_centre)

	#first step is to move origin to the com of fixed_chain.
	#So far the atoms in the moved_chain have been relocated.

	for atom in chain_fixed.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position - old_fixed_centre)
	#now fixed_chain has been relocated. All coordinates are now wrt com of fixed_chain
	
	moved_centre = old_moved_centre - old_fixed_centre
	fixed_centre = Vector(0,0,0)


	d = (old_fixed_centre - old_moved_centre).norm()
	results["1_Input_Separation"] = d
	results["1_Old_fixed_chain_com"]=old_fixed_centre
	results["1_Old_moved_chain_com"]= old_moved_centre
	results["0_Intended_Output_Separation"] = separation_distance

	R1 = generate_3d()
	R2 = generate_3d()

	max_distance = 0.0
	com_numerator = Vector(0,0,0)
	com_denominator=0.0
	
	#Now we scale the separation distance and also rotate the chain_moved
	for atom in chain_moved.get_atoms():
		position = atom.get_vector()
		a = moved_centre.normalized()._ar * np.array(separation_distance)
		atom.set_coord((position - moved_centre).left_multiply(R2) + Vector(a))
		max_distance = max(max_distance, (atom.get_vector().norm()))
		position = atom.get_vector()
		com_numerator += Vector(position._ar*np.array(atom.mass))
		com_denominator +=atom.mass

	final_moved_centre = com_numerator.__div__(com_denominator)

	com_denominator=0.0
	com_numerator = Vector(0,0,0)	

	#Now we rotate the chain_fixed
	for atom in chain_fixed.get_atoms():
		position = atom.get_vector()
		atom.set_coord(position.left_multiply(R1))
		max_distance = max(max_distance, (atom.get_vector().norm()))
		position = atom.get_vector()
		com_numerator += Vector(position._ar*np.array(atom.mass))
		com_denominator +=atom.mass

	final_fixed_centre = com_numerator.__div__(com_denominator)
	d = (final_fixed_centre - final_moved_centre).norm()

	w = PDBIO()
	w.set_structure(structure)
	w.save(output_pdb_name)
	results["2_Output_Separation"]=d
	results["2_fixed_chain_com"]=final_fixed_centre
	results["2_moved_chain_com"]=final_moved_centre
	results["Max_distance"]=max_distance
	return results
Example #12
0
def add_water(refinement_input, ligand_chain, n_waters=2, test=False):
    if test:
        np.random.seed(42)

    if n_waters < 1:
        return

    else:
        output = []
        n_inputs = len(refinement_input)
        water_coords = []
        resnums = []
        atomnums = []
        chains = []
        resnames = []

        # get maximum residue and atom numbers
        with open(refinement_input[0], "r") as file:
            protein = file.readlines()

            for line in protein:
                if line.startswith("ATOM") or line.startswith(
                        "HETATM") or line.startswith("TER"):
                    try:
                        resnums.append(line[23:27].strip())
                        atomnums.append(line[7:11].strip())
                        chains.append(line[21])
                        resnames.append(line[17:20])
                    except:
                        IndexError("Line '{}' is too short".format(line))
        lig_length = resnames.count(ligand_chain)
        resnums = [int(num) for num in resnums if num]
        max_resnum = max(resnums)
        water_resnums = []
        water_chain = chains[0]  # water chain = 1st protein chain
        atomnum = max([int(num) for num in atomnums if num]) + 1 + lig_length

        water = cs.water * n_waters * n_inputs

        for inp in range(n_inputs):
            for n in range(n_waters):
                O_coords = Vector(
                    [np.random.randint(0, 100) for i in range(3)])
                H1_coords = O_coords + Vector(0.757, 0.586, 0.0)
                H2_coords = O_coords + Vector(-0.757, 0.586, 0.0)
                water_coords = water_coords + [list(O_coords)] + [
                    list(H1_coords)
                ] + [list(H2_coords)]

                max_resnum += 1  # each water must have a different residue number
                water_resnums = water_resnums + [max_resnum] * 3
            max_resnum += 1

        water_atomnums = [atomnum + j for j in range(n_waters * 3 * n_inputs)]

        # PDB lines - water
        water_output = []

        for atom, num, resnum, coord in zip(water, water_atomnums,
                                            water_resnums, water_coords):
            coord = ["{:7.4f}".format(c) for c in coord]
            coord = " ".join(coord)
            water_output.append(atom.format(num, water_chain, resnum, coord))

        sliced_water_output = []
        for i in range(0, len(water_output), n_waters * 3):
            sliced_water_output.append(water_output[i:i + n_waters * 3])

        # loop over minimisation inputs
        for inp, w in zip(refinement_input, sliced_water_output):
            new_protein_file = inp
            protein = []
            ligand = []

            # read in protein and ligand lines
            with open(inp, "r") as inp:
                lines = inp.readlines()

                for line in lines:
                    if line.startswith("ATOM") or line.startswith("HETATM"):
                        if line[17:20].strip() == ligand_chain:
                            ligand.append(line)
                        else:
                            protein.append(line)

            # add water to PDB
            with open(new_protein_file, "w+") as file:
                for line in protein:
                    file.write(line)
                file.write("\n")
                for line in w:
                    file.write(line)
                file.write("\n")
                for line in ligand:
                    file.write(line)

            # load again with Biopython
            parser = PDBParser()
            structure = parser.get_structure("complex", new_protein_file)
            water_list = []
            protein_list = Selection.unfold_entities(structure, "A")
            temp_protein_file = os.path.join(
                os.path.dirname(inp.name),
                os.path.basename(inp.name).replace(".pdb", "_temp.pdb"))

            for res in structure.get_residues():
                if res.resname == 'HOH':
                    water_list = water_list + Selection.unfold_entities(
                        res, "A")

            # check for water contacts
            contacts5 = []
            for w in water_list:
                contacts5 = contacts5 + NeighborSearch(protein_list).search(
                    w.coord, 5.0, "A")
            contacts5 = [c for c in contacts5
                         if c not in water_list]  # exclude "self" contacts

            # translate water, if needed
            while contacts5:
                contacts5 = []
                for w in water_list:
                    x, y, z = w.coord
                    w.set_coord([x - 5, y, z])
                    contacts5 = contacts5 + NeighborSearch(
                        protein_list).search(w.coord, 5.0, "A")
                    contacts5 = [c for c in contacts5 if c not in water_list]

            # save final output
            io = PDBIO()
            io.set_structure(structure)
            io.save(temp_protein_file)
            output.append(new_protein_file)

            new_water_lines = []
            with open(temp_protein_file, "r") as temp:
                temp_lines = temp.readlines()
                for line in temp_lines:
                    if line[17:20].strip() == "HOH":
                        line = line.replace(line[7:11],
                                            str(int(line[7:11]) + lig_length))
                        if line[12:15] == "2HW":
                            line = line + "\nTER\n"
                        new_water_lines.append(line)

            new_water_lines[-2] = new_water_lines[-2].replace("\nTER\n", "")

            with open(new_protein_file, "w+") as file:
                for line in protein:
                    file.write(line)
                file.write("\nTER\n")
                for line in new_water_lines:
                    file.write(line)
                file.write("\n")
                for line in ligand:
                    file.write(line)
                file.write("TER")

            os.remove(temp_protein_file)

        return output
						Atom1.append(atom)
					elif atom.name == 'SG':
						Atom2.append(atom)

i = 0						
for i in range(len(Atom1)):
	resid.append([Atom1[i], Atom2[i]])
	i += 1
j = 0
for j in range(len(list)):
	dict[list[j]] = resid[j]
	j += 1

#print(atom1, atom1)
					
for resi in resid:
	for res in resid:
		atom1 = resi[0]
		atom2 = resi[1]
		atom3 = res[1]
		atom4 = res[0]
		distance = atom3 - atom2
		v1 = atom1.get_vector()
		v2 = atom2.get_vector()
		v3 = atom3.get_vector()
		v4 = atom4.get_vector()
		vector = Vector.calc_dihedral(v1, v2, v3, v4)
		if 85 < vector < 95 :
			if 1.9 < distance < 2.1:
				print('S-S:')
Example #14
0
def cluster(parametersobject):
	number_of_orientations = parametersobject.parameterdic['Number_of_orientations']
	skip = parametersobject.parameterdic['Skip_initial_frames']
	data = []
	
	f_framelist = open('analysis/frames_read.txt', 'w+')
	framelist = []
	
	for i in range(1, 1+number_of_orientations):
		with open('analysis/coord_matrix_'+str(i).zfill(3)+'.txt', 'r') as f:
			l = f.readlines()
			data.extend([[float(p) for p in line.strip().split()[1:10]] for line in l[skip:]])
			f_framelist.write(str(len(l))+'\n')
			framelist.append(len(l)-skip)
	f_framelist.close()
	
	
	ms = MeanShift(n_jobs = -2, cluster_all = True)
	ms.fit(data)
	labels = ms.labels_
	names, numbers = np.unique(labels, return_counts = True)
	cluster_centers = ms.cluster_centers_
	fout = open('analysis/clusters.txt', 'w+')
	fout.write('label\tcount\ttheta\tphi\ttheta_x\ttheta_y\ttheta_z\tx\ty\tz\tR[0][0]\tR[0][1]\tR[0][2]\tR[1][0]\tR[1][1]\tR[1][2]\tR[2][0]\tR[2][1]\tR[2][2]\n')
	phi = 0
	for i, line in enumerate(cluster_centers):
		R = [_[:] for _ in [[]]*9]
		x, y, z = line[0], line[1], line[2]
		R[0] = line[3:6]
		R[1] = line[6:9]
		R[2] = np.cross(R[0], R[1])
		V = Vector(x, y, z)
		if V.norm() > 1e-6:
			theta = V.angle(Vector(0,0,1))
			norm = np.sqrt(x*x + y*y)
			if norm > 1e-6:
				phi = np.arctan2(y,x)
				#otherwise phi isn't updated and the previous value is copied. Keeps it from jumping near the poles.
		else:
			theta = 0.0
		
		theta_x = np.arctan2(R[2][1], R[2][2])
		theta_y = np.arctan2(-R[2][0], np.sqrt(R[2][1]*R[2][1]+R[2][2]*R[2][2]))
		theta_z = np.arctan2(R[1][0], R[0][0])
		
		fout.write(str(names[i])+'\t'+str(numbers[i])+'\t')
		fout.write(str(theta)+'\t'+str(phi)+'\t')
		fout.write(str(theta_x)+'\t'+str(theta_y)+'\t'+str(theta_z)+'\t')
		
		
		for value in line:
			fout.write(str(value)+'\t')
		fout.write(str(R[2][0])+'\t'+str(R[2][1])+'\t'+str(R[2][2])+'\t')
		fout.write('\n')
	fout.close()
	n_clusters_ = len(np.unique(labels))
	print("Number of estimated clusters:", n_clusters_)
	
	
	classification = open('analysis/frame_cluster_types.txt', 'w+')
	frame_counter = 0
	framelist_counter = 0
	for label in labels:
		if frame_counter<framelist[framelist_counter]:
			classification.write(str(label)+'\t')
			frame_counter+=1
		else:
			framelist_counter+=1
			classification.write('\n'+str(label)+'\t')
			frame_counter=1
	
	'''
Example #15
0
def cross(v1, v2):
    return Vector(v1[1] * v2[2] - v1[2] * v2[1], v1[2] * v2[0] - v1[0] * v2[2],
                  v1[0] * v2[1] - v1[1] * v2[0])
Example #16
0
def find_geometry(metals,
                  structure,
                  permissive=False,
                  all_metals=False,
                  external=None):

    # check metal contacts
    output = []
    checked_metals = []
    structure_list = Selection.unfold_entities(structure, "A")

    for metal in metals:

        # search distance based on metal type
        if metal[0].element == "YB":
            dist = 3.5
        elif metal[0].element == "K":
            dist = 3.3
        else:
            dist = 2.9

        metal_str = "{}:{}:{}".format(metal[2].id, metal[1].get_id()[1],
                                      metal[0].name)
        in_ext = []

        for i in external:
            if metal_str in i:
                in_ext = True

        if not in_ext and list(metal[0].coord) not in checked_metals:
            coords = metal[0].coord

            contacts = []

            for chain in structure.get_chains():

                for residue in chain.get_residues():
                    contacts_atoms = NeighborSearch(structure_list).search(
                        coords, dist, "A")
                    # exclude self-contacts, carbons and hydrogens
                    excluded_contacts = cs.metals + ['C', 'H']
                    contacts_atoms = [
                        c for c in contacts_atoms
                        if c.element not in excluded_contacts
                    ]

                    for atom in contacts_atoms:
                        if residue in chain.get_residues(
                        ) and atom in residue.get_atoms():
                            contacts.append([atom, residue, chain])

            combinations = list(itertools.combinations(contacts, 2))
            combinations = [list(c) for c in combinations]

            # get all atom - metal - atom angles
            for c in combinations:
                vi = Vector(c[0][0].coord)
                vj = Vector(c[1][0].coord)
                angle = vectors.calc_angle(vi, coords, vj) * 180 / np.pi
                c.append(angle)

            geo, coordinated_atoms = angle_classification(combinations, False)

            if geo is None and permissive:
                geo, coordinated_atoms = angle_classification(
                    combinations, True)

                if geo is None and all_metals and combinations:

                    geo, coordinated_atoms = angle_classification(
                        combinations, True)
                    if geo:
                        print(
                            "Found {} geometry around {} (residue {}). Adding constraints."
                            .format(geo, metal[0].name, metal[1].get_id()[1]))
                        checked_metals.append(list(metal[0].coord))
                    else:
                        coordinated_atoms = combinations
                        checked_metals.append(list(metal[0].coord))
                        geo = "no"
                        print(
                            "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal."
                            .format(geo, metal[0].name, metal[1].get_id()[1],
                                    dist))

                elif geo is None and not all_metals:
                    raise ce.NoGeometryAroundMetal(
                        "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal."
                        .format(metal[0].name, metal[1].get_id()[1], dist))

                elif geo is None and all_metals and not combinations:
                    print("No atoms coordinated to {} (residue {}).".format(
                        metal[0].name, metal[1].get_id()[1]))

                elif geo:
                    checked_metals.append(list(metal[0].coord))
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints."
                        .format(geo, metal[0].name, metal[1].get_id()[1]))

            elif geo is None and all_metals and combinations:
                geo, coordinated_atoms = angle_classification(
                    combinations, True)

                if geo is None:
                    geo = "no"
                    coordinated_atoms = combinations
                    checked_metals.append(list(metal[0].coord))
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints to all atoms within {}A of the metal."
                        .format(geo, metal[0].name, metal[1].get_id()[1],
                                dist))

                else:
                    print(
                        "Found {} geometry around {} (residue {}). Adding constraints."
                        .format(geo, metal[0].name, metal[1].get_id()[1]))

            elif geo is None and all_metals and not combinations:
                print("No atoms coordinated to {} (residue {}).".format(
                    metal[0].name, metal[1].get_id()[1]))

            elif geo is None and not all_metals and not permissive:
                raise ce.NoGeometryAroundMetal(
                    "Failed to determine geometry around {} (residue {}). Add constraints manually or set 'constrain_all_metals: true' to constrain all atoms within {}A of the metal."
                    .format(metal[0].name, metal[1].get_id()[1], dist))

            else:
                checked_metals.append(list(metal[0].coord))
                print(
                    "Found {} geometry around {} (residue {}). Adding constraints."
                    .format(geo, metal[0].name, metal[1].get_id()[1]))

            # format string
            yaml_string = "{}-{}-{}:{}:{}-{}:{}:{}"
            spring_const = 50

            string_atoms = []
            for c in coordinated_atoms:
                atom1, atom2, angle = c

                if atom1 not in string_atoms:
                    string_atoms.append(atom1)
                if atom2 not in string_atoms:
                    string_atoms.append(atom2)

            for atom in string_atoms:
                atomname1 = atom[0].name
                resnum1 = atom[1].get_id()[1]
                chain1 = atom[2].get_id()

                atomname2 = metal[0].name
                resnum2 = metal[1].get_id()[1]
                chain2 = metal[2].get_id()

                atom_dist = atom[0] - metal[0]
                out = yaml_string.format(spring_const, atom_dist, chain1,
                                         resnum1, atomname1, chain2, resnum2,
                                         atomname2)

                output.append(out)

            output = list(set(output))

            if output:
                output = ['{}'.format(o) for o in output]

    return output
def generate_node_features(protein_chains,
                           surface,
                           ns: NeighborSearch,
                           only_ca=Constants.GET_ONLY_CA_ATOMS):
    pdb_id = protein_chains[0].get_parent().full_id[0]
    pdb_id = pdb_id[-4:]
    dssp = make_dssp_dict(os.path.join(Constants.DSSP_PATH, pdb_id + '.dssp'))
    get_residues_t = dssp_key_t = min_dist_t = residue_depth_t = atom_d_t = settattr_t = 0

    for chain in protein_chains:
        start = time.time()
        residue_generator = chain.get_residues()
        get_residues_t += time.time() - start

        last_n_residues = deque(
            [None,
             next(residue_generator),
             next(residue_generator, None)])
        while last_n_residues[1] is not None:
            prev_res = last_n_residues.popleft()
            prev_res_name = Constants.EMPTY_STR_FEATURE
            if prev_res is not None:
                prev_res_name = prev_res.resname
            res = last_n_residues[0]

            next_res = last_n_residues[1]
            next_res_name = Constants.EMPTY_STR_FEATURE
            if next_res is not None:
                next_res_name = next_res.resname

            start = time.time()
            is_key = True
            key = res.full_id[2:]
            if key not in dssp[0]:
                key = (key[0], (' ', key[1][1], ' '))
                if key not in dssp[0]:
                    for dssp_key in dssp[0]:
                        if dssp_key[0] == key[0] and dssp_key[1][1] == key[1][
                                1]:
                            key = dssp_key
                            break

                    if key not in dssp[0]:
                        is_key = False
                        # raise Exception(f'DSSP key not found for {key}, model {res.full_id[0]}')
            if is_key:
                dssp_features = dssp[0][key]
            else:
                dssp_features = ('', '-', 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                 0.0, 0.0, 0.0, 0.0, 0.0)
            dssp_key_t += time.time() - start

            start = time.time()
            is_cb = 'CB' in res
            cb_ca_surf_angle = 0
            ca_cb_surf_angle = 0

            ca_atom = res['CA']
            ca_d, ca_surf_idx = min_dist(ca_atom.get_coord(), surface)
            ca_vec = ca_atom.get_vector()
            if not is_cb:
                # print('there is no CB ..... :(((((((')
                pass
            else:
                cb_vec = res['CB'].get_vector()
                cb_d, cb_surf_idx = min_dist(res['CB'].get_coord(), surface)
                cb_ca_surf_angle = calc_angle(cb_vec, ca_vec,
                                              Vector(surface[ca_surf_idx]))
                ca_cb_surf_angle = calc_angle(ca_vec, cb_vec,
                                              Vector(surface[cb_surf_idx]))
            min_dist_t += time.time() - start

            start = time.time()
            res_d, dist_list = residue_depth(res, surface)
            if res_d is None:
                res_d = 5.0
                print("Nan values!!!")

            if ca_d is None:
                ca_d = 5.0
                print("Nan values!!!")
            residue_depth_t += time.time() - start

            for idx, atom in enumerate(res.get_atoms()):
                if only_ca:
                    atom = ca_atom

                start = time.time()
                atom_d, s_idx = dist_list[idx]
                atom_coord = atom.get_coord()
                ca_atom_coord = ca_atom.get_coord()

                d = atom_coord - ca_atom_coord
                ca_atom_dist = np.sqrt(np.sum(d * d))
                atom_ca_surf_angle = 0
                ca_atom_surf_angle = 0
                if not np.array_equal(atom_coord, ca_atom_coord):
                    atom_ca_surf_angle = calc_angle(atom.get_vector(), ca_vec,
                                                    Vector(surface[s_idx]))
                    ca_atom_surf_angle = calc_angle(ca_vec, atom.get_vector(),
                                                    Vector(surface[s_idx]))

                if atom_d is None:
                    atom_d = 5.0
                    print(f"Nan valuess!! {atom_d}, {atom}")
                atom_d_t += time.time() - start

                start = time.time()
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['prev_res_name'],
                        prev_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['next_res_name'],
                        next_res_name)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['residue_depth'],
                        res_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['atom_depth'],
                        atom_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_depth'],
                        ca_d)
                setattr(atom, Constants.NODE_APPENDED_FEATURES['ca_atom_dist'],
                        ca_atom_dist)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['cb_ca_surf_angle'],
                        cb_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_cb_surf_angle'],
                        ca_cb_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['atom_ca_surf_angle'],
                        atom_ca_surf_angle)
                setattr(atom,
                        Constants.NODE_APPENDED_FEATURES['ca_atom_surf_angle'],
                        ca_atom_surf_angle)
                setattr(atom, Constants.DSSP_FEATURES_NAME, dssp_features)
                settattr_t += time.time() - start

                cumsum_main = 0
                cumsum_plane = 0

                cumsum_atom_main = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                cumsum_atom_plane = [0] * len(
                    Constants.NEIGHBOUR_SUM_RADIUS_ATOMS)
                for num, radius in enumerate(Constants.NEIGHBOUR_SUM_RADIUS):
                    atoms = ns.search(atom_coord, radius)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_radius_name(num)],
                        len(atoms) - cumsum_main)

                    num_above_plane = num_of_atoms_above_plane(
                        surface[s_idx] - atom_coord, atom_coord, atoms)
                    setattr(
                        atom, Constants.NODE_APPENDED_FEATURES[
                            Constants.neighbour_sum_above_plane_radius_name(
                                num)], num_above_plane - cumsum_plane)
                    cumsum_main += len(atoms)
                    cumsum_plane += num_above_plane

                    for i, atom_element in enumerate(
                            Constants.NEIGHBOUR_SUM_RADIUS_ATOMS):
                        atoms_one_element = list(
                            filter(
                                lambda a: a.element.upper() == atom_element.
                                upper(), atoms))
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.neighbour_sum_radius_name(
                                    num, atom_element)],
                            len(atoms_one_element) - cumsum_atom_main[i])

                        num_above_plane = num_of_atoms_above_plane(
                            surface[s_idx] - atom_coord, atom_coord,
                            atoms_one_element)
                        setattr(
                            atom, Constants.NODE_APPENDED_FEATURES[
                                Constants.
                                neighbour_sum_above_plane_radius_name(
                                    num, atom_element)],
                            num_above_plane - cumsum_atom_plane[i])
                        cumsum_atom_main[i] += len(atoms_one_element)
                        cumsum_atom_plane[i] += num_above_plane
                if only_ca:
                    break
            last_n_residues.append(next(residue_generator, None))
Example #18
0
def to_vector(t):
    return Vector(t[0], t[1], t[2])
def from4atoms(OA, OB, OC, OD, u, v, l, m, accept_no_solution=True):
    u = pi - u
    v = pi - v
    
    a = OA - OC
    b = OB - OD
    
    ddiff = OC - OD + a - b # (BA) = (B-A)
    x = a.norm() * l * cos(u)
    y = b.norm() * m * cos(v)

    rx = x
    ry = y - b * ddiff

    r1 = a
    r2 = b

    if(abs(r1[0]) < 0.000001):
        if(abs(r2[0]) < 0.000001):
            print "WARNING: r1 = r2 = 0.0"        
        # Swap rows
        tmp = r1
        r1 = r2
        r2 = tmp
        tmp = rx
        ry = rx
        rx = tmp

    # Reduce the matrix
    factor = r1[0]
    r1 = r1 / factor
    rx = rx / factor    
    factor = r2[0]
    r2 = r2 - (r1 ** factor)
    ry = ry - rx * factor
    factor = r2[1]
    r2 = r2 / factor
    ry = ry / factor
    factor = r1[1]
    r1 = r1 - r2 ** factor
    rx = rx - ry * factor

    # Make solution space vectors
    alpha = r1[2]
    beta  = r2[2]
    gamma = rx
    delta = ry
    u = Vector(-alpha,-beta,1.0)
    v = Vector(gamma, delta, 0)

    # Solve quadratic equation for norm of c
    acoef = (u.norm())**2
    bcoef = 2*(u*v)
    ccoef = (v.norm())**2 - l**2
    disc = bcoef**2 - 4*acoef*ccoef

    if(disc < 0.0):
        # This is the sick case where we can't find _any_ solution
        if not accept_no_solution:
            raise ValueError, "from4atoms: no solution found (disc=%f)" % disc
        else:
            disc = 0
    
    x1 = (-bcoef - sqrt(disc))/(2*acoef)
    x2 = (-bcoef + sqrt(disc))/(2*acoef)

    # Create the two c-vectors
    c1 = u ** x1 + v
    c2 = u ** x2 + v

    # The two candidate E-poitns
    E1 = OA + c1
    E2 = OA + c2

    # The two candidate d-vectors
    d1 = E1 - OB
    d2 = E2 - OB

    # Pick the one with smallest norm difference
    d1norm = d1.norm()
    d2norm = d2.norm()
    diff1 = abs(d1norm - m)
    diff2 = abs(d2norm - m)

    if(diff1 < diff2):
        return E1
    else:
        return E2
Example #20
0
def analyse(input_file_name,
            refer_file_name,
            moved_chain_id,
            fixed_chain_id,
            r_moved_chain_id,
            r_fixed_chain_id,
            output_file1,
            output_file2,
            r_model_number=0):

    structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse',
                                                      input_file_name)
    reference = PDBParser(PERMISSIVE=1).get_structure('reference',
                                                      refer_file_name)

    r_chain_moved = reference[r_model_number][r_moved_chain_id]
    r_chain_fixed = reference[r_model_number][r_fixed_chain_id]

    theta = []
    phi = []
    theta_x = []
    theta_y = []
    theta_z = []
    d = []
    coords_x = []
    coords_y = []
    coords_z = []
    matrix_entries = [_[:] for _ in [[]] * 9]

    for model_number, model in enumerate(structure):
        chain_moved = structure[model_number][moved_chain_id]
        chain_fixed = structure[model_number][fixed_chain_id]
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_moved.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_fixed.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        fixed_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_fixed.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_fixed.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        for atom in model.get_atoms():
            atom.transform(R, V)
        for atom in chain_moved.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        for atom in chain_fixed.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        fixed_centre = com_numerator.__div__(com_denominator)
        if fixed_centre.norm() > 0.5:
            print("Fixed chain norm is " + str(fixed_centre.norm()) +
                  " in model " + str(model_number) +
                  ". Should have been at the origin. Check code...")
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        x = moved_centre._ar[0]
        y = moved_centre._ar[1]
        z = moved_centre._ar[2]
        coords_x.append(x)
        coords_y.append(y)
        coords_z.append(z)

        d.append((moved_centre - fixed_centre).norm())
        if moved_centre.norm() > 1e-6:
            theta.append(moved_centre.angle(Vector(0, 0, 1)))
            norm = np.sqrt(x * x + y * y)
            if norm > 1e-6:
                phi.append(np.arctan2(y, x))
        else:
            theta.append(0.0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_moved.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_moved.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        theta_x.append(np.arctan2(R[2][1], R[2][2]))
        theta_y.append(
            np.arctan2(-R[2][0],
                       np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2])))
        theta_z.append(np.arctan2(R[1][0], R[0][0]))
        for _ in range(3):
            matrix_entries[_].append(R[0][_])
            matrix_entries[_ + 3].append(R[1][_])
            matrix_entries[_ + 6].append(R[2][_])

    f_results1 = open(output_file1, "w+")
    for frame in range(0, len(structure)):
        f_results1.write(
            str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) +
            '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' +
            str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n')
    f_results1.close()
    f_results2 = open(output_file2, "w+")
    for frame in range(0, len(structure)):
        f_results2.write(
            str(frame) + '\t' + str(coords_x[frame]) + '\t' +
            str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t')
        for _ in range(3):
            f_results2.write(str(matrix_entries[_][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t')
        f_results2.write('\n')
    f_results2.close()
    def add_water(self):
        """
        Adds water to the PDB file in a random position, then translates them until there are no clashes.
        """
        if self.test:
            np.random.seed(42)

        output = []
        n_inputs = len(self.input_pdbs)
        water_coords = []
        resnums = []
        atomnums = []
        chains = []
        resnames = []

        # Open the original PDB file
        with open(self.input_pdbs[0], "r") as file:
            # Figure out which lines refer to the actual structure and CONECTs, drop everything else
            lines = file.readlines()
            conect = [line for line in lines if "CONECT" in line]
            pdb_lines = [
                line for line in lines
                if "END" not in line and "CONECT" not in line
            ]

            for line in pdb_lines:
                if (line.startswith("ATOM") or line.startswith("HETATM")
                        or line.startswith("TER")):
                    try:
                        # Extract atom information
                        resnum = line[22:27].strip()
                        atomnum = line[7:11].strip()
                        chain = line[21]
                        resname = line[17:20]
                        resnums.append(resnum)
                        atomnums.append(atomnum)
                        chains.append(chain)
                        resnames.append(resname)

                        # If there are already waters in the system but were not selected to be perturbed, we exclude
                        # them
                        if resname == "HOH":
                            water = f"{chain}:{resnum}"
                            if (water not in self.user_waters
                                    and water not in self.water_to_exclude):
                                self.water_to_exclude.append(water)
                    # Line too short - Remarks pdb
                    except IndexError:
                        pass

        # Return if no waters are supposed to be added
        if self.n_waters < 1:
            return

        else:
            # Check the maximum existing residue name, so we know where to introduce the waters
            lig_length = resnames.count(self.ligand_residue)
            resnums = [int(num) for num in resnums if num]
            max_resnum = max(resnums)
            water_resnums = []

            # Figure out the chain ID and atom numbers to introduce the waters
            water_chain = chains[0]  # water chain = 1st protein chain
            atomnum = max([int(num)
                           for num in atomnums if num]) + 1 + lig_length

            # Enumerate enough water templates to add n_waters to each input
            water = cs.water * self.n_waters * n_inputs
            for input_pdb in range(n_inputs):
                for water_string in range(self.n_waters):
                    # Randomize oxygen coordinates - create an [x, y, z] vector
                    O_coords = Vector(
                        [np.random.randint(0, 100) for _ in range(3)])
                    # Add hydrogens to the oxygen
                    H1_coords = O_coords + Vector(0.757, 0.586, 0.0)
                    H2_coords = O_coords + Vector(-0.757, 0.586, 0.0)
                    water_coords = (water_coords + [list(O_coords)] +
                                    [list(H1_coords)] + [list(H2_coords)])
                    # Increment residue number, so each added water has a different one
                    max_resnum += 1
                    water_resnums = water_resnums + [max_resnum] * 3
                max_resnum += 1

            # Calculate atom numbers of all waters
            water_atomnums = [
                atomnum + j for j in range(self.n_waters * 3 * n_inputs)
            ]

            # Create water PDB lines based on calculated atom numbers, residues, etc.
            water_output = []
            for atom, num, resnum, coord in zip(water, water_atomnums,
                                                water_resnums, water_coords):
                # Format coordinates, so they fit into the PDB format
                coord = ["{:7.4f}".format(c) for c in coord]
                coord = " ".join(coord)
                water_output.append(
                    atom.format(num, water_chain, resnum, coord))

            # Slice created water PDB lines and split between different input PDBs
            sliced_water_output = []
            for i in range(0, len(water_output), self.n_waters * 3):
                sliced_water_output.append(water_output[i:i +
                                                        self.n_waters * 3])

            # Loop over PDB inputs and
            for input_pdb, water_output in zip(self.input_pdbs,
                                               sliced_water_output):
                new_protein_file = input_pdb
                # Write PDB lines followed by created water lines
                with open(input_pdb, "w+") as file:
                    for line in pdb_lines:
                        file.write(line)
                    file.write("\n")
                    for line in water_output:
                        file.write(line)
                    file.write("END")

                # Load the input PDB file again with Biopython to check for contacts
                parser = PDBParser()
                structure = parser.get_structure("complex", new_protein_file)
                water_list = []

                # Get all protein atoms to check for clashes
                protein_list = Selection.unfold_entities(structure, "A")

                # Get all relevant water atoms to check for clashes
                for res in structure.get_residues():
                    resnum = res._id[1]
                    if res.resname == "HOH":
                        if resnum not in resnums:
                            water_list = water_list + Selection.unfold_entities(
                                res, "A")

                # Check contacts between added waters and the protein at 5.0 angstrom
                contacts5 = []
                for water_output in water_list:
                    contacts5 = contacts5 + NeighborSearch(
                        protein_list).search(water_output.coord, 5.0, "A")
                contacts5 = [c for c in contacts5
                             if c not in water_list]  # exclude "self" contacts

                # Keep on tranlsating the water molecules as long as there are clashes at 5.0 A
                while contacts5:
                    contacts5 = []
                    for w_ in water_list:
                        x, y, z = w_.coord
                        # Set new coordinates and check contacts again
                        w_.set_coord([x - 5, y, z])
                        contacts5 = contacts5 + NeighborSearch(
                            protein_list).search(w_.coord, 5.0, "A")
                        contacts5 = [
                            c for c in contacts5 if c not in water_list
                        ]

                # Save final output with translated water as a temporary file
                temp_protein_file = os.path.join(
                    os.path.dirname(input_pdb),
                    os.path.basename(input_pdb).replace(".pdb", "_temp.pdb"),
                )

                io = PDBIO()
                io.set_structure(structure)
                io.save(temp_protein_file)
                output.append(new_protein_file)

                # Open the temporary file created with biopython
                new_water_lines = []
                with open(temp_protein_file, "r") as temp:
                    temp_lines = temp.readlines()

                    # Iterate over lines created with biopython
                    for line in temp_lines:
                        if (line[17:20].strip() == "HOH"
                                and int(line[22:27].strip()) not in resnums):
                            line = line.replace(
                                line[7:11], str(int(line[7:11]) + lig_length))
                            if line[12:15] == "2HW":
                                line = line.strip("\n") + "\nTER\n"
                            # If it's one of added waters, we manually change its residue number an save
                            new_water_lines.append(line)

                del new_water_lines[
                    -1]  # Last biopython line is a not needed TER

                # Save new water lines, so they are not duplicated in the next run
                with open("added_waters.txt", "a+") as water_file:
                    for line in new_water_lines:
                        water_file.write(line)

                # Overwrite the original input PDB, save original PDB lines, added water lines and the original CONECTs.
                with open(new_protein_file, "w+") as file:
                    for line in pdb_lines:
                        file.write(line)
                    if not line.startswith("TER"):
                        file.write("TER\n")
                    for line in new_water_lines:
                        file.write(line)
                    for line in conect:
                        file.write(line)
                    file.write("\n")
                    file.write("END")

                # Remove temporary biopython file
                os.remove(temp_protein_file)