def test_stereo_sets_correct_transform_matrix(self): from e3fp.fingerprint.fprinter import stereo_indicators_from_shell from e3fp.fingerprint.array_ops import project_to_plane, \ make_transform_matrix, \ transform_array from e3fp.fingerprint.structs import Shell shell = Shell(0, {1, 2}) atom_coords = np.asarray([[0, 0, 0.], [0, 2., 0.], # -> y [0, 0, 3.]], # -> z dtype=np.float) atom_tuples = [(1, 1, Shell(1)), # -> y (5, 5, Shell(2))] # -> z for i in range(20): rand_trans = np.random.uniform(size=3)*100 rand_y = np.random.uniform(size=3)*10 rand_v = np.random.uniform(size=3)*20 rand_z = project_to_plane(rand_v, rand_y)*30 rand_transform_mat = make_transform_matrix(np.zeros(3), rand_y, rand_z) trans_mat = np.identity(4, dtype=np.float) trans_mat[:3, 3] = rand_trans rand_transform_mat = np.dot(trans_mat, rand_transform_mat) new_coords = transform_array(rand_transform_mat, atom_coords) reverse_trans_mat = np.linalg.inv(rand_transform_mat) np.testing.assert_almost_equal( atom_coords, transform_array(reverse_trans_mat, new_coords)) atom_coords_dict = dict(list(zip(list(range(3)), new_coords))) stereo_indicators_from_shell(shell, atom_tuples, atom_coords_dict) np.testing.assert_almost_equal(shell.transform_matrix, reverse_trans_mat)
def test_stereo_indicators_for_frame(self): from e3fp.fingerprint.fprinter import stereo_indicators_from_shell from e3fp.fingerprint.array_ops import project_to_plane,\ make_transform_matrix,\ transform_array from e3fp.fingerprint.structs import Shell shell = Shell(0, {1, 2, 3}) atom_coords = np.asarray([[0, 0, 0.], [1, -0.5, 0.], [0, 2., 0.], # -> y [0, 0, 3.]], # -> z dtype=np.float) atom_tuples = [(1, 1, Shell(2)), # -> y (2, 1, Shell(1)), (5, 5, Shell(3))] # -> z for i in range(20): rand_trans = np.random.uniform(size=3)*100 rand_y = np.random.uniform(size=3)*10 rand_v = np.random.uniform(size=3)*20 rand_z = project_to_plane(rand_v, rand_y)*30 rand_transform_mat = make_transform_matrix(np.zeros(3), rand_y, rand_z) new_coords = transform_array(rand_transform_mat, atom_coords) np.testing.assert_almost_equal( atom_coords, transform_array( np.linalg.inv(rand_transform_mat), new_coords)) new_coords += rand_trans atom_coords_dict = dict(list(zip(list(range(4)), new_coords))) stereo_ind = stereo_indicators_from_shell(shell, atom_tuples, atom_coords_dict) # 2 is chosen for y, 3 for z expect_stereo_ind = [1, -5, 2] self.assertEqual(stereo_ind, expect_stereo_ind)
def save_aligned_conf_to_pdb(pdb_file, mol, conf_id=0): """Save mol conformer to PDB file.""" conf = mol.GetConformer(conf_id) coords = np.array( [conf.GetAtomPosition(i) for i in range(conf.GetNumAtoms())]) pca = sklearn.decomposition.PCA(2) pca.fit(coords) mean_coord = np.mean(coords, axis=0) x = as_unit(pca.components_[0, :]) y = as_unit(pca.components_[1, :]) z = np.cross(x, y) trans_mat = make_transform_matrix(mean_coord, y=y, z=z) trans_coords = transform_array(trans_mat, coords) for i in range(conf.GetNumAtoms()): conf.SetAtomPosition(i, trans_coords[i, :]) writer = rdkit.Chem.rdmolfiles.PDBWriter(pdb_file) writer.write(mol, conf_id) writer.close() # renumber atoms by atom_idx lines = [] with open(pdb_file, "rU") as f: lines = f.readlines() with open(pdb_file, "w") as f: i = 0 for line in lines: if line.startswith("HETATM"): elem = mol.GetAtomWithIdx(i).GetSymbol() atom_name = (elem + "{:d} ".format(i + 1))[:4] line = line[:13] + atom_name + line[17:] i += 1 f.write(line) return np.argsort(trans_coords[:, 0], )
def test_two_axis_transform_correct2(self): from e3fp.fingerprint.array_ops import make_transform_matrix, \ as_unit, transform_array, \ Y_AXIS for i in range(3, 8): arr = np.random.uniform(size=(i, 3)) center = arr[0, :] y = arr[1, :] - center z = arr[2, :] - center trans_mat = make_transform_matrix(center, y, z) rot_arr = transform_array(trans_mat, arr) c0 = rot_arr[0, :] y0 = as_unit(rot_arr[1, :]) z0 = as_unit(rot_arr[2, :]) np.testing.assert_array_almost_equal(c0.flatten(), np.zeros(3)) np.testing.assert_array_almost_equal(y0.flatten(), Y_AXIS.flatten()) self.assertAlmostEqual(z0[0], 0.)
def test_fingerprint_is_transform_invariant(self): from e3fp.fingerprint import fprinter from e3fp.fingerprint.array_ops import ( make_transform_matrix, transform_array, ) from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 5 conf = mol.GetConformers()[0] ref_fp = None atom_ids = [x.GetIdx() for x in mol.GetAtoms()] coords = np.array(list(map(conf.GetAtomPosition, atom_ids)), dtype=np.float) for i in range(5): rand_y = np.random.uniform(size=3) rand_trans = np.random.uniform(size=3) * 100 trans_mat = make_transform_matrix(rand_trans) rot_mat = make_transform_matrix(np.zeros(3), rand_y) transform_mat = np.dot(trans_mat, rot_mat) new_coords = transform_array(transform_mat, coords) with self.assertRaises(AssertionError): np.testing.assert_almost_equal(new_coords, coords) for atom_id, new_coord in zip(atom_ids, new_coords): conf.SetAtomPosition(atom_id, new_coord) test_coords = np.array(list(map(conf.GetAtomPosition, atom_ids)), dtype=np.float) np.testing.assert_almost_equal(test_coords, new_coords) fpr = fprinter.Fingerprinter(level=level, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) fp = fpr.get_fingerprint_at_level(level) if ref_fp is None: ref_fp = fp else: self.assertEqual(fp, ref_fp)
def shell_to_pdb(mol, shell, atom_coords, bound_atoms_dict, out_file=None, reorient=True): """Append substructure within shell to PDB. Parameters ---------- mol : RDKit Mol Input mol shell : Shell A shell atom_coords : dict Dict matching atom id to coordinates. bound_atoms_dict : dict Dict matching atom id to id of bound atoms. out_file : str or None, optional File to which to append coordinates. reorient : bool, optional Use the transformation matrix in the shell to align by the stereo quadrants. If no transformation matrix present, centers the center atom. Returns ------- list of str: list of PDB file lines, if `out_file` not specified """ remark = "REMARK 400" header_lines = [remark + " COMPOUND", remark + " " + mol.GetProp("_Name")] lines = header_lines + [ "MODEL", ] atom_ids = sorted(shell.substruct.atoms) atoms = [mol.GetAtomWithIdx(x) for x in atom_ids] coords = np.asarray(list(map(atom_coords.get, atom_ids)), dtype=np.float64) if reorient: try: coords = array_ops.transform_array(shell.transform_matrix, coords) except AttributeError: coords -= atom_coords[shell.center_atom] for i, atom_id in enumerate(atom_ids): elem = atoms[i].GetSymbol() name = "{}{:d}".format(elem, atom_id + 1) charge = atoms[i].GetFormalCharge() if charge > 0: charge = "{:d}+".format(charge) elif charge < 0: charge = "{:d}-".format(abs(charge)) else: charge = "" if atom_id == shell.center_atom: temp = 1. elif atom_id in shell.atoms: temp = .5 else: temp = 0. pdb_entries = { "atom_id": atom_id, "name": name, "coord": coords[i, :].flatten(), "occupancy": 0., "temp": temp, "elem": elem, "charge": charge } lines.append(PDB_LINE.format(**pdb_entries)) # PLACEHOLDER FOR WRITING BONDS TO PDB # used_bonds = set() # write_bonds = [] # for atom_id in atom_ids: # write_bonds.append(atom_id) # bound_atom_ids = bound_atoms_dict.get(atom_id, set()) # for bound_atom_id in bound_atom_ids: # if (atom_id, bound_atom_id) in used_bonds: # continue # if len(write_bonds) > 3: # lines.append("CONECT "+" ".join(map(str, write_bonds))) # write_bonds = [atom_id,] # write_bonds.append(bound_atom_id) # used_bonds.add((atom_id, bound_atom_id)) # used_bonds.add((bound_atom_id, atom_id)) # lines.append("CONECT "+" ".join(map(str, write_bonds))) # write_bonds = [] lines.append("ENDMDL") if out_file is not None: with smart_open(out_file, "a") as f: for line in lines: f.write(line + "\n") else: return lines