def test_example_merge(): m = pybel.readstring('smi', 'c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m, molgrid.ElementIndexTyper()) c2 = molgrid.CoordinateSet(m) c2.make_vector_types() #this should not screw up index types ex = molgrid.Example() ex.coord_sets.append(c) ex.coord_sets.append(c2) assert ex.type_size() == (c.max_type + c2.max_type) assert ex.coordinate_size() == (c.coord.dimension(0) + c2.type_index.size()) c3 = ex.merge_coordinates() assert c3.coord.tonumpy().shape == (24, 3) t = np.concatenate( [c.type_index.tonumpy(), c2.type_index.tonumpy() + c.max_type]) assert np.array_equal(t, c3.type_index.tonumpy()) #test merging without unique types, which makes no sense c4 = ex.merge_coordinates(0, False) assert c4.coord.tonumpy().shape == (24, 3) t = np.concatenate([c.type_index.tonumpy(), c2.type_index.tonumpy()]) assert np.array_equal(t, c4.type_index.tonumpy()) #test sliced merging c5 = ex.merge_coordinates(1, False) assert c5.coord.tonumpy().shape == (8, 3) #no hydrogens in this slice
def fitmol(fname, niters=10): print('Reading {}'.format(fname)) m = next(pybel.readfile('sdf', fname)) m.OBMol.Center() #put in center of box! m.addh() ligname = os.path.split(fname)[1] print('Typing input molecule') cset = molgrid.CoordinateSet(m, typer) print('Creating empty grid') mgrid_values = torch.zeros(gmaker.grid_dimensions(cset.num_types()), dtype=torch.float32, device=device) print('Calling gmaker forward') gmaker.forward((0, 0, 0), cset, mgrid_values) mgrid = generate.MolGrid(mgrid_values, channels, np.zeros(3), 0.5) types = generate.count_types(cset.type_index.tonumpy().astype(int), cset.num_types(), dtype=np.int16) grid = simple_atom_fit(mgrid, types, niters) struct = grid.info['src_struct'] loss = struct.info['loss'] fittime = struct.info['time'] fixes = struct.info['n_steps'] try: rmsd = get_min_rmsd(cset.coords, cset.type_index.tonumpy(), struct.xyz, struct.c) except: rmsd = np.inf return struct, fittime, loss, fixes, rmsd
def test_backward_vec(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[1.0, 0, 0], [-1, -1, 0]], np.float32) t = np.array([[0, 1.0, 0], [1.0, 0, 0]], np.float32) r = np.array([2.0, 2.0], np.float32) coords = molgrid.CoordinateSet(c, t, r) shape = g1.grid_dimensions(3) #make diff with gradient in center diff = molgrid.MGrid4f(*shape) diff[0, 30, 30, 30] = 1.0 diff[1, 30, 30, 30] = -1.0 cpuatoms = molgrid.MGrid2f(2, 3) cputypes = molgrid.MGrid2f(2, 3) gpuatoms = molgrid.MGrid2f(2, 3) gputypes = molgrid.MGrid2f(2, 3) g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu(), cputypes.cpu()) assert cputypes[0][0] > 0 assert cputypes[0][1] < 0 assert cputypes[0][2] == 0 g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu(), gputypes.gpu()) np.testing.assert_allclose(gpuatoms.tonumpy(), cpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(gputypes.tonumpy(), cputypes.tonumpy(), atol=1e-5)
def test_radius_multiples(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 1) shape = g1.grid_dimensions(1) cpugrid = molgrid.MGrid4f(*shape) cpugrid2 = molgrid.MGrid4f(*shape) gpugrid = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), coords, cpugrid.cpu()) g1.forward((0, 0, 0), coords, gpugrid.gpu()) g1.forward((0, 0, 0), c, t, r, cpugrid2.cpu()) np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5) np.testing.assert_allclose(cpugrid.tonumpy(), cpugrid2.tonumpy(), atol=1e-6) g = cpugrid.tonumpy() assert g[0, 30, 30, 30] == approx(1) #cut a line across line = g[0, 30, 30, :] xvals = np.abs(np.arange(-3, 3.1, .1)) gauss = np.exp(-2 * xvals**2) for i in range(20, 41): assert line[i] == approx(gauss[i]) for i in list(range(0, 15)) + list(range(45, 61)): assert line[i] == approx(0) quad = 4 * np.exp(-2) * xvals**2 - 12 * np.exp(-2) * xvals + 9 * np.exp(-2) for i in list(range(15, 20)) + list(range(41, 45)): assert line[i] == approx(quad[i], abs=1e-5) #funkier grid g2 = molgrid.GridMaker(resolution=.1, dimension=6.0, radius_scale=0.5, gassian_radius_multiple=3.0) cpugrid = molgrid.MGrid4f(*shape) gpugrid = molgrid.MGrid4f(*shape) g2.forward((0, 0, 0), coords, cpugrid.cpu()) g2.forward((0, 0, 0), coords, gpugrid.gpu()) np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5) g = cpugrid.tonumpy() assert g[0, 30, 30, 30] == approx(1) #cut a line across line = g[0, 30, :, 30] xvals = np.abs(np.arange(-3, 3.1, .1)) * 2.0 gauss = np.exp(-2 * xvals**2) #should be guassian the whole way, although quickly hits numerical zero for i in range(0, 61): assert line[i] == approx(gauss[i], abs=1e-5)
def test_coordset_merge(): m = pybel.readstring('smi','c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m) c.make_vector_types() coords = np.zeros([10,3],np.float32) types = np.zeros([10,15],np.float32) radii = np.zeros(10,np.float32) n = c.copyTo(coords,types,radii) assert n == 8 assert np.sum(coords) != 0 #types should be padded out assert types[:,11].sum() == 0 #coords too assert coords[8:].sum() == 0 assert radii[8:].sum() == 0 #check truncation coordsm = np.zeros([5,3],np.float32) typesm = np.zeros([5,8],np.float32) radiim = np.zeros(5,np.float32) n = c.copyTo(coordsm,typesm,radiim) assert n == 5 assert np.all(coordsm == coords[:5]) assert np.all(typesm == types[:5,:8]) assert np.all(radiim == radii[:5])
def test_backwards(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[1.0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([2.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 1) shape = g1.grid_dimensions(1) #make diff with gradient in center diff = molgrid.MGrid4f(*shape) diff[0, 30, 30, 30] = 1.0 cpuatoms = molgrid.MGrid2f(1, 3) gpuatoms = molgrid.MGrid2f(1, 3) #apply random rotation T = molgrid.Transform((0, 0, 0), 0, True) T.forward(coords, coords) g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu()) g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu()) T.backward(cpuatoms.cpu(), cpuatoms.cpu(), False) T.backward(gpuatoms.gpu(), gpuatoms.gpu(), False) print(cpuatoms.tonumpy(), gpuatoms.tonumpy()) # results should be ~ -.6, 0, 0 np.testing.assert_allclose(cpuatoms.tonumpy(), gpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(cpuatoms.tonumpy().flatten(), [-0.60653067, 0, 0], atol=1e-5)
def test_vector_types(): g1 = molgrid.GridMaker(resolution=.25, dimension=6.0) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) vt = np.array([[1.0, 0]], np.float32) vt2 = np.array([[0.5, 0.5]], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 2) vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt), molgrid.Grid1f(r)) v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt2), molgrid.Grid1f(r)) shape = g1.grid_dimensions(2) reference = molgrid.MGrid4f(*shape) vgrid = molgrid.MGrid4f(*shape) v2grid = molgrid.MGrid4f(*shape) v3grid = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), coords, reference.cpu()) g1.forward((0, 0, 0), vcoords, vgrid.cpu()) g1.forward((0, 0, 0), v2coords, v2grid.cpu()) g1.forward((0, 0, 0), c, vt, r, v3grid.cpu()) np.testing.assert_allclose(reference.tonumpy(), vgrid.tonumpy(), atol=1e-5) np.testing.assert_allclose(vgrid.tonumpy(), v3grid.tonumpy(), atol=1e-6) v2g = v2grid.tonumpy() g = reference.tonumpy() np.testing.assert_allclose(g[0, :], v2g[0, :] * 2.0, atol=1e-5) np.testing.assert_allclose(g[0, :], v2g[1, :] * 2.0, atol=1e-5) vgridgpu = molgrid.MGrid4f(*shape) v2gridgpu = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), vcoords, vgridgpu.gpu()) g1.forward((0, 0, 0), v2coords, v2gridgpu.gpu()) np.testing.assert_allclose(reference.tonumpy(), vgridgpu.tonumpy(), atol=1e-5) v2gpu = v2gridgpu.tonumpy() np.testing.assert_allclose(g[0, :], v2gpu[0, :] * 2.0, atol=1e-5) np.testing.assert_allclose(g[0, :], v2gpu[1, :] * 2.0, atol=1e-5)
def test_type_radii(): g1 = molgrid.GridMaker(resolution=.25, dimension=6.0, radius_type_indexed=True) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 2) coords.make_vector_types(True, [3.0, 1.0]) shape = g1.grid_dimensions(3) #includes dummy type reference = molgrid.MGrid4f(*shape) gpudata = molgrid.MGrid4f(*shape) assert g1.get_radii_type_indexed() g1.forward((0, 0, 0), coords, reference.cpu()) g1.forward((0, 0, 0), coords, gpudata.gpu()) np.testing.assert_allclose(reference.tonumpy(), gpudata.tonumpy(), atol=1e-5) assert reference.tonumpy().sum() > 2980 #radius of 1 would be 116 reference.fill_zero() reference[0][20][12][12] = -1 reference[1][20][12][12] = 1 reference[2][20][12][12] = 2 cpuatoms = molgrid.MGrid2f(1, 3) cputypes = molgrid.MGrid2f(1, 3) gpuatoms = molgrid.MGrid2f(1, 3) gputypes = molgrid.MGrid2f(1, 3) g1.backward((0, 0, 0), coords, reference.cpu(), cpuatoms.cpu(), cputypes.cpu()) assert cpuatoms[0][0] < 0 assert cpuatoms[0][1] == 0 assert cpuatoms[0][2] == 0 assert cputypes[0][0] < 0 assert cputypes[0][1] == 0 assert cputypes[0][2] == 0 g1.backward((0, 0, 0), coords, reference.gpu(), gpuatoms.gpu(), gputypes.gpu()) np.testing.assert_allclose(gpuatoms.tonumpy(), cpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(gputypes.tonumpy(), cputypes.tonumpy(), atol=1e-5)
def test_examplevec(): m = pybel.readstring('smi','c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m,molgrid.ElementIndexTyper()) c2 = molgrid.CoordinateSet(m) c2.make_vector_types() #this should not screw up index types ex = molgrid.Example() ex.coord_sets.append(c) ex.labels.append(0) ex2 = molgrid.Example() ex2.coord_sets.append(c2) ex2.labels.append(1) evec = molgrid.ExampleVec([ex,ex2])
def test_coordset_from_mol_vec(): m = pybel.readstring('smi','c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m) #default gnina ligand types c.make_vector_types(True, molgrid.defaultGninaLigandTyper.get_type_radii()) assert c.type_vector.dimension(1) == 15 assert c.radii.dimension(0) == 15 assert c.has_vector_types()
def load_examples(T): examples = [] for coord, types, energy, diff in T: radii = np.array([typeradii[int(index)] for index in types], dtype=np.float32) c = molgrid.CoordinateSet(coord, types, radii, 4) ex = molgrid.Example() ex.coord_sets.append(c) ex.labels.append(diff) examples.append(ex) return examples
def test_coordset_from_mol(): m = pybel.readstring('smi','c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m,molgrid.ElementIndexTyper()) oldcoord = c.coords.tonumpy() #simple translate t = molgrid.Transform(molgrid.Quaternion(), (0,0,0), (1,1,1)) t.forward(c,c) newcoord = c.coords.tonumpy() assert np.sum(newcoord-oldcoord) == approx(48)
def test_typer_coord_set(self, typer, mol): struct1 = typer.make_struct(mol, dtype=torch.float32) coord_set = molgrid.CoordinateSet(mol, typer) struct2 = AtomStruct.from_coord_set(coord_set, typer, dtype=torch.float32) assert (struct1.coords == struct2.coords).all(), 'different coords' assert (struct1.types == struct2.types).all(), 'different types' assert struct1.typer == struct2.typer, 'different typers' assert struct1.atom_types == struct2.atom_types, 'different atom types' assert (struct1.atomic_radii == struct2.atomic_radii).all(), \ 'different atomic radii'
def test_coordset_merge(): m = pybel.readstring('smi','c1ccccc1CO') m.addh() m.make3D() c = molgrid.CoordinateSet(m,molgrid.ElementIndexTyper()) c2 = molgrid.CoordinateSet(m) c3 = molgrid.CoordinateSet(c,c2) c4 = molgrid.CoordinateSet(c,c2,False) assert c3.max_type == (c.max_type + c2.max_type) assert c3.coords.dimension(0) == (c.coords.dimension(0)+c2.type_index.size()) assert c4.max_type == max(c.max_type,c2.max_type) assert c4.coords.dimension(0) == (c.coords.dimension(0)+c2.type_index.size()) t = np.concatenate([c.type_index.tonumpy(),c2.type_index.tonumpy()+c.max_type]) assert np.array_equal(t, c3.type_index.tonumpy()) #test merging without unique types, which makes no sense assert c4.coords.tonumpy().shape == (24,3) t = np.concatenate([c.type_index.tonumpy(),c2.type_index.tonumpy()]) assert np.array_equal(t, c4.type_index.tonumpy())
def test_coordset_from_array(): coords = np.array([[1,0,-1],[1,3,-1],[1,0,-1]],np.float32) types = np.array([3,2,1],np.float32) radii = np.array([1.5,1.5,1.0],np.float32) c = molgrid.CoordinateSet(coords, types, radii, 4) oldcoordr = c.coords.tonumpy() #simple translate t = molgrid.Transform(molgrid.Quaternion(), (0,0,0), (-1,0,1)) t.forward(c,c) newcoord = c.coords.tonumpy() assert c.coords[1,1] == 3.0 assert np.sum(newcoord) == approx(3.0) c2 = c.clone() c2.coords[1,1] = 0 assert c.coords[1,1] == 3.0
for data in pya.anidataloader(hd5file): #calculate some statistics mcnt += 1 ccnt += len(data['energies']) elements.update(data['species']) #molecule types and radii types = np.array([typemap[elem] for elem in data['species']], dtype=np.float32) radii = np.array([typeradii[int(index)] for index in types], dtype=np.float32) sz = len(radii) if sz not in examplesbysize: examplesbysize[sz] = [] #create an example for every conformer for coord, energy in zip(data['coordinates'],data['energies']): c = molgrid.CoordinateSet(coord.astype(np.float32), types, radii,4) ex = molgrid.Example() ex.coord_sets.append(c) energy *= 627.5096 #convert to kcal/mol if args.normalize: energy /= sz ex.labels.append(energy) examples.append(ex) examplesbysize[sz].append(ex) wandb.watch(model) for sz in range(2,27): #construct strata of molecules with <= sz atoms
def test_vector_types(): g1 = molgrid.GridMaker(resolution=.25,dimension=6.0) c = np.array([[0,0,0],[2,0,0]],np.float32) t = np.array([0,1],np.float32) vt = np.array([[1.0,0],[0,1.0]],np.float32) vt2 = np.array([[0.5,0.0],[0.0,0.5]],np.float32) r = np.array([1.0,1.0],np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid1f(t),molgrid.Grid1f(r),2) vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt),molgrid.Grid1f(r)) v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt2),molgrid.Grid1f(r)) shape = g1.grid_dimensions(2) reference = molgrid.MGrid4f(*shape) vgrid = molgrid.MGrid4f(*shape) v2grid = molgrid.MGrid4f(*shape) v3grid = molgrid.MGrid4f(*shape) g1.forward((0,0,0),coords, reference.cpu()) g1.forward((0,0,0),vcoords, vgrid.cpu()) g1.forward((0,0,0),v2coords, v2grid.cpu()) g1.forward((0,0,0),c,vt,r, v3grid.cpu()) np.testing.assert_allclose(reference.tonumpy(),vgrid.tonumpy(),atol=1e-5) np.testing.assert_allclose(vgrid.tonumpy(),v3grid.tonumpy(),atol=1e-6) v2g = v2grid.tonumpy() g = reference.tonumpy() np.testing.assert_allclose(g[0,:],v2g[0,:]*2.0,atol=1e-5) np.testing.assert_allclose(g[1,:],v2g[1,:]*2.0,atol=1e-5) vgridgpu = molgrid.MGrid4f(*shape) v2gridgpu = molgrid.MGrid4f(*shape) g1.forward((0,0,0),vcoords, vgridgpu.gpu()) g1.forward((0,0,0),v2coords, v2gridgpu.gpu()) np.testing.assert_allclose(reference.tonumpy(),vgridgpu.tonumpy(),atol=1e-5) v2gpu = v2gridgpu.tonumpy() np.testing.assert_allclose(g[0,:],v2gpu[0,:]*2.0,atol=1e-5) np.testing.assert_allclose(g[1,:],v2gpu[1,:]*2.0,atol=1e-5) #create target grid with equal type density at 1,0,0 tc = molgrid.Grid2f(np.array([[1,0,0]],np.float32)) tv = molgrid.Grid2f(np.array([[0.5,0.5]],np.float32)) tr = molgrid.Grid1f(np.array([1.0],np.float32)) targetc = molgrid.CoordinateSet(tc,tv,tr) tgrid = molgrid.MGrid4f(*shape) g1.forward((0,0,0),targetc,tgrid.cpu()) gradc = molgrid.MGrid2f(2,3) gradt = molgrid.MGrid2f(2,2) g1.backward((0,0,0),vcoords,tgrid.cpu(),gradc.cpu(),gradt.cpu()) assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4) assert gradc[0,0] > 0 gradc.fill_zero() gradt.fill_zero() g1.backward((0,0,0),vcoords,tgrid.gpu(),gradc.gpu(),gradt.gpu()) assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4) assert gradc[0,0] > 0
t = molgrid.FileMappedGninaTyper(args.ligmap) # Grid dimensions (including types) gdims = gm.grid_dimensions(t.num_types()) # Pre-allocate grid # Only one example (batch size is 1) grid = torch.zeros(1, *gdims, dtype=torch.float32, device="cuda:0") obmol = next(pybel.readfile("sdf", args.sdf)) obmol.addh() print(obmol, end="") # Use OpenBabel molecule object (obmol.OBmol) instead of PyBel molecule (obmol) cs = molgrid.CoordinateSet(obmol.OBMol, t) ex = molgrid.Example() ex.coord_sets.append(cs) c = ex.coord_sets[0].center() # Only one coordinate set print("center:", tuple(c)) # https://gnina.github.io/libmolgrid/python/index.html#the-transform-class transform = molgrid.Transform( c, random_translate=0.0, random_rotation=False, # float # bool ) transform.forward(ex, ex)
r = typeradii[i] return (i, r) typer = molgrid.PythonCallbackIndexTyper(mytyper, 4, ['H', 'C', 'N', 'O']) tensor_shape = (1, ) + dims input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device=device) predictions = [] with torch.no_grad(): labelvec = torch.zeros(1, dtype=torch.float32, device=device) c = molgrid.CoordinateSet(mol, typer) ex = molgrid.Example() ex.coord_sets.append(c) batch = molgrid.ExampleVec([ex]) types = c.type_index.tonumpy() tcnts = np.array([np.count_nonzero(types == i) for i in range(4)]) base = linmodel.predict([tcnts]) start = time.time() for _ in range(args.n): gmaker.forward( batch, input_tensor, random_translation=2, random_rotation=True ) #create grid; randomly translate/rotate molecule