def test_radius_multiples(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 1) shape = g1.grid_dimensions(1) cpugrid = molgrid.MGrid4f(*shape) cpugrid2 = molgrid.MGrid4f(*shape) gpugrid = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), coords, cpugrid.cpu()) g1.forward((0, 0, 0), coords, gpugrid.gpu()) g1.forward((0, 0, 0), c, t, r, cpugrid2.cpu()) np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5) np.testing.assert_allclose(cpugrid.tonumpy(), cpugrid2.tonumpy(), atol=1e-6) g = cpugrid.tonumpy() assert g[0, 30, 30, 30] == approx(1) #cut a line across line = g[0, 30, 30, :] xvals = np.abs(np.arange(-3, 3.1, .1)) gauss = np.exp(-2 * xvals**2) for i in range(20, 41): assert line[i] == approx(gauss[i]) for i in list(range(0, 15)) + list(range(45, 61)): assert line[i] == approx(0) quad = 4 * np.exp(-2) * xvals**2 - 12 * np.exp(-2) * xvals + 9 * np.exp(-2) for i in list(range(15, 20)) + list(range(41, 45)): assert line[i] == approx(quad[i], abs=1e-5) #funkier grid g2 = molgrid.GridMaker(resolution=.1, dimension=6.0, radius_scale=0.5, gassian_radius_multiple=3.0) cpugrid = molgrid.MGrid4f(*shape) gpugrid = molgrid.MGrid4f(*shape) g2.forward((0, 0, 0), coords, cpugrid.cpu()) g2.forward((0, 0, 0), coords, gpugrid.gpu()) np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5) g = cpugrid.tonumpy() assert g[0, 30, 30, 30] == approx(1) #cut a line across line = g[0, 30, :, 30] xvals = np.abs(np.arange(-3, 3.1, .1)) * 2.0 gauss = np.exp(-2 * xvals**2) #should be guassian the whole way, although quickly hits numerical zero for i in range(0, 61): assert line[i] == approx(gauss[i], abs=1e-5)
def test_vector_types_mol(): '''Test vector types with a real molecule''' fname = datadir+"/small.types" e = molgrid.ExampleProvider(data_root=datadir+"/structs") e.populate(fname) ex = e.next() ev = molgrid.ExampleProvider(data_root=datadir+"/structs",make_vector_types=True) ev.populate(fname) exv = ev.next() assert exv.has_vector_types() assert not ex.has_vector_types() gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(ex.num_types()) # this should be grid_dims or get_grid_dims mgridout = molgrid.MGrid4f(*dims) mgridgpu = molgrid.MGrid4f(*dims) mgridoutv = molgrid.MGrid4f(*dims) mgridgpuv = molgrid.MGrid4f(*dims) d = np.ones(dims,np.float32) diff = molgrid.MGrid4f(*dims) diff.copyFrom(d) gmaker.forward(ex, mgridout.cpu()) gmaker.forward(ex, mgridgpu.gpu()) center = ex.coord_sets[-1].center() c = ex.merge_coordinates() backcoordscpu = molgrid.MGrid2f(c.size(),3) backcoordsgpu = molgrid.MGrid2f(c.size(),3) gmaker.backward(center, c, diff.cpu(), backcoordscpu.cpu()) gmaker.backward(center, c, diff.gpu(), backcoordsgpu.gpu()) #vector types gmaker.set_radii_type_indexed(True) gmaker.forward(exv, mgridoutv.cpu()) gmaker.forward(exv, mgridgpuv.gpu()) cv = exv.merge_coordinates() vbackcoordscpu = molgrid.MGrid2f(cv.size(),3) vbackcoordsgpu = molgrid.MGrid2f(cv.size(),3) vbacktypescpu = molgrid.MGrid2f(cv.size(),cv.num_types()) vbacktypesgpu = molgrid.MGrid2f(cv.size(),cv.num_types()) gmaker.backward(center, cv, diff.cpu(), vbackcoordscpu.cpu(),vbacktypescpu.cpu()) gmaker.backward(center, cv, diff.gpu(), vbackcoordsgpu.gpu(),vbacktypesgpu.gpu()) np.testing.assert_allclose(mgridout.tonumpy(),mgridoutv.tonumpy(),atol=1e-5) np.testing.assert_allclose(mgridgpu.tonumpy(),mgridgpuv.tonumpy(),atol=1e-5) np.testing.assert_allclose(mgridoutv.tonumpy(),mgridgpuv.tonumpy(),atol=1e-5) np.testing.assert_allclose(vbackcoordscpu.tonumpy(),backcoordscpu.tonumpy(),atol=1e-5) np.testing.assert_allclose(vbackcoordsgpu.tonumpy(),backcoordsgpu.tonumpy(),atol=1e-5) np.testing.assert_allclose(vbackcoordscpu.tonumpy(),vbackcoordsgpu.tonumpy(),atol=1e-4) np.testing.assert_allclose(vbacktypescpu.tonumpy(),vbacktypesgpu.tonumpy(),atol=1e-4)
def test_batched_function(): for dev in ('cuda','cpu'): gmaker = molgrid.GridMaker(resolution=.1,dimension=6.0) c = torch.tensor([[[1.0,0,0],[1,0,0]],[[0,1,0],[0,1,0]]],device=dev,dtype=torch.float32,requires_grad=True) vt = torch.tensor([[[0,1.0,0],[1.0,0,0]],[[0,1.0,0],[1.0,0,0]]],device=dev,dtype=torch.float32,requires_grad=True) r = torch.tensor([[2.0,2.0],[2.0,2.0]],device=dev,dtype=torch.float32) grid = BatchedCoords2GridFunction.apply(gmaker, (0,0,0), c, vt, r) shape = gmaker.grid_dimensions(3) #make diff with gradient in center diff = torch.zeros(2,*shape,dtype=torch.float32,device=dev) diff[0,0,30,30,30] = 1.0 diff[0,1,30,30,30] = -1.0 diff[1,0,30,30,30] = 1.0 diff[1,1,30,30,30] = -1.0 grid.backward(diff) assert c.grad[0][0].cpu().numpy() == approx([0.60653,0,0],abs=1e-4) assert c.grad[0][1].cpu().numpy() == approx([-0.60653,0,0],abs=1e-4) assert vt.grad[0][0].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4) assert vt.grad[0][1].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4) assert c.grad[1][0].cpu().numpy() == approx([0,0.60653,0],abs=1e-4) assert c.grad[1][1].cpu().numpy() == approx([0,-0.60653,0],abs=1e-4) assert vt.grad[1][0].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4) assert vt.grad[1][1].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4)
def test_backward_vec(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[1.0, 0, 0], [-1, -1, 0]], np.float32) t = np.array([[0, 1.0, 0], [1.0, 0, 0]], np.float32) r = np.array([2.0, 2.0], np.float32) coords = molgrid.CoordinateSet(c, t, r) shape = g1.grid_dimensions(3) #make diff with gradient in center diff = molgrid.MGrid4f(*shape) diff[0, 30, 30, 30] = 1.0 diff[1, 30, 30, 30] = -1.0 cpuatoms = molgrid.MGrid2f(2, 3) cputypes = molgrid.MGrid2f(2, 3) gpuatoms = molgrid.MGrid2f(2, 3) gputypes = molgrid.MGrid2f(2, 3) g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu(), cputypes.cpu()) assert cputypes[0][0] > 0 assert cputypes[0][1] < 0 assert cputypes[0][2] == 0 g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu(), gputypes.gpu()) np.testing.assert_allclose(gpuatoms.tonumpy(), cpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(gputypes.tonumpy(), cputypes.tonumpy(), atol=1e-5)
def test_make_vector_types_ex_provider(capsys): fname = datadir + "/ligonly.types" e = molgrid.ExampleProvider(data_root=datadir + "/structs", make_vector_types=True) e.populate(fname) batch_size = 10 b = e.next_batch(batch_size) gmaker = molgrid.GridMaker(dimension=23.5, radius_type_indexed=True) shape = gmaker.grid_dimensions( molgrid.defaultGninaLigandTyper.num_types() + 1) mgrid = molgrid.MGrid5f(batch_size, *shape) c = b[0].merge_coordinates() tv = c.type_vector.tonumpy() assert tv.shape == (10, 15) assert tv[0].sum() == 1.0 assert tv[0][8] == 1.0 gmaker.forward(b, mgrid) assert b[0].coord_sets[0].has_vector_types() assert b[0].coord_sets[1].has_vector_types() assert b[0].type_size() == 15
def test_backwards(): g1 = molgrid.GridMaker(resolution=.1, dimension=6.0) c = np.array([[1.0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([2.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 1) shape = g1.grid_dimensions(1) #make diff with gradient in center diff = molgrid.MGrid4f(*shape) diff[0, 30, 30, 30] = 1.0 cpuatoms = molgrid.MGrid2f(1, 3) gpuatoms = molgrid.MGrid2f(1, 3) #apply random rotation T = molgrid.Transform((0, 0, 0), 0, True) T.forward(coords, coords) g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu()) g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu()) T.backward(cpuatoms.cpu(), cpuatoms.cpu(), False) T.backward(gpuatoms.gpu(), gpuatoms.gpu(), False) print(cpuatoms.tonumpy(), gpuatoms.tonumpy()) # results should be ~ -.6, 0, 0 np.testing.assert_allclose(cpuatoms.tonumpy(), gpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(cpuatoms.tonumpy().flatten(), [-0.60653067, 0, 0], atol=1e-5)
def test_dx(): fname = datadir + "/small.types" e = molgrid.ExampleProvider(data_root=datadir + "/structs") e.populate(fname) ex = e.next() c = ex.coord_sets[1] assert np.min(c.type_index.tonumpy()) >= 0 gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions( c.max_type) # this should be grid_dims or get_grid_dims center = c.coord.tonumpy().mean(axis=0) center = tuple(center.astype(float)) mgridout = molgrid.MGrid4f(*dims) gmaker.forward(center, c, mgridout.cpu()) molgrid.write_dx("tmp.dx", mgridout[0].cpu(), center, 0.5) mgridin = molgrid.read_dx("tmp.dx") os.remove("tmp.dx") g = mgridin.grid().tonumpy() go = mgridout[0].tonumpy() np.testing.assert_array_almost_equal(g, go, decimal=5) assert center == approx(list(mgridin.center())) assert mgridin.resolution() == 0.5
def test_backward_gradients(): #test that we have the right value along a single dimension gmaker = molgrid.GridMaker( resolution=0.5, dimension=6.0, gaussian_radius_multiple=-2.0) #use full truncated gradient xvals = np.arange(-0.9, 3, .1) for device in ('cuda', 'cpu'): types = torch.ones(1, 1, dtype=torch.float32, device=device) radii = torch.ones(1, dtype=torch.float32, device=device) for i in range(3): #test along each axis for x in xvals: coords = torch.zeros(1, 3, dtype=torch.float32, device=device) coords[0][i] = x coords.requires_grad = True outgrid = molgrid.Coords2GridFunction.apply( gmaker, (0, 0, 0), coords, types, radii) if i == 0: gp = outgrid[0][8][6][6] elif i == 1: gp = outgrid[0][6][8][6] else: gp = outgrid[0][6][6][8] Lg = torch.autograd.grad(gp, coords, create_graph=True)[0] fancyL = torch.sum(Lg**2) val = float(torch.autograd.grad(fancyL, coords)[0][0][i]) d = x - 1 correct = -128 * d**3 * np.exp(-4 * d**2) + 32 * d * np.exp( -4 * d**2) #formulate based on distance assert val == approx(correct, abs=1e-4) #check that diagonal is symmetric and decreases at this range for device in ('cuda', 'cpu'): types = torch.ones(1, 1, dtype=torch.float32, device=device) radii = torch.ones(1, dtype=torch.float32, device=device) coords = torch.zeros(1, 3, dtype=torch.float32, requires_grad=True, device=device) outgrid = molgrid.Coords2GridFunction.apply(gmaker, (0, 0, 0), coords, types, radii) gp = outgrid[0][7][7][7] Lg = torch.autograd.grad(gp, coords, create_graph=True)[0] fancyL = torch.sum(Lg**2) fL1 = torch.autograd.grad(fancyL, coords)[0][0] gp2 = outgrid[0][8][8][8] Lg = torch.autograd.grad(gp2, coords, create_graph=True)[0] fancyL = torch.sum(Lg**2) fL2 = torch.autograd.grad(fancyL, coords)[0][0] assert fL1[0] == fL1[1] assert fL1[2] == fL1[1] assert fL2[0] == fL2[1] assert fL2[2] == fL2[1] assert fL2[0] < fL1[0]
def test_a_grid(): fname = datadir+"/small.types" e = molgrid.ExampleProvider(data_root=datadir+"/structs") e.populate(fname) ex = e.next() c = ex.coord_sets[1] assert np.min(c.type_index.tonumpy()) >= 0 gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(c.max_type) # this should be grid_dims or get_grid_dims center = c.center() center = tuple(center) mgridout = molgrid.MGrid4f(*dims) mgridgpu = molgrid.MGrid4f(*dims) npout = np.zeros(dims, dtype=np.float32) torchout = torch.zeros(dims, dtype=torch.float32) cudaout = torch.zeros(dims, dtype=torch.float32, device='cuda') gmaker.forward(center, c, mgridout.cpu()) gmaker.forward(center, c, mgridgpu.gpu()) gmaker.forward(center, c, npout) gmaker.forward(center, c, torchout) gmaker.forward(center, c, cudaout) newt = gmaker.make_tensor(center, c) newa = gmaker.make_ndarray(center, c) assert 1.438691 == approx(mgridout.tonumpy().max()) assert 1.438691 == approx(mgridgpu.tonumpy().max()) assert 1.438691 == approx(npout.max()) assert 1.438691 == approx(torchout.numpy().max()) assert 1.438691 == approx(cudaout.cpu().numpy().max()) assert 1.438691 == approx(newt.cpu().numpy().max()) assert 1.438691 == approx(newa.max()) #should overwrite by default, yes? gmaker.forward(center, c, mgridout.cpu()) gmaker.forward(center, c, mgridgpu.gpu()) assert 1.438691 == approx(mgridout.tonumpy().max()) assert 1.438691 == approx(mgridgpu.tonumpy().max()) dims = gmaker.grid_dimensions(e.num_types()) mgridout = molgrid.MGrid4f(*dims) mgridgpu = molgrid.MGrid4f(*dims) gmaker.forward(ex, mgridout.cpu()) gmaker.forward(ex, mgridgpu.gpu()) gmaker.forward(ex, mgridout.cpu()) gmaker.forward(ex, mgridgpu.gpu()) assert 2.094017 == approx(mgridout.tonumpy().max()) assert 2.094017 == approx(mgridgpu.tonumpy().max())
def test_type_radii(): g1 = molgrid.GridMaker(resolution=.25, dimension=6.0, radius_type_indexed=True) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 2) coords.make_vector_types(True, [3.0, 1.0]) shape = g1.grid_dimensions(3) #includes dummy type reference = molgrid.MGrid4f(*shape) gpudata = molgrid.MGrid4f(*shape) assert g1.get_radii_type_indexed() g1.forward((0, 0, 0), coords, reference.cpu()) g1.forward((0, 0, 0), coords, gpudata.gpu()) np.testing.assert_allclose(reference.tonumpy(), gpudata.tonumpy(), atol=1e-5) assert reference.tonumpy().sum() > 2980 #radius of 1 would be 116 reference.fill_zero() reference[0][20][12][12] = -1 reference[1][20][12][12] = 1 reference[2][20][12][12] = 2 cpuatoms = molgrid.MGrid2f(1, 3) cputypes = molgrid.MGrid2f(1, 3) gpuatoms = molgrid.MGrid2f(1, 3) gputypes = molgrid.MGrid2f(1, 3) g1.backward((0, 0, 0), coords, reference.cpu(), cpuatoms.cpu(), cputypes.cpu()) assert cpuatoms[0][0] < 0 assert cpuatoms[0][1] == 0 assert cpuatoms[0][2] == 0 assert cputypes[0][0] < 0 assert cputypes[0][1] == 0 assert cputypes[0][2] == 0 g1.backward((0, 0, 0), coords, reference.gpu(), gpuatoms.gpu(), gputypes.gpu()) np.testing.assert_allclose(gpuatoms.tonumpy(), cpuatoms.tonumpy(), atol=1e-5) np.testing.assert_allclose(gputypes.tonumpy(), cputypes.tonumpy(), atol=1e-5)
def test_vector_types(): g1 = molgrid.GridMaker(resolution=.25, dimension=6.0) c = np.array([[0, 0, 0]], np.float32) t = np.array([0], np.float32) vt = np.array([[1.0, 0]], np.float32) vt2 = np.array([[0.5, 0.5]], np.float32) r = np.array([1.0], np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t), molgrid.Grid1f(r), 2) vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt), molgrid.Grid1f(r)) v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt2), molgrid.Grid1f(r)) shape = g1.grid_dimensions(2) reference = molgrid.MGrid4f(*shape) vgrid = molgrid.MGrid4f(*shape) v2grid = molgrid.MGrid4f(*shape) v3grid = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), coords, reference.cpu()) g1.forward((0, 0, 0), vcoords, vgrid.cpu()) g1.forward((0, 0, 0), v2coords, v2grid.cpu()) g1.forward((0, 0, 0), c, vt, r, v3grid.cpu()) np.testing.assert_allclose(reference.tonumpy(), vgrid.tonumpy(), atol=1e-5) np.testing.assert_allclose(vgrid.tonumpy(), v3grid.tonumpy(), atol=1e-6) v2g = v2grid.tonumpy() g = reference.tonumpy() np.testing.assert_allclose(g[0, :], v2g[0, :] * 2.0, atol=1e-5) np.testing.assert_allclose(g[0, :], v2g[1, :] * 2.0, atol=1e-5) vgridgpu = molgrid.MGrid4f(*shape) v2gridgpu = molgrid.MGrid4f(*shape) g1.forward((0, 0, 0), vcoords, vgridgpu.gpu()) g1.forward((0, 0, 0), v2coords, v2gridgpu.gpu()) np.testing.assert_allclose(reference.tonumpy(), vgridgpu.tonumpy(), atol=1e-5) v2gpu = v2gridgpu.tonumpy() np.testing.assert_allclose(g[0, :], v2gpu[0, :] * 2.0, atol=1e-5) np.testing.assert_allclose(g[0, :], v2gpu[1, :] * 2.0, atol=1e-5)
def get_model_gmaker_eproviders(args): #train example provider eptrain=molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0, stratify_max=12, stratify_step=2, recmolcache=args.recmolcache, ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined') eptrain.populate(args.train_types) #test example provider eptest = molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0, stratify_max=12, stratify_step=2, recmolcache=args.recmolcache, ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined') eptest.populate(args.test_types) #gridmaker with defaults gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(eptrain.num_types()) model_file = imp.load_source("model", args.model) #load model with seed torch.manual_seed(args.seed) model=model_file.Model(dims) return model, gmaker, eptrain, eptest
def test_coords2grid(): gmaker = molgrid.GridMaker(resolution=0.5, dimension=23.5, radius_scale=1, radius_type_indexed=True) n_types = molgrid.defaultGninaLigandTyper.num_types() radii = np.array(list(molgrid.defaultGninaLigandTyper.get_type_radii()), np.float32) dims = gmaker.grid_dimensions(n_types) grid_size = dims[0] * dims[1] * dims[2] * dims[3] c2grid = molgrid.Coords2Grid(gmaker, center=(0, 0, 0)) n_atoms = 2 batch_size = 1 coords = nn.Parameter(torch.randn(n_atoms, 3, device='cuda')) types = nn.Parameter(torch.randn(n_atoms, n_types + 1, device='cuda')) coords.data[0, :] = torch.tensor([1, 0, 0]) coords.data[1, :] = torch.tensor([-1, 0, 0]) types.data[...] = 0 types.data[:, 10] = 1 batch_radii = torch.tensor(np.tile(radii, (batch_size, 1)), dtype=torch.float32, device='cuda') grid_gen = c2grid(coords.unsqueeze(0), types.unsqueeze(0)[:, :, :-1], batch_radii) assert float(grid_gen[0][10].sum()) == approx(float(grid_gen.sum())) assert grid_gen.sum() > 0 target = torch.zeros_like(grid_gen) target[0, :, 24, 24, 24] = 1000.0 grad_coords = molgrid.MGrid2f(n_atoms, 3) grad_types = molgrid.MGrid2f(n_atoms, n_types) r = molgrid.MGrid1f(len(radii)) r.copyFrom(radii) grid_loss = F.mse_loss(target, grid_gen) grid_loss.backward() print(grid_loss) print(coords.grad.detach().cpu().numpy())
def setup_gmaker_eprov(resolution: float, radius: float, data_file: Path): """Setup the molgrid GridMaker and ExampleProvider for the data specified in data_file. Args: resolution (float): Resolution of the grid radius (float): Radius of the grid in Angstrom types_file (Path): File specifying the types file pairings making up the data set Returns: tuple: GridMaker, ExampleProvider """ # dim is 1 voxel length less than 2xradius to ensure that center is on node between 8 voxels gmaker = molgrid.GridMaker(resolution=resolution, dimension=2 * radius - resolution) e_provider_test = molgrid.ExampleProvider(data_root="", balanced=False, shuffle=False) e_provider_test.populate(str(data_file)) return gmaker, e_provider_test
def test_vector_types_duplicate(): fname = datadir+"/smalldup.types" teste = molgrid.ExampleProvider(molgrid.GninaVectorTyper(),shuffle=False, duplicate_first=True,data_root=datadir+"/structs") teste.populate(fname) batch_size = 1 gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(molgrid.GninaVectorTyper().num_types()*4) tensor_shape = (batch_size,)+dims input_tensor_1 = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda') batch_1 = teste.next_batch(batch_size) gmaker.forward(batch_1, input_tensor_1,random_translation=0.0, random_rotation=False) input_tensor_2 = torch.zeros(tensor_shape, dtype=torch.float32, device='cpu') gmaker.forward(batch_1, input_tensor_2,random_translation=0.0, random_rotation=False) np.testing.assert_allclose(input_tensor_1.cpu().detach().numpy(),input_tensor_2.detach().numpy(),atol=1e-4) assert input_tensor_1.cpu().detach().numpy().max() < 75
def test_dx(): fname = datadir + "/small.types" e = molgrid.ExampleProvider(data_root=datadir + "/structs") e.populate(fname) ex = e.next() c = ex.coord_sets[1] assert np.min(c.type_index.tonumpy()) >= 0 gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions( e.type_size()) # this should be grid_dims or get_grid_dims center = tuple(c.center()) mgridout = molgrid.MGrid4f(*dims) gmaker.forward(center, c, mgridout.cpu()) molgrid.write_dx("tmp.dx", mgridout[0].cpu(), center, 0.5) mgridin = molgrid.read_dx("tmp.dx") os.remove("tmp.dx") g = mgridin.grid().tonumpy() go = mgridout[0].tonumpy() np.testing.assert_array_almost_equal(g, go, decimal=5) assert center == approx(list(mgridin.center())) assert mgridin.resolution() == 0.5 #dump everything molgrid.write_dx_grids("/tmp/tmp", e.get_type_names(), mgridout.cpu(), center, gmaker.get_resolution(), 0.5) checkgrid = molgrid.MGrid4f(*dims) molgrid.read_dx_grids("/tmp/tmp", e.get_type_names(), checkgrid.cpu()) np.testing.assert_array_almost_equal(mgridout.tonumpy(), 2.0 * checkgrid.tonumpy(), decimal=5)
def __init__( self, beam_size=1, multi_atom=False, n_atoms_detect=1, apply_conv=False, threshold=0.1, peak_value=1.5, min_dist=0.0, apply_prop_conv=False, constrain_types=False, constrain_frags=False, estimate_types=False, fit_L1_loss=False, interm_gd_iters=10, final_gd_iters=100, gd_kwargs=dict( lr=0.1, betas=(0.9, 0.999), weight_decay=0.0, ), dkoes_make_mol=True, use_openbabel=False, output_kernel=False, device='cuda', verbose=0, debug=False, ): # number of best structures to store and expand during search self.beam_size = beam_size # maximum number of atoms to detect in remaining density self.n_atoms_detect = n_atoms_detect # try placing all detected atoms at once, then try individually self.multi_atom = multi_atom # settings for detecting atoms in element channels self.apply_conv = apply_conv self.threshold = threshold self.peak_value = peak_value self.min_dist = min_dist # setting for detecting properties in property channels self.apply_prop_conv = apply_prop_conv # can constrain to find exact atom type counts or single fragment self.constrain_types = constrain_types self.constrain_frags = constrain_frags self.estimate_types = estimate_types # can perform gradient descent at each step and/or at final step self.fit_L1_loss = fit_L1_loss self.interm_gd_iters = interm_gd_iters self.final_gd_iters = final_gd_iters self.gd_kwargs = gd_kwargs self.output_kernel = output_kernel self.device = device self.verbose = verbose self.debug = debug self.grid_maker = molgrid.GridMaker(gaussian_radius_multiple=-1.5) self.c2grid = molgrid.Coords2Grid(self.grid_maker) # lazily initialize atom density kernel self.kernel = None
import torch import numpy as np import matplotlib.pyplot as plt import seaborn as sns from rdkit.Geometry.rdGeometry import Point3D from skimage.segmentation import flood_fill from scipy.spatial.distance import pdist from scipy.spatial.distance import squareform import generate import atom_types idx = [2, 3, 4, 5, 19, 18, 17, 6, 9, 7, 8, 10, 13, 12, 16, 14, 15, 20, 27] channels = atom_types.get_channels_by_index(idx) #generate.py defaults typer = molgrid.SubsettedGninaTyper(idx, False) #equivalent in molgrid gmaker = molgrid.GridMaker(gaussian_radius_multiple=-1.5, dimension=36) device = 'cuda' def grid_to_xyz(gcoords, mgrid): return mgrid.center + (np.array(gcoords) - ((mgrid.size - 1) / 2)) * mgrid.resolution def get_per_atom_volume(radius): return radius**3 * ((2 * np.pi)**1.5) def select_atom_starts(mgrid, G, radius): '''Given a single channel grid and the atomic radius for that type,
def main(args): # Fix seeds molgrid.set_random_seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Set CuDNN options for reproducibility torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Set up libmolgrid e = molgrid.ExampleProvider(data_root=args.data_root, balanced=False, shuffle=False) e.populate(args.test_file) gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(e.num_types()) tensor_shape = (args.batch_size, ) + dims # Load test file examples (NOTE: not possible to do directly via molgrid) with open(args.test_file, 'r') as f: lines = f.readlines() # Construct input tensors input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda') float_labels = torch.zeros(args.batch_size, dtype=torch.float32) # Initialise network - Two models currently available (see models.py for details) if args.model == 'Ragoza': model = Basic_CNN(dims).to('cuda') elif args.model == 'Imrie': model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda') else: print("Please specify a valid architecture") exit() # Load weights for network model.load_state_dict(torch.load(args.weights)) print("Loaded model parameters") # Print number of parameters in model print("Number of model params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000, )) # Test network # Ensure model in eval mode model.eval() # Test loop predictions = [] labels = [] num_samples = e.size() num_batches = -(-num_samples // args.batch_size) print("Number of examples: %d" % num_samples) for it in range(num_batches): # Load data batch = e.next_batch(args.batch_size) gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=args.translate) batch.extract_label(0, float_labels) labels.extend(list(float_labels.detach().cpu().numpy())) batch_predictions = [] for _ in range(args.num_rotate): gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=args.translate) # Predict output = F.softmax(model(input_tensor), dim=1) batch_predictions.append(list(output.detach().cpu().numpy()[:, 1])) predictions.extend(list(np.mean(batch_predictions, axis=0))) # Progress if it % args.display_iter == 0: print("Processed: %d / %d examples" % (it * args.batch_size, num_samples)) # Print performance labels = labels[:num_samples] predictions = predictions[:num_samples] print("Test AUC: %.2f" % (roc_auc_score(labels, predictions)), flush=True) # Save predictions output_lines = [] for line, pred in zip(lines, predictions): output_lines.append(str(pred) + ' ' + line) with open(args.output_path, 'w') as f: for line in output_lines: f.write(line)
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass else: tgs = ['BarlowTwins'] + args.tags wandb.init(entity='andmcnutt', project='DDG_model_Regression',config=args, tags=tgs) if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.local_rank == -1: args.local_rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes # args.rank = args.rank * ngpus_per_node + gpu print(f"rank:{args.local_rank}") dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.local_rank) # create model print("=> creating model '{}'".format(args.arch)) if args.arch.startswith('resnet'): resnet_num = int(args.arch.split('t')[-1]) model = moco.resnet.generate_model(resnet_num) model.fc = nn.Identity() elif args.arch == 'default2018': model = Default2018((28,48,48,48), args.rep_size) elif args.arch == 'dense': model = Dense((28,48,48,48)) projector = Projector(args.rep_size,args.proj_size) predictor = None if args.semi_super: predictor = Predictor(args.rep_size) print(model) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) projector.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) if args.arch.startswith('resnet'): model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], output_device=args.gpu) projector = nn.SyncBatchNorm.convert_sync_batchnorm(projector) projector = torch.nn.parallel.DistributedDataParallel(projector, device_ids=[args.gpu], output_device=args.gpu) if args.semi_super: predictor.cuda(args.gpu) predictor = torch.nn.parallel.DistributedDataParallel(predictor, device_ids=[args.gpu], output_device=args.gpu) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging raise NotImplementedError("Only DistributedDataParallel is supported.") else: # AllGather implementation (batch shuffle, queue update, etc.) in # this code only supports DistributedDataParallel. raise NotImplementedError("Only DistributedDataParallel is supported.") # define loss function (criterion) and optimizer if args.cc_lambda is None: args.cc_lambda = 1.0/args.proj_size print(f'updated cc_lambda: {args.cc_lambda}') criterion = CrossCorrLoss(args.proj_size,args.cc_lambda,args.batch_size,device=args.gpu).cuda(args.gpu) parameters = [p for p in model.parameters()] + [p for p in projector.parameters()] if args.semi_super: parameters += [p for p in predictor.parameters()] optimizer = LARS(parameters, lr=0, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code train_dataset = molgrid.MolDataset( args.data, data_root=args.dataroot, ligmolcache=args.ligmolcache, recmolcache=args.recmolcache) #Need to use random trans/rot when actually running gmaker = molgrid.GridMaker() shape = gmaker.grid_dimensions(28) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True, collate_fn=moco.loader.collateMolDataset) for epoch in range(args.start_epoch, args.epochs): train_sampler.set_epoch(epoch) # train for one epoch loss, lr = train(train_loader, model, projector, predictor, criterion, optimizer, gmaker, shape, epoch, args) print(f'Epoch: {epoch}, Loss:{loss}') if args.local_rank == 0: if args.semi_super: wandb.log({'Loss':loss[0],'Supervised Loss':loss[1], 'Representation Loss':loss[2], "Learning Rate": lr}) else: wandb.log({'Loss':loss}) if (not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.local_rank % ngpus_per_node == 0)) and (epoch % 50 == 0): save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'projector': projector.state_dict(), 'optimizer' : optimizer.state_dict(), }, is_best=False, filename='checkpoint_{:04d}.pth.tar'.format(epoch))
def simple_atom_fit(mgrid, types, iters=10, tol=0.01): '''Fit atoms to MolGrid. types are ignored as the number of atoms of each type is always inferred from the density. Returns the MolGrid of the placed atoms and the MolStruct''' t_start = time.time() # mtr22 - match the input API of generate.AtomFitter.fit mgrid = generate.MolGrid( values=torch.as_tensor(mgrid.values, device=device), channels=mgrid.channels, center=mgrid.center, resolution=mgrid.resolution, ) #for every channel, select some coordinates and setup the type/radius vectors initcoords = [] typevecs = [] radii = [] typeindices = [] numatoms = 0 tcnts = {} types_est = [] # mtr22 for (t, G) in enumerate(mgrid.values): ch = mgrid.channels[t] #print(ch) coords = select_atom_starts(mgrid, G, ch.atomic_radius) if coords: tvec = np.zeros(len(mgrid.channels)) tvec[t] = 1.0 tcnt = len(coords) numatoms += tcnt types_est.append(tcnt) #mtr22 r = mgrid.channels[t].atomic_radius initcoords += coords typevecs += [tvec] * tcnt typeindices += [t] * tcnt radii += [r] * tcnt tcnts[t] = tcnt else: types_est.append(0) #mtr22 typevecs = np.array(typevecs) initcoords = np.array(initcoords) typeindices = np.array(typeindices) # mtr22 - for computing type_diff metrics in returned molstruct types_true = torch.tensor(types, dtype=torch.float32, device=device) types_est = torch.tensor(types_est, dtype=torch.float32, device=device) #print(types_est) #setup gridder gridder = molgrid.Coords2Grid(molgrid.GridMaker( dimension=mgrid.dimension, resolution=mgrid.resolution, gaussian_radius_multiple=-1.5), center=tuple(mgrid.center.astype(float))) mgrid.values = mgrid.values.to(device) #having setup input coordinates, optimize with BFGS coords = torch.tensor(initcoords, dtype=torch.float32, requires_grad=True, device=device) types = torch.tensor(typevecs, dtype=torch.float32, device=device) radii = torch.tensor(radii, dtype=torch.float32, device=device) best_loss = np.inf best_coords = None best_typeindices = typeindices #save in case number of atoms changes goodcoords = False for inum in range(iters): optimizer = torch.optim.LBFGS([coords], max_iter=20000, tolerance_grad=1e-9, line_search_fn='strong_wolfe') def closure(): optimizer.zero_grad() agrid = gridder.forward(coords, types, radii) loss = torch.square(agrid - mgrid.values).sum() / numatoms loss.backward() return loss optimizer.step(closure) final_loss = optimizer.state_dict()['state'][0][ 'prev_loss'] #todo - check for convergence? print('iter {} (loss={}, n_atoms={})'.format(inum, final_loss, len(best_typeindices))) if final_loss < best_loss: best_loss = final_loss best_coords = coords.detach() if inum == iters - 1: #stick with these coordinates break #otherwise, try different starting coordinates for only those #atom types that have errors goodcoords = True with torch.no_grad(): offset = 0 agrid = gridder.forward(coords, types, radii) t = 0 while offset < len(typeindices): t = typeindices[offset] #eval max error - mse will downplay a single atom of many being off maxerr = float(torch.square(agrid[t] - mgrid.values[t]).max()) if maxerr > tol: goodcoords = False ch = mgrid.channels[t] newcoords = select_atom_starts(mgrid, mgrid.values[t], ch.atomic_radius) for (i, coord) in enumerate(newcoords): coords[i + offset] = torch.tensor(coord, dtype=torch.float) offset += tcnts[t] if goodcoords: break numfixes = 0 fix_iter = 0 if not goodcoords: #try to fix up an atom at a time offset = 0 #reset corods to best found so far with torch.no_grad(): coords[:] = best_coords agrid = gridder.forward(coords, types, radii) t = 0 while offset < len(typeindices): t = typeindices[offset] maxerr = float(torch.square(agrid[t] - mgrid.values[t]).max()) per_atom_volume = float(radii[offset])**3 * ((2 * np.pi)**1.5) while maxerr > tol: #identify the atom of this type closest to the place with too much density #and move it to the location with too little density tcoords = coords[offset:offset + tcnts[t]].detach().cpu( ).numpy() #coordinates for this type diff = agrid[t] - mgrid.values[t] possum = float(diff[diff > 0].sum()) negsum = float(diff[diff < 0].sum()) maxdiff = float(diff.max()) mindiff = float(diff.min()) missing_density = -(negsum + possum) if missing_density > .75 * per_atom_volume: #add atom print("Missing density - not enough atoms?") numfixes += 1 minpos = int((agrid[t] - mgrid.values[t]).argmin()) minpos = grid_to_xyz( np.unravel_index(minpos, agrid[t].shape), mgrid) #add atom: change coords, types, radii, typeindices and tcnts, numatoms numatoms += 1 typeindices = np.insert(typeindices, offset, t) tcnts[t] += 1 with torch.no_grad(): newcoord = torch.tensor([minpos], device=coords.device, dtype=coords.dtype, requires_grad=True) coords = torch.cat( (coords[:offset], newcoord, coords[offset:])) radii = torch.cat( (radii[:offset], radii[offset:offset + 1], radii[offset:])) types = torch.cat( (types[:offset], types[offset:offset + 1], types[offset:])) coords.requires_grad_(True) radii.requires_grad_(True) types.requires_grad_(True) elif mindiff**2 < tol: print("No significant density underage - too many atoms?") break #todo, remove atom else: #move an atom numfixes += 1 maxpos = int((agrid[t] - mgrid.values[t]).argmax()) minpos = int((agrid[t] - mgrid.values[t]).argmin()) maxpos = grid_to_xyz( np.unravel_index(maxpos, agrid[t].shape), mgrid) minpos = grid_to_xyz( np.unravel_index(minpos, agrid[t].shape), mgrid) dists = np.square(tcoords - maxpos).sum(axis=1) closesti = np.argmin(dists) with torch.no_grad(): coords[offset + closesti] = torch.tensor(minpos) #reoptimize optimizer = torch.optim.LBFGS([coords], max_iter=20000, tolerance_grad=1e-9, line_search_fn='strong_wolfe') #TODO: only optimize this grid optimizer.step(closure) final_loss = optimizer.state_dict()['state'][0][ 'prev_loss'] #todo - check for convergence? agrid = gridder.forward(coords, types, radii) #recompute grid #if maxerr hasn't improved, give up newerr = float(torch.square(agrid[t] - mgrid.values[t]).max()) fix_iter += 1 print( 'fix_iter {} (loss={}, n_atoms={}, newerr={}, numfixes={})' .format(fix_iter, final_loss, len(typeindices), newerr, numfixes)) if newerr >= maxerr: break else: maxerr = newerr best_loss = final_loss best_coords = coords.detach() best_typeindices = typeindices.copy() #otherwise update coordinates and repeat offset += tcnts[t] # mtr22 - match the output API of generate.AtomFitter.fit n_atoms = len(best_typeindices) n_channels = len(mgrid.channels) best_types = torch.zeros((n_atoms, n_channels), dtype=torch.float32, device=device) best_radii = torch.zeros((n_atoms, ), dtype=torch.float32, device=device) for i, t in enumerate(best_typeindices): ch = mgrid.channels[t] best_types[i, t] = 1.0 best_radii[i] = ch.atomic_radius #create struct and grid from coordinates struct_best = generate.MolStruct( xyz=best_coords.cpu().numpy(), c=best_typeindices, channels=mgrid.channels, loss=float(best_loss), type_diff=(types_est - best_types.sum(dim=0)).abs().sum().item(), est_type_diff=(types_true - types_est).abs().sum().item(), time=time.time() - t_start, n_steps=numfixes, ) grid_pred = generate.MolGrid( values=gridder.forward(best_coords, best_types, best_radii).cpu().detach().numpy(), channels=mgrid.channels, center=mgrid.center, resolution=mgrid.resolution, visited_structs=[], src_struct=struct_best, ) return grid_pred
balanced=True, shuffle=True, duplicate_first=True, default_batch_size=batch_size, iteration_scheme=molgrid.IterationScheme.SmallEpoch) traine.populate(args.trainfile) teste = molgrid.ExampleProvider( ligmolcache=args.ligte, recmolcache=args.recte, shuffle=True, duplicate_first=True, default_batch_size=batch_size, iteration_scheme=molgrid.IterationScheme.SmallEpoch) teste.populate(args.testfile) gmaker = molgrid.GridMaker(binary=args.binary_rep) dims = gmaker.grid_dimensions(14 * 4) # only one rec+onelig per example tensor_shape = (batch_size, ) + dims actual_dims = (dims[0] // 2, *dims[1:]) siam_arm = default2018(actual_dims, args.rep_size) if args.use_weights is not None: if os.path.isfile(args.use_weights): print("=> loading checkpoint '{}'".format(args.use_weights)) checkpoint = torch.load(args.use_weights, map_location="cpu") # rename moco pre-trained keys state_dict = checkpoint['state_dict'] for k in list(state_dict.keys()): del_end = None # retain only encoder_q up to before the embedding layer
def simple_atom_fit(mgrid, types, iters=10, tol=0.01, device='cuda', grm=-1.5): '''Fit atoms to AtomGrid. types are ignored as the number of atoms of each type is always inferred from the density. Returns the AtomGrid of the placed atoms and the AtomStruct''' t_start = time.time() #for every channel, select some coordinates and setup the type/radius vectors initcoords = [] typevecs = [] radii = [] typeindices = [] numatoms = 0 tcnts = {} values = torch.tensor(mgrid.values, device=device) for (t, G) in enumerate(values): ch = mgrid.channels[t] coords = select_atom_starts(mgrid, G, ch.atomic_radius) if coords: tvec = np.zeros(len(mgrid.channels)) tvec[t] = 1.0 tcnt = len(coords) numatoms += tcnt r = mgrid.channels[t].atomic_radius initcoords += coords typevecs += [tvec] * tcnt typeindices += [t] * tcnt radii += [r] * tcnt tcnts[t] = tcnt typevecs = np.array(typevecs) initcoords = np.array(initcoords) typeindices = np.array(typeindices) #print('typeindices',typeindices) #setup gridder center = tuple([float(c) for c in mgrid.center]) gridder = molgrid.Coords2Grid(molgrid.GridMaker( dimension=mgrid.dimension, resolution=mgrid.resolution, gaussian_radius_multiple=grm), center=center) #having setup input coordinates, optimize with BFGS coords = torch.tensor(initcoords, dtype=torch.float32, requires_grad=True, device=device) types = torch.tensor(typevecs, dtype=torch.float32, device=device) radii = torch.tensor(radii, dtype=torch.float32, device=device) best_loss = np.inf best_coords = None best_typeindices = typeindices #save in case number of atoms changes goodcoords = False bestagrid = torch.zeros(values.shape, dtype=torch.float32, device=device) if len(initcoords) == 0: #no atoms mol = AtomStruct(np.zeros((0, 3)), np.zeros(0), mgrid.channels, L2_loss=values.square().sum() / values.numel(), time=time.time() - t_start, iterations=0, numfixes=0, type_diff=0, est_type_diff=0, visited_structs=[]) return mol, bestagrid for inum in range(iters): optimizer = torch.optim.LBFGS([coords], max_iter=20000, tolerance_grad=1e-9, line_search_fn='strong_wolfe') def closure(): optimizer.zero_grad() agrid = gridder.forward(coords, types, radii) loss = torch.square(agrid - values).sum() / numatoms loss.backward() return loss optimizer.step(closure) final_loss = optimizer.state_dict()['state'][0][ 'prev_loss'] #todo - check for convergence? if final_loss < best_loss: best_loss = final_loss best_coords = coords.detach().cpu() if inum == iters - 1: #stick with these coordinates break #otherwise, try different starting coordinates for only those #atom types that have errors goodcoords = True with torch.no_grad(): offset = 0 agrid = gridder.forward(coords, types, radii) t = 0 while offset < len(typeindices): t = typeindices[offset] #eval max error - mse will downplay a single atom of many being off maxerr = float(torch.square(agrid[t] - values[t]).max()) if maxerr > tol: goodcoords = False ch = mgrid.channels[t] newcoords = select_atom_starts(mgrid, values[t], ch.atomic_radius) for (i, coord) in enumerate(newcoords): coords[i + offset] = torch.tensor(coord, dtype=torch.float) offset += tcnts[t] if goodcoords: break bestagrid = agrid.clone() numfixes = 0 if not goodcoords: #try to fix up an atom at a time offset = 0 #reset corods to best found so far with torch.no_grad(): coords[:] = best_coords agrid = gridder.forward(coords, types, radii) t = 0 while offset < len(typeindices): t = typeindices[offset] maxerr = float(torch.square(agrid[t] - values[t]).max()) #print('maxerr',maxerr) per_atom_volume = float(radii[offset])**3 * ((2 * np.pi)**1.5) while maxerr > tol: #identify the atom of this type closest to the place with too much density #and move it to the location with too little density tcoords = coords[offset:offset + tcnts[t]].detach().cpu( ).numpy() #coordinates for this type diff = agrid[t] - values[t] possum = float(diff[diff > 0].sum()) negsum = float(diff[diff < 0].sum()) maxdiff = float(diff.max()) mindiff = float(diff.min()) missing_density = -(negsum + possum) #print('Type %d numcoords %d maxdiff %.5f mindiff %.5f missing %.5f'%(t,len(tcoords),maxdiff,mindiff,missing_density)) if missing_density > .25 * per_atom_volume: #add atom MAGIC NUMBER ALERT #needs to be enough total missing density to be close to a whole atom, #but the missing density also needs to be somewhat concentrated #print("Missing density - not enough atoms?") numfixes += 1 minpos = int((agrid[t] - values[t]).argmin()) minpos = grid_to_xyz( np.unravel_index(minpos, agrid[t].shape), mgrid) #add atom: change coords, types, radii, typeindices and tcnts, numatoms numatoms += 1 typeindices = np.insert(typeindices, offset, t) tcnts[t] += 1 with torch.no_grad(): newcoord = torch.tensor([minpos], device=coords.device, dtype=coords.dtype, requires_grad=True) coords = torch.cat( (coords[:offset], newcoord, coords[offset:])) radii = torch.cat( (radii[:offset], radii[offset:offset + 1], radii[offset:])) types = torch.cat( (types[:offset], types[offset:offset + 1], types[offset:])) coords.requires_grad_(True) radii.requires_grad_(True) types.requires_grad_(True) elif missing_density < -.75 * per_atom_volume: print("Too many atoms?") break #todo, remove atom else: #move an atom numfixes += 1 maxpos = int((agrid[t] - values[t]).argmax()) minpos = int((agrid[t] - values[t]).argmin()) maxpos = grid_to_xyz( np.unravel_index(maxpos, agrid[t].shape), mgrid) minpos = grid_to_xyz( np.unravel_index(minpos, agrid[t].shape), mgrid) dists = np.square(tcoords - maxpos).sum(axis=1) closesti = np.argmin(dists) with torch.no_grad(): coords[offset + closesti] = torch.tensor(minpos) #reoptimize optimizer = torch.optim.LBFGS([coords], max_iter=20000, tolerance_grad=1e-9, line_search_fn='strong_wolfe') #TODO: only optimize this grid optimizer.step(closure) final_loss = optimizer.state_dict()['state'][0][ 'prev_loss'] #todo - check for convergence? agrid = gridder.forward(coords, types, radii) #recompute grid #if maxerr hasn't improved, give up newerr = float(torch.square(agrid[t] - values[t]).max()) #print(t,'newerr',newerr,'maxerr',maxerr,'maxdiff',maxdiff,'mindiff',mindiff,'missing',missing_density) if newerr >= maxerr: #don't give up if there's still a lot left to fit #and the missing density isn't all (very) shallow if missing_density < per_atom_volume or mindiff > -0.1: #magic number! break else: maxerr = newerr best_loss = final_loss best_coords = coords.detach().cpu() best_typeindices = typeindices.copy() bestagrid = agrid.clone() #otherwise update coordinates and repeat offset += tcnts[t] #create struct from coordinates mol = AtomStruct(best_coords.numpy(), best_typeindices, mgrid.channels, L2_loss=float(best_loss), time=time.time() - t_start, iterations=inum, numfixes=numfixes, type_diff=0, est_type_diff=0, visited_structs=[]) # print('losses',final_loss,best_loss,len(best_coords)) return mol, bestagrid
def test_vector_types(): g1 = molgrid.GridMaker(resolution=.25,dimension=6.0) c = np.array([[0,0,0],[2,0,0]],np.float32) t = np.array([0,1],np.float32) vt = np.array([[1.0,0],[0,1.0]],np.float32) vt2 = np.array([[0.5,0.0],[0.0,0.5]],np.float32) r = np.array([1.0,1.0],np.float32) coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid1f(t),molgrid.Grid1f(r),2) vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt),molgrid.Grid1f(r)) v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt2),molgrid.Grid1f(r)) shape = g1.grid_dimensions(2) reference = molgrid.MGrid4f(*shape) vgrid = molgrid.MGrid4f(*shape) v2grid = molgrid.MGrid4f(*shape) v3grid = molgrid.MGrid4f(*shape) g1.forward((0,0,0),coords, reference.cpu()) g1.forward((0,0,0),vcoords, vgrid.cpu()) g1.forward((0,0,0),v2coords, v2grid.cpu()) g1.forward((0,0,0),c,vt,r, v3grid.cpu()) np.testing.assert_allclose(reference.tonumpy(),vgrid.tonumpy(),atol=1e-5) np.testing.assert_allclose(vgrid.tonumpy(),v3grid.tonumpy(),atol=1e-6) v2g = v2grid.tonumpy() g = reference.tonumpy() np.testing.assert_allclose(g[0,:],v2g[0,:]*2.0,atol=1e-5) np.testing.assert_allclose(g[1,:],v2g[1,:]*2.0,atol=1e-5) vgridgpu = molgrid.MGrid4f(*shape) v2gridgpu = molgrid.MGrid4f(*shape) g1.forward((0,0,0),vcoords, vgridgpu.gpu()) g1.forward((0,0,0),v2coords, v2gridgpu.gpu()) np.testing.assert_allclose(reference.tonumpy(),vgridgpu.tonumpy(),atol=1e-5) v2gpu = v2gridgpu.tonumpy() np.testing.assert_allclose(g[0,:],v2gpu[0,:]*2.0,atol=1e-5) np.testing.assert_allclose(g[1,:],v2gpu[1,:]*2.0,atol=1e-5) #create target grid with equal type density at 1,0,0 tc = molgrid.Grid2f(np.array([[1,0,0]],np.float32)) tv = molgrid.Grid2f(np.array([[0.5,0.5]],np.float32)) tr = molgrid.Grid1f(np.array([1.0],np.float32)) targetc = molgrid.CoordinateSet(tc,tv,tr) tgrid = molgrid.MGrid4f(*shape) g1.forward((0,0,0),targetc,tgrid.cpu()) gradc = molgrid.MGrid2f(2,3) gradt = molgrid.MGrid2f(2,2) g1.backward((0,0,0),vcoords,tgrid.cpu(),gradc.cpu(),gradt.cpu()) assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4) assert gradc[0,0] > 0 gradc.fill_zero() gradt.fill_zero() g1.backward((0,0,0),vcoords,tgrid.gpu(),gradc.gpu(),gradt.gpu()) assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4) assert gradc[0,0] > 0
def main(args): # Fix seeds molgrid.set_random_seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Set CuDNN options for reproducibility torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Set up libmolgrid e = molgrid.ExampleProvider(data_root=args.data_root, balanced=True, shuffle=True) e.populate(args.train_file) gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(e.num_types()) tensor_shape = (args.batch_size, ) + dims # Construct input tensors input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda') float_labels = torch.zeros(args.batch_size, dtype=torch.float32) # Initialise network - Two models currently available (see models.py for details) if args.model == 'Ragoza': model = Basic_CNN(dims).to('cuda') elif args.model == 'Imrie': model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda') else: print("Please specify a valid architecture") exit() # Set weights for network if args.weights: model.load_state_dict(torch.load(args.weights)) print("Loaded model parameters") else: model.apply(weights_init) print("Randomly initialised model parameters") # Print number of parameters in model print("Number of model params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000)) # Train network # Construct optimizer optimizer = optim.SGD(model.parameters(), lr=args.base_lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.ExponentialLR(optimizer, args.anneal_rate) print("Initial learning rate: %.6f" % scheduler.get_lr()[0]) # Train loop losses = [] for it in range(1, args.iterations + 1): # Load data batch = e.next_batch(args.batch_size) gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=args.translate) batch.extract_label(0, float_labels) labels = float_labels.long().to('cuda') # Train optimizer.zero_grad() output = model(input_tensor) loss = F.cross_entropy(output, labels) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip_gradients) optimizer.step() losses.append(float(loss)) # Anneal learning rate if it % args.anneal_iter == 0: scheduler.step() print("Current iteration: %d, Annealing learning rate: %.6f" % (it, scheduler.get_lr()[0])) # Progress if it % args.display_iter == 0: print("Current iteration: %d, Loss: %.3f" % (it, float(np.mean(losses[-args.display_iter:])))) # Save model if it % args.save_iter == 0: print("Saving model after %d iterations." % it) torch.save( model.state_dict(), args.save_dir + "/" + args.save_prefix + ".iter-" + str(it)) # Test model if args.test_file != '' and it % args.test_iter == 0: # Set to test mode model.eval() predictions = [] labs = [] e_test = molgrid.ExampleProvider(data_root=args.data_root, balanced=False, shuffle=False) e_test.populate(args.test_file) num_samples = e_test.size() num_batches = -(-num_samples // args.batch_size) for _ in range(num_batches): # Load data batch = e_test.next_batch(args.batch_size) batch_predictions = [] batch.extract_label(0, float_labels) labs.extend(list(float_labels.detach().cpu().numpy())) for _ in range(args.num_rotate): gmaker.forward(batch, input_tensor, random_rotation=args.rotate, random_translation=0.0) # Predict output = F.softmax(model(input_tensor), dim=1) batch_predictions.append( list(output.detach().cpu().numpy()[:, 0])) predictions.extend(list(np.mean(batch_predictions, axis=0))) # Print performance labs = labs[:num_samples] predictions = predictions[:num_samples] print("Current iter: %d, AUC: %.2f" % (it, roc_auc_score(labs, predictions)), flush=True) # Set to train mode model.train()
def test_train_torch_cnn(): batch_size = 50 datadir = os.path.dirname(__file__) + '/data' fname = datadir + "/small.types" molgrid.set_random_seed(0) torch.manual_seed(0) np.random.seed(0) class Net(nn.Module): def __init__(self, dims): super(Net, self).__init__() self.pool0 = nn.MaxPool3d(2) self.conv1 = nn.Conv3d(dims[0], 32, kernel_size=3, padding=1) self.pool1 = nn.MaxPool3d(2) self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1) self.pool2 = nn.MaxPool3d(2) self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1) self.last_layer_size = dims[1] // 8 * dims[2] // 8 * dims[ 3] // 8 * 128 self.fc1 = nn.Linear(self.last_layer_size, 2) def forward(self, x): x = self.pool0(x) x = F.relu(self.conv1(x)) x = self.pool1(x) x = F.relu(self.conv2(x)) x = self.pool2(x) x = F.relu(self.conv3(x)) x = x.view(-1, self.last_layer_size) x = self.fc1(x) return x def weights_init(m): if isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear): init.xavier_uniform_(m.weight.data) batch_size = 50 e = molgrid.ExampleProvider(data_root=datadir + "/structs", balanced=True, shuffle=True) e.populate(fname) gmaker = molgrid.GridMaker() dims = gmaker.grid_dimensions(e.num_types()) tensor_shape = (batch_size, ) + dims model = Net(dims).to('cuda') model.apply(weights_init) optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) input_tensor = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda') float_labels = torch.zeros(batch_size, dtype=torch.float32) losses = [] for iteration in range(100): #load data batch = e.next_batch(batch_size) gmaker.forward(batch, input_tensor, 0, random_rotation=False ) #not rotating since convergence is faster this way batch.extract_label(0, float_labels) labels = float_labels.long().to('cuda') optimizer.zero_grad() output = model(input_tensor) loss = F.cross_entropy(output, labels) loss.backward() optimizer.step() losses.append(float(loss)) avefinalloss = np.array(losses[-5:]).mean() assert avefinalloss < .4
default="files/ligmap", help="Ligand types file") p.add_argument("-o", "--output", type=str, default=None, help="Output file") p.add_argument("--dx", action="store_true", help="Output grids as DX files") args = p.parse_args() system = os.path.splitext(os.path.basename(args.sdf))[0] if args.output is None: args.output = f"{system}.pcd" resolution = args.resolution dimension = args.dimension gm = molgrid.GridMaker(resolution=resolution, dimension=dimension) t = molgrid.FileMappedGninaTyper(args.ligmap) # Grid dimensions (including types) gdims = gm.grid_dimensions(t.num_types()) # Pre-allocate grid # Only one example (batch size is 1) grid = torch.zeros(1, *gdims, dtype=torch.float32, device="cuda:0") obmol = next(pybel.readfile("sdf", args.sdf)) obmol.addh() print(obmol, end="") # Use OpenBabel molecule object (obmol.OBmol) instead of PyBel molecule (obmol)
def __init__(self, resolution=0.5, dimension=23.5): gmaker = molgrid.GridMaker(resolution, dimension, gaussian_radius_multiple=-1.5) super().__init__(gmaker)
def main(): args = parser.parse_args() tgs = ['MoCo_SingleGPU'] wandb.init(entity='andmcnutt', project='DDG_model_Regression', config=args, tags=tgs) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") args.gpu = device if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) # create model print("=> creating model '{}'".format(args.arch)) model = moco.builder_single.MoCo( args.arch, args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp, semi_supervised=(True if args.semi_super else False)) print(model) torch.cuda.set_device(device) model = model.to(device) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code train_dataset = molgrid.torch_bindings.MolDataset( args.data, ligmolcache=args.ligmolcache, recmolcache=args.recmolcache, data_root=args.dataroot) gmaker = molgrid.GridMaker() shape = gmaker.grid_dimensions(28) train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, sampler=train_sampler, drop_last=True, collate_fn=moco.loader.collateMolDataset) wandb.watch(model, log='all') for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, gmaker, shape, epoch, args) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename='checkpoint_{:04d}.pth.tar'.format(epoch))
def __init__( self, data_file, data_root, batch_size, rec_typer, lig_typer, use_rec_elems=True, resolution=0.5, dimension=None, grid_size=None, shuffle=False, random_rotation=False, random_translation=0.0, diff_cond_transform=False, diff_cond_structs=False, n_samples=1, rec_molcache=None, lig_molcache=None, cache_structs=True, device='cuda', debug=False, ): super().__init__() assert (dimension or grid_size) and not (dimension and grid_size), \ 'must specify one of either dimension or grid_size' if grid_size: dimension = atom_grids.size_to_dimension(grid_size, resolution) # create receptor and ligand atom typers self.lig_typer = AtomTyper.get_typer(*lig_typer.split('-'), rec=False) self.rec_typer = \ AtomTyper.get_typer(*rec_typer.split('-'), rec=use_rec_elems) atom_typers = [self.rec_typer, self.lig_typer] if diff_cond_structs: # duplicate atom typers atom_typers *= 2 # create example provider self.ex_provider = molgrid.ExampleProvider( *atom_typers, data_root=data_root, recmolcache=rec_molcache or '', ligmolcache=lig_molcache or '', cache_structs=cache_structs, shuffle=shuffle, num_copies=n_samples, ) # create molgrid maker self.grid_maker = molgrid.GridMaker( resolution=resolution, dimension=dimension, gaussian_radius_multiple=-1.5, ) self.batch_size = batch_size # transformation settings self.random_rotation = random_rotation self.random_translation = random_translation self.diff_cond_transform = diff_cond_transform self.diff_cond_structs = diff_cond_structs self.debug = debug self.device = device # transform interpolation state self.cond_interp = TransformInterpolation(n_samples=n_samples) # load data from file self.ex_provider.populate(data_file)