Exemple #1
0
def test_radius_multiples():
    g1 = molgrid.GridMaker(resolution=.1, dimension=6.0)
    c = np.array([[0, 0, 0]], np.float32)
    t = np.array([0], np.float32)
    r = np.array([1.0], np.float32)
    coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t),
                                   molgrid.Grid1f(r), 1)
    shape = g1.grid_dimensions(1)
    cpugrid = molgrid.MGrid4f(*shape)
    cpugrid2 = molgrid.MGrid4f(*shape)
    gpugrid = molgrid.MGrid4f(*shape)

    g1.forward((0, 0, 0), coords, cpugrid.cpu())
    g1.forward((0, 0, 0), coords, gpugrid.gpu())
    g1.forward((0, 0, 0), c, t, r, cpugrid2.cpu())

    np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5)
    np.testing.assert_allclose(cpugrid.tonumpy(),
                               cpugrid2.tonumpy(),
                               atol=1e-6)
    g = cpugrid.tonumpy()

    assert g[0, 30, 30, 30] == approx(1)

    #cut a line across
    line = g[0, 30, 30, :]
    xvals = np.abs(np.arange(-3, 3.1, .1))
    gauss = np.exp(-2 * xvals**2)
    for i in range(20, 41):
        assert line[i] == approx(gauss[i])

    for i in list(range(0, 15)) + list(range(45, 61)):
        assert line[i] == approx(0)

    quad = 4 * np.exp(-2) * xvals**2 - 12 * np.exp(-2) * xvals + 9 * np.exp(-2)
    for i in list(range(15, 20)) + list(range(41, 45)):
        assert line[i] == approx(quad[i], abs=1e-5)

    #funkier grid
    g2 = molgrid.GridMaker(resolution=.1,
                           dimension=6.0,
                           radius_scale=0.5,
                           gassian_radius_multiple=3.0)
    cpugrid = molgrid.MGrid4f(*shape)
    gpugrid = molgrid.MGrid4f(*shape)
    g2.forward((0, 0, 0), coords, cpugrid.cpu())
    g2.forward((0, 0, 0), coords, gpugrid.gpu())

    np.testing.assert_allclose(cpugrid.tonumpy(), gpugrid.tonumpy(), atol=1e-5)
    g = cpugrid.tonumpy()

    assert g[0, 30, 30, 30] == approx(1)

    #cut a line across
    line = g[0, 30, :, 30]
    xvals = np.abs(np.arange(-3, 3.1, .1)) * 2.0
    gauss = np.exp(-2 * xvals**2)
    #should be guassian the whole way, although quickly hits numerical zero
    for i in range(0, 61):
        assert line[i] == approx(gauss[i], abs=1e-5)
Exemple #2
0
def test_vector_types_mol():
    '''Test vector types with a real molecule'''
    fname = datadir+"/small.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs")    
    e.populate(fname)
    ex = e.next()
        
    ev = molgrid.ExampleProvider(data_root=datadir+"/structs",make_vector_types=True)
    ev.populate(fname)
    exv = ev.next()
    
    assert exv.has_vector_types()
    assert not ex.has_vector_types()

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(ex.num_types()) # this should be grid_dims or get_grid_dims    
    
    mgridout = molgrid.MGrid4f(*dims)    
    mgridgpu = molgrid.MGrid4f(*dims)
        
    mgridoutv = molgrid.MGrid4f(*dims)    
    mgridgpuv = molgrid.MGrid4f(*dims)
    
    d = np.ones(dims,np.float32)
    diff = molgrid.MGrid4f(*dims)
    diff.copyFrom(d)       
    
    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())
    center = ex.coord_sets[-1].center()
    c = ex.merge_coordinates()
    backcoordscpu = molgrid.MGrid2f(c.size(),3)
    backcoordsgpu = molgrid.MGrid2f(c.size(),3)
    
    gmaker.backward(center, c, diff.cpu(), backcoordscpu.cpu())
    gmaker.backward(center, c, diff.gpu(), backcoordsgpu.gpu())

    #vector types
    gmaker.set_radii_type_indexed(True)
    
    gmaker.forward(exv, mgridoutv.cpu())
    gmaker.forward(exv, mgridgpuv.gpu())
    
    cv = exv.merge_coordinates()
    vbackcoordscpu = molgrid.MGrid2f(cv.size(),3)
    vbackcoordsgpu = molgrid.MGrid2f(cv.size(),3)
    vbacktypescpu = molgrid.MGrid2f(cv.size(),cv.num_types())
    vbacktypesgpu = molgrid.MGrid2f(cv.size(),cv.num_types())
        
    gmaker.backward(center, cv, diff.cpu(), vbackcoordscpu.cpu(),vbacktypescpu.cpu())
    gmaker.backward(center, cv, diff.gpu(), vbackcoordsgpu.gpu(),vbacktypesgpu.gpu())
    
    np.testing.assert_allclose(mgridout.tonumpy(),mgridoutv.tonumpy(),atol=1e-5)
    np.testing.assert_allclose(mgridgpu.tonumpy(),mgridgpuv.tonumpy(),atol=1e-5)
    np.testing.assert_allclose(mgridoutv.tonumpy(),mgridgpuv.tonumpy(),atol=1e-5)

    np.testing.assert_allclose(vbackcoordscpu.tonumpy(),backcoordscpu.tonumpy(),atol=1e-5)
    np.testing.assert_allclose(vbackcoordsgpu.tonumpy(),backcoordsgpu.tonumpy(),atol=1e-5)
    np.testing.assert_allclose(vbackcoordscpu.tonumpy(),vbackcoordsgpu.tonumpy(),atol=1e-4)
    np.testing.assert_allclose(vbacktypescpu.tonumpy(),vbacktypesgpu.tonumpy(),atol=1e-4)
Exemple #3
0
def test_batched_function():
    for dev in ('cuda','cpu'):
        gmaker = molgrid.GridMaker(resolution=.1,dimension=6.0)
        c = torch.tensor([[[1.0,0,0],[1,0,0]],[[0,1,0],[0,1,0]]],device=dev,dtype=torch.float32,requires_grad=True)
        vt = torch.tensor([[[0,1.0,0],[1.0,0,0]],[[0,1.0,0],[1.0,0,0]]],device=dev,dtype=torch.float32,requires_grad=True)
        r = torch.tensor([[2.0,2.0],[2.0,2.0]],device=dev,dtype=torch.float32)
        
        grid = BatchedCoords2GridFunction.apply(gmaker, (0,0,0), c, vt, r)
    
        shape = gmaker.grid_dimensions(3)    
        #make diff with gradient in center
        diff = torch.zeros(2,*shape,dtype=torch.float32,device=dev)
        diff[0,0,30,30,30] = 1.0  
        diff[0,1,30,30,30] = -1.0  
        diff[1,0,30,30,30] = 1.0  
        diff[1,1,30,30,30] = -1.0              
        grid.backward(diff)
        assert c.grad[0][0].cpu().numpy() == approx([0.60653,0,0],abs=1e-4)
        assert c.grad[0][1].cpu().numpy() == approx([-0.60653,0,0],abs=1e-4)
        
        assert vt.grad[0][0].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4)
        assert vt.grad[0][1].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4)    
        
        assert c.grad[1][0].cpu().numpy() == approx([0,0.60653,0],abs=1e-4)
        assert c.grad[1][1].cpu().numpy() == approx([0,-0.60653,0],abs=1e-4)
        
        assert vt.grad[1][0].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4)
        assert vt.grad[1][1].cpu().numpy() == approx([0.60653,-0.60653,0],abs=1e-4)    
Exemple #4
0
def test_backward_vec():
    g1 = molgrid.GridMaker(resolution=.1, dimension=6.0)
    c = np.array([[1.0, 0, 0], [-1, -1, 0]], np.float32)
    t = np.array([[0, 1.0, 0], [1.0, 0, 0]], np.float32)
    r = np.array([2.0, 2.0], np.float32)
    coords = molgrid.CoordinateSet(c, t, r)
    shape = g1.grid_dimensions(3)

    #make diff with gradient in center
    diff = molgrid.MGrid4f(*shape)
    diff[0, 30, 30, 30] = 1.0
    diff[1, 30, 30, 30] = -1.0

    cpuatoms = molgrid.MGrid2f(2, 3)
    cputypes = molgrid.MGrid2f(2, 3)
    gpuatoms = molgrid.MGrid2f(2, 3)
    gputypes = molgrid.MGrid2f(2, 3)

    g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu(), cputypes.cpu())

    assert cputypes[0][0] > 0
    assert cputypes[0][1] < 0
    assert cputypes[0][2] == 0

    g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu(), gputypes.gpu())

    np.testing.assert_allclose(gpuatoms.tonumpy(),
                               cpuatoms.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(gputypes.tonumpy(),
                               cputypes.tonumpy(),
                               atol=1e-5)
def test_make_vector_types_ex_provider(capsys):
    fname = datadir + "/ligonly.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs",
                                make_vector_types=True)
    e.populate(fname)
    batch_size = 10
    b = e.next_batch(batch_size)

    gmaker = molgrid.GridMaker(dimension=23.5, radius_type_indexed=True)
    shape = gmaker.grid_dimensions(
        molgrid.defaultGninaLigandTyper.num_types() + 1)
    mgrid = molgrid.MGrid5f(batch_size, *shape)

    c = b[0].merge_coordinates()
    tv = c.type_vector.tonumpy()
    assert tv.shape == (10, 15)
    assert tv[0].sum() == 1.0
    assert tv[0][8] == 1.0

    gmaker.forward(b, mgrid)

    assert b[0].coord_sets[0].has_vector_types()
    assert b[0].coord_sets[1].has_vector_types()

    assert b[0].type_size() == 15
def test_backwards():
    g1 = molgrid.GridMaker(resolution=.1, dimension=6.0)
    c = np.array([[1.0, 0, 0]], np.float32)
    t = np.array([0], np.float32)
    r = np.array([2.0], np.float32)
    coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t),
                                   molgrid.Grid1f(r), 1)
    shape = g1.grid_dimensions(1)

    #make diff with gradient in center
    diff = molgrid.MGrid4f(*shape)
    diff[0, 30, 30, 30] = 1.0

    cpuatoms = molgrid.MGrid2f(1, 3)
    gpuatoms = molgrid.MGrid2f(1, 3)

    #apply random rotation
    T = molgrid.Transform((0, 0, 0), 0, True)
    T.forward(coords, coords)

    g1.backward((0, 0, 0), coords, diff.cpu(), cpuatoms.cpu())
    g1.backward((0, 0, 0), coords, diff.gpu(), gpuatoms.gpu())

    T.backward(cpuatoms.cpu(), cpuatoms.cpu(), False)
    T.backward(gpuatoms.gpu(), gpuatoms.gpu(), False)

    print(cpuatoms.tonumpy(), gpuatoms.tonumpy())
    # results should be ~ -.6, 0, 0
    np.testing.assert_allclose(cpuatoms.tonumpy(),
                               gpuatoms.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(cpuatoms.tonumpy().flatten(),
                               [-0.60653067, 0, 0],
                               atol=1e-5)
def test_dx():
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)
    ex = e.next()
    c = ex.coord_sets[1]

    assert np.min(c.type_index.tonumpy()) >= 0

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(
        c.max_type)  # this should be grid_dims or get_grid_dims
    center = c.coord.tonumpy().mean(axis=0)
    center = tuple(center.astype(float))

    mgridout = molgrid.MGrid4f(*dims)
    gmaker.forward(center, c, mgridout.cpu())

    molgrid.write_dx("tmp.dx", mgridout[0].cpu(), center, 0.5)

    mgridin = molgrid.read_dx("tmp.dx")
    os.remove("tmp.dx")

    g = mgridin.grid().tonumpy()
    go = mgridout[0].tonumpy()
    np.testing.assert_array_almost_equal(g, go, decimal=5)

    assert center == approx(list(mgridin.center()))
    assert mgridin.resolution() == 0.5
def test_backward_gradients():
    #test that we have the right value along a single dimension
    gmaker = molgrid.GridMaker(
        resolution=0.5, dimension=6.0,
        gaussian_radius_multiple=-2.0)  #use full truncated gradient
    xvals = np.arange(-0.9, 3, .1)

    for device in ('cuda', 'cpu'):
        types = torch.ones(1, 1, dtype=torch.float32, device=device)
        radii = torch.ones(1, dtype=torch.float32, device=device)
        for i in range(3):  #test along each axis
            for x in xvals:
                coords = torch.zeros(1, 3, dtype=torch.float32, device=device)
                coords[0][i] = x
                coords.requires_grad = True
                outgrid = molgrid.Coords2GridFunction.apply(
                    gmaker, (0, 0, 0), coords, types, radii)
                if i == 0:
                    gp = outgrid[0][8][6][6]
                elif i == 1:
                    gp = outgrid[0][6][8][6]
                else:
                    gp = outgrid[0][6][6][8]
                Lg = torch.autograd.grad(gp, coords, create_graph=True)[0]
                fancyL = torch.sum(Lg**2)
                val = float(torch.autograd.grad(fancyL, coords)[0][0][i])
                d = x - 1
                correct = -128 * d**3 * np.exp(-4 * d**2) + 32 * d * np.exp(
                    -4 * d**2)  #formulate based on distance
                assert val == approx(correct, abs=1e-4)

    #check that diagonal is symmetric and decreases at this range
    for device in ('cuda', 'cpu'):
        types = torch.ones(1, 1, dtype=torch.float32, device=device)
        radii = torch.ones(1, dtype=torch.float32, device=device)
        coords = torch.zeros(1,
                             3,
                             dtype=torch.float32,
                             requires_grad=True,
                             device=device)

        outgrid = molgrid.Coords2GridFunction.apply(gmaker, (0, 0, 0), coords,
                                                    types, radii)
        gp = outgrid[0][7][7][7]
        Lg = torch.autograd.grad(gp, coords, create_graph=True)[0]
        fancyL = torch.sum(Lg**2)
        fL1 = torch.autograd.grad(fancyL, coords)[0][0]

        gp2 = outgrid[0][8][8][8]
        Lg = torch.autograd.grad(gp2, coords, create_graph=True)[0]
        fancyL = torch.sum(Lg**2)
        fL2 = torch.autograd.grad(fancyL, coords)[0][0]

        assert fL1[0] == fL1[1]
        assert fL1[2] == fL1[1]
        assert fL2[0] == fL2[1]
        assert fL2[2] == fL2[1]
        assert fL2[0] < fL1[0]
Exemple #9
0
def test_a_grid():
    fname = datadir+"/small.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs")
    e.populate(fname)
    ex = e.next()
    c = ex.coord_sets[1]
    
    assert np.min(c.type_index.tonumpy()) >= 0

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(c.max_type) # this should be grid_dims or get_grid_dims
    center = c.center()
    center = tuple(center)


    mgridout = molgrid.MGrid4f(*dims)    
    mgridgpu = molgrid.MGrid4f(*dims)    
    npout = np.zeros(dims, dtype=np.float32)
    torchout = torch.zeros(dims, dtype=torch.float32)
    cudaout = torch.zeros(dims, dtype=torch.float32, device='cuda')
    
    gmaker.forward(center, c, mgridout.cpu())
    gmaker.forward(center, c, mgridgpu.gpu())

    gmaker.forward(center, c, npout)
    gmaker.forward(center, c, torchout)
    gmaker.forward(center, c, cudaout)
    
    
    newt = gmaker.make_tensor(center, c)
    newa = gmaker.make_ndarray(center, c)
    
    assert 1.438691 == approx(mgridout.tonumpy().max())
    assert 1.438691 == approx(mgridgpu.tonumpy().max())
    assert 1.438691 == approx(npout.max())
    assert 1.438691 == approx(torchout.numpy().max())
    assert 1.438691 == approx(cudaout.cpu().numpy().max())
    assert 1.438691 == approx(newt.cpu().numpy().max())
    assert 1.438691 == approx(newa.max())

    #should overwrite by default, yes?
    gmaker.forward(center, c, mgridout.cpu())
    gmaker.forward(center, c, mgridgpu.gpu())
    assert 1.438691 == approx(mgridout.tonumpy().max())
    assert 1.438691 == approx(mgridgpu.tonumpy().max())
    
    
    dims = gmaker.grid_dimensions(e.num_types())
    mgridout = molgrid.MGrid4f(*dims)    
    mgridgpu = molgrid.MGrid4f(*dims)   
    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())
    
    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())    
    
    assert 2.094017 == approx(mgridout.tonumpy().max())
    assert 2.094017 == approx(mgridgpu.tonumpy().max())
def test_type_radii():
    g1 = molgrid.GridMaker(resolution=.25,
                           dimension=6.0,
                           radius_type_indexed=True)
    c = np.array([[0, 0, 0]], np.float32)
    t = np.array([0], np.float32)
    r = np.array([1.0], np.float32)
    coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t),
                                   molgrid.Grid1f(r), 2)
    coords.make_vector_types(True, [3.0, 1.0])

    shape = g1.grid_dimensions(3)  #includes dummy type
    reference = molgrid.MGrid4f(*shape)
    gpudata = molgrid.MGrid4f(*shape)

    assert g1.get_radii_type_indexed()

    g1.forward((0, 0, 0), coords, reference.cpu())
    g1.forward((0, 0, 0), coords, gpudata.gpu())

    np.testing.assert_allclose(reference.tonumpy(),
                               gpudata.tonumpy(),
                               atol=1e-5)

    assert reference.tonumpy().sum() > 2980  #radius of 1 would be 116

    reference.fill_zero()
    reference[0][20][12][12] = -1
    reference[1][20][12][12] = 1
    reference[2][20][12][12] = 2

    cpuatoms = molgrid.MGrid2f(1, 3)
    cputypes = molgrid.MGrid2f(1, 3)
    gpuatoms = molgrid.MGrid2f(1, 3)
    gputypes = molgrid.MGrid2f(1, 3)

    g1.backward((0, 0, 0), coords, reference.cpu(), cpuatoms.cpu(),
                cputypes.cpu())

    assert cpuatoms[0][0] < 0
    assert cpuatoms[0][1] == 0
    assert cpuatoms[0][2] == 0

    assert cputypes[0][0] < 0
    assert cputypes[0][1] == 0
    assert cputypes[0][2] == 0

    g1.backward((0, 0, 0), coords, reference.gpu(), gpuatoms.gpu(),
                gputypes.gpu())

    np.testing.assert_allclose(gpuatoms.tonumpy(),
                               cpuatoms.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(gputypes.tonumpy(),
                               cputypes.tonumpy(),
                               atol=1e-5)
Exemple #11
0
def test_vector_types():
    g1 = molgrid.GridMaker(resolution=.25, dimension=6.0)
    c = np.array([[0, 0, 0]], np.float32)
    t = np.array([0], np.float32)
    vt = np.array([[1.0, 0]], np.float32)
    vt2 = np.array([[0.5, 0.5]], np.float32)
    r = np.array([1.0], np.float32)
    coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid1f(t),
                                   molgrid.Grid1f(r), 2)
    vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt),
                                    molgrid.Grid1f(r))
    v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c), molgrid.Grid2f(vt2),
                                     molgrid.Grid1f(r))

    shape = g1.grid_dimensions(2)
    reference = molgrid.MGrid4f(*shape)
    vgrid = molgrid.MGrid4f(*shape)
    v2grid = molgrid.MGrid4f(*shape)
    v3grid = molgrid.MGrid4f(*shape)

    g1.forward((0, 0, 0), coords, reference.cpu())
    g1.forward((0, 0, 0), vcoords, vgrid.cpu())
    g1.forward((0, 0, 0), v2coords, v2grid.cpu())
    g1.forward((0, 0, 0), c, vt, r, v3grid.cpu())
    np.testing.assert_allclose(reference.tonumpy(), vgrid.tonumpy(), atol=1e-5)
    np.testing.assert_allclose(vgrid.tonumpy(), v3grid.tonumpy(), atol=1e-6)

    v2g = v2grid.tonumpy()
    g = reference.tonumpy()

    np.testing.assert_allclose(g[0, :], v2g[0, :] * 2.0, atol=1e-5)
    np.testing.assert_allclose(g[0, :], v2g[1, :] * 2.0, atol=1e-5)

    vgridgpu = molgrid.MGrid4f(*shape)
    v2gridgpu = molgrid.MGrid4f(*shape)
    g1.forward((0, 0, 0), vcoords, vgridgpu.gpu())
    g1.forward((0, 0, 0), v2coords, v2gridgpu.gpu())

    np.testing.assert_allclose(reference.tonumpy(),
                               vgridgpu.tonumpy(),
                               atol=1e-5)
    v2gpu = v2gridgpu.tonumpy()

    np.testing.assert_allclose(g[0, :], v2gpu[0, :] * 2.0, atol=1e-5)
    np.testing.assert_allclose(g[0, :], v2gpu[1, :] * 2.0, atol=1e-5)
Exemple #12
0
def get_model_gmaker_eproviders(args):
    #train example provider
    eptrain=molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0, stratify_max=12, stratify_step=2, recmolcache=args.recmolcache, ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined')
    eptrain.populate(args.train_types)
    #test example provider
    eptest = molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0,
                                      stratify_max=12, stratify_step=2, recmolcache=args.recmolcache,
                                      ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined')
    eptest.populate(args.test_types)
    #gridmaker with defaults
    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(eptrain.num_types())
    model_file = imp.load_source("model", args.model)
    #load model with seed
    torch.manual_seed(args.seed)
    model=model_file.Model(dims)

    return model, gmaker, eptrain, eptest
def test_coords2grid():
    gmaker = molgrid.GridMaker(resolution=0.5,
                               dimension=23.5,
                               radius_scale=1,
                               radius_type_indexed=True)
    n_types = molgrid.defaultGninaLigandTyper.num_types()
    radii = np.array(list(molgrid.defaultGninaLigandTyper.get_type_radii()),
                     np.float32)
    dims = gmaker.grid_dimensions(n_types)
    grid_size = dims[0] * dims[1] * dims[2] * dims[3]

    c2grid = molgrid.Coords2Grid(gmaker, center=(0, 0, 0))
    n_atoms = 2
    batch_size = 1
    coords = nn.Parameter(torch.randn(n_atoms, 3, device='cuda'))
    types = nn.Parameter(torch.randn(n_atoms, n_types + 1, device='cuda'))

    coords.data[0, :] = torch.tensor([1, 0, 0])
    coords.data[1, :] = torch.tensor([-1, 0, 0])
    types.data[...] = 0
    types.data[:, 10] = 1

    batch_radii = torch.tensor(np.tile(radii, (batch_size, 1)),
                               dtype=torch.float32,
                               device='cuda')

    grid_gen = c2grid(coords.unsqueeze(0),
                      types.unsqueeze(0)[:, :, :-1], batch_radii)

    assert float(grid_gen[0][10].sum()) == approx(float(grid_gen.sum()))
    assert grid_gen.sum() > 0

    target = torch.zeros_like(grid_gen)
    target[0, :, 24, 24, 24] = 1000.0

    grad_coords = molgrid.MGrid2f(n_atoms, 3)
    grad_types = molgrid.MGrid2f(n_atoms, n_types)
    r = molgrid.MGrid1f(len(radii))
    r.copyFrom(radii)

    grid_loss = F.mse_loss(target, grid_gen)
    grid_loss.backward()
    print(grid_loss)
    print(coords.grad.detach().cpu().numpy())
def setup_gmaker_eprov(resolution: float, radius: float, data_file: Path):
    """Setup the molgrid GridMaker and ExampleProvider for the data specified in data_file.

    Args:
        resolution (float): Resolution of the grid
        radius (float): Radius of the grid in Angstrom
        types_file (Path): File specifying the types file pairings making up the data set

    Returns:
        tuple: GridMaker, ExampleProvider
    """
    # dim is 1 voxel length less than 2xradius to ensure that center is on node between 8 voxels
    gmaker = molgrid.GridMaker(resolution=resolution,
                               dimension=2 * radius - resolution)
    e_provider_test = molgrid.ExampleProvider(data_root="",
                                              balanced=False,
                                              shuffle=False)
    e_provider_test.populate(str(data_file))

    return gmaker, e_provider_test
Exemple #15
0
def test_vector_types_duplicate():
    fname = datadir+"/smalldup.types"

    teste = molgrid.ExampleProvider(molgrid.GninaVectorTyper(),shuffle=False, duplicate_first=True,data_root=datadir+"/structs")
    teste.populate(fname)
    batch_size = 1
    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(molgrid.GninaVectorTyper().num_types()*4)
    
    tensor_shape = (batch_size,)+dims
    input_tensor_1 = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda')
    
    batch_1 = teste.next_batch(batch_size)
    gmaker.forward(batch_1, input_tensor_1,random_translation=0.0, random_rotation=False)
    
    input_tensor_2 = torch.zeros(tensor_shape, dtype=torch.float32, device='cpu')
    
    gmaker.forward(batch_1, input_tensor_2,random_translation=0.0, random_rotation=False)   
    
    np.testing.assert_allclose(input_tensor_1.cpu().detach().numpy(),input_tensor_2.detach().numpy(),atol=1e-4)
    assert input_tensor_1.cpu().detach().numpy().max() < 75
Exemple #16
0
def test_dx():
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)
    ex = e.next()
    c = ex.coord_sets[1]

    assert np.min(c.type_index.tonumpy()) >= 0

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(
        e.type_size())  # this should be grid_dims or get_grid_dims
    center = tuple(c.center())

    mgridout = molgrid.MGrid4f(*dims)
    gmaker.forward(center, c, mgridout.cpu())

    molgrid.write_dx("tmp.dx", mgridout[0].cpu(), center, 0.5)

    mgridin = molgrid.read_dx("tmp.dx")
    os.remove("tmp.dx")

    g = mgridin.grid().tonumpy()
    go = mgridout[0].tonumpy()
    np.testing.assert_array_almost_equal(g, go, decimal=5)

    assert center == approx(list(mgridin.center()))
    assert mgridin.resolution() == 0.5

    #dump everything
    molgrid.write_dx_grids("/tmp/tmp", e.get_type_names(), mgridout.cpu(),
                           center, gmaker.get_resolution(), 0.5)
    checkgrid = molgrid.MGrid4f(*dims)
    molgrid.read_dx_grids("/tmp/tmp", e.get_type_names(), checkgrid.cpu())

    np.testing.assert_array_almost_equal(mgridout.tonumpy(),
                                         2.0 * checkgrid.tonumpy(),
                                         decimal=5)
Exemple #17
0
    def __init__(
        self,
        beam_size=1,
        multi_atom=False,
        n_atoms_detect=1,
        apply_conv=False,
        threshold=0.1,
        peak_value=1.5,
        min_dist=0.0,
        apply_prop_conv=False,
        constrain_types=False,
        constrain_frags=False,
        estimate_types=False,
        fit_L1_loss=False,
        interm_gd_iters=10,
        final_gd_iters=100,
        gd_kwargs=dict(
            lr=0.1,
            betas=(0.9, 0.999),
            weight_decay=0.0,
        ),
        dkoes_make_mol=True,
        use_openbabel=False,
        output_kernel=False,
        device='cuda',
        verbose=0,
        debug=False,
    ):
        # number of best structures to store and expand during search
        self.beam_size = beam_size

        # maximum number of atoms to detect in remaining density
        self.n_atoms_detect = n_atoms_detect

        # try placing all detected atoms at once, then try individually
        self.multi_atom = multi_atom

        # settings for detecting atoms in element channels
        self.apply_conv = apply_conv
        self.threshold = threshold
        self.peak_value = peak_value
        self.min_dist = min_dist

        # setting for detecting properties in property channels
        self.apply_prop_conv = apply_prop_conv

        # can constrain to find exact atom type counts or single fragment
        self.constrain_types = constrain_types
        self.constrain_frags = constrain_frags
        self.estimate_types = estimate_types

        # can perform gradient descent at each step and/or at final step
        self.fit_L1_loss = fit_L1_loss
        self.interm_gd_iters = interm_gd_iters
        self.final_gd_iters = final_gd_iters
        self.gd_kwargs = gd_kwargs

        self.output_kernel = output_kernel
        self.device = device
        self.verbose = verbose
        self.debug = debug

        self.grid_maker = molgrid.GridMaker(gaussian_radius_multiple=-1.5)
        self.c2grid = molgrid.Coords2Grid(self.grid_maker)

        # lazily initialize atom density kernel
        self.kernel = None
Exemple #18
0
import torch
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from rdkit.Geometry.rdGeometry import Point3D
from skimage.segmentation import flood_fill
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

import generate
import atom_types

idx = [2, 3, 4, 5, 19, 18, 17, 6, 9, 7, 8, 10, 13, 12, 16, 14, 15, 20, 27]
channels = atom_types.get_channels_by_index(idx)  #generate.py defaults
typer = molgrid.SubsettedGninaTyper(idx, False)  #equivalent in molgrid
gmaker = molgrid.GridMaker(gaussian_radius_multiple=-1.5, dimension=36)

device = 'cuda'


def grid_to_xyz(gcoords, mgrid):
    return mgrid.center + (np.array(gcoords) -
                           ((mgrid.size - 1) / 2)) * mgrid.resolution


def get_per_atom_volume(radius):
    return radius**3 * ((2 * np.pi)**1.5)


def select_atom_starts(mgrid, G, radius):
    '''Given a single channel grid and the atomic radius for that type,
Exemple #19
0
def main(args):
    # Fix seeds
    molgrid.set_random_seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Set CuDNN options for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Set up libmolgrid
    e = molgrid.ExampleProvider(data_root=args.data_root,
                                balanced=False,
                                shuffle=False)
    e.populate(args.test_file)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (args.batch_size, ) + dims

    # Load test file examples (NOTE: not possible to do directly via molgrid)
    with open(args.test_file, 'r') as f:
        lines = f.readlines()

    # Construct input tensors
    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(args.batch_size, dtype=torch.float32)

    # Initialise network - Two models currently available (see models.py for details)
    if args.model == 'Ragoza':
        model = Basic_CNN(dims).to('cuda')
    elif args.model == 'Imrie':
        model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda')
    else:
        print("Please specify a valid architecture")
        exit()
    # Load weights for network
    model.load_state_dict(torch.load(args.weights))
    print("Loaded model parameters")

    # Print number of parameters in model
    print("Number of model params: %dK" %
          (sum([x.nelement() for x in model.parameters()]) / 1000, ))

    # Test network

    # Ensure model in eval mode
    model.eval()

    # Test loop
    predictions = []
    labels = []
    num_samples = e.size()
    num_batches = -(-num_samples // args.batch_size)
    print("Number of examples: %d" % num_samples)
    for it in range(num_batches):
        # Load data
        batch = e.next_batch(args.batch_size)
        gmaker.forward(batch,
                       input_tensor,
                       random_rotation=args.rotate,
                       random_translation=args.translate)
        batch.extract_label(0, float_labels)
        labels.extend(list(float_labels.detach().cpu().numpy()))
        batch_predictions = []
        for _ in range(args.num_rotate):
            gmaker.forward(batch,
                           input_tensor,
                           random_rotation=args.rotate,
                           random_translation=args.translate)
            # Predict
            output = F.softmax(model(input_tensor), dim=1)
            batch_predictions.append(list(output.detach().cpu().numpy()[:, 1]))
        predictions.extend(list(np.mean(batch_predictions, axis=0)))

        # Progress
        if it % args.display_iter == 0:
            print("Processed: %d / %d examples" %
                  (it * args.batch_size, num_samples))

    # Print performance
    labels = labels[:num_samples]
    predictions = predictions[:num_samples]
    print("Test AUC: %.2f" % (roc_auc_score(labels, predictions)), flush=True)

    # Save predictions
    output_lines = []
    for line, pred in zip(lines, predictions):
        output_lines.append(str(pred) + ' ' + line)

    with open(args.output_path, 'w') as f:
        for line in output_lines:
            f.write(line)
Exemple #20
0
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass
    else:
        tgs = ['BarlowTwins'] + args.tags
        wandb.init(entity='andmcnutt', project='DDG_model_Regression',config=args, tags=tgs)

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.local_rank == -1:
            args.local_rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            # args.rank = args.rank * ngpus_per_node + gpu
            print(f"rank:{args.local_rank}")
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.local_rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    if args.arch.startswith('resnet'):
        resnet_num = int(args.arch.split('t')[-1])
        model = moco.resnet.generate_model(resnet_num)
        model.fc = nn.Identity()
    elif args.arch == 'default2018':
        model = Default2018((28,48,48,48), args.rep_size)
    elif args.arch == 'dense':
        model = Dense((28,48,48,48))
    projector = Projector(args.rep_size,args.proj_size)
    predictor = None
    if args.semi_super:
        predictor = Predictor(args.rep_size)
    print(model)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            projector.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            if args.arch.startswith('resnet'):
                model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], output_device=args.gpu)
            projector = nn.SyncBatchNorm.convert_sync_batchnorm(projector)
            projector = torch.nn.parallel.DistributedDataParallel(projector, device_ids=[args.gpu], output_device=args.gpu)
            if args.semi_super:
                predictor.cuda(args.gpu)
                predictor = torch.nn.parallel.DistributedDataParallel(predictor, device_ids=[args.gpu], output_device=args.gpu)
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    if args.cc_lambda is None:
        args.cc_lambda = 1.0/args.proj_size
        print(f'updated cc_lambda: {args.cc_lambda}')
    criterion = CrossCorrLoss(args.proj_size,args.cc_lambda,args.batch_size,device=args.gpu).cuda(args.gpu)

    parameters = [p for p in model.parameters()] + [p for p in projector.parameters()]
    if args.semi_super:
        parameters += [p for p in predictor.parameters()]
    optimizer = LARS(parameters, lr=0,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    train_dataset = molgrid.MolDataset(
        args.data, data_root=args.dataroot,
        ligmolcache=args.ligmolcache, recmolcache=args.recmolcache)
    #Need to use random trans/rot when actually running
    gmaker = molgrid.GridMaker()
    shape = gmaker.grid_dimensions(28)

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, shuffle=True)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True, collate_fn=moco.loader.collateMolDataset)

    for epoch in range(args.start_epoch, args.epochs):
        train_sampler.set_epoch(epoch)

        # train for one epoch
        loss, lr = train(train_loader, model, projector, predictor, criterion, optimizer, gmaker, shape, epoch, args)
        print(f'Epoch: {epoch}, Loss:{loss}')
        if args.local_rank == 0:
            if args.semi_super:
                wandb.log({'Loss':loss[0],'Supervised Loss':loss[1], 'Representation Loss':loss[2], "Learning Rate": lr})
            else: 
                wandb.log({'Loss':loss})

        if (not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.local_rank % ngpus_per_node == 0)) and (epoch % 50 == 0):
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'projector': projector.state_dict(),
                'optimizer' : optimizer.state_dict(),
            }, is_best=False, filename='checkpoint_{:04d}.pth.tar'.format(epoch))
Exemple #21
0
def simple_atom_fit(mgrid, types, iters=10, tol=0.01):
    '''Fit atoms to MolGrid.  types are ignored as the number of 
    atoms of each type is always inferred from the density.
    Returns the MolGrid of the placed atoms and the MolStruct'''
    t_start = time.time()

    # mtr22 - match the input API of generate.AtomFitter.fit
    mgrid = generate.MolGrid(
        values=torch.as_tensor(mgrid.values, device=device),
        channels=mgrid.channels,
        center=mgrid.center,
        resolution=mgrid.resolution,
    )

    #for every channel, select some coordinates and setup the type/radius vectors
    initcoords = []
    typevecs = []
    radii = []
    typeindices = []
    numatoms = 0
    tcnts = {}
    types_est = []  # mtr22
    for (t, G) in enumerate(mgrid.values):
        ch = mgrid.channels[t]
        #print(ch)
        coords = select_atom_starts(mgrid, G, ch.atomic_radius)
        if coords:
            tvec = np.zeros(len(mgrid.channels))
            tvec[t] = 1.0
            tcnt = len(coords)
            numatoms += tcnt
            types_est.append(tcnt)  #mtr22

            r = mgrid.channels[t].atomic_radius
            initcoords += coords
            typevecs += [tvec] * tcnt
            typeindices += [t] * tcnt
            radii += [r] * tcnt
            tcnts[t] = tcnt
        else:
            types_est.append(0)  #mtr22

    typevecs = np.array(typevecs)
    initcoords = np.array(initcoords)
    typeindices = np.array(typeindices)

    # mtr22 - for computing type_diff metrics in returned molstruct
    types_true = torch.tensor(types, dtype=torch.float32, device=device)
    types_est = torch.tensor(types_est, dtype=torch.float32, device=device)

    #print(types_est)

    #setup gridder
    gridder = molgrid.Coords2Grid(molgrid.GridMaker(
        dimension=mgrid.dimension,
        resolution=mgrid.resolution,
        gaussian_radius_multiple=-1.5),
                                  center=tuple(mgrid.center.astype(float)))
    mgrid.values = mgrid.values.to(device)

    #having setup input coordinates, optimize with BFGS
    coords = torch.tensor(initcoords,
                          dtype=torch.float32,
                          requires_grad=True,
                          device=device)
    types = torch.tensor(typevecs, dtype=torch.float32, device=device)
    radii = torch.tensor(radii, dtype=torch.float32, device=device)
    best_loss = np.inf
    best_coords = None
    best_typeindices = typeindices  #save in case number of atoms changes
    goodcoords = False

    for inum in range(iters):
        optimizer = torch.optim.LBFGS([coords],
                                      max_iter=20000,
                                      tolerance_grad=1e-9,
                                      line_search_fn='strong_wolfe')

        def closure():
            optimizer.zero_grad()
            agrid = gridder.forward(coords, types, radii)
            loss = torch.square(agrid - mgrid.values).sum() / numatoms
            loss.backward()
            return loss

        optimizer.step(closure)
        final_loss = optimizer.state_dict()['state'][0][
            'prev_loss']  #todo - check for convergence?

        print('iter {} (loss={}, n_atoms={})'.format(inum, final_loss,
                                                     len(best_typeindices)))

        if final_loss < best_loss:
            best_loss = final_loss
            best_coords = coords.detach()

        if inum == iters - 1:  #stick with these coordinates
            break
        #otherwise, try different starting coordinates for only those
        #atom types that have errors
        goodcoords = True
        with torch.no_grad():
            offset = 0
            agrid = gridder.forward(coords, types, radii)
            t = 0
            while offset < len(typeindices):
                t = typeindices[offset]
                #eval max error - mse will downplay a single atom of many being off
                maxerr = float(torch.square(agrid[t] - mgrid.values[t]).max())
                if maxerr > tol:
                    goodcoords = False
                    ch = mgrid.channels[t]
                    newcoords = select_atom_starts(mgrid, mgrid.values[t],
                                                   ch.atomic_radius)
                    for (i, coord) in enumerate(newcoords):
                        coords[i + offset] = torch.tensor(coord,
                                                          dtype=torch.float)
                offset += tcnts[t]
        if goodcoords:
            break

    numfixes = 0
    fix_iter = 0
    if not goodcoords:
        #try to fix up an atom at a time
        offset = 0
        #reset corods to best found so far
        with torch.no_grad():
            coords[:] = best_coords
            agrid = gridder.forward(coords, types, radii)
        t = 0
        while offset < len(typeindices):
            t = typeindices[offset]
            maxerr = float(torch.square(agrid[t] - mgrid.values[t]).max())
            per_atom_volume = float(radii[offset])**3 * ((2 * np.pi)**1.5)
            while maxerr > tol:
                #identify the atom of this type closest to the place with too much density
                #and move it to the location with too little density
                tcoords = coords[offset:offset + tcnts[t]].detach().cpu(
                ).numpy()  #coordinates for this type

                diff = agrid[t] - mgrid.values[t]
                possum = float(diff[diff > 0].sum())
                negsum = float(diff[diff < 0].sum())
                maxdiff = float(diff.max())
                mindiff = float(diff.min())
                missing_density = -(negsum + possum)
                if missing_density > .75 * per_atom_volume:  #add atom
                    print("Missing density - not enough atoms?")
                    numfixes += 1
                    minpos = int((agrid[t] - mgrid.values[t]).argmin())
                    minpos = grid_to_xyz(
                        np.unravel_index(minpos, agrid[t].shape), mgrid)
                    #add atom: change coords, types, radii, typeindices and tcnts, numatoms
                    numatoms += 1
                    typeindices = np.insert(typeindices, offset, t)
                    tcnts[t] += 1
                    with torch.no_grad():
                        newcoord = torch.tensor([minpos],
                                                device=coords.device,
                                                dtype=coords.dtype,
                                                requires_grad=True)
                        coords = torch.cat(
                            (coords[:offset], newcoord, coords[offset:]))
                        radii = torch.cat(
                            (radii[:offset], radii[offset:offset + 1],
                             radii[offset:]))
                        types = torch.cat(
                            (types[:offset], types[offset:offset + 1],
                             types[offset:]))

                        coords.requires_grad_(True)
                        radii.requires_grad_(True)
                        types.requires_grad_(True)

                elif mindiff**2 < tol:
                    print("No significant density underage - too many atoms?")
                    break
                    #todo, remove atom
                else:  #move an atom
                    numfixes += 1
                    maxpos = int((agrid[t] - mgrid.values[t]).argmax())
                    minpos = int((agrid[t] - mgrid.values[t]).argmin())
                    maxpos = grid_to_xyz(
                        np.unravel_index(maxpos, agrid[t].shape), mgrid)
                    minpos = grid_to_xyz(
                        np.unravel_index(minpos, agrid[t].shape), mgrid)

                    dists = np.square(tcoords - maxpos).sum(axis=1)
                    closesti = np.argmin(dists)
                    with torch.no_grad():
                        coords[offset + closesti] = torch.tensor(minpos)

                #reoptimize
                optimizer = torch.optim.LBFGS([coords],
                                              max_iter=20000,
                                              tolerance_grad=1e-9,
                                              line_search_fn='strong_wolfe')
                #TODO: only optimize this grid
                optimizer.step(closure)
                final_loss = optimizer.state_dict()['state'][0][
                    'prev_loss']  #todo - check for convergence?
                agrid = gridder.forward(coords, types, radii)  #recompute grid

                #if maxerr hasn't improved, give up
                newerr = float(torch.square(agrid[t] - mgrid.values[t]).max())
                fix_iter += 1
                print(
                    'fix_iter {} (loss={}, n_atoms={}, newerr={}, numfixes={})'
                    .format(fix_iter, final_loss, len(typeindices), newerr,
                            numfixes))

                if newerr >= maxerr:
                    break
                else:
                    maxerr = newerr
                    best_loss = final_loss
                    best_coords = coords.detach()
                    best_typeindices = typeindices.copy()

                #otherwise update coordinates and repeat

            offset += tcnts[t]

    # mtr22 - match the output API of generate.AtomFitter.fit
    n_atoms = len(best_typeindices)
    n_channels = len(mgrid.channels)
    best_types = torch.zeros((n_atoms, n_channels),
                             dtype=torch.float32,
                             device=device)
    best_radii = torch.zeros((n_atoms, ), dtype=torch.float32, device=device)
    for i, t in enumerate(best_typeindices):
        ch = mgrid.channels[t]
        best_types[i, t] = 1.0
        best_radii[i] = ch.atomic_radius

    #create struct and grid from coordinates
    struct_best = generate.MolStruct(
        xyz=best_coords.cpu().numpy(),
        c=best_typeindices,
        channels=mgrid.channels,
        loss=float(best_loss),
        type_diff=(types_est - best_types.sum(dim=0)).abs().sum().item(),
        est_type_diff=(types_true - types_est).abs().sum().item(),
        time=time.time() - t_start,
        n_steps=numfixes,
    )

    grid_pred = generate.MolGrid(
        values=gridder.forward(best_coords, best_types,
                               best_radii).cpu().detach().numpy(),
        channels=mgrid.channels,
        center=mgrid.center,
        resolution=mgrid.resolution,
        visited_structs=[],
        src_struct=struct_best,
    )

    return grid_pred
Exemple #22
0
    balanced=True,
    shuffle=True,
    duplicate_first=True,
    default_batch_size=batch_size,
    iteration_scheme=molgrid.IterationScheme.SmallEpoch)
traine.populate(args.trainfile)
teste = molgrid.ExampleProvider(
    ligmolcache=args.ligte,
    recmolcache=args.recte,
    shuffle=True,
    duplicate_first=True,
    default_batch_size=batch_size,
    iteration_scheme=molgrid.IterationScheme.SmallEpoch)
teste.populate(args.testfile)

gmaker = molgrid.GridMaker(binary=args.binary_rep)
dims = gmaker.grid_dimensions(14 * 4)  # only one rec+onelig per example
tensor_shape = (batch_size, ) + dims

actual_dims = (dims[0] // 2, *dims[1:])
siam_arm = default2018(actual_dims, args.rep_size)
if args.use_weights is not None:
    if os.path.isfile(args.use_weights):
        print("=> loading checkpoint '{}'".format(args.use_weights))
        checkpoint = torch.load(args.use_weights, map_location="cpu")

        # rename moco pre-trained keys
        state_dict = checkpoint['state_dict']
        for k in list(state_dict.keys()):
            del_end = None
            # retain only encoder_q up to before the embedding layer
Exemple #23
0
def simple_atom_fit(mgrid, types, iters=10, tol=0.01, device='cuda', grm=-1.5):
    '''Fit atoms to AtomGrid.  types are ignored as the number of 
    atoms of each type is always inferred from the density.
    Returns the AtomGrid of the placed atoms and the AtomStruct'''

    t_start = time.time()
    #for every channel, select some coordinates and setup the type/radius vectors
    initcoords = []
    typevecs = []
    radii = []
    typeindices = []
    numatoms = 0
    tcnts = {}
    values = torch.tensor(mgrid.values, device=device)

    for (t, G) in enumerate(values):
        ch = mgrid.channels[t]
        coords = select_atom_starts(mgrid, G, ch.atomic_radius)
        if coords:
            tvec = np.zeros(len(mgrid.channels))
            tvec[t] = 1.0
            tcnt = len(coords)
            numatoms += tcnt

            r = mgrid.channels[t].atomic_radius
            initcoords += coords
            typevecs += [tvec] * tcnt
            typeindices += [t] * tcnt
            radii += [r] * tcnt
            tcnts[t] = tcnt

    typevecs = np.array(typevecs)
    initcoords = np.array(initcoords)
    typeindices = np.array(typeindices)
    #print('typeindices',typeindices)
    #setup gridder
    center = tuple([float(c) for c in mgrid.center])
    gridder = molgrid.Coords2Grid(molgrid.GridMaker(
        dimension=mgrid.dimension,
        resolution=mgrid.resolution,
        gaussian_radius_multiple=grm),
                                  center=center)

    #having setup input coordinates, optimize with BFGS
    coords = torch.tensor(initcoords,
                          dtype=torch.float32,
                          requires_grad=True,
                          device=device)
    types = torch.tensor(typevecs, dtype=torch.float32, device=device)
    radii = torch.tensor(radii, dtype=torch.float32, device=device)
    best_loss = np.inf
    best_coords = None
    best_typeindices = typeindices  #save in case number of atoms changes
    goodcoords = False
    bestagrid = torch.zeros(values.shape, dtype=torch.float32, device=device)

    if len(initcoords) == 0:  #no atoms
        mol = AtomStruct(np.zeros((0, 3)),
                         np.zeros(0),
                         mgrid.channels,
                         L2_loss=values.square().sum() / values.numel(),
                         time=time.time() - t_start,
                         iterations=0,
                         numfixes=0,
                         type_diff=0,
                         est_type_diff=0,
                         visited_structs=[])
        return mol, bestagrid

    for inum in range(iters):
        optimizer = torch.optim.LBFGS([coords],
                                      max_iter=20000,
                                      tolerance_grad=1e-9,
                                      line_search_fn='strong_wolfe')

        def closure():
            optimizer.zero_grad()
            agrid = gridder.forward(coords, types, radii)
            loss = torch.square(agrid - values).sum() / numatoms
            loss.backward()
            return loss

        optimizer.step(closure)
        final_loss = optimizer.state_dict()['state'][0][
            'prev_loss']  #todo - check for convergence?

        if final_loss < best_loss:
            best_loss = final_loss
            best_coords = coords.detach().cpu()

        if inum == iters - 1:  #stick with these coordinates
            break
        #otherwise, try different starting coordinates for only those
        #atom types that have errors
        goodcoords = True
        with torch.no_grad():
            offset = 0
            agrid = gridder.forward(coords, types, radii)
            t = 0
            while offset < len(typeindices):
                t = typeindices[offset]
                #eval max error - mse will downplay a single atom of many being off
                maxerr = float(torch.square(agrid[t] - values[t]).max())
                if maxerr > tol:
                    goodcoords = False
                    ch = mgrid.channels[t]
                    newcoords = select_atom_starts(mgrid, values[t],
                                                   ch.atomic_radius)
                    for (i, coord) in enumerate(newcoords):
                        coords[i + offset] = torch.tensor(coord,
                                                          dtype=torch.float)
                offset += tcnts[t]
        if goodcoords:
            break
    bestagrid = agrid.clone()
    numfixes = 0
    if not goodcoords:
        #try to fix up an atom at a time
        offset = 0
        #reset corods to best found so far
        with torch.no_grad():
            coords[:] = best_coords
            agrid = gridder.forward(coords, types, radii)
        t = 0
        while offset < len(typeindices):
            t = typeindices[offset]
            maxerr = float(torch.square(agrid[t] - values[t]).max())
            #print('maxerr',maxerr)
            per_atom_volume = float(radii[offset])**3 * ((2 * np.pi)**1.5)
            while maxerr > tol:
                #identify the atom of this type closest to the place with too much density
                #and move it to the location with too little density
                tcoords = coords[offset:offset + tcnts[t]].detach().cpu(
                ).numpy()  #coordinates for this type

                diff = agrid[t] - values[t]
                possum = float(diff[diff > 0].sum())
                negsum = float(diff[diff < 0].sum())
                maxdiff = float(diff.max())
                mindiff = float(diff.min())
                missing_density = -(negsum + possum)
                #print('Type %d numcoords %d maxdiff %.5f mindiff %.5f missing %.5f'%(t,len(tcoords),maxdiff,mindiff,missing_density))
                if missing_density > .25 * per_atom_volume:  #add atom  MAGIC NUMBER ALERT
                    #needs to be enough total missing density to be close to a whole atom,
                    #but the missing density also needs to be somewhat concentrated
                    #print("Missing density - not enough atoms?")
                    numfixes += 1
                    minpos = int((agrid[t] - values[t]).argmin())
                    minpos = grid_to_xyz(
                        np.unravel_index(minpos, agrid[t].shape), mgrid)
                    #add atom: change coords, types, radii, typeindices and tcnts, numatoms
                    numatoms += 1
                    typeindices = np.insert(typeindices, offset, t)
                    tcnts[t] += 1
                    with torch.no_grad():
                        newcoord = torch.tensor([minpos],
                                                device=coords.device,
                                                dtype=coords.dtype,
                                                requires_grad=True)
                        coords = torch.cat(
                            (coords[:offset], newcoord, coords[offset:]))
                        radii = torch.cat(
                            (radii[:offset], radii[offset:offset + 1],
                             radii[offset:]))
                        types = torch.cat(
                            (types[:offset], types[offset:offset + 1],
                             types[offset:]))

                        coords.requires_grad_(True)
                        radii.requires_grad_(True)
                        types.requires_grad_(True)

                elif missing_density < -.75 * per_atom_volume:
                    print("Too many atoms?")
                    break
                    #todo, remove atom
                else:  #move an atom
                    numfixes += 1
                    maxpos = int((agrid[t] - values[t]).argmax())
                    minpos = int((agrid[t] - values[t]).argmin())
                    maxpos = grid_to_xyz(
                        np.unravel_index(maxpos, agrid[t].shape), mgrid)
                    minpos = grid_to_xyz(
                        np.unravel_index(minpos, agrid[t].shape), mgrid)

                    dists = np.square(tcoords - maxpos).sum(axis=1)
                    closesti = np.argmin(dists)
                    with torch.no_grad():
                        coords[offset + closesti] = torch.tensor(minpos)

                #reoptimize
                optimizer = torch.optim.LBFGS([coords],
                                              max_iter=20000,
                                              tolerance_grad=1e-9,
                                              line_search_fn='strong_wolfe')
                #TODO: only optimize this grid
                optimizer.step(closure)
                final_loss = optimizer.state_dict()['state'][0][
                    'prev_loss']  #todo - check for convergence?
                agrid = gridder.forward(coords, types, radii)  #recompute grid

                #if maxerr hasn't improved, give up
                newerr = float(torch.square(agrid[t] - values[t]).max())
                #print(t,'newerr',newerr,'maxerr',maxerr,'maxdiff',maxdiff,'mindiff',mindiff,'missing',missing_density)
                if newerr >= maxerr:
                    #don't give up if there's still a lot left to fit
                    #and the missing density isn't all (very) shallow
                    if missing_density < per_atom_volume or mindiff > -0.1:  #magic number!
                        break
                else:
                    maxerr = newerr
                    best_loss = final_loss
                    best_coords = coords.detach().cpu()
                    best_typeindices = typeindices.copy()
                    bestagrid = agrid.clone()

                #otherwise update coordinates and repeat

            offset += tcnts[t]

    #create struct from coordinates
    mol = AtomStruct(best_coords.numpy(),
                     best_typeindices,
                     mgrid.channels,
                     L2_loss=float(best_loss),
                     time=time.time() - t_start,
                     iterations=inum,
                     numfixes=numfixes,
                     type_diff=0,
                     est_type_diff=0,
                     visited_structs=[])
    # print('losses',final_loss,best_loss,len(best_coords))
    return mol, bestagrid
Exemple #24
0
def test_vector_types():
    g1 = molgrid.GridMaker(resolution=.25,dimension=6.0)
    c = np.array([[0,0,0],[2,0,0]],np.float32)
    t = np.array([0,1],np.float32)
    vt = np.array([[1.0,0],[0,1.0]],np.float32)
    vt2 = np.array([[0.5,0.0],[0.0,0.5]],np.float32)
    r = np.array([1.0,1.0],np.float32)
    coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid1f(t),molgrid.Grid1f(r),2)
    vcoords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt),molgrid.Grid1f(r))
    v2coords = molgrid.CoordinateSet(molgrid.Grid2f(c),molgrid.Grid2f(vt2),molgrid.Grid1f(r))

    shape = g1.grid_dimensions(2)
    reference = molgrid.MGrid4f(*shape)
    vgrid = molgrid.MGrid4f(*shape)
    v2grid = molgrid.MGrid4f(*shape)
    v3grid = molgrid.MGrid4f(*shape)
    
    g1.forward((0,0,0),coords, reference.cpu())
    g1.forward((0,0,0),vcoords, vgrid.cpu())
    g1.forward((0,0,0),v2coords, v2grid.cpu())
    g1.forward((0,0,0),c,vt,r, v3grid.cpu())        
    np.testing.assert_allclose(reference.tonumpy(),vgrid.tonumpy(),atol=1e-5)
    np.testing.assert_allclose(vgrid.tonumpy(),v3grid.tonumpy(),atol=1e-6)
    
    v2g = v2grid.tonumpy()
    g = reference.tonumpy()

    np.testing.assert_allclose(g[0,:],v2g[0,:]*2.0,atol=1e-5)
    np.testing.assert_allclose(g[1,:],v2g[1,:]*2.0,atol=1e-5)
    
    vgridgpu = molgrid.MGrid4f(*shape)
    v2gridgpu = molgrid.MGrid4f(*shape)
    g1.forward((0,0,0),vcoords, vgridgpu.gpu())
    g1.forward((0,0,0),v2coords, v2gridgpu.gpu())
    
    np.testing.assert_allclose(reference.tonumpy(),vgridgpu.tonumpy(),atol=1e-5)
    v2gpu = v2gridgpu.tonumpy()
    
    np.testing.assert_allclose(g[0,:],v2gpu[0,:]*2.0,atol=1e-5)
    np.testing.assert_allclose(g[1,:],v2gpu[1,:]*2.0,atol=1e-5)    
    
    #create target grid with equal type density at 1,0,0
    tc = molgrid.Grid2f(np.array([[1,0,0]],np.float32))
    tv = molgrid.Grid2f(np.array([[0.5,0.5]],np.float32))
    tr = molgrid.Grid1f(np.array([1.0],np.float32))
    targetc = molgrid.CoordinateSet(tc,tv,tr)
    tgrid = molgrid.MGrid4f(*shape)
    g1.forward((0,0,0),targetc,tgrid.cpu())
    
    gradc = molgrid.MGrid2f(2,3)
    gradt = molgrid.MGrid2f(2,2)
    g1.backward((0,0,0),vcoords,tgrid.cpu(),gradc.cpu(),gradt.cpu())
    assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4)
    assert gradc[0,0] > 0
    
    gradc.fill_zero()
    gradt.fill_zero()
    g1.backward((0,0,0),vcoords,tgrid.gpu(),gradc.gpu(),gradt.gpu())

    assert gradc[0,0] == approx(-gradc[1,0],abs=1e-4)
    assert gradc[0,0] > 0
Exemple #25
0
def main(args):
    # Fix seeds
    molgrid.set_random_seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Set CuDNN options for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Set up libmolgrid
    e = molgrid.ExampleProvider(data_root=args.data_root,
                                balanced=True,
                                shuffle=True)
    e.populate(args.train_file)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (args.batch_size, ) + dims

    # Construct input tensors
    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(args.batch_size, dtype=torch.float32)

    # Initialise network - Two models currently available (see models.py for details)
    if args.model == 'Ragoza':
        model = Basic_CNN(dims).to('cuda')
    elif args.model == 'Imrie':
        model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda')
    else:
        print("Please specify a valid architecture")
        exit()

    # Set weights for network
    if args.weights:
        model.load_state_dict(torch.load(args.weights))
        print("Loaded model parameters")
    else:
        model.apply(weights_init)
        print("Randomly initialised model parameters")

    # Print number of parameters in model
    print("Number of model params: %dK" %
          (sum([x.nelement() for x in model.parameters()]) / 1000))

    # Train network

    # Construct optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=args.base_lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = lr_scheduler.ExponentialLR(optimizer, args.anneal_rate)
    print("Initial learning rate: %.6f" % scheduler.get_lr()[0])

    # Train loop
    losses = []
    for it in range(1, args.iterations + 1):
        # Load data
        batch = e.next_batch(args.batch_size)
        gmaker.forward(batch,
                       input_tensor,
                       random_rotation=args.rotate,
                       random_translation=args.translate)
        batch.extract_label(0, float_labels)
        labels = float_labels.long().to('cuda')

        # Train
        optimizer.zero_grad()
        output = model(input_tensor)
        loss = F.cross_entropy(output, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.clip_gradients)
        optimizer.step()
        losses.append(float(loss))

        # Anneal learning rate
        if it % args.anneal_iter == 0:
            scheduler.step()
            print("Current iteration: %d, Annealing learning rate: %.6f" %
                  (it, scheduler.get_lr()[0]))

        # Progress
        if it % args.display_iter == 0:
            print("Current iteration: %d, Loss: %.3f" %
                  (it, float(np.mean(losses[-args.display_iter:]))))

        # Save model
        if it % args.save_iter == 0:
            print("Saving model after %d iterations." % it)
            torch.save(
                model.state_dict(),
                args.save_dir + "/" + args.save_prefix + ".iter-" + str(it))

        # Test model
        if args.test_file != '' and it % args.test_iter == 0:
            # Set to test mode
            model.eval()
            predictions = []
            labs = []
            e_test = molgrid.ExampleProvider(data_root=args.data_root,
                                             balanced=False,
                                             shuffle=False)
            e_test.populate(args.test_file)
            num_samples = e_test.size()
            num_batches = -(-num_samples // args.batch_size)
            for _ in range(num_batches):
                # Load data
                batch = e_test.next_batch(args.batch_size)
                batch_predictions = []
                batch.extract_label(0, float_labels)
                labs.extend(list(float_labels.detach().cpu().numpy()))
                for _ in range(args.num_rotate):
                    gmaker.forward(batch,
                                   input_tensor,
                                   random_rotation=args.rotate,
                                   random_translation=0.0)
                    # Predict
                    output = F.softmax(model(input_tensor), dim=1)
                    batch_predictions.append(
                        list(output.detach().cpu().numpy()[:, 0]))
                predictions.extend(list(np.mean(batch_predictions, axis=0)))
            # Print performance
            labs = labs[:num_samples]
            predictions = predictions[:num_samples]
            print("Current iter: %d, AUC: %.2f" %
                  (it, roc_auc_score(labs, predictions)),
                  flush=True)
            # Set to train mode
            model.train()
def test_train_torch_cnn():
    batch_size = 50
    datadir = os.path.dirname(__file__) + '/data'
    fname = datadir + "/small.types"

    molgrid.set_random_seed(0)
    torch.manual_seed(0)
    np.random.seed(0)

    class Net(nn.Module):
        def __init__(self, dims):
            super(Net, self).__init__()
            self.pool0 = nn.MaxPool3d(2)
            self.conv1 = nn.Conv3d(dims[0], 32, kernel_size=3, padding=1)
            self.pool1 = nn.MaxPool3d(2)
            self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1)
            self.pool2 = nn.MaxPool3d(2)
            self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1)

            self.last_layer_size = dims[1] // 8 * dims[2] // 8 * dims[
                3] // 8 * 128
            self.fc1 = nn.Linear(self.last_layer_size, 2)

        def forward(self, x):
            x = self.pool0(x)
            x = F.relu(self.conv1(x))
            x = self.pool1(x)
            x = F.relu(self.conv2(x))
            x = self.pool2(x)
            x = F.relu(self.conv3(x))
            x = x.view(-1, self.last_layer_size)
            x = self.fc1(x)
            return x

    def weights_init(m):
        if isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight.data)

    batch_size = 50
    e = molgrid.ExampleProvider(data_root=datadir + "/structs",
                                balanced=True,
                                shuffle=True)
    e.populate(fname)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (batch_size, ) + dims

    model = Net(dims).to('cuda')
    model.apply(weights_init)

    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(batch_size, dtype=torch.float32)

    losses = []
    for iteration in range(100):
        #load data
        batch = e.next_batch(batch_size)
        gmaker.forward(batch, input_tensor, 0, random_rotation=False
                       )  #not rotating since convergence is faster this way
        batch.extract_label(0, float_labels)
        labels = float_labels.long().to('cuda')

        optimizer.zero_grad()
        output = model(input_tensor)
        loss = F.cross_entropy(output, labels)
        loss.backward()
        optimizer.step()
        losses.append(float(loss))

    avefinalloss = np.array(losses[-5:]).mean()
    assert avefinalloss < .4
Exemple #27
0
               default="files/ligmap",
               help="Ligand types file")
p.add_argument("-o", "--output", type=str, default=None, help="Output file")
p.add_argument("--dx", action="store_true", help="Output grids as DX files")

args = p.parse_args()

system = os.path.splitext(os.path.basename(args.sdf))[0]

if args.output is None:
    args.output = f"{system}.pcd"

resolution = args.resolution
dimension = args.dimension

gm = molgrid.GridMaker(resolution=resolution, dimension=dimension)

t = molgrid.FileMappedGninaTyper(args.ligmap)

# Grid dimensions (including types)
gdims = gm.grid_dimensions(t.num_types())

# Pre-allocate grid
# Only one example (batch size is 1)
grid = torch.zeros(1, *gdims, dtype=torch.float32, device="cuda:0")

obmol = next(pybel.readfile("sdf", args.sdf))
obmol.addh()
print(obmol, end="")

# Use OpenBabel molecule object (obmol.OBmol) instead of PyBel molecule (obmol)
Exemple #28
0
 def __init__(self, resolution=0.5, dimension=23.5):
     gmaker = molgrid.GridMaker(resolution,
                                dimension,
                                gaussian_radius_multiple=-1.5)
     super().__init__(gmaker)
Exemple #29
0
def main():
    args = parser.parse_args()
    tgs = ['MoCo_SingleGPU']
    wandb.init(entity='andmcnutt',
               project='DDG_model_Regression',
               config=args,
               tags=tgs)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    args.gpu = device

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    # create model
    print("=> creating model '{}'".format(args.arch))
    model = moco.builder_single.MoCo(
        args.arch,
        args.moco_dim,
        args.moco_k,
        args.moco_m,
        args.moco_t,
        args.mlp,
        semi_supervised=(True if args.semi_super else False))
    print(model)

    torch.cuda.set_device(device)
    model = model.to(device)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    train_dataset = molgrid.torch_bindings.MolDataset(
        args.data,
        ligmolcache=args.ligmolcache,
        recmolcache=args.recmolcache,
        data_root=args.dataroot)
    gmaker = molgrid.GridMaker()
    shape = gmaker.grid_dimensions(28)

    train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=(train_sampler is None),
        num_workers=args.workers,
        sampler=train_sampler,
        drop_last=True,
        collate_fn=moco.loader.collateMolDataset)

    wandb.watch(model, log='all')
    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, gmaker, shape, epoch,
              args)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            is_best=False,
            filename='checkpoint_{:04d}.pth.tar'.format(epoch))
Exemple #30
0
    def __init__(
        self,
        data_file,
        data_root,
        batch_size,
        rec_typer,
        lig_typer,
        use_rec_elems=True,
        resolution=0.5,
        dimension=None,
        grid_size=None,
        shuffle=False,
        random_rotation=False,
        random_translation=0.0,
        diff_cond_transform=False,
        diff_cond_structs=False,
        n_samples=1,
        rec_molcache=None,
        lig_molcache=None,
        cache_structs=True,
        device='cuda',
        debug=False,
    ):
        super().__init__()

        assert (dimension or grid_size) and not (dimension and grid_size), \
            'must specify one of either dimension or grid_size'
        if grid_size:
            dimension = atom_grids.size_to_dimension(grid_size, resolution)

        # create receptor and ligand atom typers
        self.lig_typer = AtomTyper.get_typer(*lig_typer.split('-'), rec=False)
        self.rec_typer = \
            AtomTyper.get_typer(*rec_typer.split('-'), rec=use_rec_elems)

        atom_typers = [self.rec_typer, self.lig_typer]
        if diff_cond_structs:  # duplicate atom typers
            atom_typers *= 2

        # create example provider
        self.ex_provider = molgrid.ExampleProvider(
            *atom_typers,
            data_root=data_root,
            recmolcache=rec_molcache or '',
            ligmolcache=lig_molcache or '',
            cache_structs=cache_structs,
            shuffle=shuffle,
            num_copies=n_samples,
        )

        # create molgrid maker
        self.grid_maker = molgrid.GridMaker(
            resolution=resolution,
            dimension=dimension,
            gaussian_radius_multiple=-1.5,
        )
        self.batch_size = batch_size

        # transformation settings
        self.random_rotation = random_rotation
        self.random_translation = random_translation
        self.diff_cond_transform = diff_cond_transform
        self.diff_cond_structs = diff_cond_structs
        self.debug = debug
        self.device = device

        # transform interpolation state
        self.cond_interp = TransformInterpolation(n_samples=n_samples)

        # load data from file
        self.ex_provider.populate(data_file)