コード例 #1
0
def test_custom_typer_example_provider():
    fname = datadir + "/small.types"
    t = molgrid.ElementIndexTyper(80)
    e = molgrid.ExampleProvider(t, data_root=datadir + "/structs")
    e.populate(fname)
    batch = e.next_batch(10)
    c = batch[0].coord_sets[0]
    assert c.max_type == 80
コード例 #2
0
def test_type_sizing():
    fname = datadir+"/ligonly.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs",make_vector_types=True)
    e.populate(fname)
    batch_size = 10
    b = e.next_batch(batch_size)
    #provider and example should agree on number of types, even if one coordset is empty
    assert e.num_types() == b[0].num_types()
コード例 #3
0
def test_a_grid():
    fname = datadir+"/small.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs")
    e.populate(fname)
    ex = e.next()
    c = ex.coord_sets[1]
    
    assert np.min(c.type_index.tonumpy()) >= 0

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(c.max_type) # this should be grid_dims or get_grid_dims
    center = c.center()
    center = tuple(center)


    mgridout = molgrid.MGrid4f(*dims)    
    mgridgpu = molgrid.MGrid4f(*dims)    
    npout = np.zeros(dims, dtype=np.float32)
    torchout = torch.zeros(dims, dtype=torch.float32)
    cudaout = torch.zeros(dims, dtype=torch.float32, device='cuda')
    
    gmaker.forward(center, c, mgridout.cpu())
    gmaker.forward(center, c, mgridgpu.gpu())

    gmaker.forward(center, c, npout)
    gmaker.forward(center, c, torchout)
    gmaker.forward(center, c, cudaout)
    
    
    newt = gmaker.make_tensor(center, c)
    newa = gmaker.make_ndarray(center, c)
    
    assert 1.438691 == approx(mgridout.tonumpy().max())
    assert 1.438691 == approx(mgridgpu.tonumpy().max())
    assert 1.438691 == approx(npout.max())
    assert 1.438691 == approx(torchout.numpy().max())
    assert 1.438691 == approx(cudaout.cpu().numpy().max())
    assert 1.438691 == approx(newt.cpu().numpy().max())
    assert 1.438691 == approx(newa.max())

    #should overwrite by default, yes?
    gmaker.forward(center, c, mgridout.cpu())
    gmaker.forward(center, c, mgridgpu.gpu())
    assert 1.438691 == approx(mgridout.tonumpy().max())
    assert 1.438691 == approx(mgridgpu.tonumpy().max())
    
    
    dims = gmaker.grid_dimensions(e.num_types())
    mgridout = molgrid.MGrid4f(*dims)    
    mgridgpu = molgrid.MGrid4f(*dims)   
    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())
    
    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())    
    
    assert 2.094017 == approx(mgridout.tonumpy().max())
    assert 2.094017 == approx(mgridgpu.tonumpy().max())
コード例 #4
0
def get_model_gmaker_eproviders(args):
    #train example provider
    eptrain=molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0, stratify_max=12, stratify_step=2, recmolcache=args.recmolcache, ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined')
    eptrain.populate(args.train_types)
    #test example provider
    eptest = molgrid.ExampleProvider(shuffle=True, stratify_receptor=True, labelpos=0, stratify_pos=0, stratify_min=0,
                                      stratify_max=12, stratify_step=2, recmolcache=args.recmolcache,
                                      ligmolcache=args.ligmolcache,data_root='/net/pulsar/home/koes/rishal/rmsd_paper/pdbbind/general_minus_refined')
    eptest.populate(args.test_types)
    #gridmaker with defaults
    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(eptrain.num_types())
    model_file = imp.load_source("model", args.model)
    #load model with seed
    torch.manual_seed(args.seed)
    model=model_file.Model(dims)

    return model, gmaker, eptrain, eptest
コード例 #5
0
def test_cached_with_typer_example_provider():
    fname = datadir + "/ligonly.types"
    t = molgrid.ElementIndexTyper(80)
    e = molgrid.ExampleProvider(t, ligmolcache=datadir + '/lig.molcache2')
    e.populate(fname)
    batch = e.next_batch(10)
    c = batch[0].coord_sets[1]
    assert c.max_type == 80
    assert c.type_index[0] == 7
コード例 #6
0
def test_example_provider_iterator_interface():
    fname = datadir+"/small.types"
    BSIZE=25
    e = molgrid.ExampleProvider(data_root=datadir+"/structs",default_batch_size=BSIZE)
    e.populate(fname)
    
    e2 = molgrid.ExampleProvider(data_root=datadir+"/structs",default_batch_size=BSIZE)
    e2.populate(fname)

    nlabels = e.num_labels()
    labels = molgrid.MGrid2f(BSIZE,nlabels)
    labels2 = molgrid.MGrid2f(BSIZE,nlabels)

    for (i, b) in enumerate(e):
        b2 = e2.next_batch()
        b.extract_labels(labels.cpu())
        b2.extract_labels(labels2.cpu())
        np.testing.assert_allclose(labels,labels2)
        if i > 10:
            break
コード例 #7
0
def test_gnina_example_provider():
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)

    batch_size = 100
    batch = e.next_batch(batch_size)
    #extract labels
    nlabels = e.num_labels()
    assert nlabels == 3
    labels = molgrid.MGrid2f(batch_size, nlabels)
    gpulabels = molgrid.MGrid2f(batch_size, nlabels)

    batch.extract_labels(labels.cpu())
    batch.extract_labels(gpulabels.gpu())
    assert np.array_equal(labels.tonumpy(), gpulabels.tonumpy())
    label0 = molgrid.MGrid1f(batch_size)
    label1 = molgrid.MGrid1f(batch_size)
    label2 = molgrid.MGrid1f(batch_size)
    batch.extract_label(0, label0.cpu())
    batch.extract_label(1, label1.cpu())
    batch.extract_label(2, label2.gpu())

    assert label0[0] == 1
    assert label1[0] == approx(6.05)
    assert label2[0] == approx(0.162643)
    assert labels[0, 0] == 1
    assert labels[0][1] == approx(6.05)
    assert labels[0][2] == approx(0.162643)

    for i in range(nlabels):
        assert label0[i] == labels[i][0]
        assert label1[i] == labels[i][1]
        assert label2[i] == labels[i][2]

    ex = batch[0]
    crec = ex.coord_sets[0]
    assert crec.size() == 1781
    assert list(crec.coords[0]) == approx([45.042, 12.872, 13.001])
    assert crec.radii[0] == approx(1.8)
    assert list(crec.type_index)[:10] == [
        6.0, 1.0, 1.0, 7.0, 0.0, 6.0, 1.0, 1.0, 7.0, 1.0
    ]

    clig = ex.coord_sets[1]
    assert clig.size() == 10
    assert list(clig.coords[9]) == approx([27.0536, 3.2453, 32.4511])
    assert list(clig.type_index) == [
        8.0, 1.0, 1.0, 9.0, 10.0, 0.0, 0.0, 1.0, 9.0, 8.0
    ]

    batch = e.next_batch(1)
    a = np.array([0], dtype=np.float32)
    batch.extract_label(1, a)
コード例 #8
0
def test_pytorch_dataset():
    fname = datadir + "/small.types"

    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)
    m = molgrid.MolDataset(fname, data_root=datadir + "/structs")

    assert len(m) == 1000

    ex = e.next()
    coordinates = ex.merge_coordinates()

    center, coords, types, radii, labels = m[0]

    assert list(center.shape) == [3]
    np.testing.assert_allclose(coords, coordinates.coords.tonumpy())
    np.testing.assert_allclose(types, coordinates.type_index.tonumpy())
    np.testing.assert_allclose(radii, coordinates.radii.tonumpy())

    assert len(labels) == 3
    assert labels[0] == 1
    np.testing.assert_allclose(labels[1], 6.05)
    np.testing.assert_allclose(labels[-1], 0.162643)

    center, coords, types, radii, labels = m[-1]
    assert labels[0] == 0
    np.testing.assert_allclose(labels[1], -10.3)
    '''Testing out the collate_fn when used with torch.utils.data.DataLoader'''
    torch_loader = torch.utils.data.DataLoader(
        m, batch_size=8, collate_fn=molgrid.MolDataset.collateMolDataset)
    iterator = iter(torch_loader)
    next(iterator)
    lengths, center, coords, types, radii, labels = next(iterator)
    assert len(lengths) == 8
    assert center.shape[0] == 8
    assert coords.shape[0] == 8
    assert types.shape[0] == 8
    assert radii.shape[0] == 8
    assert radii.shape[0] == 8
    assert labels.shape[0] == 8

    mcenter, mcoords, mtypes, mradii, mlabels = m[10]
    np.testing.assert_allclose(center[2], mcenter)
    np.testing.assert_allclose(coords[2][:lengths[2]], mcoords)
    np.testing.assert_allclose(types[2][:lengths[2]], mtypes)
    np.testing.assert_allclose(radii[2][:lengths[2]], mradii.unsqueeze(1))
    assert len(labels[2]) == len(mlabels)
    assert labels[2][0] == mlabels[0]
    assert labels[2][1] == mlabels[1]
コード例 #9
0
def test_duplicated_examples():
    '''This is for files with multiple ligands'''
    fname = datadir+"/multilig.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs")
    e.populate(fname)
    batch_size = 10
    b = e.next_batch(batch_size)
    for i in range(1,batch_size):
        assert len(b[i].coord_sets) == 3 #one rec and two ligands
        #ligands should be different
        sqsum = np.square(b[i].coord_sets[1].coords.tonumpy() - b[i].coord_sets[2].coords.tonumpy()).sum()
        assert sqsum > 0    
        
    e = molgrid.ExampleProvider(data_root=datadir+"/structs",duplicate_first=True)
    e.populate(fname)
    batch_size = 10
    b = e.next_batch(batch_size)
    for i in range(1,batch_size):
        assert len(b[i].coord_sets) == 4 #rec lig rec lig
        #ligands should be different
        sqsum = np.square(b[i].coord_sets[1].coords.tonumpy() - b[i].coord_sets[3].coords.tonumpy()).sum()
        assert sqsum > 0
        #receptors should be the same
        sqsum = np.square(b[i].coord_sets[0].coords.tonumpy() - b[i].coord_sets[2].coords.tonumpy()).sum()
コード例 #10
0
def test_vector_sum_types():
    fname = datadir+"/ligonly.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs",make_vector_types=True)
    e.populate(fname)
    batch_size = 10
    b = e.next_batch(batch_size)
    sum = molgrid.MGrid2f(batch_size, e.num_types())
    b.sum_types(sum)
    sum2 = np.zeros(sum.shape,np.float32)
    b.sum_types(sum2)
    sum3 = torch.empty(sum.shape,dtype=torch.float32,device='cuda')
    b.sum_types(sum3)
    np.testing.assert_allclose(sum.tonumpy(),sum3.detach().cpu().numpy(),atol=1e-5)
    np.testing.assert_allclose(sum.tonumpy(),sum2,atol=1e-5)
    np.testing.assert_allclose(sum[0].tonumpy(), [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 3., 0.,
       0., 0., 0., 0., 0., 2., 2., 1., 0., 0., 0.], atol=1e-5)

    e = molgrid.ExampleProvider(molgrid.NullIndexTyper(), molgrid.defaultGninaLigandTyper, data_root=datadir+"/structs",make_vector_types=True)
    e.populate(fname)
    b = e.next_batch(batch_size)
    sum = molgrid.MGrid2f(batch_size, e.num_types())
    b.sum_types(sum)
    np.testing.assert_allclose(sum[0].tonumpy(), [ 2., 3., 0.,
       0., 0., 0., 0., 0., 2., 2., 1., 0., 0., 0.], atol=1e-5)
コード例 #11
0
def setup_gmaker_eprov(resolution: float, radius: float, data_file: Path):
    """Setup the molgrid GridMaker and ExampleProvider for the data specified in data_file.

    Args:
        resolution (float): Resolution of the grid
        radius (float): Radius of the grid in Angstrom
        types_file (Path): File specifying the types file pairings making up the data set

    Returns:
        tuple: GridMaker, ExampleProvider
    """
    # dim is 1 voxel length less than 2xradius to ensure that center is on node between 8 voxels
    gmaker = molgrid.GridMaker(resolution=resolution,
                               dimension=2 * radius - resolution)
    e_provider_test = molgrid.ExampleProvider(data_root="",
                                              balanced=False,
                                              shuffle=False)
    e_provider_test.populate(str(data_file))

    return gmaker, e_provider_test
コード例 #12
0
def test_vector_types_duplicate():
    fname = datadir+"/smalldup.types"

    teste = molgrid.ExampleProvider(molgrid.GninaVectorTyper(),shuffle=False, duplicate_first=True,data_root=datadir+"/structs")
    teste.populate(fname)
    batch_size = 1
    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(molgrid.GninaVectorTyper().num_types()*4)
    
    tensor_shape = (batch_size,)+dims
    input_tensor_1 = torch.zeros(tensor_shape, dtype=torch.float32, device='cuda')
    
    batch_1 = teste.next_batch(batch_size)
    gmaker.forward(batch_1, input_tensor_1,random_translation=0.0, random_rotation=False)
    
    input_tensor_2 = torch.zeros(tensor_shape, dtype=torch.float32, device='cpu')
    
    gmaker.forward(batch_1, input_tensor_2,random_translation=0.0, random_rotation=False)   
    
    np.testing.assert_allclose(input_tensor_1.cpu().detach().numpy(),input_tensor_2.detach().numpy(),atol=1e-4)
    assert input_tensor_1.cpu().detach().numpy().max() < 75
コード例 #13
0
def test_dx():
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)
    ex = e.next()
    c = ex.coord_sets[1]

    assert np.min(c.type_index.tonumpy()) >= 0

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(
        e.type_size())  # this should be grid_dims or get_grid_dims
    center = c.coord.tonumpy().mean(axis=0)
    center = tuple(center.astype(float))

    mgridout = molgrid.MGrid4f(*dims)
    gmaker.forward(center, c, mgridout.cpu())

    molgrid.write_dx("tmp.dx", mgridout[0].cpu(), center, 0.5)

    mgridin = molgrid.read_dx("tmp.dx")
    os.remove("tmp.dx")

    g = mgridin.grid().tonumpy()
    go = mgridout[0].tonumpy()
    np.testing.assert_array_almost_equal(g, go, decimal=5)

    assert center == approx(list(mgridin.center()))
    assert mgridin.resolution() == 0.5

    #dump everything
    molgrid.write_dx_grids("/tmp/tmp", e.get_type_names(), mgridout.cpu(),
                           center, gmaker.get_resolution(), 0.5)
    checkgrid = molgrid.MGrid4f(*dims)
    molgrid.read_dx_grids("/tmp/tmp", e.get_type_names(), checkgrid.cpu())

    np.testing.assert_array_almost_equal(mgridout.tonumpy(),
                                         2.0 * checkgrid.tonumpy(),
                                         decimal=5)
コード例 #14
0
def test_mol_example_provider(capsys):
    fname = datadir+"/smallmol.types"
    e = molgrid.ExampleProvider(data_root=datadir+"/structs")
    e.populate(fname)
    with capsys.disabled(): #bunch openbabel garbage
        ex = e.next()
        b = e.next_batch(10) #should wrap around

    #with defaults, file should be read in order
    assert ex.labels[0] == 1
    assert ex.labels[1] == approx(3.3747)
    assert ex.coord_sets[0].size() == 1289
    assert ex.coord_sets[1].size() == 8

    coords = ex.coord_sets[1].coord.tonumpy()
    assert tuple(coords[0]) == approx((26.6450,6.1410,4.6680))
    assert len(ex.coord_sets) == 2
    l0 = [ex.labels[0] for ex in b]
    l1 = [ex.labels[1] for ex in b]

    #labels should be in order
    assert (1,1,0,0,0,1,1,1,0,0) == tuple(l0)

    assert (6.0000, 3.8697, -6.6990, -4.3010, -9.0000, 3.3747, 6.0000, 3.8697, -6.6990, -4.3010) == approx(tuple(l1))
コード例 #15
0
tgs = make_tags(args) + args.tags
wandb.init(entity='andmcnutt',
           project='DDG_model_Regression',
           config=args,
           tags=tgs)

#Parameters that are not important for hyperparameter sweep
batch_size = args.batch_size
epochs = args.epoch

# print('ligtr={}, rectr={}'.format(args.ligtr,args.rectr))

traine = molgrid.ExampleProvider(
    ligmolcache=args.ligtr,
    recmolcache=args.rectr,
    balanced=True,
    shuffle=True,
    duplicate_first=True,
    default_batch_size=batch_size,
    iteration_scheme=molgrid.IterationScheme.SmallEpoch)
traine.populate(args.trainfile)
teste = molgrid.ExampleProvider(
    ligmolcache=args.ligte,
    recmolcache=args.recte,
    shuffle=True,
    duplicate_first=True,
    default_batch_size=batch_size,
    iteration_scheme=molgrid.IterationScheme.SmallEpoch)
teste.populate(args.testfile)

gmaker = molgrid.GridMaker(binary=args.binary_rep)
dims = gmaker.grid_dimensions(14 * 4)  # only one rec+onelig per example
コード例 #16
0
def test_train_torch_cnn():
    batch_size = 50
    datadir = os.path.dirname(__file__) + '/data'
    fname = datadir + "/small.types"

    molgrid.set_random_seed(0)
    torch.manual_seed(0)
    np.random.seed(0)

    class Net(nn.Module):
        def __init__(self, dims):
            super(Net, self).__init__()
            self.pool0 = nn.MaxPool3d(2)
            self.conv1 = nn.Conv3d(dims[0], 32, kernel_size=3, padding=1)
            self.pool1 = nn.MaxPool3d(2)
            self.conv2 = nn.Conv3d(32, 64, kernel_size=3, padding=1)
            self.pool2 = nn.MaxPool3d(2)
            self.conv3 = nn.Conv3d(64, 128, kernel_size=3, padding=1)

            self.last_layer_size = dims[1] // 8 * dims[2] // 8 * dims[
                3] // 8 * 128
            self.fc1 = nn.Linear(self.last_layer_size, 2)

        def forward(self, x):
            x = self.pool0(x)
            x = F.relu(self.conv1(x))
            x = self.pool1(x)
            x = F.relu(self.conv2(x))
            x = self.pool2(x)
            x = F.relu(self.conv3(x))
            x = x.view(-1, self.last_layer_size)
            x = self.fc1(x)
            return x

    def weights_init(m):
        if isinstance(m, nn.Conv3d) or isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight.data)

    batch_size = 50
    e = molgrid.ExampleProvider(data_root=datadir + "/structs",
                                balanced=True,
                                shuffle=True)
    e.populate(fname)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (batch_size, ) + dims

    model = Net(dims).to('cuda')
    model.apply(weights_init)

    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(batch_size, dtype=torch.float32)

    losses = []
    for iteration in range(100):
        #load data
        batch = e.next_batch(batch_size)
        gmaker.forward(batch, input_tensor, 0, random_rotation=False
                       )  #not rotating since convergence is faster this way
        batch.extract_label(0, float_labels)
        labels = float_labels.long().to('cuda')

        optimizer.zero_grad()
        output = model(input_tensor)
        loss = F.cross_entropy(output, labels)
        loss.backward()
        optimizer.step()
        losses.append(float(loss))

    avefinalloss = np.array(losses[-5:]).mean()
    assert avefinalloss < .4
コード例 #17
0
ファイル: CNN_train.py プロジェクト: oxpig/DenseFS
def main(args):
    # Fix seeds
    molgrid.set_random_seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Set CuDNN options for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Set up libmolgrid
    e = molgrid.ExampleProvider(data_root=args.data_root,
                                balanced=True,
                                shuffle=True)
    e.populate(args.train_file)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (args.batch_size, ) + dims

    # Construct input tensors
    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(args.batch_size, dtype=torch.float32)

    # Initialise network - Two models currently available (see models.py for details)
    if args.model == 'Ragoza':
        model = Basic_CNN(dims).to('cuda')
    elif args.model == 'Imrie':
        model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda')
    else:
        print("Please specify a valid architecture")
        exit()

    # Set weights for network
    if args.weights:
        model.load_state_dict(torch.load(args.weights))
        print("Loaded model parameters")
    else:
        model.apply(weights_init)
        print("Randomly initialised model parameters")

    # Print number of parameters in model
    print("Number of model params: %dK" %
          (sum([x.nelement() for x in model.parameters()]) / 1000))

    # Train network

    # Construct optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=args.base_lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    scheduler = lr_scheduler.ExponentialLR(optimizer, args.anneal_rate)
    print("Initial learning rate: %.6f" % scheduler.get_lr()[0])

    # Train loop
    losses = []
    for it in range(1, args.iterations + 1):
        # Load data
        batch = e.next_batch(args.batch_size)
        gmaker.forward(batch,
                       input_tensor,
                       random_rotation=args.rotate,
                       random_translation=args.translate)
        batch.extract_label(0, float_labels)
        labels = float_labels.long().to('cuda')

        # Train
        optimizer.zero_grad()
        output = model(input_tensor)
        loss = F.cross_entropy(output, labels)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.clip_gradients)
        optimizer.step()
        losses.append(float(loss))

        # Anneal learning rate
        if it % args.anneal_iter == 0:
            scheduler.step()
            print("Current iteration: %d, Annealing learning rate: %.6f" %
                  (it, scheduler.get_lr()[0]))

        # Progress
        if it % args.display_iter == 0:
            print("Current iteration: %d, Loss: %.3f" %
                  (it, float(np.mean(losses[-args.display_iter:]))))

        # Save model
        if it % args.save_iter == 0:
            print("Saving model after %d iterations." % it)
            torch.save(
                model.state_dict(),
                args.save_dir + "/" + args.save_prefix + ".iter-" + str(it))

        # Test model
        if args.test_file != '' and it % args.test_iter == 0:
            # Set to test mode
            model.eval()
            predictions = []
            labs = []
            e_test = molgrid.ExampleProvider(data_root=args.data_root,
                                             balanced=False,
                                             shuffle=False)
            e_test.populate(args.test_file)
            num_samples = e_test.size()
            num_batches = -(-num_samples // args.batch_size)
            for _ in range(num_batches):
                # Load data
                batch = e_test.next_batch(args.batch_size)
                batch_predictions = []
                batch.extract_label(0, float_labels)
                labs.extend(list(float_labels.detach().cpu().numpy()))
                for _ in range(args.num_rotate):
                    gmaker.forward(batch,
                                   input_tensor,
                                   random_rotation=args.rotate,
                                   random_translation=0.0)
                    # Predict
                    output = F.softmax(model(input_tensor), dim=1)
                    batch_predictions.append(
                        list(output.detach().cpu().numpy()[:, 0]))
                predictions.extend(list(np.mean(batch_predictions, axis=0)))
            # Print performance
            labs = labs[:num_samples]
            predictions = predictions[:num_samples]
            print("Current iter: %d, AUC: %.2f" %
                  (it, roc_auc_score(labs, predictions)),
                  flush=True)
            # Set to train mode
            model.train()
コード例 #18
0
def main(args):
    # Fix seeds
    molgrid.set_random_seed(args.seed)
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    # Set CuDNN options for reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Set up libmolgrid
    e = molgrid.ExampleProvider(data_root=args.data_root,
                                balanced=False,
                                shuffle=False)
    e.populate(args.test_file)

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(e.num_types())
    tensor_shape = (args.batch_size, ) + dims

    # Load test file examples (NOTE: not possible to do directly via molgrid)
    with open(args.test_file, 'r') as f:
        lines = f.readlines()

    # Construct input tensors
    input_tensor = torch.zeros(tensor_shape,
                               dtype=torch.float32,
                               device='cuda')
    float_labels = torch.zeros(args.batch_size, dtype=torch.float32)

    # Initialise network - Two models currently available (see models.py for details)
    if args.model == 'Ragoza':
        model = Basic_CNN(dims).to('cuda')
    elif args.model == 'Imrie':
        model = DenseNet(dims, block_config=(4, 4, 4)).to('cuda')
    else:
        print("Please specify a valid architecture")
        exit()
    # Load weights for network
    model.load_state_dict(torch.load(args.weights))
    print("Loaded model parameters")

    # Print number of parameters in model
    print("Number of model params: %dK" %
          (sum([x.nelement() for x in model.parameters()]) / 1000, ))

    # Test network

    # Ensure model in eval mode
    model.eval()

    # Test loop
    predictions = []
    labels = []
    num_samples = e.size()
    num_batches = -(-num_samples // args.batch_size)
    print("Number of examples: %d" % num_samples)
    for it in range(num_batches):
        # Load data
        batch = e.next_batch(args.batch_size)
        gmaker.forward(batch,
                       input_tensor,
                       random_rotation=args.rotate,
                       random_translation=args.translate)
        batch.extract_label(0, float_labels)
        labels.extend(list(float_labels.detach().cpu().numpy()))
        batch_predictions = []
        for _ in range(args.num_rotate):
            gmaker.forward(batch,
                           input_tensor,
                           random_rotation=args.rotate,
                           random_translation=args.translate)
            # Predict
            output = F.softmax(model(input_tensor), dim=1)
            batch_predictions.append(list(output.detach().cpu().numpy()[:, 1]))
        predictions.extend(list(np.mean(batch_predictions, axis=0)))

        # Progress
        if it % args.display_iter == 0:
            print("Processed: %d / %d examples" %
                  (it * args.batch_size, num_samples))

    # Print performance
    labels = labels[:num_samples]
    predictions = predictions[:num_samples]
    print("Test AUC: %.2f" % (roc_auc_score(labels, predictions)), flush=True)

    # Save predictions
    output_lines = []
    for line, pred in zip(lines, predictions):
        output_lines.append(str(pred) + ' ' + line)

    with open(args.output_path, 'w') as f:
        for line in output_lines:
            f.write(line)
コード例 #19
0
import sys, molgrid
import numpy as np
sys.path.insert(0, '.')
import liGAN

rec_typer = molgrid.FileMappedGninaTyper('data/my_rec_map')
lig_typer = molgrid.FileMappedGninaTyper('data/my_lig_map')
lig_channels = liGAN.atom_types.get_channels_from_map(lig_typer)

print('loading data')
ex_provider = molgrid.ExampleProvider(
    rec_typer,
    lig_typer,
    data_root='data/molport',
    recmolcache='data/molportFULL_rec.molcache2' and '',
    ligmolcache='data/molportFULL_lig.molcache2' and '',
    shuffle=True)
ex_provider.populate('data/molportFULL_rand_test0_1000.types')

batch_size = 1000
n_examples = ex_provider.size()
n_batches = n_examples // batch_size

type_counts = np.zeros(lig_typer.num_types())
mol_count = 0

for i in range(n_batches):
    for ex in ex_provider.next_batch(batch_size):
        struct = liGAN.atom_structs.AtomStruct.from_coord_set(
            ex.coord_sets[1], lig_channels)
        type_counts += struct.type_counts
コード例 #20
0
    def __init__(
        self,
        data_file,
        data_root,
        batch_size,
        rec_typer,
        lig_typer,
        use_rec_elems=True,
        resolution=0.5,
        dimension=None,
        grid_size=None,
        shuffle=False,
        random_rotation=False,
        random_translation=0.0,
        diff_cond_transform=False,
        diff_cond_structs=False,
        n_samples=1,
        rec_molcache=None,
        lig_molcache=None,
        cache_structs=True,
        device='cuda',
        debug=False,
    ):
        super().__init__()

        assert (dimension or grid_size) and not (dimension and grid_size), \
            'must specify one of either dimension or grid_size'
        if grid_size:
            dimension = atom_grids.size_to_dimension(grid_size, resolution)

        # create receptor and ligand atom typers
        self.lig_typer = AtomTyper.get_typer(*lig_typer.split('-'), rec=False)
        self.rec_typer = \
            AtomTyper.get_typer(*rec_typer.split('-'), rec=use_rec_elems)

        atom_typers = [self.rec_typer, self.lig_typer]
        if diff_cond_structs:  # duplicate atom typers
            atom_typers *= 2

        # create example provider
        self.ex_provider = molgrid.ExampleProvider(
            *atom_typers,
            data_root=data_root,
            recmolcache=rec_molcache or '',
            ligmolcache=lig_molcache or '',
            cache_structs=cache_structs,
            shuffle=shuffle,
            num_copies=n_samples,
        )

        # create molgrid maker
        self.grid_maker = molgrid.GridMaker(
            resolution=resolution,
            dimension=dimension,
            gaussian_radius_multiple=-1.5,
        )
        self.batch_size = batch_size

        # transformation settings
        self.random_rotation = random_rotation
        self.random_translation = random_translation
        self.diff_cond_transform = diff_cond_transform
        self.diff_cond_structs = diff_cond_structs
        self.debug = debug
        self.device = device

        # transform interpolation state
        self.cond_interp = TransformInterpolation(n_samples=n_samples)

        # load data from file
        self.ex_provider.populate(data_file)
コード例 #21
0
def test_example_provider_epoch_iteration():
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(
        data_root=datadir + "/structs",
        default_batch_size=10,
        iteration_scheme=molgrid.IterationScheme.LargeEpoch)
    e.populate(fname)

    assert e.small_epoch_size() == 1000
    assert e.large_epoch_size() == 1000

    cnt = 0
    for batch in e:
        cnt += 1
    assert cnt == 100

    e = molgrid.ExampleProvider(
        data_root=datadir + "/structs",
        default_batch_size=10,
        balanced=True,
        iteration_scheme=molgrid.IterationScheme.LargeEpoch)
    e.populate(fname)

    assert e.small_epoch_size() == 326
    assert e.large_epoch_size() == 1674

    cnt = 0
    for batch in e:
        cnt += 1
    assert cnt == 168

    e = molgrid.ExampleProvider(
        data_root=datadir + "/structs",
        default_batch_size=10,
        balanced=False,
        stratify_receptor=True,
        iteration_scheme=molgrid.IterationScheme.SmallEpoch)
    e.populate(fname)

    assert e.small_epoch_size() == 120
    assert e.large_epoch_size() == 1260

    cnt = 0
    for batch in e:
        cnt += 1
    assert cnt == 12

    values = set()
    e = molgrid.ExampleProvider(
        data_root=datadir + "/structs",
        default_batch_size=8,
        balanced=True,
        stratify_receptor=True,
        iteration_scheme=molgrid.IterationScheme.SmallEpoch)
    e.populate(fname)

    assert e.small_epoch_size() == 112
    assert e.large_epoch_size() == 2240

    cnt = 0
    small = 0
    large = 0
    for batch in e:
        for ex in batch:
            key = ex.coord_sets[0].src + ":" + ex.coord_sets[1].src
            #small epoch should see an example at _most_ once
            assert key not in values
            values.add(key)
        cnt += 1
        s = e.get_small_epoch_num()
        assert s >= small
        if s > small:
            assert s == small + 1
            small = s
        l = e.get_large_epoch_num()
        assert l >= large
        if l > large:
            assert l == large + 1
            large = l
    assert cnt == 14

    e = molgrid.ExampleProvider(
        data_root=datadir + "/structs",
        default_batch_size=10,
        balanced=True,
        stratify_receptor=True,
        iteration_scheme=molgrid.IterationScheme.LargeEpoch)
    e.populate(fname)

    assert e.small_epoch_size() == 112
    assert e.large_epoch_size() == 2240

    values = set()
    cnt = 0
    small = 0
    large = 0
    for batch in e:
        for ex in batch:
            key = ex.coord_sets[0].src + ":" + ex.coord_sets[1].src
            values.add(key)
        cnt += 1
        s = e.get_small_epoch_num()
        assert s >= small
        if s > small:
            assert s == small + 1
            small = s
        l = e.get_large_epoch_num()
        assert l >= large
        if l > large:
            assert l == large + 1
            large = l
    assert cnt == 224
    assert len(
        values) == e.size()  #large epoch should see everything at least once
コード例 #22
0
def test_vector_types_mol():
    '''Test vector types with a real molecule'''
    fname = datadir + "/small.types"
    e = molgrid.ExampleProvider(data_root=datadir + "/structs")
    e.populate(fname)
    ex = e.next()

    ev = molgrid.ExampleProvider(data_root=datadir + "/structs",
                                 make_vector_types=True)
    ev.populate(fname)
    exv = ev.next()

    assert exv.has_vector_types()
    assert not ex.has_vector_types()

    gmaker = molgrid.GridMaker()
    dims = gmaker.grid_dimensions(
        ex.num_types())  # this should be grid_dims or get_grid_dims

    mgridout = molgrid.MGrid4f(*dims)
    mgridgpu = molgrid.MGrid4f(*dims)

    mgridoutv = molgrid.MGrid4f(*dims)
    mgridgpuv = molgrid.MGrid4f(*dims)

    d = np.ones(dims, np.float32)
    diff = molgrid.MGrid4f(*dims)
    diff.copyFrom(d)

    gmaker.forward(ex, mgridout.cpu())
    gmaker.forward(ex, mgridgpu.gpu())
    center = ex.coord_sets[-1].center()
    c = ex.merge_coordinates()
    backcoordscpu = molgrid.MGrid2f(c.size(), 3)
    backcoordsgpu = molgrid.MGrid2f(c.size(), 3)

    gmaker.backward(center, c, diff.cpu(), backcoordscpu.cpu())
    gmaker.backward(center, c, diff.gpu(), backcoordsgpu.gpu())

    #vector types
    gmaker.set_radii_type_indexed(True)

    gmaker.forward(exv, mgridoutv.cpu())
    gmaker.forward(exv, mgridgpuv.gpu())

    cv = exv.merge_coordinates()
    vbackcoordscpu = molgrid.MGrid2f(cv.size(), 3)
    vbackcoordsgpu = molgrid.MGrid2f(cv.size(), 3)
    vbacktypescpu = molgrid.MGrid2f(cv.size(), cv.num_types())
    vbacktypesgpu = molgrid.MGrid2f(cv.size(), cv.num_types())

    gmaker.backward(center, cv, diff.cpu(), vbackcoordscpu.cpu(),
                    vbacktypescpu.cpu())
    gmaker.backward(center, cv, diff.gpu(), vbackcoordsgpu.gpu(),
                    vbacktypesgpu.gpu())

    np.testing.assert_allclose(mgridout.tonumpy(),
                               mgridoutv.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(mgridgpu.tonumpy(),
                               mgridgpuv.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(mgridoutv.tonumpy(),
                               mgridgpuv.tonumpy(),
                               atol=1e-5)

    np.testing.assert_allclose(vbackcoordscpu.tonumpy(),
                               backcoordscpu.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(vbackcoordsgpu.tonumpy(),
                               backcoordsgpu.tonumpy(),
                               atol=1e-5)
    np.testing.assert_allclose(vbackcoordscpu.tonumpy(),
                               vbackcoordsgpu.tonumpy(),
                               atol=1e-4)
    np.testing.assert_allclose(vbacktypescpu.tonumpy(),
                               vbacktypesgpu.tonumpy(),
                               atol=1e-4)