Ejemplo n.º 1
0
def test_combiner_priors():
    # This test checks to see if the same energy/force results are obtained
    # using a FeatureCombiner instantiated with just a Geometry feature
    # as with a cgnet that uses a normal GeometryFeature as the feature
    # __init__ kwarg

    # First, we create our FeatureCombiner
    layer_list = [geometry_feature, zscore_layer]
    feature_combiner = FeatureCombiner(layer_list)

    # Next, we create CGnet and use the bond_potential prior and
    # feature_combiner.
    arch = _get_random_architecture(len(geom_stats.master_description_tuples))
    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    # and assert the output has the correct shape
    energy, forces = model.forward(coords_torch)
    np.testing.assert_array_equal(energy.size(), (n_frames, 1))
    np.testing.assert_array_equal(forces.size(), (n_frames, n_beads, 3))

    # To test the priors, we compare to a CGnet formed with just
    # the tradiational feature=GeometryFeature init
    arch = [zscore_layer] + arch
    model_2 = CGnet(arch, ForceLoss(), feature=geometry_feature,
                    priors=[bond_potential])
    energy_2, forces_2 = model_2.forward(coords_torch)
    np.testing.assert_array_equal(energy.detach().numpy(),
                                  energy_2.detach().numpy())
    np.testing.assert_array_equal(forces.detach().numpy(),
                                  forces_2.detach().numpy())
Ejemplo n.º 2
0
def test_combiner_schnet_in_cgnet():
    # Here we test to see if a FeatureCombiner using just a SchnetFeature
    # produces the same output as a CGnet with a SchnetFeature for the
    # feature __init__ kwarg
    # First, we instantiate a FeatureCombiner with a SchnetFeature
    # That is capable of calculating pairwise distances (calculate_geometry
    # is True)
    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
        calculate_geometry=True)
    layer_list = [schnet_feature]
    feature_combiner = FeatureCombiner(layer_list)

    # Next, we make aa CGnet with a random hidden architecture
    arch = _get_random_architecture(feature_size)
    model = CGnet(arch, ForceLoss(), feature=feature_combiner)

    # Next, we forward the random protein data through the model
    # and assert the output has the correct shape
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Next, we make another CGnet with the same arch but embed a SchnetFeature
    # directly instead of using a FeatureCombiner
    model_2 = CGnet(arch, ForceLoss(), feature=schnet_feature)
    energy_2, forces_2 = model_2.forward(coords_torch,
                                         embedding_property=embedding_property)

    np.testing.assert_array_equal(energy.detach().numpy(),
                                  energy_2.detach().numpy())
    np.testing.assert_array_equal(forces.detach().numpy(),
                                  forces_2.detach().numpy())
Ejemplo n.º 3
0
def test_combiner_full():
    # Test the combination of GeometryFeature, SchnetFeature,
    # amd priors in a CGnet class
    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    layer_list = [geometry_feature, zscore_layer, schnet_feature]
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx)

    # Next, we create CGnet and use the bond_potential prior and
    # feature_combiner. We use a simple, random, four-layer hidden architecutre
    # for the terminal fully-connected layers
    width = np.random.randint(5, high=10)  # random fully-connected width
    arch = LinearLayer(feature_size,
                       width, activation=nn.Tanh())
    for i in range(2):
        arch += LinearLayer(width, width, activation=nn.Tanh())
    arch += LinearLayer(width, 1, activation=None)
    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Ensure CGnet output has the correct size
    np.testing.assert_array_equal(energy.size(), (n_frames, 1))
    np.testing.assert_array_equal(forces.size(), (n_frames, n_beads, 3))
Ejemplo n.º 4
0
def test_bead_energy_masking():
    # Tests to make sure that masked energies and forces are properly zeroed
    # by the bead mask used with variable sized input

    # We create a simple random embedding layer and some
    # mock, padded embeddings that originally have varying length
    num_feats = np.random.randint(10, 50)
    n_embeddings = np.random.randint(beads, 2 * beads)
    embedding_layer = CGBeadEmbedding(n_embeddings=n_embeddings,
                                      embedding_dim=num_feats)
    variable_beads = np.random.randint(3, beads,
                                       size=frames)  # random protein sizes
    variable_embeddings = [
        np.random.randint(1, high=beads, size=bead) for bead in variable_beads
    ]
    padded_embedding_list = []
    for embedding in variable_embeddings:
        pads_needed = beads - embedding.shape[0]
        padded_embeddings = np.hstack((embedding, np.zeros(pads_needed)))
        padded_embedding_list.append(padded_embeddings)
    embedding_property = torch.tensor(padded_embedding_list).long()

    # we create a simple 2 layer random width terminal network
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=nn.Tanh()))

    # Next we create a basic SchnetFeature
    rbf_layer = GaussianRBF()
    feature = SchnetFeature(num_feats,
                            embedding_layer=embedding_layer,
                            rbf_layer=rbf_layer,
                            n_interaction_blocks=np.random.randint(2, 5),
                            calculate_geometry=True,
                            n_beads=beads)

    # Next, we instance a CGSchNet model using the above objects
    # with force matching as a loss criterion. We forward the coords
    # and the embedding property through as well
    model = CGnet(arch, ForceLoss(), feature=feature)
    energy, force = model.forward(coords,
                                  embedding_property=embedding_property)

    # the force components for masked beads should all be zero if the padding
    # due to variable length input is masked properly
    # We check each frame of the above output individually:
    for i in range(frames):
        masked_forces = force[i][variable_beads[i]:]
        zero_forces = np.zeros((beads - variable_beads[i], 3))
        np.testing.assert_array_equal(masked_forces.detach().numpy(),
                                      zero_forces)
Ejemplo n.º 5
0
def test_cgnet():
    # Tests CGnet class criterion attribute, architecture size, and network
    # output size. Also tests priors for proper residual connection to
    # feature layer.

    # First, we set up a bond harmonic prior and a GeometryFeature layer
    bonds_idx = geom_stats.return_indices('Bonds')
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    feature_layer = GeometryFeature(feature_tuples='all_backbone',
                                    n_beads=beads)
    num_feats = feature_layer(coords).size()[1]

    # Next, we create a 4 layer hidden architecture with a random width
    # and with a scalar output
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=None))

    # Next, we instance a CGnet model using the above objects
    # with force matching as a loss criterion
    model = CGnet(arch,
                  ForceLoss(),
                  feature=feature_layer,
                  priors=[harmonic_potential])

    # Test to see if the prior is embedded
    assert model.priors is not None

    # Test to see if the hidden architexture has the correct length
    assert len(arch) == len(model.arch)

    # Test to see if criterion is embedded correctly
    assert isinstance(model.criterion, ForceLoss)

    # Next, we forward the test protein data from the preamble through
    # the model
    energy, force = model.forward(coords)
    # Here, we test to see if the predicted energy is scalar
    # and the predicted forces are the same dimension as the input coordinates
    np.testing.assert_array_equal(energy.size(), (coords.size()[0], 1))
    np.testing.assert_array_equal(force.size(), coords.size())
Ejemplo n.º 6
0
def test_combiner_shape_with_geometry_propagation():
    # This tests a network with schnet features in which the geometry features
    # are also propagated through the neural network

    # This calculates all pairwise distances and backbone angles and dihedrals
    full_geometry_feature = GeometryFeature(feature_tuples='all_backbone',
                                            n_beads=n_beads)

    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    layer_list = [full_geometry_feature, schnet_feature]
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')

    # Here, we set propagate_geometry to true
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx,
                                       propagate_geometry=True)

    # The length of the geometry feature is the length of its tuples, where
    # each four-body dihedral is double counted to account for cosines and sines
    geom_feature_length = (len(full_geometry_feature.feature_tuples) +
                           len([f for f in full_geometry_feature.feature_tuples
                                if len(f) == 4]))

    # The total_size is what we need to input into our first linear layer, and
    # it represents the concatenation of the flatted schnet features with the
    # geometry features
    total_size = feature_size*n_beads + geom_feature_length

    # Now we just repeat the procedure from test_combiner_full above
    width = np.random.randint(5, high=10)  # random fully-connected width
    arch = LinearLayer(total_size,
                       width, activation=nn.Tanh())
    for i in range(2):
        arch += LinearLayer(width, width, activation=nn.Tanh())
    arch += LinearLayer(width, 1, activation=None)
    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Ensure CGnet output has the correct size
    np.testing.assert_array_equal(energy.size(), (n_frames, 1))
    np.testing.assert_array_equal(forces.size(), (n_frames, n_beads, 3))
Ejemplo n.º 7
0
def test_linear_regression():
    # Comparison of CGnet with sklearn linear regression for linear force

    # Notes
    # -----
    # This test is quite forgiving in comparing the sklearn/CGnet results
    # for learning a linear force field/quadratic potential because the decimal
    # accuracy is set to one decimal point. It could be lower, but the test
    # might then occassionaly fail due to stochastic reasons associated with
    # the dataset and the limited training routine.
    #
    # For this reason, we use np.testing.assert_almost_equal instead of
    # np.testing.assert_allclose

    # First, we instance a CGnet model 2 layers deep and 15 nodes wide
    layers = LinearLayer(1, 15, activation=nn.Softplus(), bias=True)
    layers += LinearLayer(15, 15, activation=nn.Softplus(), bias=True)
    layers += LinearLayer(15, 1, activation=nn.Softplus(), bias=True)
    model = CGnet(layers, ForceLoss())

    # Next, we define the optimizer and train for 35 epochs on the test linear
    # regression data defined in the preamble
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    epochs = 35
    for i in range(epochs):
        optimizer.zero_grad()
        energy, force = model.forward(x0)
        loss = model.criterion(force, y0)
        loss.backward()
        optimizer.step()
    loss = loss.data.numpy()

    # We produce numpy verions of the training data
    x = x0.detach().numpy()
    y = y0.numpy()

    # Here, we instance an sklearn linear regression model for comparison to
    # CGnet
    lrg = LinearRegression()
    reg = lrg.fit(x, y)
    y_pred = reg.predict(x)

    # Here, we test to to see if MSE losses are close up to a tolerance.
    np.testing.assert_almost_equal(mse(y, y_pred), loss, decimal=1)
Ejemplo n.º 8
0
def test_cgnet_simulation():
    # Tests a simulation from a CGnet built with the GeometryFeature
    # for the shapes of its coordinate, force, and potential outputs

    # First, we set up a bond harmonic prior and a GeometryFeature layer
    bonds_idx = geom_stats.return_indices('Bonds')
    bonds_interactions, _ = geom_stats.get_prior_statistics(features='Bonds',
                                                            as_list=True)
    harmonic_potential = HarmonicLayer(bonds_idx, bonds_interactions)
    feature_layer = GeometryFeature(feature_tuples='all_backbone',
                                    n_beads=beads)
    num_feats = feature_layer(coords).size()[1]

    # Next, we create a 4 layer hidden architecture with a random width
    # and with a scalar output
    rand = np.random.randint(1, 10)
    arch = (LinearLayer(num_feats, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, rand, bias=True, activation=nn.Tanh()) +
            LinearLayer(rand, 1, bias=True, activation=None))

    # Next, we instance a CGnet model using the above objects
    # with force matching as a loss criterion
    model = CGnet(arch,
                  ForceLoss(),
                  feature=feature_layer,
                  priors=[harmonic_potential])
    model.eval()

    # Here, we produce mock target protein force data
    forces = torch.randn((frames, beads, 3), requires_grad=False)

    # Here, we create an optimizer for traning the model,
    # and we train it for one epoch
    optimizer = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    optimizer.zero_grad()
    energy, pred_forces = model.forward(coords)
    loss = model.criterion(pred_forces, forces)
    loss.backward()
    optimizer.step()

    # Here, we define random simulation frame lengths
    # as well as randomly choosing to save every 2 or 4 frames
    length = np.random.choice([2, 4]) * 2
    save = np.random.choice([2, 4])

    # Here we instance a simulation class and produce a CG trajectory
    my_sim = Simulation(model,
                        coords,
                        beta=geom_stats.beta,
                        length=length,
                        save_interval=save,
                        save_forces=True,
                        save_potential=True)

    traj = my_sim.simulate()

    # We test to see if the trajectory is the proper shape based on the above
    # choices for simulation length and frame saving
    assert traj.shape == (frames, length // save, beads, dims)
    assert my_sim.simulated_forces.shape == (frames, length // save, beads,
                                             dims)
    assert my_sim.simulated_potential.shape == (frames, length // save, 1)
Ejemplo n.º 9
0
def test_dataset_loss_with_optimizer_and_regularization():
    # Test manual batch processing vs. dataset_loss during regularized training
    # Make a simple model and test that a manual on-the-fly loss calculation
    # approximately matches the one from dataset_loss when given an optimizer
    # and regularization function

    # Set up the network
    num_epochs = 5

    # Empty lists to be compared after training
    epochal_train_losses_manual = []
    epochal_train_losses_dataset = []

    # We require two models and two optimizers to keep things separate
    # The architectures MUST be deep copied or else they are tethered
    # to each other
    model_manual = CGnet(copy.deepcopy(arch), ForceLoss()).float()
    model_dataset = CGnet(copy.deepcopy(arch), ForceLoss()).float()

    optimizer_manual = torch.optim.Adam(model_manual.parameters(), lr=1e-5)
    optimizer_dataset = torch.optim.Adam(model_dataset.parameters(), lr=1e-5)

    # We want a nonrandom loader so we can compare the losses at the end
    nonrandom_loader = DataLoader(dataset, batch_size=batch_size)

    for epoch in range(1, num_epochs + 1):
        train_loss_manual = 0.0
        train_loss_dataset = 0.0

        # This is the manual part
        effective_batch_num = 0

        for batch_num, batch_data in enumerate(nonrandom_loader):
            optimizer_manual.zero_grad()
            coord, force, embedding_property = batch_data

            if batch_num == 0:
                ref_batch_size = coord.numel()

            batch_weight = coord.numel() / ref_batch_size

            energy, pred_force = model_manual.forward(coord,
                                                      embedding_property)

            batch_loss = model_manual.criterion(pred_force, force)
            batch_loss.backward()
            optimizer_manual.step()

            lipschitz_projection(model_manual, strength=lipschitz_strength)

            train_loss_manual += batch_loss.detach().cpu() * batch_weight
            effective_batch_num += batch_weight

        train_loss_manual = train_loss_manual / effective_batch_num
        epochal_train_losses_manual.append(train_loss_manual.numpy())

        # This is the dataset loss part
        train_loss_dataset = dataset_loss(model_dataset, nonrandom_loader,
                                          optimizer_dataset,
                                          _regularization_function)
        epochal_train_losses_dataset.append(train_loss_dataset)

    np.testing.assert_allclose(epochal_train_losses_manual,
                               epochal_train_losses_dataset,
                               rtol=1e-4)
Ejemplo n.º 10
0
def test_combiner_output_with_geometry_propagation():
    # This tests CGnet concatenation with propogating geometries
    # to make sure the FeatureCombiner method matches a manual calculation

    # This calculates all pairwise distances and backbone angles and dihedrals
    full_geometry_feature = GeometryFeature(feature_tuples='all_backbone',
                                            n_beads=n_beads)
    # Here we generate a random schent feature that does not calculate geometry
    schnet_feature, embedding_property, feature_size = _get_random_schnet_feature(
                                                          calculate_geometry=False)
    # grab distance indices
    dist_idx = geom_stats.return_indices('Distances')

    # Here we assemble the post-schnet fully connected network for manual
    # calculation of the energy/forces
    # The length of the geometry feature is the length of its tuples, where
    # each four-body dihedral is double counted to account for cosines and sines
    geom_feature_length = (len(full_geometry_feature.feature_tuples) +
                           len([f for f in full_geometry_feature.feature_tuples
                                if len(f) == 4]))
    total_size = feature_size*n_beads + geom_feature_length
    width = np.random.randint(5, high=10)  # random fully-connected width
    arch = LinearLayer(total_size,
                       width, activation=nn.Tanh())
    for i in range(2):
        arch += LinearLayer(width, width, activation=nn.Tanh())
    arch += LinearLayer(width, 1, activation=None)

    # Manual calculation using geometry feature concatenation and propagation
    # Here, we grab the distances to forward through the schnet feature. They
    # must be reindexed to the redundant mapping ammenable to schnet tools
    geometry_output = full_geometry_feature(coords_torch)
    distances = geometry_output[:, geom_stats.redundant_distance_mapping]
    schnet_output = schnet_feature(distances, embedding_property)

    # Here, we perform Manual feature concatenation between schnet and geometry
    # outputs. First, we flatten the schnet output for compatibility
    n_frames = coords_torch.shape[0]
    schnet_output = schnet_output.reshape(n_frames, -1)
    concatenated_features = torch.cat((schnet_output, geometry_output), dim=1)

    # Here, we feed the concatednated features through the terminal network and
    # predict the energy/forces
    terminal_network = nn.Sequential(*arch)
    manual_energy = terminal_network(concatenated_features)
    # Add in the bond potential contribution
    manual_energy += bond_potential(
        geometry_output[:, bond_potential.callback_indices])
    manual_forces = torch.autograd.grad(-torch.sum(manual_energy),
                                        coords_torch)[0]

    # Next, we produce the same output using a CGnet and test numerical
    # similarity, thereby testing the internal concatenation function of
    # CGnet.forward(). We create our model using a FeatureCombiner
    layer_list = [full_geometry_feature, schnet_feature]
    feature_combiner = FeatureCombiner(layer_list, distance_indices=dist_idx,
                                       propagate_geometry=True)

    model = CGnet(arch, ForceLoss(), feature=feature_combiner,
                  priors=[bond_potential])

    # Next, we forward the random protein data through the model
    energy, forces = model.forward(coords_torch,
                                   embedding_property=embedding_property)

    # Test if manual and CGnet calculations match numerically
    np.testing.assert_array_equal(energy.detach().numpy(),
                                  manual_energy.detach().numpy())
    np.testing.assert_array_equal(forces.detach().numpy(),
                                  manual_forces.detach().numpy())