Exemple #1
0
 def test_gradient_penalty_non_zero(self):
     # Test to verify that a non-zero gradient penalty is computed on the from the first training step
     with TemporaryDirectory() as tmpdirname:
         latent = np.random.rand(64, 1, 512)
         os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                     exist_ok=True)
         with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
             json.dump(latent.tolist(), f)
         C = CreateModelRunner(input_data_path=tmpdirname +
                               '/encoded_smiles.latent',
                               output_model_folder=tmpdirname)
         C.run()
         D = Discriminator.load(tmpdirname + '/discriminator.txt')
         G = Generator.load(tmpdirname + '/generator.txt')
         json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
         latent_space_mols = np.array(json.load(json_smiles))
         testSampler = Sampler(G)
         latent_space_mols = latent_space_mols.reshape(
             latent_space_mols.shape[0], 512)
         T = torch.cuda.FloatTensor
         G.cuda()
         D.cuda()
         dataloader = torch.utils.data.DataLoader(
             LatentMolsDataset(latent_space_mols),
             shuffle=True,
             batch_size=64,
             drop_last=True)
         for _, real_mols in enumerate(dataloader):
             real_mols = real_mols.type(T)
             fake_mols = testSampler.sample(real_mols.shape[0])
             alpha = T(np.random.random((real_mols.size(0), 1)))
             interpolates = (alpha * real_mols +
                             ((1 - alpha) * fake_mols)).requires_grad_(True)
             d_interpolates = D(interpolates)
             fake = T(real_mols.shape[0], 1).fill_(1.0)
             gradients = autograd.grad(
                 outputs=d_interpolates,
                 inputs=interpolates,
                 grad_outputs=fake,
                 create_graph=True,
                 retain_graph=True,
                 only_inputs=True,
             )[0]
             gradients = gradients.view(gradients.size(0), -1)
             gradient_penalty = ((gradients.norm(2, dim=1) - 1)**2).mean()
             self.assertTrue(gradient_penalty.data != 0)
             break
Exemple #2
0
 def test_discriminator_shape(self):
     # Test to verify that the same dimension network is created invariant of smiles input file size
     with TemporaryDirectory() as tmpdirname:
         for j in [1, 64, 256, 1024]:
             latent = np.random.rand(j, 1, 512)
             os.makedirs(os.path.dirname(tmpdirname +
                                         '/encoded_smiles.latent'),
                         exist_ok=True)
             with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                 json.dump(latent.tolist(), f)
             C = CreateModelRunner(input_data_path=tmpdirname +
                                   '/encoded_smiles.latent',
                                   output_model_folder=tmpdirname)
             C.run()
             D = Discriminator.load(tmpdirname + '/discriminator.txt')
             D_params = []
             for param in D.parameters():
                 D_params.append(param.view(-1))
             D_params = torch.cat(D_params)
             reference = 394241
             self.assertEqual(D_params.shape[0], reference,
                              "Network does not match expected size")
Exemple #3
0
    def test_separate_optimizers(self):
        # Verify that two different instances of the optimizer is created using the TrainModelRunner.py initialization
        # This ensures the two components train separately
        with TemporaryDirectory() as tmpdirname:

            latent = np.random.rand(64, 1, 512)
            os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                        exist_ok=True)
            with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                json.dump(latent.tolist(), f)

            C = CreateModelRunner(input_data_path=tmpdirname +
                                  '/encoded_smiles.latent',
                                  output_model_folder=tmpdirname)
            C.run()
            D = Discriminator.load(tmpdirname + '/discriminator.txt')
            G = Generator.load(tmpdirname + '/generator.txt')
            optimizer_G = torch.optim.Adam(G.parameters())
            optimizer_D = torch.optim.Adam(D.parameters())
            self.assertTrue(type(optimizer_G) == type(
                optimizer_D))  # must return the same type of object
            self.assertTrue(
                optimizer_G
                is not optimizer_D)  # object identity MUST be different
Exemple #4
0
    def __init__(self,
                 input_data_path,
                 output_model_folder,
                 decode_mols_save_path='',
                 n_epochs=200,
                 starting_epoch=1,
                 batch_size=64,
                 lr=0.0002,
                 b1=0.5,
                 b2=0.999,
                 n_critic=5,
                 sample_interval=10,
                 save_interval=100,
                 sample_after_training=100,
                 message=""):
        self.message = message

        # init params
        self.input_data_path = input_data_path
        self.output_model_folder = output_model_folder
        self.n_epochs = n_epochs
        self.starting_epoch = starting_epoch
        self.batch_size = batch_size
        self.lr = lr
        self.b1 = b1
        self.b2 = b2
        self.n_critic = n_critic
        self.sample_interval = sample_interval
        self.save_interval = save_interval
        self.sample_after_training = sample_after_training
        self.decode_mols_save_path = decode_mols_save_path

        # initialize dataloader
        json_smiles = open(self.input_data_path, "r")
        latent_space_mols = np.array(json.load(json_smiles))
        latent_space_mols = latent_space_mols.reshape(
            latent_space_mols.shape[0], 512)

        self.dataloader = torch.utils.data.DataLoader(
            LatentMolsDataset(latent_space_mols),
            shuffle=True,
            batch_size=self.batch_size)

        # load discriminator
        discriminator_name = 'discriminator.txt' if self.starting_epoch == 1 else str(
            self.starting_epoch - 1) + '_discriminator.txt'
        discriminator_path = os.path.join(output_model_folder,
                                          discriminator_name)
        self.D = Discriminator.load(discriminator_path)

        # load generator
        generator_name = 'generator.txt' if self.starting_epoch == 1 else str(
            self.starting_epoch - 1) + '_generator.txt'
        generator_path = os.path.join(output_model_folder, generator_name)
        self.G = Generator.load(generator_path)

        # initialize sampler
        self.Sampler = Sampler(self.G)

        # initialize optimizer
        self.optimizer_G = torch.optim.Adam(self.G.parameters(),
                                            lr=self.lr,
                                            betas=(self.b1, self.b2))
        self.optimizer_D = torch.optim.Adam(self.D.parameters(),
                                            lr=self.lr,
                                            betas=(self.b1, self.b2))

        # Tensor
        cuda = True if torch.cuda.is_available() else False
        if cuda:
            self.G.cuda()
            self.D.cuda()
        self.Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
Exemple #5
0
    def test_model_trains(self):
        # Performs one step of training and verifies that the weights are updated, implying some training occurs.
        with TemporaryDirectory() as tmpdirname:
            T = torch.cuda.FloatTensor
            latent = np.random.rand(64, 1, 512)
            os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                        exist_ok=True)
            with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                json.dump(latent.tolist(), f)

            C = CreateModelRunner(input_data_path=tmpdirname +
                                  '/encoded_smiles.latent',
                                  output_model_folder=tmpdirname)
            C.run()
            D = Discriminator.load(tmpdirname + '/discriminator.txt')
            G = Generator.load(tmpdirname + '/generator.txt')
            G.cuda()
            D.cuda()
            optimizer_G = torch.optim.Adam(G.parameters())
            optimizer_D = torch.optim.Adam(D.parameters())
            json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
            latent_space_mols = np.array(json.load(json_smiles))
            testSampler = Sampler(G)
            latent_space_mols = latent_space_mols.reshape(
                latent_space_mols.shape[0], 512)
            dataloader = torch.utils.data.DataLoader(
                LatentMolsDataset(latent_space_mols),
                shuffle=True,
                batch_size=64,
                drop_last=True)
            for _, real_mols in enumerate(dataloader):
                real_mols = real_mols.type(T)
                before_G_params = []
                before_D_params = []
                for param in G.parameters():
                    before_G_params.append(param.view(-1))
                before_G_params = torch.cat(before_G_params)
                for param in D.parameters():
                    before_D_params.append(param.view(-1))
                before_D_params = torch.cat(before_D_params)

                optimizer_D.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                real_validity = D(real_mols)
                fake_validity = D(fake_mols)
                #It is not relevant to compute gradient penalty. The test is only interested in if there is a change in
                #the weights (training), not in giving proper training
                d_loss = -torch.mean(real_validity) + torch.mean(fake_validity)
                d_loss.backward()
                optimizer_D.step()
                optimizer_G.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                fake_validity = D(fake_mols)
                g_loss = -torch.mean(fake_validity)
                g_loss.backward()
                optimizer_G.step()
                after_G_params = []
                after_D_params = []
                for param in G.parameters():
                    after_G_params.append(param.view(-1))
                after_G_params = torch.cat(after_G_params)
                for param in D.parameters():
                    after_D_params.append(param.view(-1))
                after_D_params = torch.cat(after_D_params)
                self.assertTrue(
                    torch.any(torch.ne(after_G_params, before_G_params)))
                self.assertTrue(
                    torch.any(torch.ne(after_D_params, before_D_params)))

                break