def dataframe_to_gcn_input(self, input_data):
     x_atoms_cold, x_bonds_cold, x_edges_cold = tensorise_smiles(
         input_data['rdkit'],
         max_degree=self.encoder_params['max_degree'],
         max_atoms=self.encoder_params['max_atoms']
     )
     return [x_atoms_cold, x_bonds_cold, x_edges_cold]
Example #2
0
def multistage_autoenc(smiles_x, num_layers, params, train_params):
    # Create empty lists for outputs
    #val_losses = []
    # X_atom, X_bond, X_edge = tensorise_smiles(smiles_x[:2], max_degree=5, max_atoms=60)
    print('Processing SMILES...')
    #X, val = train_test_split(smiles_x, test_size=train_params["validation_split"], shuffle=True,
    #random_state = np.random.randint(1, 10000))
    X_atoms, X_bonds, X_edges = tensorise_smiles(smiles_x,
                                                 max_degree=5,
                                                 max_atoms=params['max_atoms'])
    #X_atoms_val, X_bonds_val, X_edges_val = tensorise_smiles(val, max_degree=5, max_atoms=params['max_atoms'])

    vni, vxi = vni_vxi(X_atoms, X_bonds, X_edges)
    #vni_val, vxi_val = vni_vxi(X_atoms_val, X_bonds_val, X_edges_val)
    # Iterate for every layer
    for layer in range(1, num_layers + 1):
        opt = Adam(lr=params["learning_rates"][layer - 1],
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-8,
                   decay=params['adam_decay'],
                   amsgrad=False)
        #########################################################################
        ######################### STAGE I #######################################
        #########################################################################

        #gen = GraphDataGen(X, train_params['batch_size'], params, shuffle=False)
        #valid = GraphDataGen(val, train_params['batch_size'], params, shuffle=False

        if layer == 1:
            stage_I_enc, _, stage_I_dec = stage_creator(params,
                                                        layer,
                                                        conv=True)
            stage_I_dec.compile(optimizer=opt,
                                loss=params['losses_conv'],
                                metrics=['mse'])
            stage_I_dec.fit(x=[X_atoms, X_bonds, X_edges],
                            y=[vni, vxi],
                            epochs=train_params['epochs'],
                            validation_split=0.1,
                            callbacks=params['callbacks'],
                            batch_size=train_params['batch_size'])
            stage_I_enc = stage_I_dec.layers[3]
            stage_I_enc.save_weights(
                'layer_{}_stage_I_enc_weights.h5'.format(layer))

            #val_losses.append(stage_I_dec.evaluate(x=[X_atoms_val, X_bonds_val, X_edges_val], y=[vni_val, vxi_val])[0])
        else:
            stage_I_dec, stage_I_enc = add_new_layer(
                stage_I_enc,
                params,
                train_params,
                layer,
                X=[X_atoms, X_bonds, X_edges])

        #########################################################################
        ######################### STAGE II ######################################
        #########################################################################
        stage_I_encodings = stage_I_enc.predict([X_atoms, X_bonds, X_edges])
        _, vxi_II = vni_vxi(stage_I_encodings, X_bonds, X_edges)

        stage_II_dec, stage_II_enc = stage_creator(params, layer, conv=False)
        opt = Adam(lr=params["learning_rates_fp"][layer - 1],
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-8,
                   decay=params['adam_decay'],
                   amsgrad=False)

        stage_II_dec.compile(optimizer=opt,
                             loss=params['loss_fp'],
                             metrics=['mse'])
        stage_II_dec.fit([stage_I_encodings, X_bonds, X_edges],
                         y=[vxi_II],
                         epochs=train_params['epochs'],
                         validation_split=train_params['validation_split'],
                         callbacks=params['callbacks'],
                         batch_size=train_params['batch_size'],
                         verbose=1)
        stage_II_enc.save_weights(f'layer_{layer}_stage_II_enc_weights.h5')