def main(): args = get_arguments() data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: model.create(charset, latent_rep_size=args.latent_dim) checkpointer = ModelCheckpoint(filepath=args.model, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) model.autoencoder.fit(data_train, data_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, callbacks=[checkpointer, reduce_lr], validation_data=(data_test, data_test))
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size=args.latent_dim) else: model.create(charset, latent_rep_size=args.latent_dim) checkpointer = ModelCheckpoint(filepath=args.model, verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) model.autoencoder.fit(data_train, data_train, shuffle=True, epochs=args.epochs, batch_size=args.batch_size, callbacks=[checkpointer, reduce_lr], validation_data=(data_test, data_test))
def main(): np.random.seed(RANDOM_SEED) data_train, data_test, charset = load_dataset('data/processed.h5') print("Charset", charset) model = MoleculeVAE() model.create(charset, latent_rep_size=292) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) checkpointer = ModelCheckpoint(filepath='model.h5', verbose=1, save_best_only=True) history = model.autoencoder.fit(data_train[:1000], data_train[:1000], shuffle=True, nb_epoch=NUM_EPOCHS, batch_size=100, callbacks=[checkpointer, reduce_lr], validation_data=(data_test[:1000], data_test[:1000])) with open('trainHistoryDict', 'wb') as file_pi: pickle.dump(history.history, file_pi)
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data_train, data_test, charset = load_dataset(args.data) model = MoleculeVAE() if os.path.isfile(args.model): model.load(charset, args.model, latent_rep_size = args.latent_dim) else: model.create(charset, latent_rep_size = args.latent_dim) checkpointer = ModelCheckpoint(filepath = args.model, verbose = 1, save_best_only = True) reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, min_lr = 0.0001) model.autoencoder.fit( data_train, data_train, shuffle = True, nb_epoch = args.epochs, batch_size = args.batch_size, callbacks = [checkpointer, reduce_lr], validation_data = (data_test, data_test) )
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data = pd.read_hdf(args.data, 'table') structures = data['structure'] # import gzip # filepath = args.data # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line] # can also use CanonicalSmilesDataGenerator datobj = SmilesDataGenerator(structures, MAX_LEN, test_split=args.test_split, random_seed=args.random_seed) test_divisor = int((1 - datobj.test_split) / (datobj.test_split)) train_gen = datobj.train_generator(args.batch_size) test_gen = datobj.test_generator(args.batch_size) # reformulate generators to not use weights train_gen = ((tens, tens) for (tens, _, weights) in train_gen) test_gen = ((tens, tens) for (tens, _, weights) in test_gen) model = MoleculeVAE() if os.path.isfile(args.model): model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim) else: model.create(datobj.chars, latent_rep_size = args.latent_dim) checkpointer = ModelCheckpoint(filepath = args.model, verbose = 1, save_best_only = True) reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, min_lr = 0.0001) model.autoencoder.fit_generator( train_gen, args.epoch_size, epochs = args.epochs, callbacks = [checkpointer, reduce_lr], validation_data = test_gen, nb_val_samples = args.epoch_size / test_divisor, pickle_safe = True )
def main(): args = get_arguments() np.random.seed(args.random_seed) from molecules.model import MoleculeVAE from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau data = pd.read_hdf(args.data, 'table') structures = data['structure'] # import gzip # filepath = args.data # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line] # can also use CanonicalSmilesDataGenerator datobj = SmilesDataGenerator(structures, MAX_LEN, test_split=args.test_split, random_seed=args.random_seed) test_divisor = int((1 - datobj.test_split) / (datobj.test_split)) train_gen = datobj.train_generator(args.batch_size) test_gen = datobj.test_generator(args.batch_size) # reformulate generators to not use weights train_gen = ((tens, tens) for (tens, _, weights) in train_gen) test_gen = ((tens, tens) for (tens, _, weights) in test_gen) model = MoleculeVAE() if os.path.isfile(args.model): model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim) else: model.create(datobj.chars, latent_rep_size = args.latent_dim) checkpointer = ModelCheckpoint(filepath = args.model, verbose = 1, save_best_only = True) reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, min_lr = 0.0001) model.autoencoder.fit_generator( train_gen, args.epoch_size, nb_epoch = args.epochs, callbacks = [checkpointer, reduce_lr], validation_data = test_gen, nb_val_samples = args.epoch_size / test_divisor, pickle_safe = True )
from molecules.model import MoleculeVAE from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \ decode_smiles_from_indexes, load_dataset from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau NUM_EPOCHS = 100 BATCH_SIZE = 10 LATENT_DIM = 128 RANDOM_SEED = 123 np.random.seed(RANDOM_SEED) #args.random_seed) data_train, data_test, charset = load_dataset('./data/processed.h5') model = MoleculeVAE() #model.load(charset, args.model, latent_rep_size = args.latent_dim) model.create(charset, latent_rep_size=LATENT_DIM) checkpointer = ModelCheckpoint( filepath='./test_models/weights.{epoch:02d}-{val_loss:.2f}.hdf5', verbose=1, save_best_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001) data_train = data_train[:1] model.autoencoder.fit(data_train, data_train, shuffle=True,