Beispiel #1
0
def main():
    args = get_arguments()
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
Beispiel #2
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        model.create(charset, latent_rep_size=args.latent_dim)

    checkpointer = ModelCheckpoint(filepath=args.model,
                                   verbose=1,
                                   save_best_only=True)

    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.2,
                                  patience=3,
                                  min_lr=0.0001)

    model.autoencoder.fit(data_train,
                          data_train,
                          shuffle=True,
                          epochs=args.epochs,
                          batch_size=args.batch_size,
                          callbacks=[checkpointer, reduce_lr],
                          validation_data=(data_test, data_test))
Beispiel #3
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from molecules.utils import one_hot_array, one_hot_index, from_one_hot_array, \
        decode_smiles_from_indexes, load_dataset
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
    
    data_train, data_test, charset = load_dataset(args.data)
    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(charset, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit(
        data_train,
        data_train,
        shuffle = True,
        nb_epoch = args.epochs,
        batch_size = args.batch_size,
        callbacks = [checkpointer, reduce_lr],
        validation_data = (data_test, data_test)
    )
Beispiel #4
0
def visualize_model(args):
    model = MoleculeVAE()

    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    plot(model.autoencoder, to_file = args.outfile)
Beispiel #5
0
def visualize_model(args):
    model = MoleculeVAE()

    data, charset = load_dataset(args.data, split = False)

    if os.path.isfile(args.model):
        model.load(charset, args.model)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    plot(model.autoencoder, to_file = args.outfile)
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)

    from molecules.model import MoleculeVAE
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

    data = pd.read_hdf(args.data, 'table')
    structures = data['structure']

    # import gzip
    # filepath = args.data
    # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line]

    # can also use CanonicalSmilesDataGenerator
    datobj = SmilesDataGenerator(structures, MAX_LEN,
                                 test_split=args.test_split,
                                 random_seed=args.random_seed)
    test_divisor = int((1 - datobj.test_split) / (datobj.test_split))
    train_gen = datobj.train_generator(args.batch_size)
    test_gen = datobj.test_generator(args.batch_size)

    # reformulate generators to not use weights
    train_gen = ((tens, tens) for (tens, _, weights) in train_gen)
    test_gen = ((tens, tens) for (tens, _, weights) in test_gen)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(datobj.chars, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit_generator(
        train_gen,
        args.epoch_size,
        epochs = args.epochs,
        callbacks = [checkpointer, reduce_lr],
        validation_data = test_gen,
        nb_val_samples = args.epoch_size / test_divisor,
        pickle_safe = True
    )
Beispiel #7
0
def main():
    args = get_arguments()
    np.random.seed(args.random_seed)
    
    from molecules.model import MoleculeVAE
    from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
    
    data = pd.read_hdf(args.data, 'table')
    structures = data['structure']

    # import gzip
    # filepath = args.data
    # structures = [line.split()[0].strip() for line in gzip.open(filepath) if line]

    # can also use CanonicalSmilesDataGenerator
    datobj = SmilesDataGenerator(structures, MAX_LEN,
                                 test_split=args.test_split,
                                 random_seed=args.random_seed)
    test_divisor = int((1 - datobj.test_split) / (datobj.test_split))
    train_gen = datobj.train_generator(args.batch_size)
    test_gen = datobj.test_generator(args.batch_size)

    # reformulate generators to not use weights
    train_gen = ((tens, tens) for (tens, _, weights) in train_gen)
    test_gen = ((tens, tens) for (tens, _, weights) in test_gen)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(datobj.chars, args.model, latent_rep_size = args.latent_dim)
    else:
        model.create(datobj.chars, latent_rep_size = args.latent_dim)

    checkpointer = ModelCheckpoint(filepath = args.model,
                                   verbose = 1,
                                   save_best_only = True)

    reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                                  factor = 0.2,
                                  patience = 3,
                                  min_lr = 0.0001)

    model.autoencoder.fit_generator(
        train_gen,
        args.epoch_size,
        nb_epoch = args.epochs,
        callbacks = [checkpointer, reduce_lr],
        validation_data = test_gen,
        nb_val_samples = args.epoch_size / test_divisor,
        pickle_safe = True
    )
def main():
    args = get_arguments()

    if os.path.isfile(args.data):
        h5f = h5py.File(args.data, 'r')
        charset = list(h5f['charset'][:])
        h5f.close()
    else:
        raise ValueError("Data file %s doesn't exist" % args.data)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    results = interpolate(args.source, args.dest, args.steps, charset, model, args.latent_dim, args.width)
    for result in results:
        print(result[0], result[2])
Beispiel #9
0
def main():
    args = get_arguments()

    if os.path.isfile(args.data):
        h5f = h5py.File(args.data, 'r')
        charset = list(h5f['charset'][:])
        h5f.close()
    else:
        raise ValueError("Data file %s doesn't exist" % args.data)

    model = MoleculeVAE()
    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    results = interpolate(args.source, args.dest, args.steps, charset, model,
                          args.latent_dim, args.width)
    for result in results:
        print(result[0], result[2])
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size = args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter = '\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data = charset)
            h5f.create_dataset('latent_vectors', data = x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)
def main():
    args = get_arguments()
    model = MoleculeVAE()

    data, data_test, charset = load_dataset(args.data)

    if os.path.isfile(args.model):
        model.load(charset, args.model, latent_rep_size=args.latent_dim)
    else:
        raise ValueError("Model file %s doesn't exist" % args.model)

    x_latent = model.encoder.predict(data)
    if not args.visualize:
        if not args.save_h5:
            np.savetxt(sys.stdout, x_latent, delimiter='\t')
        else:
            h5f = h5py.File(args.save_h5, 'w')
            h5f.create_dataset('charset', data=charset)
            h5f.create_dataset('latent_vectors', data=x_latent)
            h5f.close()
    else:
        visualize_latent_rep(args, model, x_latent)