Ejemplo n.º 1
0
def test_preprocess_and_loader(tmpdir, dataset):
    smiles, multis, scalars = dataset

    # Convert to needed formats
    nxs = [convert_string_to_nx(s) for s in smiles]

    # Save data to a temporary directory
    data_path = tmpdir.join('temp.proto')
    with tf.io.TFRecordWriter(str(data_path)) as writer:
        for n, m, s in zip(nxs, multis, scalars):
            record = convert_nx_to_dict(n)
            record['multi'] = m
            record['scalar'] = s
            writer.write(make_tfrecord(record))
    assert data_path.isfile()

    # Make a data loader with the multi-property output
    loader = make_data_loader(str(data_path),
                              batch_size=1,
                              output_property='multi',
                              output_shape=(2, ))
    ins, outs = next(iter(loader))
    assert ins['atom'].shape == (5, )
    assert outs.shape == (1, 2)

    # Make the data loader with the scalar output
    loader = make_data_loader(str(data_path),
                              batch_size=1,
                              output_property='scalar')
    ins, outs = next(iter(loader))
    assert ins['atom'].shape == (5, )
    assert outs.shape == (1, )
Ejemplo n.º 2
0
    params_hash = hashlib.sha256(
        json.dumps(run_params).encode()).hexdigest()[:6]

    # Determine the output directory
    test_dir = os.path.join(
        'networks',
        f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}'
    )
    os.makedirs(test_dir, exist_ok=args.overwrite)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_loader = make_data_loader(str(_data_dir / 'train_data.proto'),
                                    shuffle_buffer=32768,
                                    cache=True,
                                    batch_size=args.batch_size,
                                    output_property=output)
    test_loader = make_data_loader(str(_data_dir / 'test_data.proto'),
                                   batch_size=args.batch_size,
                                   output_property=output)
    val_loader = make_data_loader(str(_data_dir / 'valid_data.proto'),
                                  batch_size=args.batch_size,
                                  output_property=output,
                                  cache=True)

    # Make the model
    model = build_fn(atom_features=args.atom_features,
                     message_steps=args.num_messages,
                     output_layers=args.output_layers,
                     reduce_fn=args.readout_fn,
Ejemplo n.º 3
0
        iter(
            tf.data.TFRecordDataset(str(
                data_dir.joinpath('train_data.proto')))))
    rng = np.random.RandomState(args.random_seed)
    train_data = rng.choice(train_data, size=(len(train_data), ), replace=True)

    # Save it back to disk for later user
    train_path = os.path.join(test_dir, 'train_data.proto')
    with tf.io.TFRecordWriter(train_path) as writer:
        for d in train_data:
            writer.write(d.numpy())

    # Making the data loaders for use during training
    train_loader = make_data_loader(train_path,
                                    shuffle_buffer=len(train_data),
                                    cache=True,
                                    output_property='output',
                                    batch_size=args.batch_size)
    test_loader = make_data_loader(os.path.join(data_dir, 'test_data.proto'),
                                   batch_size=args.batch_size,
                                   output_property='output')
    val_loader = make_data_loader(os.path.join(data_dir, 'valid_data.proto'),
                                  batch_size=args.batch_size,
                                  output_property='output')

    # Make the model, loading it by the
    model = load_model(os.path.join(model_dir, 'best_model.h5'),
                       custom_objects=custom_objects)
    model_config = model.get_config()
    model = tf.keras.models.Model.from_config(model_config,
                                              custom_objects=custom_objects)
Ejemplo n.º 4
0
        'networks',
        f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}'
    )
    os.makedirs(test_dir, exist_ok=args.overwrite)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Load in the dielectric constants
    with open('dielectric_constants.json') as fp:
        solvent_data = json.load(fp)
        n_solvents = len(solvent_data['e'])

    # Making the data loaders
    train_loader = make_data_loader('train_data.proto',
                                    shuffle_buffer=32768,
                                    batch_size=args.batch_size,
                                    output_property='solv_energies',
                                    output_shape=(n_solvents, ))
    test_loader = make_data_loader('test_data.proto',
                                   batch_size=args.batch_size,
                                   output_property='solv_energies',
                                   output_shape=(n_solvents, ))
    val_loader = make_data_loader('valid_data.proto',
                                  batch_size=args.batch_size,
                                  output_property='solv_energies',
                                  output_shape=(n_solvents, ))

    # Make a function to inject dielectric constants into input
    def add_constants(inputs, outputs):
        inputs['dielectric_constants'] = solvent_data['e']
        return inputs, outputs
Ejemplo n.º 5
0
    arg_parser.add_argument('--batch-size', help='Number of molecules per batch', type=int, default=32)
    arg_parser.add_argument('--num-epochs', help='Number of epochs to run', type=int, default=64)

    # Parse the arguments
    args = arg_parser.parse_args()
    run_params = args.__dict__
    params_hash = hashlib.sha256(json.dumps(run_params).encode()).hexdigest()[:6]

    # Determine the output directory
    test_dir = os.path.join('networks', f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}')
    os.makedirs(test_dir)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_loader = make_data_loader('train_data.proto', shuffle_buffer=32768,
                                    batch_size=args.batch_size, output_property='u0_atom')
    test_loader = make_data_loader('test_data.proto', batch_size=args.batch_size, output_property='u0_atom')
    val_loader = make_data_loader('valid_data.proto', batch_size=args.batch_size, output_property='u0_atom')

    # Make the model
    model = build_fn(atom_features=args.atom_features, message_steps=args.num_messages,
                     output_layers=args.output_layers)

    # Set the scale for the output parameter
    ic50s = np.concatenate([x[1].numpy() for x in iter(train_loader)], axis=0)
    model.get_layer('scale').set_weights([np.array([[ic50s.std()]]), np.array([ic50s.mean()])])

    # Train the model
    final_learn_rate = 1e-6
    init_learn_rate = 1e-3
    decay_rate = (final_learn_rate / init_learn_rate) ** (1. / (args.num_epochs - 1))
Ejemplo n.º 6
0
    args = arg_parser.parse_args()
    run_params = args.__dict__
    params_hash = hashlib.sha256(json.dumps(run_params).encode()).hexdigest()[:6]

    # Set the random seed for TF
    tf.random.set_seed(args.random_seed)

    # Determine the output directory
    test_dir = os.path.join('networks', f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}')
    os.makedirs(test_dir)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_loader = make_data_loader('../datasets/train_data.proto', shuffle_buffer=32768,
                                    batch_size=args.batch_size, output_property='output',
                                    random_seed=args.random_seed)
    test_loader = make_data_loader('../datasets/test_data.proto', batch_size=args.batch_size, output_property='output')
    val_loader = make_data_loader('../datasets/valid_data.proto', batch_size=args.batch_size, output_property='output')

    # Load in the bond and atom type information
    with open('../atom_types.json') as fp:
        atom_type_count = len(json.load(fp))
    with open('../bond_types.json') as fp:
        bond_type_count = len(json.load(fp))

    # Make the model
    model = build_fn(atom_features=args.atom_features, message_steps=args.num_messages,
                     output_layers=args.output_layers, reduce_fn=args.readout_fn)

    # Set the scale for the output parameter