def test_predict_mol(box_size, o): mol = next(pybel.readfile('mol2', protein_file)) with pytest.raises(ValueError, match='featurizer must be set'): model = UNet(box_size=box_size, scale=0.5, input_channels=num_features, output_channels=o) model.pocket_density_from_mol(mol) with pytest.raises(ValueError, match='scale must be set'): model = UNet(featurizer=featurizer, box_size=box_size, input_channels=num_features, output_channels=o) model.pocket_density_from_mol(mol) model = UNet(featurizer=featurizer, box_size=box_size, scale=0.5, output_channels=o) model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy') with pytest.raises(TypeError, match='pybel.Molecule'): model.pocket_density_from_mol(protein_file) density, origin, step = model.pocket_density_from_mol(mol) assert (density > 0).any()
def test_predict_pocket_atoms(box_size, o): np.random.seed(42) mol = next(pybel.readfile('mol2', protein_file)) model = UNet(featurizer=featurizer, box_size=box_size, scale=0.5, output_channels=o) model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy') segmentation_kwargs = {'threshold': 0.55, 'min_size': 5} pocket_mols_atoms = model.predict_pocket_atoms(mol, dist_cutoff=3, expand_residue=False, **segmentation_kwargs) pocket_mols_residues = model.predict_pocket_atoms(mol, dist_cutoff=3, expand_residue=True, **segmentation_kwargs) assert len(pocket_mols_atoms) == len(pocket_mols_residues) assert len(pocket_mols_atoms) > 0 for p1, p2 in zip(pocket_mols_atoms, pocket_mols_residues): assert isinstance(p1, pybel.Molecule) assert isinstance(p2, pybel.Molecule) assert len(p1.atoms) <= len(p2.atoms) res1 = set([res.idx for res in p1.residues]) res2 = set([res.idx for res in p2.residues]) assert res1 == res2
def test_get_pockets_segmentation(data): with pytest.raises(ValueError, match='data_handle must be set'): model = UNet(box_size=data.box_size, input_channels=data.x_channels, output_channels=data.y_channels, l2_lambda=1e-7) model.pocket_density_from_grid('10gs') with pytest.raises(ValueError, match='scale must be set'): model = UNet(box_size=data.box_size, input_channels=data.x_channels, output_channels=data.y_channels, l2_lambda=1e-7, data_handle=data) model.scale = None model.pocket_density_from_grid('10gs') np.random.seed(42) model = UNet(box_size=data.box_size, input_channels=data.x_channels, output_channels=data.y_channels, l2_lambda=1e-7, data_handle=data) model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy') density, *_ = model.pocket_density_from_grid('10gs') with pytest.raises(ValueError, match='not supported'): model.get_pockets_segmentation(np.array([density] * 2), 0.6) pocket = model.get_pockets_segmentation(density, 0.6) assert pocket.shape == (data.box_size, ) * 3 assert pocket.max() > 0 assert len(np.unique(pocket)) - 1 <= pocket.max()
def test_training(data, loss): train_gen = data.batch_generator(batch_size=5) eval_gen = data.batch_generator(batch_size=5) test_gen = data.batch_generator(batch_size=2, subset='test') num_epochs = 2 box_size = data.box_size input_channels = data.x_channels output_channels = data.y_channels inputs = Input((box_size, box_size, box_size, input_channels)) outputs = Convolution3D(filters=output_channels, kernel_size=1, activation='sigmoid')(inputs) model = UNet(inputs=inputs, outputs=outputs) model.compile(optimizer=Adam(lr=1e-6), loss=loss, metrics=[dice, dice_loss, ovl, ovl_loss]) model.fit_generator(train_gen, steps_per_epoch=2, epochs=num_epochs, verbose=0) for scores in (model.evaluate_generator(eval_gen, steps=2), model.evaluate_generator(test_gen, steps=1)): assert np.allclose(scores[1], -scores[2]) assert np.allclose(scores[3], -scores[4]) loss_change = model.history.history['loss'] assert len(loss_change) == num_epochs assert (loss_change[0] != loss_change[1:]).all()
def test_load_wrong_args(data, kwargs, err, compiled): box_size = data.box_size i = data.x_channels o = data.y_channels model1 = UNet(box_size=box_size, input_channels=i, output_channels=o, scale=data.scale, data_handle=data) if compiled: model1.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy', metrics=[dice, dice_loss, ovl, ovl_loss]) with tempfile.NamedTemporaryFile(suffix='.hdf') as f: model1.save(f.name) with pytest.raises(err, match=list(kwargs)[0]): UNet.load_model(f.name, data_handle=data, **kwargs)
def test_save_pockets_cube(data): model = UNet(data_handle=data, l2_lambda=1e-7) model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy') density, origin, step = model.pocket_density_from_grid('10gs') with pytest.raises(ValueError, match='saving more than one prediction'): model.save_density_as_cube(np.concatenate((density, density)), origin, step) with pytest.raises(NotImplementedError, match='saving multichannel'): model.save_density_as_cube(density, origin, step) density = density[..., [0]] with tempfile.NamedTemporaryFile(suffix='.cube') as cmap_file: fname = cmap_file.name model.save_density_as_cube(density, origin, step, fname=fname) with open(fname, 'r') as f: # skip header for _ in range(7): f.readline() values = np.array(f.read().split()).reshape(density.shape) assert np.allclose(density, values.astype(float))
def test_save_pockets_cmap(data): model = UNet(data_handle=data, l2_lambda=1e-7) model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy') density, origin, step = model.pocket_density_from_grid('10gs') with pytest.raises(ValueError, match='saving more than one prediction'): model.save_density_as_cmap(np.concatenate((density, density)), origin, step) with tempfile.NamedTemporaryFile(suffix='.cmap') as cmap_file: fname = cmap_file.name model.save_density_as_cmap(density, origin, step, fname=fname) with h5py.File(fname, 'r') as f: assert 'Chimera' in f group = f['Chimera'] assert len(group.keys()) == data.y_channels for i in range(data.y_channels): key = 'image%s' % (i + 1) assert key in group assert 'data_zyx' in group[key] dataset = group[key]['data_zyx'][:] assert np.allclose(density[0, ..., i].transpose([2, 1, 0]), dataset[:])
def test_save_load(data, kwargs, compiled): from keras.models import load_model as keras_load box_size = data.box_size i = data.x_channels o = data.y_channels model1 = UNet(box_size=box_size, input_channels=i, output_channels=o, scale=data.scale, data_handle=data) if compiled: model1.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy', metrics=[dice, dice_loss, ovl, ovl_loss]) weights1 = model1.get_weights() with tempfile.NamedTemporaryFile(suffix='.hdf') as f: model1.save(f.name) model2 = UNet.load_model(f.name, data_handle=data, **kwargs) weights2 = model2.get_weights() assert model1.to_json() == model2.to_json() for w1, w2 in zip(weights1, weights2): assert np.allclose(w1, w2) with tempfile.NamedTemporaryFile(suffix='.hdf') as f: model1.save_keras(f.name) model2 = keras_load(f.name) weights2 = model2.get_weights() for w1, w2 in zip(weights1, weights2): assert np.allclose(w1, w2)
def main(): args = parse_args() if args.output is None: args.output = 'output_' + time.strftime('%Y-%m-%d') if not os.path.exists(args.output): os.makedirs(args.output) if not os.access(args.output, os.W_OK): raise IOError( 'Cannot create files inside %s (check your permissions).' % args.output) if args.train_ids: with open(args.train_ids) as f: train_ids = list(filter(None, f.read().split('\n'))) else: train_ids = None if args.test_ids: with open(args.test_ids) as f: test_ids = list(filter(None, f.read().split('\n'))) else: test_ids = None if train_ids: if test_ids: all_ids = sorted(set(train_ids) | set(test_ids)) else: all_ids = train_ids else: all_ids = None data = DataWrapper(args.input, test_set=test_ids, pdbids=all_ids, load_data=args.load) if args.model: model = UNet.load_model(args.model, data_handle=data) else: model = UNet(data_handle=data) model.compile(optimizer=Adam(lr=1e-6), loss=dice_loss, metrics=[dice, ovl, 'binary_crossentropy']) train_batch_generator = data.batch_generator(batch_size=args.batch_size) callbacks = [ ModelCheckpoint(os.path.join(args.output, 'checkpoint.hdf'), save_best_only=False) ] if test_ids: val_batch_generator = data.batch_generator(batch_size=args.batch_size, subset='test') num_val_steps = max(args.steps_per_epoch // 5, 1) callbacks.append( ModelCheckpoint(os.path.join(args.output, 'best_weights.hdf'), save_best_only=True)) else: val_batch_generator = None num_val_steps = None model.fit_generator(train_batch_generator, steps_per_epoch=args.steps_per_epoch, epochs=args.epochs, verbose=args.verbose, callbacks=callbacks, validation_data=val_batch_generator, validation_steps=num_val_steps) history = pd.DataFrame(model.history.history) history.to_csv(os.path.join(args.output, 'history.csv')) model.save(os.path.join(args.output, 'model.hdf'))