Beispiel #1
0
def test_unet_from_layers(box_size, i, o):
    inputs = Input([box_size] * 3 + [i])
    conv1 = Convolution3D(filters=3,
                          kernel_size=1,
                          activation='elu',
                          padding='same')(inputs)
    outputs = Convolution3D(filters=o,
                            kernel_size=1,
                            activation='sigmoid',
                            padding='same')(conv1)

    model = UNet(inputs=inputs,
                 outputs=outputs,
                 box_size=box_size,
                 input_channels=i,
                 output_channels=o)
    assert hasattr(model, 'data_handle')
    assert model.data_handle is None

    with pytest.raises(ValueError, match='input should be 5D'):
        UNet(inputs=inputs[0], outputs=inputs)

    with pytest.raises(ValueError, match='output should be 5D'):
        UNet(inputs=inputs, outputs=outputs[1])

    with pytest.raises(ValueError,
                       match='input and output shapes do not match'):
        UNet(inputs=inputs, outputs=concatenate([outputs, outputs], 1))
Beispiel #2
0
def test_unet_from_data_handle(data):
    with pytest.raises(ValueError, match='you must either provide'):
        UNet()

    with pytest.raises(TypeError, match='data_handle should be a DataWrapper'):
        UNet(data_handle='10gs')

    model = UNet(data_handle=data)
    assert model.data_handle == data
    assert model.scale == data.scale
    assert model.max_dist == data.max_dist
    assert len(model.inputs) == 1
    assert model.inputs[0].shape[-1] == data.x_channels
    assert len(model.outputs) == 1
    assert model.outputs[0].shape[-1] == data.y_channels
Beispiel #3
0
def test_training(data, loss):
    train_gen = data.batch_generator(batch_size=5)
    eval_gen = data.batch_generator(batch_size=5)
    test_gen = data.batch_generator(batch_size=2, subset='test')
    num_epochs = 2

    box_size = data.box_size
    input_channels = data.x_channels
    output_channels = data.y_channels

    inputs = Input((box_size, box_size, box_size, input_channels))
    outputs = Convolution3D(filters=output_channels,
                            kernel_size=1,
                            activation='sigmoid')(inputs)

    model = UNet(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(lr=1e-6),
                  loss=loss,
                  metrics=[dice, dice_loss, ovl, ovl_loss])
    model.fit_generator(train_gen,
                        steps_per_epoch=2,
                        epochs=num_epochs,
                        verbose=0)

    for scores in (model.evaluate_generator(eval_gen, steps=2),
                   model.evaluate_generator(test_gen, steps=1)):
        assert np.allclose(scores[1], -scores[2])
        assert np.allclose(scores[3], -scores[4])

    loss_change = model.history.history['loss']
    assert len(loss_change) == num_epochs
    assert (loss_change[0] != loss_change[1:]).all()
Beispiel #4
0
def test_unet_with_featurizer(box_size, o):
    f = Featurizer()
    i = len(f.FEATURE_NAMES)

    with pytest.raises(TypeError, match='should be a tfbio.data.Featurize'):
        UNet(box_size=box_size,
             input_channels=i,
             output_channels=o,
             scale=0.5,
             featurizer=1)

    model = UNet(box_size=box_size,
                 input_channels=i,
                 output_channels=o,
                 scale=0.5,
                 featurizer=f)
    assert hasattr(model, 'data_handle')
    assert model.data_handle is None
    assert hasattr(model, 'featurizer')
    assert isinstance(model.featurizer, Featurizer)
Beispiel #5
0
def test_incompatible_with_layers(kwargs):
    inputs = Input([10] * 3 + [3])
    conv1 = Convolution3D(filters=3,
                          kernel_size=1,
                          activation='elu',
                          padding='same')(inputs)
    outputs = Convolution3D(filters=5,
                            kernel_size=1,
                            activation='sigmoid',
                            padding='same')(conv1)
    with pytest.raises(ValueError, match=list(kwargs)[0]):
        UNet(inputs=inputs, outputs=outputs, **kwargs)
Beispiel #6
0
def test_predict_pocket_atoms(box_size, o):
    np.random.seed(42)
    mol = next(pybel.readfile('mol2', protein_file))

    model = UNet(featurizer=featurizer,
                 box_size=box_size,
                 scale=0.5,
                 output_channels=o)
    model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy')

    segmentation_kwargs = {'threshold': 0.55, 'min_size': 5}

    pocket_mols_atoms = model.predict_pocket_atoms(mol,
                                                   dist_cutoff=3,
                                                   expand_residue=False,
                                                   **segmentation_kwargs)
    pocket_mols_residues = model.predict_pocket_atoms(mol,
                                                      dist_cutoff=3,
                                                      expand_residue=True,
                                                      **segmentation_kwargs)
    assert len(pocket_mols_atoms) == len(pocket_mols_residues)
    assert len(pocket_mols_atoms) > 0
    for p1, p2 in zip(pocket_mols_atoms, pocket_mols_residues):
        assert isinstance(p1, pybel.Molecule)
        assert isinstance(p2, pybel.Molecule)
        assert len(p1.atoms) <= len(p2.atoms)
        res1 = set([res.idx for res in p1.residues])
        res2 = set([res.idx for res in p2.residues])
        assert res1 == res2
Beispiel #7
0
def test_incompatible_layers_shapes(input_shape, strides, message):
    inputs = Input(input_shape)
    if message == 'input should be 5D':
        outputs = inputs
    else:
        outputs = Convolution3D(filters=3,
                                kernel_size=1,
                                activation='sigmoid',
                                padding='same',
                                strides=strides)(inputs)

    with pytest.raises(ValueError, match=message):
        UNet(inputs=inputs, outputs=outputs, box_size=20)
Beispiel #8
0
def test_save_pockets_cmap(data):
    model = UNet(data_handle=data, l2_lambda=1e-7)
    model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy')
    density, origin, step = model.pocket_density_from_grid('10gs')

    with pytest.raises(ValueError, match='saving more than one prediction'):
        model.save_density_as_cmap(np.concatenate((density, density)), origin,
                                   step)

    with tempfile.NamedTemporaryFile(suffix='.cmap') as cmap_file:
        fname = cmap_file.name
        model.save_density_as_cmap(density, origin, step, fname=fname)
        with h5py.File(fname, 'r') as f:
            assert 'Chimera' in f
            group = f['Chimera']
            assert len(group.keys()) == data.y_channels
            for i in range(data.y_channels):
                key = 'image%s' % (i + 1)
                assert key in group
                assert 'data_zyx' in group[key]
                dataset = group[key]['data_zyx'][:]
                assert np.allclose(density[0, ..., i].transpose([2, 1, 0]),
                                   dataset[:])
Beispiel #9
0
def test_load_wrong_args(data, kwargs, err, compiled):
    box_size = data.box_size
    i = data.x_channels
    o = data.y_channels

    model1 = UNet(box_size=box_size,
                  input_channels=i,
                  output_channels=o,
                  scale=data.scale,
                  data_handle=data)
    if compiled:
        model1.compile(optimizer=Adam(lr=1e-6),
                       loss='binary_crossentropy',
                       metrics=[dice, dice_loss, ovl, ovl_loss])

    with tempfile.NamedTemporaryFile(suffix='.hdf') as f:

        model1.save(f.name)

        with pytest.raises(err, match=list(kwargs)[0]):
            UNet.load_model(f.name, data_handle=data, **kwargs)
Beispiel #10
0
def main():
    args = parse_args()

    if args.output is None:
        args.output = 'pockets_' + time.strftime('%Y-%m-%d')
    if not os.path.exists(args.output):
        os.makedirs(args.output)
    if not os.access(args.output, os.W_OK):
        raise IOError('Cannot create files inside %s (check your permissions).' % args.output)

    # load trained model
    model = UNet.load_model(args.model, scale=args.scale, max_dist=args.max_dist,
                            featurizer=Featurizer(save_molecule_codes=False))

    if args.verbose:
        progress_bar = tqdm
    else:
        progress_bar = iter

    for path in progress_bar(args.input):
        match = re.match(args.dirname_pattern, path)
        if not match:
            raise ValueError('Cannot extract name from %s. '
                             'Please specify correct --namedir_pattern' % path)
        dirname = os.path.join(args.output, match.groups()[0])
        if not os.path.exists(dirname):
            os.makedirs(dirname)

        mol = next(readfile(args.format, path))

        # predict pockets and save them as separate mol2 files
        pockets = model.predict_pocket_atoms(mol)
        for i, pocket in enumerate(pockets):
            pocket.write('mol2', os.path.join(dirname, 'pocket%i.mol2' % i))

        # save pocket probability as density map (UCSF Chimera format)
        density, origin, step = model.pocket_density_from_mol(mol)
        model.save_density_as_cmap(density, origin, step, fname=os.path.join(dirname, 'pockets.cmap'))
Beispiel #11
0
def test_multiple_inputs_outputs(box_size, i_channels, o_channels):
    inputs = [Input([box_size] * 3 + [i]) for i in i_channels]
    conv1 = [
        Convolution3D(filters=3,
                      kernel_size=1,
                      activation='elu',
                      padding='same')(inp) for inp in inputs
    ]
    conv1 = concatenate(conv1, axis=-1)
    outputs = [
        Convolution3D(filters=o,
                      kernel_size=1,
                      activation='sigmoid',
                      padding='same')(conv1) for o in o_channels
    ]

    model = UNet(inputs=inputs,
                 outputs=outputs,
                 box_size=box_size,
                 input_channels=sum(i_channels),
                 output_channels=sum(o_channels))
    assert len(model.inputs) == len(i_channels)
    assert len(model.outputs) == len(o_channels)
Beispiel #12
0
def test_incompatible_with_data_handle(data, kwargs):
    with pytest.raises(ValueError, match=list(kwargs)[0]):
        UNet(data_handle=data, **kwargs)
Beispiel #13
0
def test_predict_mol(box_size, o):
    mol = next(pybel.readfile('mol2', protein_file))
    with pytest.raises(ValueError, match='featurizer must be set'):
        model = UNet(box_size=box_size,
                     scale=0.5,
                     input_channels=num_features,
                     output_channels=o)
        model.pocket_density_from_mol(mol)

    with pytest.raises(ValueError, match='scale must be set'):
        model = UNet(featurizer=featurizer,
                     box_size=box_size,
                     input_channels=num_features,
                     output_channels=o)
        model.pocket_density_from_mol(mol)

    model = UNet(featurizer=featurizer,
                 box_size=box_size,
                 scale=0.5,
                 output_channels=o)
    model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy')

    with pytest.raises(TypeError, match='pybel.Molecule'):
        model.pocket_density_from_mol(protein_file)

    density, origin, step = model.pocket_density_from_mol(mol)
    assert (density > 0).any()
Beispiel #14
0
def test_save_pockets_cube(data):
    model = UNet(data_handle=data, l2_lambda=1e-7)
    model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy')
    density, origin, step = model.pocket_density_from_grid('10gs')

    with pytest.raises(ValueError, match='saving more than one prediction'):
        model.save_density_as_cube(np.concatenate((density, density)), origin,
                                   step)

    with pytest.raises(NotImplementedError, match='saving multichannel'):
        model.save_density_as_cube(density, origin, step)

    density = density[..., [0]]
    with tempfile.NamedTemporaryFile(suffix='.cube') as cmap_file:
        fname = cmap_file.name
        model.save_density_as_cube(density, origin, step, fname=fname)
        with open(fname, 'r') as f:
            # skip header
            for _ in range(7):
                f.readline()
            values = np.array(f.read().split()).reshape(density.shape)
            assert np.allclose(density, values.astype(float))
Beispiel #15
0
def test_save_load(data, kwargs, compiled):
    from keras.models import load_model as keras_load
    box_size = data.box_size
    i = data.x_channels
    o = data.y_channels

    model1 = UNet(box_size=box_size,
                  input_channels=i,
                  output_channels=o,
                  scale=data.scale,
                  data_handle=data)
    if compiled:
        model1.compile(optimizer=Adam(lr=1e-6),
                       loss='binary_crossentropy',
                       metrics=[dice, dice_loss, ovl, ovl_loss])
    weights1 = model1.get_weights()

    with tempfile.NamedTemporaryFile(suffix='.hdf') as f:

        model1.save(f.name)

        model2 = UNet.load_model(f.name, data_handle=data, **kwargs)
        weights2 = model2.get_weights()

        assert model1.to_json() == model2.to_json()
        for w1, w2 in zip(weights1, weights2):
            assert np.allclose(w1, w2)

    with tempfile.NamedTemporaryFile(suffix='.hdf') as f:
        model1.save_keras(f.name)

        model2 = keras_load(f.name)
        weights2 = model2.get_weights()

        for w1, w2 in zip(weights1, weights2):
            assert np.allclose(w1, w2)
Beispiel #16
0
def test_get_pockets_segmentation(data):
    with pytest.raises(ValueError, match='data_handle must be set'):
        model = UNet(box_size=data.box_size,
                     input_channels=data.x_channels,
                     output_channels=data.y_channels,
                     l2_lambda=1e-7)
        model.pocket_density_from_grid('10gs')

    with pytest.raises(ValueError, match='scale must be set'):
        model = UNet(box_size=data.box_size,
                     input_channels=data.x_channels,
                     output_channels=data.y_channels,
                     l2_lambda=1e-7,
                     data_handle=data)
        model.scale = None
        model.pocket_density_from_grid('10gs')

    np.random.seed(42)
    model = UNet(box_size=data.box_size,
                 input_channels=data.x_channels,
                 output_channels=data.y_channels,
                 l2_lambda=1e-7,
                 data_handle=data)
    model.compile(optimizer=Adam(lr=1e-6), loss='binary_crossentropy')
    density, *_ = model.pocket_density_from_grid('10gs')

    with pytest.raises(ValueError, match='not supported'):
        model.get_pockets_segmentation(np.array([density] * 2), 0.6)

    pocket = model.get_pockets_segmentation(density, 0.6)
    assert pocket.shape == (data.box_size, ) * 3
    assert pocket.max() > 0
    assert len(np.unique(pocket)) - 1 <= pocket.max()
Beispiel #17
0
def main():
    args = parse_args()

    if args.output is None:
        args.output = 'output_' + time.strftime('%Y-%m-%d')
    if not os.path.exists(args.output):
        os.makedirs(args.output)
    if not os.access(args.output, os.W_OK):
        raise IOError(
            'Cannot create files inside %s (check your permissions).' %
            args.output)

    if args.train_ids:
        with open(args.train_ids) as f:
            train_ids = list(filter(None, f.read().split('\n')))
    else:
        train_ids = None

    if args.test_ids:
        with open(args.test_ids) as f:
            test_ids = list(filter(None, f.read().split('\n')))
    else:
        test_ids = None

    if train_ids:
        if test_ids:
            all_ids = sorted(set(train_ids) | set(test_ids))
        else:
            all_ids = train_ids
    else:
        all_ids = None

    data = DataWrapper(args.input,
                       test_set=test_ids,
                       pdbids=all_ids,
                       load_data=args.load)

    if args.model:
        model = UNet.load_model(args.model, data_handle=data)
    else:
        model = UNet(data_handle=data)
        model.compile(optimizer=Adam(lr=1e-6),
                      loss=dice_loss,
                      metrics=[dice, ovl, 'binary_crossentropy'])

    train_batch_generator = data.batch_generator(batch_size=args.batch_size)

    callbacks = [
        ModelCheckpoint(os.path.join(args.output, 'checkpoint.hdf'),
                        save_best_only=False)
    ]

    if test_ids:
        val_batch_generator = data.batch_generator(batch_size=args.batch_size,
                                                   subset='test')
        num_val_steps = max(args.steps_per_epoch // 5, 1)
        callbacks.append(
            ModelCheckpoint(os.path.join(args.output, 'best_weights.hdf'),
                            save_best_only=True))
    else:
        val_batch_generator = None
        num_val_steps = None

    model.fit_generator(train_batch_generator,
                        steps_per_epoch=args.steps_per_epoch,
                        epochs=args.epochs,
                        verbose=args.verbose,
                        callbacks=callbacks,
                        validation_data=val_batch_generator,
                        validation_steps=num_val_steps)

    history = pd.DataFrame(model.history.history)
    history.to_csv(os.path.join(args.output, 'history.csv'))
    model.save(os.path.join(args.output, 'model.hdf'))