Esempio n. 1
0
def _getPropertiesRDkit(smallmol):
    """
    Returns ndarray of shape (n_atoms x n_properties) molecule atom types,
    according to the following definitions and order:
        0. Hydrophibic
        1. Aromatic
        2. Acceptor
        3. Donor
        4. - Ionizable
        5. + Ionizable
        6. Metal (empty)
        7. Occupancy (No hydrogens)
    """
    from moleculekit.smallmol.util import factory

    n_atoms = smallmol.numAtoms

    atom_mapping = {
        "Hydrophobe": 0,
        "LumpedHydrophobe": 0,
        "Aromatic": 1,
        "Acceptor": 2,
        "Donor": 3,
        "PosIonizable": 4,
        "NegIonizable": 5,
    }

    feats = factory.GetFeaturesForMol(smallmol._mol)
    properties = np.zeros((n_atoms, 8), dtype=bool)

    for feat in feats:
        fam = feat.GetFamily()
        if fam not in atom_mapping:  # Non relevant property
            continue
        properties[feat.GetAtomIds(), atom_mapping[fam]] = 1

    # Occupancy, ignoring hydrogens.
    properties[:, 7] = smallmol.get("element") != "H"
    return properties
Esempio n. 2
0
def _getPropertiesRDkit(smallmol):

    n_atoms = smallmol.numAtoms

    from tempfile import NamedTemporaryFile
    tmpmol2 = NamedTemporaryFile(suffix='.pdb').name
    smallmol.write(tmpmol2)
    s_atoms = process_arpeggio(tmpmol2)
    os.remove(tmpmol2)

    # the order of s_atoms matches the order of channels
    arpeggio_features = []

    for i, atom in enumerate(s_atoms):
        raw_features = sorted(tuple(s_atoms[i].atom_types))
        feature_vector = np.zeros(len(arpeggio_atomtypes_unique), dtype=bool)
        for rf in raw_features:
            #print(rf)
            if rf in arpeggio_atomtypes_unique:
                feature_vector[arpeggio_atomtypes_unique[rf]] = 1

        arpeggio_features.append(feature_vector)

        if feature_vector[-1] != 0:
            print(raw_features, feature_vector)

    arpeggio_features = np.stack(arpeggio_features, axis=0)

    #atoms = ['C', 'O', 'N', 'S', 'P', 'Cl', 'F']

    atom_mapping = {
        "Hydrophobe": 0,
        "LumpedHydrophobe": 0,
        "Aromatic": 1,
        "Acceptor": 2,
        "Donor": 3,
        "PosIonizable": 4,
        "NegIonizable": 5,
        "ZnBinder": 6,
    }

    # other features come from arpeggio
    atom_mapping1 = {
        "Hydrophobe": 0,
        "LumpedHydrophobe": 1,
        "ZnBinder": 2,
    }

    feats = factory.GetFeaturesForMol(smallmol._mol)
    properties = np.zeros((n_atoms, 3), dtype=bool)

    protein_feature = np.zeros(shape=(len(properties), 1),
                               dtype=bool)  # 0 for ligand

    for feat in feats:
        fam = feat.GetFamily()
        if fam not in atom_mapping1:  # Non relevant property
            continue
        properties[feat.GetAtomIds(), atom_mapping1[fam]] = 1

    #for idx, atom in enumerate(atoms):
    #    properties[:, 7+idx] = smallmol.get('element') == atom
    els = np.stack([get_atomic_feature(e) for e in smallmol.get('element')],
                   axis=0)

    # Occupancy, ignoring hydrogens.
    #properties[:, 3] = ~np.isin(smallmol.get('element'),  ['H'])

    print([
        c.shape for c in [properties, arpeggio_features, els, protein_feature]
    ])

    channels = np.concatenate(
        [properties, arpeggio_features, els, protein_feature], axis=-1)

    return channels