Example #1
0
def _CDPLgenerateConformation(cdpl_mol):
    '''
    PRIVAT METHOD
    configures a CDPL Molecule for conformation generation. \n
    Input: \n
    mol (CDPL BasicMolecule): a CDPL BasicMolecule \n
    Return: \n
    (CDPL BasicMolecule): the corresponding random conf. for the input BasicMolecule
     '''

    _CDPLconfigForConformation(
        cdpl_mol
    )  #TODO What exactly should be in the config for the cmp generation?
    cg = ConfGen.RandomConformerGenerator()
    coords = Math.Vector3DArray()
    i = 0

    cg.strictMMFF94AtomTyping = False

    ConfGen.prepareForConformerGeneration(cdpl_mol)

    coords.resize(cdpl_mol.numAtoms, Math.Vector3D())

    cg.setup(cdpl_mol)

    if cg.generate(coords) != ConfGen.RandomConformerGenerator.SUCCESS:
        log.error('! Conformer generation failed !')
        return

    Chem.set3DCoordinates(cdpl_mol, coords)

    return cdpl_mol
Example #2
0
def getMoleculeFromAtom(atom: Chem.BasicAtom, protein: Chem.BasicMolecule) -> (Chem.BasicMolecule, list):
    """
    Given an atom and a protein structure, find the ligand the atom corresponds to.
    Traverses the molecule by its bonds until no longer any atoms are attached. All atoms and bonds are assigned to a
    new molecule, which is being returned.
    :param atom:
    :param protein:
    :return: The found ligand as well as the atom indices of the ligand in the parent molecule.
    """
    ligand = Chem.Fragment()
    neighbors = set()  # atoms not being added already
    neighborsAdded = set()  # keep track of added atoms to not process twice
    atomsToRemove = []

    neighbors.add(atom)
    while len(neighbors) > 0:
        n = neighbors.pop()
        neighborsAdded.add(n)
        ligand.addAtom(n)
        atomsToRemove.append(protein.getAtomIndex(n))

        # get all the neighbor atoms
        for i, b in enumerate(n.bonds):
            for a in b.atoms:
                if a != n:
                    if a not in neighbors and a not in neighborsAdded:  # new atom
                        neighbors.add(a)

                    ligand.addBond(b)  # ignored if already exists

    Chem.perceiveComponents(ligand, True)
    mol = Chem.BasicMolecule()
    mol.assign(ligand)
    return mol, atomsToRemove
def getSurfaceAtoms(mol):
    surfaceATomExtractor = Chem.SurfaceAtomExtractor()
    f = Chem.Fragment()
    surfaceATomExtractor.extract(mol, mol, f)
    surfaceAtoms = Chem.BasicMolecule()
    surfaceAtoms.assign(f)
    return surfaceAtoms
    def __iter__(self):
        if self.properties is None:
            i = 0
            while True:
                mol = Chem.BasicMolecule()
                try:
                    if self.r.read(mol):
                        yield sanitize_mol(mol)
                    else:
                        break
                except IOError:
                    yield None

                i += 1
                if i == self.nr_mols:
                    break
        else:
            i = 0
            while True:
                mol = Chem.BasicMolecule()
                try:
                    if self.r.read(mol):
                        read_properties = self._extract_properties_from_mol(
                            mol)
                        yield sanitize_mol(mol), read_properties
                    else:
                        break
                except IOError:
                    yield None

                i += 1
                if i == self.nr_mols:
                    break
    def prepare(self, removeLigands=True):
        from MoleculeTools import sanitize_mol

        sanitize_mol(self, makeHydrogenComplete=True)
        Chem.generateHydrogen3DCoordinates(self, True)

        if removeLigands:
            self.removeLigands()
def translate_mol_to_coords(mol, coords):
    mol_coords = Math.Vector3DArray()
    Chem.get3DCoordinates(mol, mol_coords)
    for i, row in enumerate(mol_coords):
        for j, column in enumerate(row):
            row[j] = column - coords[j]  # shift to desired coordinates

    Chem.set3DCoordinates(mol, mol_coords)
    return mol
def mol_to_sdf(molecules, path, multiconf=True):
    if not isinstance(molecules, Iterable):
        molecules = [molecules]
    w = Chem.FileSDFMolecularGraphWriter(path)
    Chem.setMultiConfExportParameter(w, multiconf)
    for mol in molecules:
        Chem.calcImplicitHydrogenCounts(mol, False)
        w.write(mol)
    w.close()
    def makeRandomRotation(self, inplace: bool = True) -> Math.Vector3DArray:
        # TODO: maybe add boundaries for randomness
        rotMatrix = Math.Matrix3D()
        rotMatrix.assign(Rotation.random().as_matrix())
        rotatedCoords = rotate3DObject(self.getCoordinates(), rotMatrix)

        if inplace:
            Chem.set3DCoordinates(self, rotatedCoords)

        return rotatedCoords
def prepareProtein(protein, removeLigands=True, removeWater=True):
    from MoleculeTools import sanitize_mol

    sanitize_mol(self, makeHydrogenComplete=True)
    Chem.generateHydrogen3DCoordinates(self, True)

    if removeLigands:
        self.removeLigands(removeWater=removeWater)

    return protein
Example #10
0
def calcAtomSetCentroid(atoms, conf_idx):
    if len(atoms) == 1:
        return Chem.getConformer3DCoordinates(atoms[0], conf_idx)

    ctr = Math.Vector3D()

    for atom in atoms:
        ctr += Chem.getConformer3DCoordinates(atom, conf_idx)

    ctr /= len(atoms)
    return ctr
    def __init__(self, lig_feature, env_feature):
        ftype_names = {
            Pharm.FeatureType.H_BOND_ACCEPTOR: 'HBA',
            Pharm.FeatureType.H_BOND_DONOR: 'HBD',
            Pharm.FeatureType.POS_IONIZABLE: 'PI',
            Pharm.FeatureType.NEG_IONIZABLE: 'NI',
            Pharm.FeatureType.AROMATIC: 'AR',
            Pharm.FeatureType.HYDROPHOBIC: 'H',
            Pharm.FeatureType.X_VOLUME: 'XV'
        }

        lig_feature_type = ftype_names[Pharm.getType(lig_feature)]
        lig_residue_code = Biomol.getResidueCode(
            Pharm.getSubstructure(lig_feature).atoms[0])
        lig_residue_number = Biomol.getResidueSequenceNumber(
            Pharm.getSubstructure(lig_feature).atoms[0])
        lig_residue_chain = Biomol.getChainID(
            Pharm.getSubstructure(lig_feature).atoms[0])

        env_feature_type = ftype_names[Pharm.getType(env_feature)]
        env_residue_code = Biomol.getResidueCode(
            Pharm.getSubstructure(env_feature).atoms[0])
        env_residue_number = Biomol.getResidueSequenceNumber(
            Pharm.getSubstructure(env_feature).atoms[0])
        env_residue_chain = Biomol.getChainID(
            Pharm.getSubstructure(env_feature).atoms[0])

        self.interaction_type = '{}-{}'.format(lig_feature_type,
                                               env_feature_type)
        self.lig_residue = '{}_{}_{}'.format(lig_residue_code,
                                             lig_residue_number,
                                             lig_residue_chain)
        self.env_residue = '{}_{}_{}'.format(env_residue_code,
                                             env_residue_number,
                                             env_residue_chain)

        atoms = []
        for atom in Pharm.getSubstructure(lig_feature).atoms:
            key_atom = '{}:{}'.format(Chem.getSymbol(atom),
                                      Biomol.getSerialNumber(atom))
            atoms.append(key_atom)

        self.lig_atom = sorted(atoms, key=lambda k: int(k.split(':')[1]))

        atoms = []
        for atom in Pharm.getSubstructure(env_feature).atoms:
            key_atom = '{}:{}'.format(Chem.getSymbol(atom),
                                      Biomol.getSerialNumber(atom))
            atoms.append(key_atom)

        self.env_atom = sorted(atoms, key=lambda k: int(k.split(':')[1]))
def calculateECFP(mol, nIter=4, nBits=1021):
    """
    Calculate the ECFP fingerprint for a given molecule.
    :param mol:
    :param nIter:
    :param nBits:
    :return:
    """
    Chem.makeHydrogenComplete(mol)
    ecfpGen = Chem.CircularFingerprintGenerator()
    ecfpGen.setNumIterations(nIter)
    ecfpGen.setNumBits(nBits)
    bitv = Util.BitSet()
    ecfpGen.generate(mol, bitv)
    return bitv
def center_mol(mol):
    coords = Math.Vector3DArray()
    Chem.get3DCoordinates(mol, coords)
    np_coords = np.array(coords)
    centroid = get_centroid(np_coords)
    centered = np_coords - centroid

    # set coordinates coordinate object
    for i, row in enumerate(coords):
        for j, column in enumerate(row):
            row[j] = centered[i, j]

    # set coordinates to molecule
    Chem.set3DCoordinates(mol, coords)
    return mol
def process():
    if len(sys.argv) < 3:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [output.sdf]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    ofs = Base.FileIOStream(sys.argv[2], 'w')

    reader = Chem.SDFMoleculeReader(ifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    mol = Chem.BasicMolecule()

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfExportParameter(writer, False)

    stats = Stats()
    stats.read = 0
    stats.dropped = 0

    xhashes = set()

    while reader.read(mol):
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)

        if hashcode in xhashes:
            stats.dropped += 1
            print('Removed Duplicate Molecule ' + str(stats.read) + ': ' +
                  Chem.generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)
        else:
            xhashes.add(hashcode)
            writer.write(mol)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read) + ' Molecules...',
                  file=sys.stderr)

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
Example #15
0
def _CDPLextractProteinFragments(pdb_mol, lig_three_letter_code, radius=6.0):
    lig = Chem.Fragment()
    _CDPLcalcProteinProperties(pdb_mol)

    # extract ligand
    for atom in pdb_mol.atoms:
        if Biomol.getResidueCode(atom) == lig_three_letter_code:
            Biomol.extractResidueSubstructure(atom, pdb_mol, lig, False)
    if lig.numAtoms == 0:
        log.error("The defined three letter code is not existing:",
                  lig_three_letter_code)
    # extract environment
    env = Chem.Fragment()
    Biomol.extractEnvironmentResidues(lig, pdb_mol, env, float(radius))

    return env, lig
Example #16
0
def encodePhaInfo2(surface, pha, invert=False):
    types = [-1, -1, -1, 0, 1, 2, 3, -1, -1, -1, -1, -1]
    invertedTypes = [-1, -1, -1, 1, 0, 3, 2, -1, -1, -1, -1, -1]
    typeCount = 4
    encoding = np.full((len(surface), typeCount), np.inf)
    count = 0
    for feature in pha:
        count = count + 1
        featureType = Pharm.getType(feature)
        if invert:
            index = invertedTypes[featureType]
        else:
            index = types[featureType]
        if index < 0:
            continue
        featureCoords = np.array(Chem.get3DCoordinates(feature))
        for i in range(len(surface)):
            pt = surface[i]
            dist = np.linalg.norm(pt - featureCoords)
            encoding[i][index] = min(encoding[i][index], dist)
    print(count)
    for enc in encoding:
        minV = 0
        for i in range(typeCount):
            if enc[minV] > enc[i]:
                minV = i
        # minDist = enc[minV]
        for i in range(typeCount):
            enc[i] = 0
        # if minDist < 20:
        enc[minV] = 1
    return encoding
 def _extract_properties_from_mol(self, mol):
     read_properties = {}
     data = Chem.getStructureData(mol)
     for element in data:
         if element.header in self.properties:
             read_properties[element.header[2:-1]] = element.data
     return read_properties
def is_inorganic(mol: Chem.BasicMolecule) -> bool:
    for atom in mol.atoms:
        if Chem.getType(atom) != 6:
            continue
        else:
            return False
    return True
Example #19
0
def saveCDFMolecule(fname, mol):
    cdf_writer = Chem.FileCDFMolecularGraphWriter(fname)

    if not cdf_writer.write(mol):
        return None

    return mol
    def makeRandomTranslation(self,
                              inplace: bool = True,
                              scalingFactor: float = 10) -> Math.Vector3DArray:
        """
        
        :param inplace: 
        :param scalingFactor: Scales the randomly retrieved direction by this factor
        :return: 
        """
        direction = Math.Vector3D()
        direction.assign(np.random.rand(3) * scalingFactor)
        translatedCoords = translate3DObject(self.getCoordinates(), direction)

        if inplace:
            Chem.set3DCoordinates(self, translatedCoords)

        return translatedCoords
Example #21
0
def generateSMILES(mol):
    if not PRINT_SMILES:
        return ''

    try:
        return Chem.generateSMILES(mol)
    except:
        return ''
Example #22
0
def _CDPLgeneratePha(mol, pha_type):
    '''
    PRIVAT METHOD
    generates the pharmacophore for the molecule and is used by the CDPLphaGenerator.
    Input: \n
    mol (CDPL BasicMolecule): the molecule the pharmacophore needs to be generated for
    lig_only (string): either True, then there are is no hydrogens coordinates being 
    calculated  \n
    Return: \n
    (CDPL BasicPharmacophore): the corresponding pharmacophore
     '''
    if pha_type is not 'lig_only':  #TODO What exactly should be in the config for the pha generation?
        Chem.generateHydrogen3DCoordinates(mol, True)
    pharm = Pharm.BasicPharmacophore()
    pharm_generator = Pharm.DefaultPharmacophoreGenerator(True)
    pharm_generator.generate(mol, pharm)
    return pharm
Example #23
0
def getAllHeavyAtoms(res):
    atoms = []

    for atom in res.atoms:
        if Chem.getType(atom) != Chem.AtomType.H:
            atoms.append(atom)

    return atoms
def readPDBFromStream(stream: Base.IOStream):
    from Protein import Protein
    from MoleculeTools import sanitize_mol

    r = Biomol.PDBMoleculeReader(stream)
    mol = Chem.BasicMolecule()
    r.read(mol)
    sanitize_mol(mol, makeHydrogenComplete=True)
    return Protein(mol)
Example #25
0
 def _generateNodes(self, pha):
     ''' 
     PRIVATE METHOD
     generates the nodes of the graph \n
     Input \n
     pha (CDPL BasicPharmacophore): pha the graph is based on
     '''
     index_counter = 0
     for feature in pha:
         node = PhaNode()
         node.feature_type = self._getAllowedSet(Pharm.getType(feature),
                                                 ELEM_LIST)
         node.coords[0] = round(Chem.get3DCoordinates(feature)[0], 6)
         node.coords[1] = round(Chem.get3DCoordinates(feature)[1], 6)
         node.coords[2] = round(Chem.get3DCoordinates(feature)[2], 6)
         node.index = index_counter
         index_counter += 1
         self.nodes.append(node)
def is_metal(mol: Chem.BasicMolecule) -> bool:
    """
    Indicate if the compound contains a metal
    """
    for atom in mol.atoms:
        if Chem.getType(atom) in ALLOWED_ATOMS:
            continue
        else:
            return True
    return False
Example #27
0
def CDPLmolFromSdf(sdf_path, conformation):
    '''
    generates a single CDPL Molecule from an sdf-file. If conformations is true, then
    one random conformation will be generated. \n
    Input: \n
    sdf_path (string): path to the sdf file \n
    conformation (boolean): generates one 3d conformation according to MMFF94 \n
    Return: \n
    (CDPL BasicMolecule): the corresponding CDPL BasicMolecule 
    '''
    mol = Chem.BasicMolecule()
    ifs = Base.FileIOStream(sdf_path, 'r')
    sdf_reader = Chem.SDFMoleculeReader(ifs)

    if not sdf_reader.read(mol):
        log.error("COULD NOT READ SDF", sdf_path)
        return False
    if conformation:
        return _CDPLgenerateConformation(mol)
    return mol
def is_macrocyle(mol, ring_size=7):
    """
    Checks if the given molecule contains rings larger than given size. If yes --> macrocyle.
    :param mol:
    :param ring_size:
    :return: Boolean indicating if macrocylce or not
    """
    sssr = Chem.perceiveSSSR(mol)
    if sssr.getSize() > 0:
        if max([frag.getNumAtoms() for frag in sssr]) > ring_size:
            return True
    return False
def remove_metal_salts(mol: Chem.BasicMolecule) -> Chem.BasicMolecule:
    to_remove = []
    for atom in mol.atoms:
        if Chem.getType(atom) not in SALT_METALS:
            continue
        else:
            to_remove.append(mol.getAtomIndex(atom))
    to_remove.sort()
    to_remove.reverse()
    for index in to_remove:
        mol.removeAtom(index)
    return mol
 def read_all(self):
     """
     Reads all the molecules from the SDF file with set properties
     :return:
     """
     mols = {}
     for i, mol in enumerate(self):
         name = Chem.getName(mol)
         if len(name) == 0:  # no name set
             name = str(i)
         mols[name] = mol
     return mols