Exemple #1
0
  def testIncludeOnly(self):
    cfac = ChemicalFeatures.BuildFeatureFactory(
      os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolChemicalFeatures', 'test_data',
                   'featDef.txt'))
    self.failUnless(cfac.GetNumFeatureDefs() == 2)

    mol = Chem.MolFromSmiles("COCN")
    rdDistGeom.EmbedMolecule(mol)

    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondAcceptor") == 2)
    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="HBondDonor") == 1)
    self.failUnless(cfac.GetNumMolFeatures(mol, includeOnly="Bogus") == 0)

    self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 1, includeOnly="HBondDonor"))
    self.failUnlessRaises(IndexError,
                          lambda: cfac.GetMolFeature(mol, 2, includeOnly="HBondAcceptor"))
    f = cfac.GetMolFeature(mol, 0, includeOnly="HBondDonor")
    self.failUnless(f.GetFamily() == 'HBondDonor')

    feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondAcceptor")
    self.failUnless(len(feats) == 2)
    feats = cfac.GetFeaturesForMol(mol, includeOnly="HBondDonor")
    self.failUnless(len(feats) == 1)
    feats = cfac.GetFeaturesForMol(mol, includeOnly="Bogus")
    self.failUnless(len(feats) == 0)
 def setUp(self):
     fdefFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'Pharm2D',
                             'test_data', 'BaseFeatures.fdef')
     featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
     self.factory = SigFactory.SigFactory(featFactory, minPointCount=2, maxPointCount=3)
     self.factory.SetBins([(0, 2), (2, 5), (5, 8)])
     self.factory.Init()
Exemple #3
0
def _getFeatureFamily(mol):
    FEATURE_DEF_FILE = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    feat_factory = ChemicalFeatures.BuildFeatureFactory(FEATURE_DEF_FILE)
    hmol = rdkit.Chem.AddHs(mol)
    AllChem.EmbedMolecule(hmol, useRandomCoords=True)
    rc = rdkit.Chem.AllChem.EmbedMolecule(hmol)
    logging.debug("Getting features for mol " + mol.GetProp("_Name"))
    if rc < 0:
        rc = rdkit.Chem.AllChem.EmbedMolecule(hmol, useRandomCoords=True)
    if rc == 0:
        try:
            if rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol) != 0:
                rdkit.Chem.AllChem.UFFOptimizeMolecule(hmol, maxIters=1000)
        except ValueError:
            logging.error("Problem with 3D version of molecule " +
                          hmol.GetProp("_Name"))
            pass
    feats = feat_factory.GetFeaturesForMol(hmol)
    atomFeatures = [["" for feature in range(len(feats))]
                    for atom in range(hmol.GetNumAtoms())]
    for feature in feats:
        for atomId in feature.GetAtomIds():
            if feature.GetFamily() not in atomFeatures[atomId]:
                atomFeatures[atomId].append(feature.GetFamily())
    return atomFeatures
Exemple #4
0
def processArgs(args, parser):
    try:
        factory = ChemicalFeatures.BuildFeatureFactory(args.fdefFilename)
    except Exception:
        parser.error(
            "Could not parse Fdef file {0.fdefFilename}.".format(args))

    with open(args.smilesFilename) as inF:
        for lineNo, line in enumerate(inF, 1):
            if lineNo == args.maxLines + 1:
                break
            smi = splitExpr.split(line.strip())[0].strip()
            mol = Chem.MolFromSmiles(smi)
            if mol is None:
                logger.warning("Could not process smiles '%s' on line %d." %
                               (smi, lineNo))
                continue

            print('Mol-%d\t%s' % (lineNo, smi))
            if args.reverseIt:
                feats = factory.GetFeaturesForMol(mol)
                for feat in feats:
                    print('\t%s-%s: ' % (feat.GetFamily(), feat.GetType()),
                          end='')
                    print(', '.join([str(x) for x in feat.GetAtomIds()]))
            else:
                featInfo = GetAtomFeatInfo(factory, mol)
                for i, v in enumerate(featInfo):
                    print('\t% 2s(%d)' %
                          (mol.GetAtomWithIdx(i).GetSymbol(), i + 1),
                          end='')
                    if v:
                        print('\t', ', '.join(v))
                    else:
                        print()
Exemple #5
0
    def _align_molecules(self, molecules: List[Chem.Mol]) -> None:
        """ Align a list of molecules to a given pharmacophore.

        Parameters
        ----------
        molecules : list of rdkit.Chem.Mol
            List of molecules to align.

        """
        self.n_molecules += len(molecules)

        rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit()
        apply_radii_to_bounds(radii, rdkit_pharmacophore)

        fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        featFactory = ChemicalFeatures.BuildFeatureFactory(fdef)

        MolScore = namedtuple("MolScore", ["score", "id", "mol"])

        for mol in tqdm(molecules):

            bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol)
            can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(
                mol, featFactory, rdkit_pharmacophore)
            if can_match:
                failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore(
                    all_matches,
                    bounds_matrix,
                    rdkit_pharmacophore,
                    useDownsampling=True)
                if failed:
                    matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                    self.molecules.append(matched_mol)
                    continue
            else:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            atom_match = [list(x.GetAtomIds()) for x in matched_mols]

            try:
                mol_H = Chem.AddHs(mol)
                _, embeddings, _ = EmbedLib.EmbedPharmacophore(
                    mol_H, atom_match, rdkit_pharmacophore, count=10)
            except:
                continue

            SSDs = transform_embeddings(rdkit_pharmacophore, embeddings,
                                        atom_match)
            if len(SSDs) == 0:
                matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol)
                self.molecules.append(matched_mol)
                continue
            best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0]

            score = 1 / SSDs[best_fit_index]
            matched_mol = MolScore(score, mol.GetProp("_Name"),
                                   embeddings[best_fit_index])
            self.molecules.append(matched_mol)
 def testGithub2603(self):
   cfac = ChemicalFeatures.BuildFeatureFactory(
     os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef"))
   m = Chem.MolFromSmiles('OCc1ccccc1CN')
   feats = cfac.GetFeaturesForMol(m)
   self.assertEqual(feats[0].GetFamily(), 'Donor')
   cfac = None
   self.assertEqual(feats[0].GetFamily(), 'Donor')
Exemple #7
0
def alchemy_nodes(mol):
    """Featurization for all atoms in a molecule. The atom indices
    will be preserved.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        RDKit molecule object

    Returns
    -------
    atom_feats_dict : dict
        Dictionary for atom features
    """
    atom_feats_dict = defaultdict(list)
    is_donor = defaultdict(int)
    is_acceptor = defaultdict(int)

    fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    mol_feats = mol_featurizer.GetFeaturesForMol(mol)
    mol_conformers = mol.GetConformers()
    assert len(mol_conformers) == 1

    for i in range(len(mol_feats)):
        if mol_feats[i].GetFamily() == 'Donor':
            node_list = mol_feats[i].GetAtomIds()
            for u in node_list:
                is_donor[u] = 1
        elif mol_feats[i].GetFamily() == 'Acceptor':
            node_list = mol_feats[i].GetAtomIds()
            for u in node_list:
                is_acceptor[u] = 1

    num_atoms = mol.GetNumAtoms()
    for u in range(num_atoms):
        atom = mol.GetAtomWithIdx(u)
        atom_type = atom.GetAtomicNum()
        num_h = atom.GetTotalNumHs()
        atom_feats_dict['node_type'].append(atom_type)

        h_u = []
        h_u += atom_type_one_hot(atom, ['H', 'C', 'N', 'O', 'F', 'S', 'Cl'])
        h_u.append(atom_type)
        h_u.append(is_acceptor[u])
        h_u.append(is_donor[u])
        h_u += atom_is_aromatic(atom)
        h_u += atom_hybridization_one_hot(atom, [Chem.rdchem.HybridizationType.SP,
                                                 Chem.rdchem.HybridizationType.SP2,
                                                 Chem.rdchem.HybridizationType.SP3])
        h_u.append(num_h)
        atom_feats_dict['n_feat'].append(F.tensor(np.asarray(h_u, dtype=np.float32)))

    atom_feats_dict['n_feat'] = F.stack(atom_feats_dict['n_feat'], dim=0)
    atom_feats_dict['node_type'] = F.tensor(
        np.asarray(atom_feats_dict['node_type'], dtype=np.int64))

    return atom_feats_dict
Exemple #8
0
  def test4Search(self):
    featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir,
                                                        'BaseFeatures.fdef'))

    activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Donor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Aromatic',
                                            Geometry.Point3D(0.0, 0.0, 0.0))]
    pcophore= Pharmacophore.Pharmacophore(activeFeats)
    pcophore.setLowerBound(0,1,2.251)
    pcophore.setUpperBound(0,1,2.451)
    pcophore.setUpperBound2D(0,1,3)

    pcophore.setLowerBound(0,2,4.970)
    pcophore.setUpperBound(0,2,5.170)
    pcophore.setUpperBound2D(0,2,6)

    pcophore.setLowerBound(1,2,2.681)
    pcophore.setUpperBound(1,2,2.881)
    pcophore.setUpperBound2D(1,2,6)

    inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb')
    nDone = 0
    nMatches = 0
    nHits = 0

    while 1:
      try:
        name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1')
        if PY3:
          molPkl = bytes(molPkl, encoding='latin1')
      except:
        break

      nDone += 1

      mol = Chem.Mol(molPkl)
      boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
      DG.DoTriangleSmoothing(boundsMat)
    
      canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory,
                                                          pcophore)
      if canMatch:
        nMatches+=1
        r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore,
                                        useDownsampling=True,use2DLimits=True,
                                        mol=mol)
        failed,bm,match,details = r
        if not failed:
          nHits+=1

    self.assertEqual(nDone,100)
    self.assertEqual(nMatches,93)
    #print 'nhits:',nHits
    self.assertEqual(nHits,67)
Exemple #9
0
    def __config_feature_factory(self):
        """
        Initialize the 'feature factory' rdkit module with the
            current molecule.
        """
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        self.__feat_factory = ChemicalFeatures.BuildFeatureFactory(fdefName)

        return
Exemple #10
0
def str2molgraph(
    rawstr, length
):  # rawstr :tuple() e.g. ('<RX_6>', 'N', 'c', '1', 'n', 'c', '2', '[', 'n', 'H', ']', 'c', '(', 'C', 'C', 'C', 'c', '3', 'c', 's', 'c', '(', 'C', '(', '=', 'O', ')', 'O', ')', 'c', '3', ')', 'c', 'c', '2', 'c', '(', '=', 'O', ')', '[', 'n', 'H', ']', '1')

    smiles = ''.join(rawstr[:length])

    m = Chem.MolFromSmiles(smiles)

    g = nx.Graph()
    fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    feats = factory.GetFeaturesForMol(m)

    atom_true_index = {}
    atom_index = 0
    # Nodes
    for i in range(len(rawstr)):
        if not need_emb(rawstr[i], EMB_ATOMS):
            g.add_node(i)

        else:
            atom_true_index[
                atom_index] = i  # meanwhile, set a map dict to find the true index of atoms
            atom_i = m.GetAtomWithIdx(atom_index)
            atom_index += 1
            g.add_node(i,
                       a_type=atom_i.GetSymbol(),
                       a_num=atom_i.GetAtomicNum(),
                       acceptor=0,
                       donor=0,
                       aromatic=atom_i.GetIsAromatic(),
                       hybridization=atom_i.GetHybridization(),
                       num_h=atom_i.GetTotalNumHs())

    # Donor and Acceptor properties
    for i in range(0, len(feats)):
        if feats[i].GetFamily() == 'Donor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                if i in atom_true_index:
                    g.nodes[atom_true_index[i]]['donor'] = 1
        elif feats[i].GetFamily() == 'Acceptor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                if i in atom_true_index:
                    g.nodes[atom_true_index[i]]['acceptor'] = 1

    #Edges
    for i in range(0, m.GetNumAtoms()):
        for j in range(0, m.GetNumAtoms()):
            e_ij = m.GetBondBetweenAtoms(i, j)
            if e_ij is not None and i in atom_true_index and j in atom_true_index:
                g.add_edge(atom_true_index[i],
                           atom_true_index[j],
                           b_type=e_ij.GetBondType())

    return g
Exemple #11
0
 def get_factory(self):
     """
     Generate the Ph4 feature factory
     :return:
     """
     if self.factory is None:
         this_dir, this_filename = os.path.split(__file__)
         data_path = os.path.join(this_dir, "data", "RDKitPh4.fdef")
         self.factory = ChemicalFeatures.BuildFeatureFactory(data_path)
     return self.factory
  def test4Github252(self):
    fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    feat_factory = ChemicalFeatures.BuildFeatureFactory(fdef)

    m1 = Chem.MolFromSmiles('Cc1ccccc1')
    feats = feat_factory.GetFeaturesForMol(m1)
    self.assertRaises(RuntimeError, lambda: Pharmacophore.Pharmacophore(feats))

    AllChem.Compute2DCoords(m1)
    Pharmacophore.Pharmacophore(feats)
Exemple #13
0
def numpy_pp_fps(mols):
    """ Calculate Gobbi and Poppinger pharmacophore fingerprints and return them as numpy.ndarrays

    :param mols: {list} list of molecules (RDKit mols)
    :return: numpy array containing row-wise fingerprints for every molecule
    """
    feat_fact = ChemicalFeatures.BuildFeatureFactory()
    sig_fact = SigFactory(feat_fact, useCounts=False, minPointCount=2, maxPointCount=3)
    sig_fact.SetBins([(0, 2), (2, 4), (4, 6), (6, 8), (8, 100)])
    sig_fact.Init()
    return _rdk2numpy([Generate.Gen2DFingerprint(m, sig_fact) for m in mols if m])
  def get_instance(cls):
    try:
      from rdkit import RDConfig
      from rdkit.Chem import ChemicalFeatures
    except ModuleNotFoundError:
      raise ValueError("This class requires RDKit to be installed.")

    if not cls._instance:
      fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
      cls._instance = ChemicalFeatures.BuildFeatureFactory(fdefName)
    return cls._instance
Exemple #15
0
def rdkit_featuredefinition() -> ChemicalFeatures.MolChemicalFeatureFactory:
    """ Loads rdkit chemical feature factory.
    
        Returns
        -------
        rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory
            The feature factory.

    """
    fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    return ChemicalFeatures.BuildFeatureFactory(fdefName)
    def __init__(self, atms: typing.List[str]):
        self.atms_to_idx = dict(zip(atms, range(len(atms))))
        self.number_atom_options = len(self.atms_to_idx)

        self.hyb_mapping = {Chem.rdchem.HybridizationType.SP:0 ,
                            Chem.rdchem.HybridizationType.SP2: 1,
                            Chem.rdchem.HybridizationType.SP3: 2}
        self.number_hyb_options = len(self.hyb_mapping)

        self.fdef_name = os.path.join(RDDataDir, 'BaseFeatures.fdef')
        self.feats_factory = ChemicalFeatures.BuildFeatureFactory(self.fdef_name)
Exemple #17
0
    def __call__(self, mol):
        """Featurizes the input molecule.

        Parameters
        ----------
        mol : rdkit.Chem.rdchem.Mol
            RDKit molecule instance.

        Returns
        -------
        dict
            Mapping atom_data_field as specified in the input argument to the atom
            features, which is a float32 tensor of shape (N, M), N is the number of
            atoms and M is the feature size.
        """
        atom_features = []

        AllChem.ComputeGasteigerCharges(mol)
        num_atoms = mol.GetNumAtoms()

        # Get information for donor and acceptor
        fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        mol_featurizer = ChemicalFeatures.BuildFeatureFactory(fdef_name)
        mol_feats = mol_featurizer.GetFeaturesForMol(mol)
        is_donor, is_acceptor = self.get_donor_acceptor_info(mol_feats)

        # Get a symmetrized smallest set of smallest rings
        # Following the practice from Chainer Chemistry (https://github.com/chainer/
        # chainer-chemistry/blob/da2507b38f903a8ee333e487d422ba6dcec49b05/chainer_chemistry/
        # dataset/preprocessors/weavenet_preprocessor.py)
        sssr = Chem.GetSymmSSSR(mol)

        for i in range(num_atoms):
            atom = mol.GetAtomWithIdx(i)
            # Features that can be computed directly from RDKit atom instances, which is a list
            feats = self._featurizer(atom)
            # Donor/acceptor indicator
            feats.append(float(is_donor[i]))
            feats.append(float(is_acceptor[i]))
            # Count the number of rings the atom belongs to for ring size between 3 and 8
            count = [0 for _ in range(3, 9)]
            for ring in sssr:
                ring_size = len(ring)
                if i in ring and 3 <= ring_size <= 8:
                    count[ring_size - 3] += 1
            feats.extend(count)
            atom_features.append(feats)
        atom_features = np.stack(atom_features)

        return {
            self._atom_data_field:
            F.zerocopy_from_numpy(atom_features.astype(np.float32))
        }
def construct_hydrogen_bonding(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS):
    fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
    feats = factory.GetFeaturesForMol(mol)
    hydrogen_bonding_vec = numpy.zeros((num_max_atoms, 2), dtype=numpy.float32)
    for f in feats:
        if f.GetFamily() == 'Donor':
            idx = f.GetAtomIds()[0]
            hydrogen_bonding_vec[idx, 0] = 1.0
        if f.GetFamily() == 'Acceptor':
            idx = f.GetAtomIds()[0]
            hydrogen_bonding_vec[idx, 1] = 1.0
    return hydrogen_bonding_vec
Exemple #19
0
def load_factory(filename=None):
    """
    Loads RDKit factory with custom feature patterns from a file.

    :param filename: file name of fdef format file. If None the default patterns will be loaded. Default: None.
    :type filename: str
    :return: object of MolChemicalFeatureFactory class

    """
    if filename is None:
        filename = path.join(path.abspath(path.dirname(__file__)),
                             'smarts_features.fdef')
    return ChemicalFeatures.BuildFeatureFactory(filename)
    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(
                _ChemicalFeatureGenerator, cls).__new__(cls)

            from rdkit import RDConfig
            from rdkit.Chem import ChemicalFeatures

            fdef_path = path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
            cls._instance.feature_factory = \
                ChemicalFeatures.BuildFeatureFactory(fdef_path)

        return cls._instance
Exemple #21
0
def extract_features(mol):
    factory = ChemicalFeatures.BuildFeatureFactory('./LigityFeatures.fdef')
    feats = factory.GetFeaturesForMol(mol)
    features = []
    for feat in feats:
        feature = feat.GetFamily()
        if feature in pharmacophores:
            id = feat.GetId()
            x, y, z = list(feat.GetPos())
            string = str(id) + ',' + feature + ',' + str(x) + ',' + str(
                y) + ',' + str(z)
            features.append(string)
    return features
Exemple #22
0
def BuildSigFactory(options=None, fdefFile=None,
                    bins=[(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)],
                    skipFeats=('LumpedHydrophobe', 'ZnBinder')):
    if options:
        fdefFile = options.fdefFile
    if not fdefFile:
        raise ValueError('bad fdef file')
    from rdkit.Chem import ChemicalFeatures
    from rdkit.Chem.Pharm2D import SigFactory
    featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile)
    sigFactory = SigFactory.SigFactory(featFactory, skipFeats=skipFeats, trianglePruneBins=False)
    sigFactory.SetBins(bins)
    return sigFactory
 def get_node_features(self, mol):
     fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
     factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
     x = []
     sp = []
     sp2 = []
     sp3 = []
     donor = []
     acceptor = []
     type_idx = []
     for i in range(4):
         x.append([])
     for atom in mol.GetAtoms():
         donor.append(0)
         acceptor.append(0)
         #print(atom.GetHybridization())
         x[0].append(atom.GetAtomicNum())
         x[1].append(atom.GetTotalValence())
         x[2].append(atom.GetIsAromatic())
         x[3].append(atom.GetTotalNumHs(includeNeighbors=True))
         hybridization = atom.GetHybridization()
         sp.append(1 if hybridization == HybridizationType.SP else 0)
         sp2.append(1 if hybridization == HybridizationType.SP2 else 0)
         sp3.append(1 if hybridization == HybridizationType.SP3 else 0)
         type_idx.append(atom.GetAtomicNum())
     # Now calculate donors and acceptors
     feats = factory.GetFeaturesForMol(mol)
     for j in range(0, len(feats)):
         if feats[j].GetFamily() == 'Donor':
             node_list = feats[j].GetAtomIds()
             for k in node_list:
                 donor[k] = 1
         elif feats[j].GetFamily() == 'Acceptor':
             node_list = feats[j].GetAtomIds()
             for k in node_list:
                 acceptor[k] = 1
     # Now get the elements and make one hot encoding
     x_element = []
     for i in range(len(self.atomicNbrs)):
         x_element.append([])
     for i in range(len(type_idx)):
         for j in range(len(self.atomicNbrs)):
             if type_idx[i] == self.atomicNbrs[j]:
                 x_element[j].append(1)
             else:
                 x_element[j].append(0)
     # Now add them all
     x = x + [donor] + [acceptor] + [sp] + [sp2] + [sp3] + x_element
     x = np.array(x).T
     #x = torch.tensor(x, dtype=torch.float)
     return (x, x.sum(axis=0), len(x))
Exemple #24
0
def smile_to_graph(smile):

    mol = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol)
    fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    feats = factory.GetFeaturesForMol(mol)

    #Construction of the graph
    graph = Molecule()

    graph.atoms = []
    graph.bonds = []

    for i in range(0, mol.GetNumAtoms()):

        atom = mol.GetAtomWithIdx(i)
        node = Atom(i,
                    atom.GetSymbol(),
                    atom.GetAtomicNum(),
                    aromatic=atom.GetIsAromatic(),
                    hybrid=atom.GetHybridization(),
                    nbH=atom.GetTotalNumHs())

        graph.add_atom(node)

    for i in range(0, len(feats)):

        if feats[i].GetFamily() == 'Donor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                graph.atoms[i].don = 1

        elif feats[i].GetFamily() == 'Acceptor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                graph.atoms[i].acc = 1

    for i in range(0, mol.GetNumAtoms()):

        for j in range(0, mol.GetNumAtoms()):
            e_ij = mol.GetBondBetweenAtoms(i, j)

            if e_ij is not None:
                bond = e_ij.GetBondTypeAsDouble()
                graph.add_bond(i, j, bond)

    return graph
    def alchemy_nodes(self, mol):
        atom_feats_dict, is_donor, is_acceptor = defaultdict(
            list), defaultdict(int), defaultdict(int)
        ddir = rdf.RDDataDir
        mol_featurizer = cf.BuildFeatureFactory(osp.join(ddir, BASE_FT))
        mol_conformers, mol_feats = mol.GetConformers(
        ), mol_featurizer.GetFeaturesForMol(mol)
        geom = mol_conformers[0].GetPositions()
        for i in range(len(mol_feats)):
            if mol_feats[i].GetFamily() == 'Acceptor':
                node_list = mol_feats[i].GetAtomIds()
                for u in node_list:
                    is_acceptor[u] = 1
            elif mol_feats[i].GetFamily() == 'None':
                continue
            elif mol_feats[i].GetFamily() == 'Donor':
                node_list = mol_feats[i].GetAtomIds()
                for u in node_list:
                    is_donor[u] = 1

        num_atoms = mol.GetNumAtoms()
        for u in range(num_atoms):
            atom = mol.GetAtomWithIdx(u)
            h_u = []
            symbol, atom_type, aromatic = atom.GetSymbol(), atom.GetAtomicNum(
            ), atom.GetIsAromatic()
            atom_feats_dict['node_type'].append(atom_type)
            hybridization, num_h = atom.GetHybridization(), atom.GetTotalNumHs(
            )
            atom_feats_dict[POSITION].append(torch.FloatTensor(geom[u]))
            h_u = h_u + [int(symbol == x) for x in ATOMS]
            h_u.append(atom_type)
            h_u.append(int(aromatic))
            h_u.append(is_donor[u])
            h_u.append(is_acceptor[u])
            h_u += [
                int(hybridization == x)
                for x in (Chem.rdchem.HybridizationType.SP,
                          Chem.rdchem.HybridizationType.SP2,
                          Chem.rdchem.HybridizationType.SP3)
            ]
            h_u.append(num_h)
            atom_feats_dict[N_FEAT].append(torch.FloatTensor(h_u))
        atom_feats_dict['node_type'] = torch.LongTensor(
            atom_feats_dict['node_type'])
        atom_feats_dict[N_FEAT] = torch.stack(atom_feats_dict[N_FEAT], dim=0)
        atom_feats_dict[POSITION] = torch.stack(atom_feats_dict['pos'], dim=0)
        return atom_feats_dict
Exemple #26
0
def rawsmiles2graph(smiles):
    # smiles = smiles.strip().replace(' ','')
    m = Chem.MolFromSmiles(smiles)
    g = nx.Graph()

    fdef_name = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
    factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
    feats = factory.GetFeaturesForMol(m)

    # Nodes
    for i in range(0, m.GetNumAtoms()):
        atom_i = m.GetAtomWithIdx(i)

        g.add_node(i,
                   a_type=atom_i.GetSymbol(),
                   a_num=atom_i.GetAtomicNum(),
                   acceptor=0,
                   donor=0,
                   aromatic=atom_i.GetIsAromatic(),
                   hybridization=atom_i.GetHybridization(),
                   num_h=atom_i.GetTotalNumHs())

    # Donor and Acceptor properties
    for i in range(0, len(feats)):
        if feats[i].GetFamily() == 'Donor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                g.node[i]['donor'] = 1
        elif feats[i].GetFamily() == 'Acceptor':
            node_list = feats[i].GetAtomIds()
            for i in node_list:
                g.node[i]['acceptor'] = 1

    # Edges
    for i in range(0, m.GetNumAtoms()):
        for j in range(0, m.GetNumAtoms()):
            e_ij = m.GetBondBetweenAtoms(i, j)
            if e_ij is not None:
                g.add_edge(i, j, b_type=e_ij.GetBondType())
            else:
                # Unbonded
                g.add_edge(
                    i,
                    j,
                    b_type=None,
                )

    return g
Exemple #27
0
def get_atom_features(mol, dist_matrix):
    """
    Compute the following features for each atom in 'mol':
        - atom type: H, C, N, O, F (one-hot)
        - degree: 1, 2, 3, 4, 5 (one-hot)
        - Hybridization: SP, SP2, SP3, UNSPECIFIED (one-hot)
        - is aromatic: bool {0, 1}
        - formal charge: int
        - atomic number: float
        - average bond length: float
        - average weight of neigboring atoms: float
        - donor: bool {0, 1}
        - acceptor: bool {0, 1}
    """
    n_atoms = mol.GetNumAtoms()
    features = np.zeros((n_atoms, C.N_ATOM_FEATURES))
    adj_matrix = rdmolops.GetAdjacencyMatrix(mol)
    for a in mol.GetAtoms():
        idx = a.GetIdx()
        if sum(adj_matrix[idx]) > 0:
            ave_bond_length = np.mean(dist_matrix[idx][adj_matrix[idx] == 1])
            ave_neighbor_wt = np.mean(
                [n.GetAtomicNum() for n in a.GetNeighbors()])
        else:
            ave_bond_length, ave_neighbor_wt = 0.0, 0.0

        sym = a.GetSymbol()
        a_feats = one_hot_encoding(sym, C.SYMBOLS) \
            + one_hot_encoding(a.GetDegree(), C.DEGREES) \
            + one_hot_encoding(a.GetHybridization(), C.HYBRIDIZATIONS) \
            + [a.GetIsAromatic(), a.GetFormalCharge(), a.GetAtomicNum(),
               ave_bond_length, ave_neighbor_wt]
        features[idx, :len(a_feats)] = np.array(a_feats)

    feat_factory = ChemicalFeatures.BuildFeatureFactory(C.FDEF)
    try:
        chem_feats = feat_factory.GetFeaturesForMol(mol)
        for t in range(len(chem_feats)):
            if chem_feats[t].GetFamily() == 'Donor':
                for i in chem_feats[t].GetAtomIds():
                    features[i, -2] = 1
            elif chem_feats[t].GetFamily() == 'Acceptor':
                for i in chem_feats[t].GetAtomIds():
                    features[i, -1] = 1
    except RuntimeError as e:
        print(e)

    return features
    def testBasic(self):
        cfac = ChemicalFeatures.BuildFeatureFactory(
            os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol',
                         'MolChemicalFeatures', 'test_data', 'featDef.txt'))
        self.failUnless(cfac.GetNumFeatureDefs() == 2)

        fNames = cfac.GetFeatureFamilies()
        self.failUnless(len(fNames) == 2)
        self.failUnless(fNames[0] == 'HBondDonor')
        self.failUnless(fNames[1] == 'HBondAcceptor')

        mol = Chem.MolFromSmiles("COCN")
        rdDistGeom.EmbedMolecule(mol,
                                 30,
                                 100,
                                 useExpTorsionAnglePrefs=False,
                                 useBasicKnowledge=False)

        self.failUnless(cfac.GetNumMolFeatures(mol) == 3)
        for i in range(cfac.GetNumMolFeatures(mol)):
            self.failUnless(cfac.GetMolFeature(mol, i))
        # check that the recompute argument works:
        self.failUnless(cfac.GetMolFeature(mol, 0))
        for i in range(cfac.GetNumMolFeatures(mol)):
            self.failUnless(cfac.GetMolFeature(mol, i, "", False))
        self.failUnlessRaises(IndexError, lambda: cfac.GetMolFeature(mol, 3))

        feats = cfac.GetFeaturesForMol(mol)
        self.failUnless(len(feats) == 3)
        fTypes = ['HBondDonor', 'HBondAcceptor', 'HBondAcceptor']

        positions = [[1.3041, -0.6079, 0.0924], [-0.7066, 0.5994, 0.1824],
                     [1.3041, -0.6079, 0.0924]]
        targetAids = [[3], [1], [3]]
        for i, feat in enumerate(feats):
            self.assertEqual(feat.GetFamily(), fTypes[i])
            pos = list(feat.GetPos())
            aids = list(feat.GetAtomIds())
            self.assertEqual(aids, targetAids[i])
            self.assertTrue(lstFeq(pos, positions[i]))
            nmol = feat.GetMol()
            self.assertEqual(Chem.MolToSmiles(nmol), "COCN")
            ncfac = feat.GetFactory()
            self.assertEqual(ncfac.GetNumFeatureDefs(), 2)
            self.assertEqual(feat.GetActiveConformer(), -1)
    def get_node_features(self, mol):
        fdef_name = osp.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdef_name)
        x_one_hot = []
        x_normal = []
        donor = []
        acceptor = []
        for i in range(2):
            x_one_hot.append([])
        for i in range(11):
            x_normal.append([])

        for atom in mol.GetAtoms():
            x_one_hot[0].append(atom.GetAtomicNum())
            x_one_hot[1].append(atom.GetHybridization())

        for atom in mol.GetAtoms():
            donor.append(0)
            acceptor.append(0)
            x_normal[0].append(atom.GetTotalValence())
            x_normal[1].append(atom.GetNumImplicitHs())
            x_normal[2].append(atom.GetFormalCharge())
            x_normal[3].append(atom.GetNumRadicalElectrons())
            x_normal[4].append(atom.GetImplicitValence())
            x_normal[5].append(atom.GetNumExplicitHs())
            x_normal[6].append(atom.GetIsAromatic())
            x_normal[7].append(atom.GetIsotope())
            x_normal[8].append(atom.GetChiralTag())

        feats = factory.GetFeaturesForMol(mol)
        for j in range(0, len(feats)):
            if feats[j].GetFamily() == 'Donor':
                node_list = feats[j].GetAtomIds()
                for k in node_list:
                    donor[k] = 1
            elif feats[j].GetFamily() == 'Acceptor':
                node_list = feats[j].GetAtomIds()
                for k in node_list:
                    acceptor[k] = 1
        x_normal[9] = donor
        x_normal[10] = acceptor
        x_one_hot = np.array(x_one_hot).T
        x_normal = np.array(x_normal).T
        #x = torch.tensor(x, dtype=torch.float)
        return (x_one_hot, x_normal)
Exemple #30
0
def DefaultSigFactory(fdefFile=None,
                      minPointCount=2,maxPointCount=3,
                      bins=[(2,3),(3,4),(4,5),(5,6),(6,7),(7,8),(8,100)]):
    import SigFactory
    from rdkit.Chem import ChemicalFeatures
    if fdefFile is None:
        from rdkit import RDConfig
        import os.path
        fdefFile = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef')
    featFactory = ChemicalFeatures.BuildFeatureFactory(fdefFile,)
    factory = SigFactory.SigFactory(featFactory,
                                    skipFeats=('ZnBinder','LumpedHydrophobe'),
                                    minPointCount=minPointCount,
                                    maxPointCount=maxPointCount,
                                    trianglePruneBins=False)
    factory.SetBins(tuple(bins))
    factory.Init()
    return factory