Example #1
0
def main():

    sdf_root_path = "/media/data/pubchem/SDF"

    for path, dirs, filenames in os.walk(sdf_root_path):
        for filename in filenames:
            filepath = os.path.join(sdf_root_path, filename)

            # This SDF file fails to parse with RDKit on Ubuntu 16.04
            if "Compound_102125001_102150000" in filename:
                continue

            with gzip.open(filepath, 'rb') as myfile:
                suppl = Chem.ForwardSDMolSupplier(myfile)

                for mol in suppl:

                    if not mol:
                        continue

                    try:
                        info = {}
                        rdMolDescriptors.GetMorganFingerprint(mol,
                                                              1,
                                                              bitInfo=info)
                        keys = info.keys()
                        keys_list = list(keys)
                        for k in keys_list:
                            print(k, end=' ')
                        print()
                    except Exception:
                        pass
Example #2
0
def GenerateMorganFeaturesFingerprints(Mols):
    """Generate MorganFeatures fingerprints."""

    MiscUtil.PrintInfo("\nGenerating  MorganFeatures fingerprints...")

    # Setup fingerprints parameters...
    Radius = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["Radius"]
    UseChirality = OptionsInfo["FingerprintsParams"]["MorganFeatures"][
        "UseChirality"]
    UseFeatures = True

    if OptionsInfo["GenerateBitVectFingerints"]:
        # Generate ExplicitBitVect fingerprints...
        FPSize = 2048
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprintAsBitVect(
                Mol,
                Radius,
                useFeatures=UseFeatures,
                useChirality=UseChirality,
                nBits=FPSize) for Mol in Mols
        ]
    else:
        # Generate UIntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprint(Mol,
                                                  Radius,
                                                  useFeatures=UseFeatures,
                                                  useChirality=UseChirality)
            for Mol in Mols
        ]

    return MolsFingerprints
Example #3
0
def _compute_sas(mol: Mol, sa_model: Dict[int, float]) -> float:
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    # for bitId, v in fps.items():
    for bitId, v in fps.items():
        nf += v
        sfp = bitId
        score1 += sa_model.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = mol.GetNumAtoms()
    nChiralCenters = len(FindMolChiralCenters(mol, includeUnassigned=True))
    ri = mol.GetRingInfo()
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.

    # ---------------------------------------
    # This differs from the paper, which defines:
    # macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
Example #4
0
def scoreMolWConfidence(mol, fscore):
  """Next to the NP Likeness Score, this function outputs a confidence value
  between 0..1 that descibes how many fragments of the tested molecule
  were found in the model data set (1: all fragments were found).

  Returns namedtuple NPLikeness(nplikeness, confidence)"""

  if mol is None:
    raise ValueError('invalid molecule')
  fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
  bits = fp.GetNonzeroElements()

  # calculating the score
  score = 0.0
  bits_found = 0
  for bit in bits:
    if bit in fscore:
      bits_found += 1
      score += fscore[bit]

  score /= float(mol.GetNumAtoms())
  confidence = float(bits_found / len(bits))

  # preventing score explosion for exotic molecules
  if score > 4:
    score = 4. + math.log10(score - 4. + 1.)
  elif score < -4:
    score = -4. - math.log10(-4. - score + 1.)
  NPLikeness = namedtuple("NPLikeness", "nplikeness,confidence")
  return NPLikeness(score, confidence)
Example #5
0
  def _featurize(self, mol):
    """
    Calculate circular fingerprint.

    Parameters
    ----------
    mol : RDKit Mol
        Molecule.
    """
    if self.sparse:
      info = {}
      fp = rdMolDescriptors.GetMorganFingerprint(
          mol, self.radius, useChirality=self.chiral,
          useBondTypes=self.bonds, useFeatures=self.features,
          bitInfo=info)
      fp = fp.GetNonzeroElements()  # convert to a dict

      # generate SMILES for fragments
      if self.smiles:
        fp_smiles = {}
        for fragment_id, count in fp.items():
          root, radius = info[fragment_id][0]
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root)
          frag = Chem.PathToSubmol(mol, env)
          smiles = Chem.MolToSmiles(frag)
          fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
        fp = fp_smiles
    else:
      fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
          mol, self.radius, nBits=self.size, useChirality=self.chiral,
          useBondTypes=self.bonds, useFeatures=self.features)
    return fp
Example #6
0
def GenerateMorganFeaturesFingerprints(Mols):
    """Generate MorganFeatures fingerprints."""

    MiscUtil.PrintInfo("\nGenerating MorganFeatures %s fingerprints..." %
                       OptionsInfo["SpecifiedFingerprintsType"])

    # Setup fingerprints parameters...
    Radius = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["Radius"]
    UseChirality = OptionsInfo["FingerprintsParams"]["MorganFeatures"][
        "UseChirality"]
    FPSize = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["FPSize"]
    UseFeatures = True

    if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I):
        # Generate ExplicitBitVect fingerprints...
        MiscUtil.PrintInfo("FPSize: %s" % (FPSize))
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprintAsBitVect(
                Mol,
                Radius,
                useFeatures=UseFeatures,
                useChirality=UseChirality,
                nBits=FPSize) for Mol in Mols
        ]
    else:
        # Generate UIntSparseIntVect fingerprints...
        MolsFingerprints = [
            rdMolDescriptors.GetMorganFingerprint(Mol,
                                                  Radius,
                                                  useFeatures=UseFeatures,
                                                  useChirality=UseChirality)
            for Mol in Mols
        ]

    return MolsFingerprints
Example #7
0
 def testGithub1761(self):
     mol = Chem.MolFromSmiles('CC(F)(Cl)C(F)(Cl)C')
     self.assertRaises(OverflowError,
                       lambda: rdMD.GetMorganFingerprint(mol, -1))
     self.assertRaises(OverflowError,
                       lambda: rdMD.GetHashedMorganFingerprint(mol, 0, -1))
     self.assertRaises(ValueError,
                       lambda: rdMD.GetHashedMorganFingerprint(mol, 0, 0))
Example #8
0
def calculateScore(m):
    if _fscores is None: readFragmentScores()

    # fragment score
    fp = rdMolDescriptors.GetMorganFingerprint(
        m, 2)  # <- 2 is the *radius* of the circular fingerprint
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bitId, v in iteritems(fps):
        nf += v
        sfp = bitId
        score1 += _fscores.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = m.GetNumAtoms()
    nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
    ri = m.GetRingInfo()
    nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8: nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0: macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
Example #9
0
  def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
    """Calculate circular fingerprint.

    Parameters
    ----------
    datapoint: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      A numpy array of circular fingerprint.
    """
    try:
      from rdkit import Chem
      from rdkit.Chem import rdMolDescriptors
    except ModuleNotFoundError:
      raise ImportError("This class requires RDKit to be installed.")
    if 'mol' in kwargs:
      datapoint = kwargs.get("mol")
      raise DeprecationWarning(
          'Mol is being phased out as a parameter, please pass "datapoint" instead.'
      )
    if self.sparse:
      info: Dict = {}
      fp = rdMolDescriptors.GetMorganFingerprint(
          datapoint,
          self.radius,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features,
          bitInfo=info)
      fp = fp.GetNonzeroElements()  # convert to a dict

      # generate SMILES for fragments
      if self.smiles:
        fp_smiles = {}
        for fragment_id, count in fp.items():
          root, radius = info[fragment_id][0]
          env = Chem.FindAtomEnvironmentOfRadiusN(datapoint, radius, root)
          frag = Chem.PathToSubmol(datapoint, env)
          smiles = Chem.MolToSmiles(frag)
          fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
        fp = fp_smiles
    else:
      fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
          datapoint,
          self.radius,
          nBits=self.size,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features)
      fp = np.asarray(fp, dtype=float)
    return fp
Example #10
0
    def compute(self, mol):
        fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
        bits = fp.GetNonzeroElements()

        # calculating the score
        score = sum(self.NA_model.get(bit, 0) for bit in bits)
        score /= float(mol.GetNumAtoms())

        # preventing score explosion for exotic molecules
        if score > 4:
            score = 4. + math.log10(score - 4. + 1.)
        elif score < -4:
            score = -4. - math.log10(-4. - score + 1.)

        return score
Example #11
0
def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False):
  """
  Calculates the Morgan fingerprint with the counts of atomId removed.

  Parameters:
    mol -- the molecule of interest
    radius -- the maximum radius
    fpType -- the type of Morgan fingerprint: 'count' or 'bv'
    atomId -- the atom to remove the counts for (if -1, no count is removed)
    nBits -- the size of the bit vector (only for fpType = 'bv')
    useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan
  """
  if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type")
  if not hasattr(mol, '_fpInfo'):
    info = {}
    # get the fingerprint
    if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, bitInfo=info)
    else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info)
    # construct the bit map
    if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())]
    else: bitmap = [[] for x in range(mol.GetNumAtoms())]
    for bit, es in info.iteritems():
      for at1, rad in es:
        if rad == 0: # for radius 0
          if fpType == 'bv': bitmap[at1][bit] = 1
          else: bitmap[at1].append(bit)
        else: # for radii > 0
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
          amap = {}
          submol = Chem.PathToSubmol(mol, env, atomMap=amap)
          for at2 in amap.keys():
            if fpType == 'bv': bitmap[at2][bit] = 1
            else: bitmap[at2].append(bit)
    mol._fpInfo = (molFp, bitmap)

  if atomId < 0:
    return mol._fpInfo[0]
  else: # remove the bits of atomId
    if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
    if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set")
    if fpType == 'bv':
      molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor
    else: # count
      molFp = copy.deepcopy(mol._fpInfo[0])
      # delete the bits with atomId
      for bit in mol._fpInfo[1][atomId]:
        molFp[bit] -= 1
    return molFp
Example #12
0
  def _featurize(self, mol: RDKitMol) -> np.ndarray:
    """Calculate circular fingerprint.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      A numpy array of circular fingerprint.
    """
    from rdkit import Chem
    from rdkit.Chem import rdMolDescriptors

    if self.sparse:
      info: Dict = {}
      fp = rdMolDescriptors.GetMorganFingerprint(
          mol,
          self.radius,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features,
          bitInfo=info)
      fp = fp.GetNonzeroElements()  # convert to a dict

      # generate SMILES for fragments
      if self.smiles:
        fp_smiles = {}
        for fragment_id, count in fp.items():
          root, radius = info[fragment_id][0]
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, root)
          frag = Chem.PathToSubmol(mol, env)
          smiles = Chem.MolToSmiles(frag)
          fp_smiles[fragment_id] = {'smiles': smiles, 'count': count}
        fp = fp_smiles
    else:
      fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(
          mol,
          self.radius,
          nBits=self.size,
          useChirality=self.chiral,
          useBondTypes=self.bonds,
          useFeatures=self.features)
      fp = np.asarray(fp, dtype=np.float)
    return fp
Example #13
0
def _getAtomInvariantsWithRadius(mol, radius):
  """ Helper function to calculate the atom invariants for each atom 
      with a given radius

      Arguments:
      - mol:    the molecule of interest
      - radius: the radius for the Morgan fingerprint

      Return: list of atom invariants
  """
  inv = []
  for i in range(mol.GetNumAtoms()):
    info = {}
    fp = rdMolDescriptors.GetMorganFingerprint(mol, radius, fromAtoms=[i], bitInfo=info)
    for k in info.keys():
      if info[k][0][1] == radius:
        inv.append(k)
  return inv
Example #14
0
def scoreMol(mol, fscore):
    if mol is None:
        raise ValueError("invalid molecule")
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    bits = fp.GetNonzeroElements()

    # calculating the score
    score = 0.0
    for bit in bits:
        score += fscore.get(bit, 0)
    score /= float(mol.GetNumAtoms())

    # preventing score explosion for exotic molecules
    if score > 4:
        score = 4.0 + math.log10(score - 4.0 + 1.0)
    if score < -4:
        score = -4.0 - math.log10(-4.0 - score + 1.0)
    return score
Example #15
0
def NP_score(mol, fscore=None):
    if fscore is None:
        fscore = readNPModel()
    if mol is None:
        raise ValueError('invalid molecule')
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    bits = fp.GetNonzeroElements()

    # calculating the score
    score = 0.
    for bit in bits:
        score += fscore.get(bit, 0)
    score /= float(mol.GetNumAtoms())

    # preventing score explosion for exotic molecules
    if score > 4:
        score = 4. + math.log10(score - 4. + 1.)
    if score < -4:
        score = -4. - math.log10(-4. - score + 1.)
    return score
def main() :
    model = models.KeyedVectors.load_word2vec_format("vec.txt")
    embeddings = list()

    # Using canonical smiles for glycine, as in original research paper
    mol = Chem.MolFromSmiles("C(C(=O)O)N")
    try:
        info = {}
        rdMolDescriptors.GetMorganFingerprint(mol, 0, bitInfo=info)
        keys = info.keys()
        keys_list = list(keys)
        totalvec = np.zeros(200)
        for k in keys_list:
            wordvec = model.wv[str(k)]
            totalvec = np.add(totalvec, wordvec)
        embeddings.append(totalvec)
    except Exception as e:
        print(e)
        pass

    print(embeddings[0])
Example #17
0
    def testMorganFingerprints(self):
        mol = Chem.MolFromSmiles('CC(F)(Cl)C(F)(Cl)C')
        fp = rdMD.GetMorganFingerprint(mol, 0)
        self.assertTrue(len(fp.GetNonzeroElements()) == 4)

        mol = Chem.MolFromSmiles('CC')
        fp = rdMD.GetMorganFingerprint(mol, 0)
        self.assertTrue(len(fp.GetNonzeroElements()) == 1)
        self.assertTrue(list(fp.GetNonzeroElements().values())[0] == 2)
        fp = rdMD.GetMorganFingerprint(mol, 0, useCounts=False)
        self.assertTrue(len(fp.GetNonzeroElements()) == 1)
        self.assertTrue(list(fp.GetNonzeroElements().values())[0] == 1)

        mol = Chem.MolFromSmiles('CC(F)(Cl)C(F)(Cl)C')
        fp = rdMD.GetHashedMorganFingerprint(mol, 0)
        self.assertTrue(len(fp.GetNonzeroElements()) == 4)
        fp = rdMD.GetMorganFingerprint(mol, 1)
        self.assertTrue(len(fp.GetNonzeroElements()) == 8)
        fp = rdMD.GetHashedMorganFingerprint(mol, 1)
        self.assertTrue(len(fp.GetNonzeroElements()) == 8)
        fp = rdMD.GetMorganFingerprint(mol, 2)
        self.assertTrue(len(fp.GetNonzeroElements()) == 9)

        mol = Chem.MolFromSmiles('CC(F)(Cl)[C@](F)(Cl)C')
        fp = rdMD.GetMorganFingerprint(mol, 0)
        self.assertTrue(len(fp.GetNonzeroElements()) == 4)
        fp = rdMD.GetMorganFingerprint(mol, 1)
        self.assertTrue(len(fp.GetNonzeroElements()) == 8)
        fp = rdMD.GetMorganFingerprint(mol, 2)
        self.assertTrue(len(fp.GetNonzeroElements()) == 9)
        fp = rdMD.GetMorganFingerprint(mol, 0, useChirality=True)
        self.assertTrue(len(fp.GetNonzeroElements()) == 4)
        fp = rdMD.GetMorganFingerprint(mol, 1, useChirality=True)
        self.assertTrue(len(fp.GetNonzeroElements()) == 9)
        fp = rdMD.GetMorganFingerprint(mol, 2, useChirality=True)
        self.assertTrue(len(fp.GetNonzeroElements()) == 10)

        mol = Chem.MolFromSmiles('CCCCC')
        fp = rdMD.GetMorganFingerprint(mol, 0, fromAtoms=(0, ))
        self.assertTrue(len(fp.GetNonzeroElements()) == 1)

        mol = Chem.MolFromSmiles('CC1CC1')
        vs1 = rdMD.GetConnectivityInvariants(mol)
        self.assertEqual(len(vs1), mol.GetNumAtoms())
        fp1 = rdMD.GetMorganFingerprint(mol, 2, invariants=vs1)
        fp2 = rdMD.GetMorganFingerprint(mol, 2)
        self.assertEqual(fp1, fp2)

        vs2 = rdMD.GetConnectivityInvariants(mol, False)
        self.assertEqual(len(vs2), mol.GetNumAtoms())
        self.assertNotEqual(vs1, vs2)
        fp1 = rdMD.GetMorganFingerprint(mol, 2, invariants=vs2)
        self.assertNotEqual(fp1, fp2)

        mol = Chem.MolFromSmiles('Cc1ccccc1')
        vs1 = rdMD.GetFeatureInvariants(mol)
        self.assertEqual(len(vs1), mol.GetNumAtoms())
        self.assertEqual(vs1[0], 0)
        self.assertNotEqual(vs1[1], 0)
        self.assertEqual(vs1[1], vs1[2])
        self.assertEqual(vs1[1], vs1[3])
        self.assertEqual(vs1[1], vs1[4])

        mol = Chem.MolFromSmiles('FCCCl')
        vs1 = rdMD.GetFeatureInvariants(mol)
        self.assertEqual(len(vs1), mol.GetNumAtoms())
        self.assertEqual(vs1[1], 0)
        self.assertEqual(vs1[2], 0)
        self.assertNotEqual(vs1[0], 0)
        self.assertEqual(vs1[0], vs1[3])

        fp1 = rdMD.GetMorganFingerprint(mol, 0, invariants=vs1)
        fp2 = rdMD.GetMorganFingerprint(mol, 0, useFeatures=True)
        self.assertEqual(fp1, fp2)
def calculate_similarity_vector(smile_pair):
    """
    Calculate fingerprints between two smile terms using different fingerprinters,
    and use different similarity metrics to calculate the difference between those fingerprints.
    """
    #    smile1, smile2 = smile_pair.split('_')
    smile1, smile2 = smile_pair

    mol1 = Chem.MolFromSmiles(smile1)
    mol2 = Chem.MolFromSmiles(smile2)

    molecule_similarity = list()

    # RDK topological fingerprint for a molecule
    fp1 = Chem.RDKFingerprint(mol1)
    fp2 = Chem.RDKFingerprint(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.KulczynskiSimilarity(fp1,fp2)

    ## LayeredFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.LayeredFingerprint(mol1)
    #fp2 = Chem.LayeredFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    # PatternFingerprint, a fingerprint using SMARTS patterns
    #fp1 = Chem.PatternFingerprint(mol1)
    #fp2 = Chem.PatternFingerprint(mol2)
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Topological Fingerprints
    # Uses Chem.RDKFingerprint internally, but with different parameters, I guess...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#topological-fingerprints
    from rdkit.Chem.Fingerprints import FingerprintMols
    fp1 = FingerprintMols.FingerprintMol(mol1)
    fp2 = FingerprintMols.FingerprintMol(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print 'RDK fingerprint: ', DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # MACCS Keys
    # There is a SMARTS-based implementation of the 166 public MACCS keys.
    # http://www.rdkit.org/docs/GettingStartedInPython.html#maccs-keys
    from rdkit.Chem import MACCSkeys
    fp1 = MACCSkeys.GenMACCSKeys(mol1)
    fp2 = MACCSkeys.GenMACCSKeys(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Atom Pairs and Topological Torsions
    # Atom-pair descriptors [3] are available in several different forms.
    # The standard form is as fingerprint including counts for each bit instead of just zeros and ones:
    # http://www.rdkit.org/docs/GettingStartedInPython.html#atom-pairs-and-topological-torsions
    from rdkit.Chem.AtomPairs import Pairs
    fp1 = Pairs.GetAtomPairFingerprintAsBitVect(mol1)
    fp2 = Pairs.GetAtomPairFingerprintAsBitVect(mol2)
    molecule_similarity.extend(get_similarity_all(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.DiceSimilarity(fp1,fp2)
    from rdkit.Chem.AtomPairs import Torsions
    fp1 = Torsions.GetTopologicalTorsionFingerprint(mol1)
    fp2 = Torsions.GetTopologicalTorsionFingerprint(mol2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))
    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    # Morgan Fingerprints (Circular Fingerprints)
    #This family of fingerprints, better known as circular fingerprints [5],
    #is built by applying the Morgan algorithm to a set of user-supplied atom invariants.
    #When generating Morgan fingerprints, the radius of the fingerprint must also be provided...
    # http://www.rdkit.org/docs/GettingStartedInPython.html#morgan-fingerprints-circular-fingerprints
    from rdkit.Chem import rdMolDescriptors
    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    fp1 = rdMolDescriptors.GetMorganFingerprint(mol1, 2, useFeatures=True)
    fp2 = rdMolDescriptors.GetMorganFingerprint(mol2, 2, useFeatures=True)
    molecule_similarity.extend(get_similarity_subset(fp1, fp2))

    #print "RDK fingerprint: ", DataStructs.TanimotoSimilarity(fp1,fp2)

    ###############################################################################

    return molecule_similarity
 def calculateMol(self, m, smiles, internalParsing=False):
     return list(rd.GetMorganFingerprint(
         m, radius=self.radius, nBits=self.nbits, useChirality=True))
Example #20
0
def synthetic_accessibility(mol, _fscores=None):
    '''
    calculation of synthetic accessibility score as described in:

    'Estimation of Synthetic Accessibility Score of Drug-like Molecules 
    based on Molecular Complexity and Fragment Contributions'
    Peter Ertl and Ansgar Schuffenhauer
    Journal of Cheminformatics 1:8 (2009)
    http://www.jcheminf.com/content/1/1/8

    several small modifications to the original paper are included
    particularly slightly different formula for marocyclic penalty
    and taking into account also molecule symmetry (fingerprint density)

    for a set of 10k diverse molecules the agreement between the original method
    as implemented in PipelinePilot and this implementation is r2 = 0.97

    peter ertl & greg landrum, september 2013

    Parameters
    ----------
    mol : Mol

    Returns
    -------
    float : synthetic accessibility score
    '''
    if _fscores is None:
        with gzip.open(os.path.join(os.path.dirname(__file__), 'fpscores.pkl.gz'), 'rb') as f:
            _fscores = pickle.load(f)

    out_dict = {}
    for each_list in _fscores:
        for each_idx in range(1,len(each_list)):
            out_dict[each_list[each_idx]] = float(each_list[0])
    _fscores = out_dict

    # fragment score
    # 2 is the *radius* of the circular fingerprint
    fingerprint = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fingerprints = fingerprint.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bit_id, value in iteritems(fingerprints):
        nf += value
        sfp = bit_id
        score1 += _fscores.get(sfp, -4) * value
    score1 /= nf

    # features score
    num_atoms = mol.GetNumAtoms()
    num_chiral_centers = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    ring_info = mol.GetRingInfo()
    num_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    num_bridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    num_macrocycles = 0
    for each_ring in ring_info.AtomRings():
        if len(each_ring) > 8:
            num_macrocycles += 1

    size_penalty = num_atoms ** 1.005 - num_atoms
    stereo_penalty = math.log10(num_chiral_centers + 1)
    spiro_penalty = math.log10(num_spiro + 1)
    bridge_penalty = math.log10(num_bridgeheads + 1)
    macrocycle_penalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocycle_penalty = math.log10(num_macrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if num_macrocycles > 0:
        macrocycle_penalty = math.log10(2)

    score2 = 0. -size_penalty -stereo_penalty -spiro_penalty -bridge_penalty -macrocycle_penalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if num_atoms > len(fingerprints):
        score3 = math.log(float(num_atoms) / len(fingerprints)) * .5

    sascore = score1 + score2 + score3
    
    # need to transform "raw" value into scale between 1 and 10
    min_score = -4.0
    max_score = 2.5
    sascore = 11. - (sascore - min_score + 1) / (max_score - min_score) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore+1.-9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
def sa_score(smiles):
    """
    Return the SA Score for the given smiles representation.

    """

    molecule = Chem.MolFromSmiles(smiles)

    #
    # fragment score
    #

    # use a radius of 2 for circular fingerprint
    try:
        fingerprint = rdMolDescriptors.GetMorganFingerprint(molecule, 2)
        fingerprint = fingerprint.GetNonzeroElements()
    except Exception as error:
        # Will throw a boost error for N+ so we just give a 0 for score
        debug(error)
        return 0

    fragment_score = 0.0
    fragment_count = 0

    # Count frequencies of fragments
    for bit_id, count in fingerprint.items():
        fragment_count += count
        fragment_score += MOLDB.get(bit_id, -4) * count

    fragment_score /= fragment_count

    #
    # features score
    #

    num_atoms = molecule.GetNumAtoms()
    num_chiral_centers = len(
        Chem.FindMolChiralCenters(molecule, includeUnassigned=True))
    num_bridgeheads, num_spiro, num_macrocycles = ring_analysis(molecule)

    size_penalty = (num_atoms**1.005) - num_atoms
    stereo_penalty = math.log10(num_chiral_centers + 1)
    spiro_penalty = math.log10(num_spiro + 1)
    bridge_penalty = math.log10(num_bridgeheads + 1)

    macrocycle_penalty = 0.0
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocycle_penalty = math.log10(num_macrocycles + 1)
    # This form generates better results when 2 or more macrocycles are present
    if num_macrocycles > 0:
        macrocycle_penalty = math.log10(2)

    feature_penalty = (0.0 - size_penalty - stereo_penalty - spiro_penalty -
                       bridge_penalty - macrocycle_penalty)

    #
    # Correction for the fingerprint density.
    # Not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise.
    #
    if num_atoms > len(fingerprint):
        fingerprint_density = math.log(
            float(num_atoms) / len(fingerprint)) * 0.5
    else:
        fingerprint_density = 0.0

    #
    # Total score
    #
    total_score = fragment_score + feature_penalty + fingerprint_density

    # Transform "raw" value into scale between 1 and 10.
    sa_min = -4.0
    sa_max = 2.5
    total_score = 11.0 - (total_score - sa_min + 1) / (sa_max - sa_min) * 9.0
    # smooth the 10-end
    if total_score > 8.0:
        total_score = 8.0 + math.log(total_score + 1.0 - 9.0)

    if total_score > 10.0:
        total_score = 10.0
    elif total_score < 1.0:
        total_score = 1.0

    return total_score
Example #22
0
def BuildMorganFP(mol):
    from rdkit.Chem import rdMolDescriptors
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fp._sumCache = fp.GetTotalVal()
    return fp
Example #23
0
    def __call__(self, smile):
        if _fscores is None:
            self.readFragmentScores()
        m = Chem.MolFromSmiles(smile)
        if m:
            try:
                # fragment score
                fp = rdMolDescriptors.GetMorganFingerprint(
                    m, 2)  # <- 2 is the *radius* of the circular fingerprint
                fps = fp.GetNonzeroElements()
                score1 = 0.0
                nf = 0
                for bitId, v in iteritems(fps):
                    nf += v
                    sfp = bitId
                    score1 += _fscores.get(sfp, -4) * v
                score1 /= nf

                # features score
                nAtoms = m.GetNumAtoms()
                nChiralCenters = len(
                    Chem.FindMolChiralCenters(m, includeUnassigned=True))
                ri = m.GetRingInfo()
                nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m)
                nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m)
                nMacrocycles = 0
                for x in ri.AtomRings():
                    if len(x) > 8:
                        nMacrocycles += 1

                sizePenalty = nAtoms**1.005 - nAtoms
                stereoPenalty = math.log10(nChiralCenters + 1)
                spiroPenalty = math.log10(nSpiro + 1)
                bridgePenalty = math.log10(nBridgeheads + 1)
                macrocyclePenalty = 0.0
                # ---------------------------------------
                # This differs from the paper, which defines:
                #  macrocyclePenalty = math.log10(nMacrocycles+1)
                # This form generates better results when 2 or more macrocycles are present
                if nMacrocycles > 0:
                    macrocyclePenalty = math.log10(2)
                score2 = (0.0 - sizePenalty - stereoPenalty - spiroPenalty -
                          bridgePenalty - macrocyclePenalty)
                # correction for the fingerprint density
                # not in the original publication, added in version 1.1
                # to make highly symmetrical molecules easier to synthetise
                score3 = 0.0
                if nAtoms > len(fps):
                    score3 = math.log(float(nAtoms) / len(fps)) * 0.5
                sascore = score1 + score2 + score3

                # need to transform "raw" value into scale between 1 and 10
                min_score = -4.0
                max_score = 2.5
                sascore = (11.0 - (sascore - min_score + 1) /
                           (max_score - min_score) * 9.0)
                # smooth the 10-end
                if sascore > 8.0:
                    sascore = 8.0 + math.log(sascore + 1.0 - 9.0)
                if sascore > 10.0:
                    sascore = 10.0
                elif sascore < 1.0:
                    sascore = 1.0
                sascore = math.exp(1 - sascore)  # minimize the sascore
                return sascore
            except:
                return 0.0
        else:
            return 0.0
Example #24
0
def highlight_np_scores(mol,
                        col_map="rwg",
                        output="svg",
                        png_fn="contribs.png",
                        width=400,
                        height=200):
    """Fragment highlighting for Peter Ertls Natural Product Likeness Score
    (J Chem Inf Model. 2008 Jan;48(1):68-74; DOI: 10.1021/ci700286x),
    as implemented in the RDKit.
    output can be: `svg` (SVG image; default); `raw` (raw SVG string);
        `png` (PNG image, written to `png_fn`; requires cairoSVG);
        `png_tag` (HTML img tag containing the encoded PNG image);
        `debug` (text output of parameters).
    Helpful RDKit links (used for creating the code):
        http://www.rdkit.org/docs/GettingStartedInPython.html#explaining-bits-from-morgan-fingerprints
        http://rdkit.blogspot.de/2015/02/new-drawing-code.html"""
    output = output.lower()
    cmap = mpl_col.LinearSegmentedColormap(col_map, CDICT[col_map], 50)
    bit_info = {}
    rdMolDescriptors.GetMorganFingerprint(mol, 2, bitInfo=bit_info)
    num_atoms = 1  # float(mol.GetNumAtoms())
    atom_scores = Counter()
    bond_scores = Counter()
    num_bits = 0
    for bit in bit_info:
        if bit not in fscore:
            continue
        num_bits += 1
        score = fscore[bit]
        for frag in bit_info[bit]:
            env = Chem.FindAtomEnvironmentOfRadiusN(mol, frag[1], frag[0])
            for b_idx in env:
                bond_scores[b_idx] += (score / num_atoms)
                atom_scores[mol.GetBondWithIdx(b_idx).GetBeginAtomIdx()] += (
                    score / num_atoms)
                atom_scores[mol.GetBondWithIdx(b_idx).GetEndAtomIdx()] += (
                    score / num_atoms)

    if output == "debug":
        values = atom_scores.values()
        norm = NormalizeAroundZero(vmin=min(values), vmax=max(values))
        atom_cols = {atom: norm(score) for atom, score in atom_scores.items()}
        values = bond_scores.values()
        norm = NormalizeAroundZero(vmin=min(values), vmax=max(values))
        bond_cols = {bond: norm(score) for bond, score in bond_scores.items()}

        print("*** Scores ***:")
        print(atom_scores)
        print(bond_scores)
        print("*** Norm Scores ***:")
        print(atom_cols)
        print(bond_cols)
        return

    values = atom_scores.values()
    norm = NormalizeAroundZero(vmin=min(values), vmax=max(values))
    atom_cols = {
        atom: cmap(norm(score))
        for atom, score in atom_scores.items()
    }
    values = bond_scores.values()
    norm = NormalizeAroundZero(vmin=min(values), vmax=max(values))
    bond_cols = {
        bond: cmap(norm(score))
        for bond, score in bond_scores.items()
    }

    check_2d_coords(mol)
    drawer = rdMolDraw2D.MolDraw2DSVG(width, height)
    drawer.DrawMolecule(mol,
                        highlightAtoms=atom_cols.keys(),
                        highlightAtomColors=atom_cols,
                        highlightBonds=bond_cols.keys(),
                        highlightBondColors=bond_cols)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    svg = svg.replace('svg:', '')
    svg = svg.replace("</svg>\n", "</svg>")
    # svg = svg.replace("\n", "")
    svg = svg.replace("<?xml version='1.0' encoding='iso-8859-1'?>\n", "")
    if output == "raw":
        return svg
    elif "png" in output:
        if not PNG:
            print(
                "Converting to PNG requires cairoSVG, which could not be found.\n"
                "Try `pip install cairoSVG` to resolve this.")
            return
        if output == "png":
            svg2png(bytestring=svg, write_to=png_fn)
            return
        if output == "png_tag":  # return a HTML <img> tag containing the PNG img
            svg_bc = svg2png(bytestring=svg)
            return mol_img_tag(svg_bc)
        else:
            print("Unknown output option.")
            return
    elif output == "svg":
        return SVG(svg)
    else:
        print("Unknown output option:", output)