Exemple #1
0
    def ClassifyExample(self, example, appendExamples=0):
        """ Classify an example by summing over the conditional probabilities
    The most likely class is the one with the largest probability
    """
        if appendExamples:
            self._examples.append(example)
        clsProb = {}
        for key, prob in iteritems(self._classProbs):
            clsProb[key] = prob
            tmp = self._condProbs[key]
            for ai in self._attrs:
                if not (hasattr(self, '_useSigs') and self._useSigs):
                    bid = example[ai]
                    if self._qBounds[ai] > 0:
                        bid = _getBinId(bid, self._QBoundVals[ai])
                else:
                    if example[1].GetBit(ai):
                        bid = 1
                    else:
                        bid = 0
                clsProb[key] *= tmp[ai][bid]

        mkey = -1
        self.mprob = -1.0
        for key, prob in iteritems(clsProb):
            if (prob > self.mprob):
                mkey = key
                self.mprob = prob

        return mkey
Exemple #2
0
  def ClassifyExample(self, example, appendExamples=0) :
    """ Classify an example by summing over the conditional probabilities
    The most likely class is the one with the largest probability
    """
    if appendExamples:
      self._examples.append(example)
    clsProb = {}
    for key,prob in iteritems(self._classProbs):
      clsProb[key] = prob
      tmp = self._condProbs[key]
      for ai in self._attrs:
        if not (hasattr(self,'_useSigs') and self._useSigs):
          bid = example[ai]
          if self._qBounds[ai] > 0 :
            bid = _getBinId(bid, self._QBoundVals[ai])
        else:
          if example[1].GetBit(ai):
            bid=1
          else:
            bid=0
        clsProb[key] *= tmp[ai][bid]

    mkey = -1
    self.mprob = -1.0
    for key,prob in iteritems(clsProb):
      if (prob > self.mprob) :
        mkey = key
        self.mprob = prob

    return mkey
Exemple #3
0
def MolToMPL(mol,size=(300,300),kekulize=True, wedgeBonds=True,
             imageType=None, fitImage=False, options=None, **kwargs):
  """ Generates a drawing of a molecule on a matplotlib canvas
  """
  if not mol:
    raise ValueError('Null molecule provided')
  from rdkit.Chem.Draw.mplCanvas import Canvas
  canvas = Canvas(size)
  if options is None:
    options = DrawingOptions()
    options.bgColor=None
  if fitImage:
      drawingOptions.dotsPerAngstrom = int(min(size) / 10)
  options.wedgeDashedBonds=wedgeBonds
  drawer = MolDrawing(canvas=canvas, drawingOptions=options)
  omol=mol
  if kekulize:
    from rdkit import Chem
    mol = Chem.Mol(mol.ToBinary())
    Chem.Kekulize(mol)

  if not mol.GetNumConformers():
    from rdkit.Chem import AllChem
    AllChem.Compute2DCoords(mol)

  drawer.AddMol(mol,**kwargs)
  omol._atomPs=drawer.atomPs[mol]
  for k,v in iteritems(omol._atomPs):
    omol._atomPs[k]=canvas.rescalePt(v)
  canvas._figure.set_size_inches(float(size[0])/100,float(size[1])/100)
  return canvas._figure
Exemple #4
0
def MolToMPL(mol,size=(300,300),kekulize=True, wedgeBonds=True,
             imageType=None, fitImage=False, options=None, **kwargs):
  """ Generates a drawing of a molecule on a matplotlib canvas
  """
  if not mol:
    raise ValueError('Null molecule provided')
  from rdkit.Chem.Draw.mplCanvas import Canvas
  canvas = Canvas(size)
  if options is None:
    options = DrawingOptions()
    options.bgColor=None
  if fitImage:
      drawingOptions.dotsPerAngstrom = int(min(size) / 10)
  options.wedgeDashedBonds=wedgeBonds
  drawer = MolDrawing(canvas=canvas, drawingOptions=options)
  omol=mol
  if kekulize:
    from rdkit import Chem
    mol = Chem.Mol(mol.ToBinary())
    Chem.Kekulize(mol)
    
  if not mol.GetNumConformers():
    from rdkit.Chem import AllChem
    AllChem.Compute2DCoords(mol)
  
  drawer.AddMol(mol,**kwargs)
  omol._atomPs=drawer.atomPs[mol]
  for k,v in iteritems(omol._atomPs):
    omol._atomPs[k]=canvas.rescalePt(v)
  canvas._figure.set_size_inches(float(size[0])/100,float(size[1])/100)
  return canvas._figure
Exemple #5
0
 def GetAllChildren(self):
     " returns a dictionary, keyed by SMILES, of children "
     res = {}
     for smi, child in iteritems(self.children):
         res[smi] = child
         child._gacRecurse(res, terminalOnly=False)
     return res
Exemple #6
0
 def GetAllChildren(self):
   " returns a dictionary, keyed by SMILES, of children "
   res = {}
   for smi,child in iteritems(self.children):
     res[smi] = child
     child._gacRecurse(res,terminalOnly=False)
   return res
def calculateSAScore(m, fscores):
    # fragment score
    fp = rdMolDescriptors.GetMorganFingerprint(m, 2)
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bitId, v in iteritems(fps):
        nf += v
        sfp = bitId
        score1 += fscores.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = m.GetNumAtoms()
    nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
    ri = m.GetRingInfo()
    nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - \
        bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
Exemple #8
0
 def GetLeaves(self):
   " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes "
   res = {}
   for smi,child in iteritems(self.children):
     if not len(child.children):
       res[smi] = child
     else:
       child._gacRecurse(res,terminalOnly=True)
   return res
Exemple #9
0
 def GetLeaves(self):
     " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes "
     res = {}
     for smi, child in iteritems(self.children):
         if not len(child.children):
             res[smi] = child
         else:
             child._gacRecurse(res, terminalOnly=True)
     return res
Exemple #10
0
def calculateScore(m):
  if _fscores is None: readFragmentScores()

  # fragment score
  fp = rdMolDescriptors.GetMorganFingerprint(m,2)  #<- 2 is the *radius* of the circular fingerprint
  fps = fp.GetNonzeroElements()
  score1 = 0.
  nf = 0
  for bitId,v in iteritems(fps):
    nf += v
    sfp = bitId
    score1 += _fscores.get(sfp,-4)*v
  score1 /= nf

  # features score
  nAtoms = m.GetNumAtoms()
  nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True))
  ri = m.GetRingInfo()
  nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri)
  nMacrocycles=0
  for x in ri.AtomRings():
    if len(x)>8: nMacrocycles+=1

  sizePenalty = nAtoms**1.005 - nAtoms
  stereoPenalty = math.log10(nChiralCenters+1)
  spiroPenalty = math.log10(nSpiro+1)
  bridgePenalty = math.log10(nBridgeheads+1)
  macrocyclePenalty = 0.
  # ---------------------------------------
  # This differs from the paper, which defines:
  #  macrocyclePenalty = math.log10(nMacrocycles+1)
  # This form generates better results when 2 or more macrocycles are present
  if nMacrocycles > 0: macrocyclePenalty = math.log10(2)

  score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty

  # correction for the fingerprint density
  # not in the original publication, added in version 1.1
  # to make highly symmetrical molecules easier to synthetise
  score3 = 0.
  if nAtoms > len(fps):
    score3 = math.log(float(nAtoms) / len(fps)) * .5

  sascore = score1 + score2 + score3

  # need to transform "raw" value into scale between 1 and 10
  min = -4.0
  max = 2.5
  sascore = 11. - (sascore - min + 1) / (max - min) * 9.
  # smooth the 10-end
  if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.)
  if sascore > 10.: sascore = 10.0
  elif sascore < 1.: sascore = 1.0 

  return sascore
Exemple #11
0
def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False, **kwargs):
  """
  Calculates the Morgan fingerprint with the environments of atomId removed.

  Parameters:
    mol -- the molecule of interest
    radius -- the maximum radius
    fpType -- the type of Morgan fingerprint: 'count' or 'bv'
    atomId -- the atom to remove the environments for (if -1, no environments is removed)
    nBits -- the size of the bit vector (only for fpType = 'bv')
    useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan

  any additional keyword arguments will be passed to the fingerprinting function.
  """
  if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type")
  if not hasattr(mol, '_fpInfo'):
    info = {}
    # get the fingerprint
    if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits,
                                                                  useFeatures=useFeatures, bitInfo=info,
                                                                  **kwargs)
    else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info,
                                            **kwargs)
    # construct the bit map
    if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())]
    else: bitmap = [[] for x in range(mol.GetNumAtoms())]
    for bit, es in iteritems(info):
      for at1, rad in es:
        if rad == 0: # for radius 0
          if fpType == 'bv': bitmap[at1][bit] = 1
          else: bitmap[at1].append(bit)
        else: # for radii > 0
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
          amap = {}
          submol = Chem.PathToSubmol(mol, env, atomMap=amap)
          for at2 in amap.keys():
            if fpType == 'bv': bitmap[at2][bit] = 1
            else: bitmap[at2].append(bit)
    mol._fpInfo = (molFp, bitmap)

  if atomId < 0:
    return mol._fpInfo[0]
  else: # remove the bits of atomId
    if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
    if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set")
    if fpType == 'bv':
      molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor
    else: # count
      molFp = copy.deepcopy(mol._fpInfo[0])
      # delete the bits with atomId
      for bit in mol._fpInfo[1][atomId]:
        molFp[bit] -= 1
    return molFp
Exemple #12
0
def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False,
                         **kwargs):
  """
  Calculates the Morgan fingerprint with the environments of atomId removed.

  Parameters:
    mol -- the molecule of interest
    radius -- the maximum radius
    fpType -- the type of Morgan fingerprint: 'count' or 'bv'
    atomId -- the atom to remove the environments for (if -1, no environments is removed)
    nBits -- the size of the bit vector (only for fpType = 'bv')
    useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan

  any additional keyword arguments will be passed to the fingerprinting function.
  """
  if fpType not in ['bv', 'count']:
    raise ValueError("Unknown Morgan fingerprint type")
  if not hasattr(mol, '_fpInfo'):
    info = {}
    # get the fingerprint
    if fpType == 'bv':
      molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures,
                                                 bitInfo=info, **kwargs)
    else:
      molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info,
                                        **kwargs)
    # construct the bit map
    if fpType == 'bv':
      bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())]
    else:
      bitmap = [[] for x in range(mol.GetNumAtoms())]
    for bit, es in iteritems(info):
      for at1, rad in es:
        if rad == 0:  # for radius 0
          if fpType == 'bv':
            bitmap[at1][bit] = 1
          else:
            bitmap[at1].append(bit)
        else:  # for radii > 0
          env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1)
          amap = {}
          submol = Chem.PathToSubmol(mol, env, atomMap=amap)
          for at2 in amap.keys():
            if fpType == 'bv':
              bitmap[at2][bit] = 1
            else:
              bitmap[at2].append(bit)
    mol._fpInfo = (molFp, bitmap)

  if atomId < 0:
    return mol._fpInfo[0]
  else:  # remove the bits of atomId
    if atomId >= mol.GetNumAtoms():
      raise ValueError("atom index greater than number of atoms")
    if len(mol._fpInfo) != 2:
      raise ValueError("_fpInfo not set")
    if fpType == 'bv':
      molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId]  # xor
    else:  # count
      molFp = copy.deepcopy(mol._fpInfo[0])
      # delete the bits with atomId
      for bit in mol._fpInfo[1][atomId]:
        molFp[bit] -= 1
    return molFp
Exemple #13
0
        g2 = re.sub('[a-z,A-Z]', '', g2)
        sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd,
                                                                    r2, g1, g2)
        gp[j] = sma

for gp in smartsGps:
    for defn in gp:
        try:
            t = Reactions.ReactionFromSmarts(defn)
            t.Initialize()
        except Exception:
            print(defn)
            raise

environMatchers = {}
for env, sma in iteritems(environs):
    environMatchers[env] = Chem.MolFromSmarts(sma)

bondMatchers = []
for i, compats in enumerate(reactionDefs):
    tmp = []
    for i1, i2, bType in compats:
        e1 = environs['L%s' % i1]
        e2 = environs['L%s' % i2]
        patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2)
        patt = Chem.MolFromSmarts(patt)
        tmp.append((i1, i2, bType, patt))
    bondMatchers.append(tmp)

reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x]
                   for x in smartsGps])
Exemple #14
0
def FindBRICSBonds(mol, randomizeOrder=False, silent=True):
    """ returns the bonds in a molecule that BRICS would cleave

  >>> from rdkit import Chem
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(FindBRICSBonds(m))
  >>> res
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]

  a more complicated case:
  >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
  >>> res = list(FindBRICSBonds(m))
  >>> res
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

  we can also randomize the order of the results:
  >>> random.seed(23)
  >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
  >>> sorted(res)
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

  Note that this is a generator function :
  >>> res = FindBRICSBonds(m)
  >>> res
  <generator object ...>
  >>> next(res)
  ((3, 2), ('3', '4'))

  >>> m = Chem.MolFromSmiles('CC=CC')
  >>> res = list(FindBRICSBonds(m))
  >>> sorted(res)
  [((1, 2), ('7', '7'))]

  make sure we don't match ring bonds:
  >>> m = Chem.MolFromSmiles('O=C1NCCC1')
  >>> list(FindBRICSBonds(m))
  []

  another nice one, make sure environment 8 doesn't match something connected
  to a ring atom:
  >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
  >>> list(FindBRICSBonds(m))
  []

  """
    letter = re.compile('[a-z,A-Z]')
    indices = list(range(len(bondMatchers)))
    bondsDone = set()
    if randomizeOrder:
        random.shuffle(indices, random=random.random)

    envMatches = {}
    for env, patt in iteritems(environMatchers):
        envMatches[env] = mol.HasSubstructMatch(patt)
    for gpIdx in indices:
        if randomizeOrder:
            compats = bondMatchers[gpIdx][:]
            random.shuffle(compats, random=random.random)
        else:
            compats = bondMatchers[gpIdx]
        for i1, i2, bType, patt in compats:
            if not envMatches['L' + i1] or not envMatches['L' + i2]:
                continue
            matches = mol.GetSubstructMatches(patt)
            i1 = letter.sub('', i1)
            i2 = letter.sub('', i2)
            for match in matches:
                if match not in bondsDone and (match[1],
                                               match[0]) not in bondsDone:
                    bondsDone.add(match)
                    yield (((match[0], match[1]), (i1, i2)))
Exemple #15
0
def calculate_score(m):
    if _fscores is None:
        read_fragment_scores()

    # fragment score
    fp = rdMolDescriptors.GetMorganFingerprint(m, 2)  # <- 2 is the *radius* of the circular fingerprint
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bitId, v in iteritems(fps):
        nf += v
        sfp = bitId
        score1 += _fscores.get(sfp, -4) * v
    score1 /= nf

    # features score
    n_atoms = m.GetNumAtoms()
    n_chiral_centers = len(Chem.FindMolChiralCenters(m, includeUnassigned=True))
    ri = m.GetRingInfo()
    n_bridgeheads, n_spiro = num_bridgeheads_and_spiro(m)
    n_macrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            n_macrocycles += 1

    size_penalty = n_atoms ** 1.005 - n_atoms
    stereo_penalty = math.log10(n_chiral_centers + 1)
    spiro_penalty = math.log10(n_spiro + 1)
    bridge_penalty = math.log10(n_bridgeheads + 1)
    macrocycle_penalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocycle_penalty = math.log10(n_macrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if n_macrocycles > 0:
        macrocycle_penalty = math.log10(2)

    score2 = 0. - size_penalty - stereo_penalty - spiro_penalty - bridge_penalty - macrocycle_penalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if n_atoms > len(fps):
        score3 = math.log(float(n_atoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    minimum = -4.0
    maximum = 2.5
    sascore = 11. - (sascore - minimum + 1) / (maximum - minimum) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
Exemple #16
0
def FindBRICSBonds(mol,randomizeOrder=False,silent=True):
  """ returns the bonds in a molecule that BRICS would cleave

  >>> from rdkit import Chem
  >>> m = Chem.MolFromSmiles('CCCOCC')
  >>> res = list(FindBRICSBonds(m))
  >>> res
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]

  a more complicated case:
  >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
  >>> res = list(FindBRICSBonds(m))
  >>> res
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

  we can also randomize the order of the results:
  >>> random.seed(23)
  >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
  >>> sorted(res)
  [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]

  Note that this is a generator function :
  >>> res = FindBRICSBonds(m)
  >>> res
  <generator object ...>
  >>> next(res)
  ((3, 2), ('3', '4'))

  >>> m = Chem.MolFromSmiles('CC=CC')
  >>> res = list(FindBRICSBonds(m))
  >>> sorted(res)
  [((1, 2), ('7', '7'))]
  
  make sure we don't match ring bonds:
  >>> m = Chem.MolFromSmiles('O=C1NCCC1')
  >>> list(FindBRICSBonds(m))
  []
  
  another nice one, make sure environment 8 doesn't match something connected
  to a ring atom:
  >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
  >>> list(FindBRICSBonds(m))
  []
  
  """
  letter = re.compile('[a-z,A-Z]')
  indices = list(range(len(bondMatchers)))
  bondsDone=set()
  if randomizeOrder: random.shuffle(indices,random=random.random)

  envMatches={}
  for env,patt in iteritems(environMatchers):
    envMatches[env]=mol.HasSubstructMatch(patt)
  for gpIdx in indices:
    if randomizeOrder:
      compats =bondMatchers[gpIdx][:]
      random.shuffle(compats,random=random.random)
    else:
      compats = bondMatchers[gpIdx]
    for i1,i2,bType,patt in compats:
      if not envMatches['L'+i1] or not envMatches['L'+i2]: continue
      matches = mol.GetSubstructMatches(patt)
      i1 = letter.sub('',i1)
      i2 = letter.sub('',i2)
      for match in matches:
        if match not in bondsDone and (match[1],match[0]) not in bondsDone:
          bondsDone.add(match)
          yield(((match[0],match[1]),(i1,i2)))
Exemple #17
0
    def __call__(self, smile):
        if _fscores is None:
            self.readFragmentScores()
        m = Chem.MolFromSmiles(smile)
        if m:
            try:
                # fragment score
                fp = rdMolDescriptors.GetMorganFingerprint(
                    m, 2)  #<- 2 is the *radius* of the circular fingerprint
                fps = fp.GetNonzeroElements()
                score1 = 0.
                nf = 0
                for bitId, v in iteritems(fps):
                    nf += v
                    sfp = bitId
                    score1 += _fscores.get(sfp, -4) * v
                score1 /= nf

                # features score
                nAtoms = m.GetNumAtoms()
                nChiralCenters = len(
                    Chem.FindMolChiralCenters(m, includeUnassigned=True))
                ri = m.GetRingInfo()
                nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m)
                nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m)
                nMacrocycles = 0
                for x in ri.AtomRings():
                    if len(x) > 8:
                        nMacrocycles += 1

                sizePenalty = nAtoms**1.005 - nAtoms
                stereoPenalty = math.log10(nChiralCenters + 1)
                spiroPenalty = math.log10(nSpiro + 1)
                bridgePenalty = math.log10(nBridgeheads + 1)
                macrocyclePenalty = 0.
                # ---------------------------------------
                # This differs from the paper, which defines:
                #  macrocyclePenalty = math.log10(nMacrocycles+1)
                # This form generates better results when 2 or more macrocycles are present
                if nMacrocycles > 0:
                    macrocyclePenalty = math.log10(2)
                score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty
                # correction for the fingerprint density
                # not in the original publication, added in version 1.1
                # to make highly symmetrical molecules easier to synthetise
                score3 = 0.
                if nAtoms > len(fps):
                    score3 = math.log(float(nAtoms) / len(fps)) * .5
                sascore = score1 + score2 + score3

                # need to transform "raw" value into scale between 1 and 10
                min_score = -4.0
                max_score = 2.5
                sascore = 11. - (sascore - min_score + 1) / (max_score -
                                                             min_score) * 9.
                # smooth the 10-end
                if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.)
                if sascore > 10.: sascore = 10.0
                elif sascore < 1.: sascore = 1.0
                sascore = math.exp(1 - sascore)  # minimize the sascore
                return sascore
            except:
                return 0.0
        else:
            return 0.0
Exemple #18
0
 def _gacRecurse(self, res, terminalOnly=False):
     for smi, child in iteritems(self.children):
         if not terminalOnly or not len(child.children):
             res[smi] = child
         child._gacRecurse(res, terminalOnly=terminalOnly)
Exemple #19
0
def CalcSAScore(rmol):
    if _fscores is None:
        ReadFragScores()
    mol = copy.deepcopy(rmol)
    #Chem.SanitizeMol(mol) # gives crashes!

    #fragment score
    fp = AllChem.GetMorganFingerprint(
        mol, 2)  #<- 2 is the *radius* of the circular fingerprint
    fps = fp.GetNonzeroElements()
    score1 = 0.0
    nf = 0
    for bitId, v in iteritems(fps):
        nf += v
        sfp = bitId
        score1 += _fscores.get(sfp, -4) * v
    score1 /= nf

    #features score
    nAtoms = mol.GetNumAtoms()
    nChiralCenters = len(Chem.FindMolChiralCenters(mol,
                                                   includeUnassigned=True))
    ri = mol.GetRingInfo()
    nBridgehead, nSpiro = NumBridgeheadsAndSpiro(mol, ri)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgehead + 1)
    macrocyclePenalty = 0.0
    # -----------------------------
    # This differs from the paper, which defines:
    #   macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0.0 - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication
    # to make highly symmetrical molecules easier to synthesize
    score3 = 0.0
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * 0.5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    minv = -4.0
    maxv = 2.5
    sascore = 11.0 - (sascore - minv + 1) / (maxv - minv) * 9.0
    # smooth the 10-end
    if sascore > 8.0:
        sascore = 8.0 + math.log(sascore - 8.0)
    if sascore > 10.0:
        sascore = 10.0
    elif sascore < 1.0:
        sascore = 1.0

    return sascore
Exemple #20
0
 def _gacRecurse(self,res,terminalOnly=False):
   for smi,child in iteritems(self.children):
     if not terminalOnly or not len(child.children):
       res[smi] = child
     child._gacRecurse(res,terminalOnly=terminalOnly)
Exemple #21
0
    g1 = re.sub('[a-z,A-Z]','',g1)
    g2 = re.sub('[a-z,A-Z]','',g2)
    sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2)
    gp[j] =sma

for gp in smartsGps:
  for defn in gp:
    try:
      t=Reactions.ReactionFromSmarts(defn)
      t.Initialize()
    except:
      print(defn)
      raise

environMatchers={}
for env,sma in iteritems(environs):
  environMatchers[env]=Chem.MolFromSmarts(sma)
  
bondMatchers=[]
for i,compats in enumerate(reactionDefs):
  tmp=[]
  for i1,i2,bType in compats:
      e1 = environs['L%s'%i1]
      e2 = environs['L%s'%i2]
      patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2)
      patt = Chem.MolFromSmarts(patt)
      tmp.append((i1,i2,bType,patt))
  bondMatchers.append(tmp)
    
reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
reverseReactions = []
Exemple #22
0
def synthetic_accessibility(mol, _fscores=None):
    '''
    calculation of synthetic accessibility score as described in:

    'Estimation of Synthetic Accessibility Score of Drug-like Molecules 
    based on Molecular Complexity and Fragment Contributions'
    Peter Ertl and Ansgar Schuffenhauer
    Journal of Cheminformatics 1:8 (2009)
    http://www.jcheminf.com/content/1/1/8

    several small modifications to the original paper are included
    particularly slightly different formula for marocyclic penalty
    and taking into account also molecule symmetry (fingerprint density)

    for a set of 10k diverse molecules the agreement between the original method
    as implemented in PipelinePilot and this implementation is r2 = 0.97

    peter ertl & greg landrum, september 2013

    Parameters
    ----------
    mol : Mol

    Returns
    -------
    float : synthetic accessibility score
    '''
    if _fscores is None:
        with gzip.open(os.path.join(os.path.dirname(__file__), 'fpscores.pkl.gz'), 'rb') as f:
            _fscores = pickle.load(f)

    out_dict = {}
    for each_list in _fscores:
        for each_idx in range(1,len(each_list)):
            out_dict[each_list[each_idx]] = float(each_list[0])
    _fscores = out_dict

    # fragment score
    # 2 is the *radius* of the circular fingerprint
    fingerprint = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fingerprints = fingerprint.GetNonzeroElements()
    score1 = 0.
    nf = 0
    for bit_id, value in iteritems(fingerprints):
        nf += value
        sfp = bit_id
        score1 += _fscores.get(sfp, -4) * value
    score1 /= nf

    # features score
    num_atoms = mol.GetNumAtoms()
    num_chiral_centers = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    ring_info = mol.GetRingInfo()
    num_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    num_bridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    num_macrocycles = 0
    for each_ring in ring_info.AtomRings():
        if len(each_ring) > 8:
            num_macrocycles += 1

    size_penalty = num_atoms ** 1.005 - num_atoms
    stereo_penalty = math.log10(num_chiral_centers + 1)
    spiro_penalty = math.log10(num_spiro + 1)
    bridge_penalty = math.log10(num_bridgeheads + 1)
    macrocycle_penalty = 0.
    # ---------------------------------------
    # This differs from the paper, which defines:
    #  macrocycle_penalty = math.log10(num_macrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if num_macrocycles > 0:
        macrocycle_penalty = math.log10(2)

    score2 = 0. -size_penalty -stereo_penalty -spiro_penalty -bridge_penalty -macrocycle_penalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if num_atoms > len(fingerprints):
        score3 = math.log(float(num_atoms) / len(fingerprints)) * .5

    sascore = score1 + score2 + score3
    
    # need to transform "raw" value into scale between 1 and 10
    min_score = -4.0
    max_score = 2.5
    sascore = 11. - (sascore - min_score + 1) / (max_score - min_score) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore+1.-9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore