Example #1
0
    def initFromLines(self, lines):
        import re

        spaces = re.compile("[\ \t]+")

        feats = []
        rads = []
        for lineNum, line in enumerate(lines):
            txt = line.split("#")[0].strip()
            if txt:
                splitL = spaces.split(txt)
                if len(splitL) < 5:
                    logger.error(
                        "Input line %d only contains %d fields, 5 are required. Read failed." % (lineNum, len(splitL))
                    )
                    return
                fName = splitL[0]
                try:
                    xP = float(splitL[1])
                    yP = float(splitL[2])
                    zP = float(splitL[3])
                    rad = float(splitL[4])
                except ValueError:
                    logger.error("Error parsing a number of line %d. Read failed." % (lineNum))
                    return
                feats.append(ChemicalFeatures.FreeChemicalFeature(fName, fName, Geometry.Point3D(xP, yP, zP)))
                rads.append(rad)
        self._initializeFeats(feats, rads)
    def initFromLines(self, lines):
        import re
        spaces = re.compile('[\ \t]+')

        feats = []
        rads = []
        for lineNum, line in enumerate(lines):
            txt = line.split('#')[0].strip()
            if txt:
                splitL = spaces.split(txt)
                if len(splitL) < 5:
                    logger.error(
                        'Input line %d only contains %d fields, 5 are required. Read failed.'
                        % (lineNum, len(splitL)))
                    return
                fName = splitL[0]
                try:
                    xP = float(splitL[1])
                    yP = float(splitL[2])
                    zP = float(splitL[3])
                    rad = float(splitL[4])
                except ValueError:
                    logger.error(
                        'Error parsing a number of line %d. Read failed.' %
                        (lineNum))
                    return
                feats.append(
                    ChemicalFeatures.FreeChemicalFeature(
                        fName, fName, Geometry.Point3D(xP, yP, zP)))
                rads.append(rad)
        self._initializeFeats(feats, rads)
def checkConstraints(mol,recConf,feat,filt):
  # identify what constraint we have to test:
  mC = mol.GetConformer()
  matchId = feat.GetAtomIds()[0]
  # all values in the filters are "anded" so we need to loop over them
  for i in range(2,len(filt),3):
    if filt[i]=="Distance":
      # get the partners
      dist = mC.GetAtomPosition(matchId).Distance(recConf.GetAtomPosition(filt[1][0]))
#!!!POTENTIAL ERROR!!! Identified by SamudBe1 on 04AUG2014 18:44 PDT
#!!!POTENTIAL ERROR!!! In the following line, the code should be "dist>=filt[i+1]" AND "dist<filt[i+2]"
      if (dist<=filt[i+1]) or (dist>filt[i+2]):
        return False
    elif filt[i]=="Angle":
      # changed to be able to handle multiple neighbour atoms for rec
      # the first entry in filt is the matching atom - then a list of all others are neighbours
      # and the same is now also added for the ligand (ie multiple neighbour atoms)
      # get the matching atom neighbours - the ligans don't get the H's read in - so we can use
      # that list directly
      fitsRecAngle = False
      fitsLigAngle = False
      # first the receptor angles - there are usually less nieghbours for rec atoms
      for neighIdx in filt[1][1]:
        # here we are looping over all possible combinations and check at the very end if
        # both angles are ok
        # if an angle is already ok we can skip the second calculation
        # get the vectors
        l1 = mC.GetAtomPosition(matchId)-recConf.GetAtomPosition(filt[1][0])
        l2 = recConf.GetAtomPosition(neighIdx)-recConf.GetAtomPosition(filt[1][0]) 
        angle = math.degrees(l1.AngleTo(l2))
        # old version: angle = math.degrees(math.acos(l1.DotProduct(l2)/(l1.Length()*l2.Length()))) 
        if (angle>filt[i+1]) and (angle<filt[i+2]):
          fitsRecAngle = True
          break
      if not fitsRecAngle:
        return False  

      # now we check on the ligands  
      neighbAtmIdx = [a.GetIdx() for a in mol.GetAtomWithIdx(matchId).GetNeighbors()]
      for idx in neighbAtmIdx:
        l1 = mC.GetAtomPosition(idx)-mC.GetAtomPosition(matchId)
        l2 = recConf.GetAtomPosition(filt[1][0])-mC.GetAtomPosition(matchId)
        angle = math.degrees(l1.AngleTo(l2))
        # old version: angle = math.degrees(math.acos(l1.DotProduct(l2)/(l1.Length()*l2.Length())))
        if (angle>filt[i+1]) and (angle<filt[i+2]):
          fitsLigAngle = True
          break
      if not fitsLigAngle:
        return False

    else:
      logger.error("Requesting a constraint that is not defined %s" % filt[2])
      return False
  # we only reach this position if the ligand matches all queries
  return True
    def SaveState(self, fileName):
        """ Writes this calculator off to a file so that it can be easily loaded later

     **Arguments**

       - fileName: the name of the file to be written

    """
        try:
            f = open(fileName, 'wb+')
        except Exception:
            logger.error('cannot open output file %s for writing' % (fileName))
            return
        pickle.dump(self, f)
        f.close()
Example #5
0
  def SaveState(self, fileName):
    """ Writes this calculator off to a file so that it can be easily loaded later

     **Arguments**

       - fileName: the name of the file to be written

    """
    try:
      f = open(fileName, 'wb+')
    except Exception:
      logger.error('cannot open output file %s for writing' % (fileName))
      return
    cPickle.dump(self, f)
    f.close()
  if options.outF!='-':
    options.outF = file(options.outF,'w+')
  else:
    options.outF = sys.stdout

  # get the ph4 Scoring parameters

  try:
    options.ph4DescOutFile = open(options.ph4DescOutFile,'w')
  except:
    parser.error('Error opening You need to have a ph4DescOutFile file')

  try:
    f = open(options.ph4desc,'r')
  except IOError, err:
    logger.error(err)


  featureOrder=[]  
  splitL = []
  Ph4_Init_value={}
  Ph4_Descriptors={}
  Ph4_location={}
  Ph4_Type={}
  Ph4_radius={}
  Ph4_FeatureType={}

  for line in f:
    line=line.rstrip("\n")
    #(Name,Init_value,Type,radius,x,y,z,Feature_Type)=line.split("\t")
    splitL = line.split(" ")
Example #7
0
    morganRows = []
    fpConn.Commit()
    
  if not options.silent:
    logger.info('Finished.')

if __name__=='__main__':
  options,args = parser.parse_args()
  if options.loadMols:
    if len(args)!=1:
      parser.error('please provide a filename argument')
    dataFilename = args[0]
    try:
      dataFile = open(dataFilename,'r')
    except IOError:
      logger.error('input file %s does not exist'%(dataFilename))
      sys.exit(0)
    dataFile=None

  if not options.outDir:
    prefix = os.path.splitext(dataFilename)[0]
    options.outDir=prefix

  if not os.path.exists(options.outDir):
    try:
      os.mkdir(options.outDir)
    except: 
      logger.error('could not create output directory %s'%options.outDir)
      sys.exit(1)

  if 1:
Example #8
0
def RunSearch(options, queryFilename):
    global sigFactory
    if options.similarityType == 'AtomPairs':
        fpBuilder = FingerprintUtils.BuildAtomPairFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.pairDbName)
        fpTableName = options.pairTableName
        fpColName = options.pairColName
    elif options.similarityType == 'TopologicalTorsions':
        fpBuilder = FingerprintUtils.BuildTorsionsFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.torsionsDbName)
        fpTableName = options.torsionsTableName
        fpColName = options.torsionsColName
    elif options.similarityType == 'RDK':
        fpBuilder = FingerprintUtils.BuildRDKitFP
        simMetric = DataStructs.FingerprintSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.fpTableName
        if not options.fpColName:
            options.fpColName = 'rdkfp'
        fpColName = options.fpColName
    elif options.similarityType == 'Pharm2D':
        fpBuilder = FingerprintUtils.BuildPharm2DFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.pharm2DTableName
        if not options.fpColName:
            options.fpColName = 'pharm2dfp'
        fpColName = options.fpColName
        FingerprintUtils.sigFactory = BuildSigFactory(options)
    elif options.similarityType == 'Gobbi2D':
        from rdkit.Chem.Pharm2D import Gobbi_Pharm2D
        fpBuilder = FingerprintUtils.BuildPharm2DFP
        simMetric = DataStructs.TanimotoSimilarity
        dbName = os.path.join(options.dbDir, options.fpDbName)
        fpTableName = options.gobbi2DTableName
        if not options.fpColName:
            options.fpColName = 'gobbi2dfp'
        fpColName = options.fpColName
        FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory
    elif options.similarityType == 'Morgan':
        fpBuilder = FingerprintUtils.BuildMorganFP
        simMetric = DataStructs.DiceSimilarity
        dbName = os.path.join(options.dbDir, options.morganFpDbName)
        fpTableName = options.morganFpTableName
        fpColName = options.morganFpColName

    extraArgs = {}
    if options.similarityMetric == 'tanimoto':
        simMetric = DataStructs.TanimotoSimilarity
    elif options.similarityMetric == 'dice':
        simMetric = DataStructs.DiceSimilarity
    elif options.similarityMetric == 'tversky':
        simMetric = DataStructs.TverskySimilarity
        extraArgs['tverskyA'] = options.tverskyA
        extraArgs['tverskyB'] = options.tverskyB

    if options.smilesQuery:
        mol = Chem.MolFromSmiles(options.smilesQuery)
        if not mol:
            logger.error('could not build query molecule from smiles "%s"' %
                         options.smilesQuery)
            sys.exit(-1)
        options.queryMol = mol
    elif options.smartsQuery:
        mol = Chem.MolFromSmarts(options.smartsQuery)
        if not mol:
            logger.error('could not build query molecule from smarts "%s"' %
                         options.smartsQuery)
            sys.exit(-1)
        options.queryMol = mol

    if options.outF == '-':
        outF = sys.stdout
    elif options.outF == '':
        outF = None
    else:
        outF = open(options.outF, 'w+')

    molsOut = False
    if options.sdfOut:
        molsOut = True
        if options.sdfOut == '-':
            sdfOut = sys.stdout
        else:
            sdfOut = open(options.sdfOut, 'w+')
    else:
        sdfOut = None
    if options.smilesOut:
        molsOut = True
        if options.smilesOut == '-':
            smilesOut = sys.stdout
        else:
            smilesOut = open(options.smilesOut, 'w+')
    else:
        smilesOut = None

    if queryFilename:
        try:
            tmpF = open(queryFilename, 'r')
        except IOError:
            logger.error('could not open query file %s' % queryFilename)
            sys.exit(1)

        if options.molFormat == 'smiles':
            func = GetMolsFromSmilesFile
        elif options.molFormat == 'sdf':
            func = GetMolsFromSDFile

        if not options.silent:
            msg = 'Reading query molecules'
            if fpBuilder: msg += ' and generating fingerprints'
            logger.info(msg)
        probes = []
        i = 0
        nms = []
        for nm, smi, mol in func(queryFilename, None, options.nameProp):
            i += 1
            nms.append(nm)
            if not mol:
                logger.error('query molecule %d could not be built' % (i))
                probes.append((None, None))
                continue
            if fpBuilder:
                probes.append((mol, fpBuilder(mol)))
            else:
                probes.append((mol, None))
            if not options.silent and not i % 1000:
                logger.info("  done %d" % i)
    else:
        probes = None

    conn = None
    idName = options.molIdName
    ids = None
    names = None
    molDbName = os.path.join(options.dbDir, options.molDbName)
    molIdName = options.molIdName
    mConn = DbConnect(molDbName)
    cns = [(x.lower(), y)
           for x, y in mConn.GetColumnNamesAndTypes('molecules')]
    idCol, idTyp = cns[0]
    if options.propQuery or options.queryMol:
        conn = DbConnect(molDbName)
        curs = conn.GetCursor()
        if options.queryMol:
            if not options.silent: logger.info('Doing substructure query')
            if options.propQuery:
                where = 'where %s' % options.propQuery
            else:
                where = ''
            if not options.silent:
                curs.execute('select count(*) from molecules %(where)s' %
                             locals())
                nToDo = curs.fetchone()[0]

            join = ''
            doSubstructFPs = False
            fpDbName = os.path.join(options.dbDir, options.fpDbName)
            if os.path.exists(fpDbName) and not options.negateQuery:
                curs.execute("attach database '%s' as fpdb" % (fpDbName))
                try:
                    curs.execute('select * from fpdb.%s limit 1' %
                                 options.layeredTableName)
                except:
                    pass
                else:
                    doSubstructFPs = True
                    join = 'join fpdb.%s using (%s)' % (
                        options.layeredTableName, idCol)
                    query = LayeredOptions.GetQueryText(options.queryMol)
                    if query:
                        if not where:
                            where = 'where'
                        else:
                            where += ' and'
                        where += ' ' + query

            cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s' % locals(
            )
            curs.execute(cmd)
            row = curs.fetchone()
            nDone = 0
            ids = []
            while row:
                id, molpkl = row
                if not options.zipMols:
                    m = _molFromPkl(molpkl)
                else:
                    m = Chem.Mol(zlib.decompress(molpkl))
                matched = m.HasSubstructMatch(options.queryMol)
                if options.negateQuery:
                    matched = not matched
                if matched:
                    ids.append(id)
                nDone += 1
                if not options.silent and not nDone % 500:
                    if not doSubstructFPs:
                        logger.info(
                            '  searched %d (of %d) molecules; %d hits so far' %
                            (nDone, nToDo, len(ids)))
                    else:
                        logger.info(
                            '  searched through %d molecules; %d hits so far' %
                            (nDone, len(ids)))
                row = curs.fetchone()
            if not options.silent and doSubstructFPs and nToDo:
                nFiltered = nToDo - nDone
                logger.info(
                    '   Fingerprint screenout rate: %d of %d (%%%.2f)' %
                    (nFiltered, nToDo, 100. * nFiltered / nToDo))

        elif options.propQuery:
            if not options.silent: logger.info('Doing property query')
            propQuery = options.propQuery.split(';')[0]
            curs.execute(
                'select %(idCol)s from molecules where %(propQuery)s' %
                locals())
            ids = [x[0] for x in curs.fetchall()]
        if not options.silent:
            logger.info('Found %d molecules matching the query' % (len(ids)))

    t1 = time.time()
    if probes:
        if not options.silent: logger.info('Finding Neighbors')
        conn = DbConnect(dbName)
        cns = conn.GetColumnNames(fpTableName)
        curs = conn.GetCursor()

        if ids:
            ids = [(x, ) for x in ids]
            curs.execute(
                'create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                locals())
            curs.executemany('insert into _tmpTbl values (?)', ids)
            join = 'join  _tmpTbl using (%(idCol)s)' % locals()
        else:
            join = ''

        if cns[0].lower() != idCol.lower():
            # backwards compatibility to the days when mol tables had a guid and
            # the fps tables did not:
            curs.execute("attach database '%(molDbName)s' as mols" % locals())
            curs.execute("""
  select %(idCol)s,%(fpColName)s from %(fpTableName)s join
      (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s)
    using (%(molIdName)s)
""" % (locals()))
        else:
            curs.execute(
                'select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s'
                % locals())

        def poolFromCurs(curs, similarityMethod):
            row = curs.fetchone()
            while row:
                id, pkl = row
                fp = DepickleFP(pkl, similarityMethod)
                yield (id, fp)
                row = curs.fetchone()

        topNLists = GetNeighborLists(probes,
                                     options.topN,
                                     poolFromCurs(curs,
                                                  options.similarityType),
                                     simMetric=simMetric,
                                     simThresh=options.simThresh,
                                     **extraArgs)
        uniqIds = set()
        nbrLists = {}
        for i, nm in enumerate(nms):
            topNLists[i].reverse()
            scores = topNLists[i].GetPts()
            nbrNames = topNLists[i].GetExtras()
            nbrs = []
            for j, nbrGuid in enumerate(nbrNames):
                if nbrGuid is None:
                    break
                else:
                    uniqIds.add(nbrGuid)
                    nbrs.append((nbrGuid, scores[j]))
            nbrLists[(i, nm)] = nbrs
        t2 = time.time()
        if not options.silent:
            logger.info('The search took %.1f seconds' % (t2 - t1))

        if not options.silent: logger.info('Creating output')

        curs = mConn.GetCursor()
        ids = list(uniqIds)

        ids = [(x, ) for x in ids]
        curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                     locals())
        curs.executemany('insert into _tmpTbl values (?)', ids)
        curs.execute(
            'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'
            % locals())
        nmDict = {}
        for guid, id in curs.fetchall():
            nmDict[guid] = str(id)

        ks = list(nbrLists.keys())
        ks.sort()
        if not options.transpose:
            for i, nm in ks:
                nbrs = nbrLists[(i, nm)]
                nbrTxt = options.outputDelim.join([nm] + [
                    '%s%s%.3f' % (nmDict[id], options.outputDelim, score)
                    for id, score in nbrs
                ])
                if outF: print(nbrTxt, file=outF)
        else:
            labels = [
                '%s%sSimilarity' % (x[1], options.outputDelim) for x in ks
            ]
            if outF: print(options.outputDelim.join(labels), file=outF)
            for i in range(options.topN):
                outL = []
                for idx, nm in ks:
                    nbr = nbrLists[(idx, nm)][i]
                    outL.append(nmDict[nbr[0]])
                    outL.append('%.3f' % nbr[1])
                if outF: print(options.outputDelim.join(outL), file=outF)
    else:
        if not options.silent: logger.info('Creating output')
        curs = mConn.GetCursor()
        ids = [(x, ) for x in set(ids)]
        curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)' %
                     locals())
        curs.executemany('insert into _tmpTbl values (?)', ids)
        molIdName = options.molIdName
        curs.execute(
            'select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'
            % locals())
        nmDict = {}
        for guid, id in curs.fetchall():
            nmDict[guid] = str(id)
        if outF: print('\n'.join(nmDict.values()), file=outF)
    if molsOut and ids:
        molDbName = os.path.join(options.dbDir, options.molDbName)
        cns = [x.lower() for x in mConn.GetColumnNames('molecules')]
        if cns[-1] != 'molpkl':
            cns.remove('molpkl')
            cns.append('molpkl')

        curs = mConn.GetCursor()
        #curs.execute('create temporary table _tmpTbl (guid integer)'%locals())
        #curs.executemany('insert into _tmpTbl values (?)',ids)
        cnText = ','.join(cns)
        curs.execute(
            'select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)' %
            locals())

        row = curs.fetchone()
        molD = {}
        while row:
            row = list(row)
            m = _molFromPkl(row[-1])
            guid = row[0]
            nm = nmDict[guid]
            if sdfOut:
                m.SetProp('_Name', nm)
                print(Chem.MolToMolBlock(m), file=sdfOut)
                for i in range(1, len(cns) - 1):
                    pn = cns[i]
                    pv = str(row[i])
                    print >> sdfOut, '> <%s>\n%s\n' % (pn, pv)
                print('$$$$', file=sdfOut)
            if smilesOut:
                smi = Chem.MolToSmiles(m, options.chiralSmiles)
            if smilesOut:
                print('%s %s' % (smi, str(row[1])), file=smilesOut)
            row = curs.fetchone()
    if not options.silent: logger.info('Done!')
Example #9
0
def RunSearch(options,queryFilename):
  global sigFactory
  if options.similarityType=='AtomPairs':
    fpBuilder=FingerprintUtils.BuildAtomPairFP
    simMetric=DataStructs.DiceSimilarity
    dbName = os.path.join(options.dbDir,options.pairDbName)
    fpTableName = options.pairTableName
    fpColName = options.pairColName
  elif options.similarityType=='TopologicalTorsions':
    fpBuilder=FingerprintUtils.BuildTorsionsFP
    simMetric=DataStructs.DiceSimilarity
    dbName = os.path.join(options.dbDir,options.torsionsDbName)
    fpTableName = options.torsionsTableName
    fpColName = options.torsionsColName
  elif options.similarityType=='RDK':
    fpBuilder=FingerprintUtils.BuildRDKitFP
    simMetric=DataStructs.FingerprintSimilarity
    dbName = os.path.join(options.dbDir,options.fpDbName)
    fpTableName = options.fpTableName
    if not options.fpColName:
      options.fpColName='rdkfp'
    fpColName = options.fpColName
  elif options.similarityType=='Pharm2D':
    fpBuilder=FingerprintUtils.BuildPharm2DFP
    simMetric=DataStructs.DiceSimilarity
    dbName = os.path.join(options.dbDir,options.fpDbName)
    fpTableName = options.pharm2DTableName
    if not options.fpColName:
      options.fpColName='pharm2dfp'
    fpColName = options.fpColName
    FingerprintUtils.sigFactory = BuildSigFactory(options)
  elif options.similarityType=='Gobbi2D':
    from rdkit.Chem.Pharm2D import Gobbi_Pharm2D
    fpBuilder=FingerprintUtils.BuildPharm2DFP
    simMetric=DataStructs.TanimotoSimilarity
    dbName = os.path.join(options.dbDir,options.fpDbName)
    fpTableName = options.gobbi2DTableName
    if not options.fpColName:
      options.fpColName='gobbi2dfp'
    fpColName = options.fpColName
    FingerprintUtils.sigFactory = Gobbi_Pharm2D.factory
  elif options.similarityType=='Morgan':
    fpBuilder=FingerprintUtils.BuildMorganFP
    simMetric=DataStructs.DiceSimilarity
    dbName = os.path.join(options.dbDir,options.morganFpDbName)
    fpTableName = options.morganFpTableName
    fpColName = options.morganFpColName


  extraArgs={}
  if options.similarityMetric=='tanimoto':
    simMetric = DataStructs.TanimotoSimilarity
  elif options.similarityMetric=='dice':
    simMetric = DataStructs.DiceSimilarity
  elif options.similarityMetric=='tversky':
    simMetric = DataStructs.TverskySimilarity
    extraArgs['tverskyA']=options.tverskyA
    extraArgs['tverskyB']=options.tverskyB

  if options.smilesQuery:
    mol=Chem.MolFromSmiles(options.smilesQuery)
    if not mol:
      logger.error('could not build query molecule from smiles "%s"'%options.smilesQuery)
      sys.exit(-1)
    options.queryMol = mol
  elif options.smartsQuery:
    mol=Chem.MolFromSmarts(options.smartsQuery)
    if not mol:
      logger.error('could not build query molecule from smarts "%s"'%options.smartsQuery)
      sys.exit(-1)
    options.queryMol = mol

  if options.outF=='-':
    outF=sys.stdout
  elif options.outF=='':
    outF=None
  else:
    outF = file(options.outF,'w+')
  
  molsOut=False
  if options.sdfOut:
    molsOut=True
    if options.sdfOut=='-':
      sdfOut=sys.stdout
    else:
      sdfOut = file(options.sdfOut,'w+')
  else:
    sdfOut=None
  if options.smilesOut:
    molsOut=True
    if options.smilesOut=='-':
      smilesOut=sys.stdout
    else:
      smilesOut = file(options.smilesOut,'w+')
  else:
    smilesOut=None

  if queryFilename:
    try:
      tmpF = file(queryFilename,'r')
    except IOError:
      logger.error('could not open query file %s'%queryFilename)
      sys.exit(1)

    if options.molFormat=='smiles':
      func=GetMolsFromSmilesFile
    elif options.molFormat=='sdf':
      func=GetMolsFromSDFile

    if not options.silent:
      msg='Reading query molecules'
      if fpBuilder: msg+=' and generating fingerprints'
      logger.info(msg)
    probes=[]
    i=0
    nms=[]
    for nm,smi,mol in func(queryFilename,None,options.nameProp):
      i+=1
      nms.append(nm)
      if not mol:
        logger.error('query molecule %d could not be built'%(i))
        probes.append((None,None))
        continue
      if fpBuilder:
        probes.append((mol,fpBuilder(mol)))
      else:
        probes.append((mol,None))
      if not options.silent and not i%1000:
        logger.info("  done %d"%i)
  else:
    probes=None

  conn=None
  idName = options.molIdName
  ids=None
  names=None
  molDbName = os.path.join(options.dbDir,options.molDbName)
  molIdName = options.molIdName
  mConn = DbConnect(molDbName)
  cns = [(x.lower(),y) for x,y in mConn.GetColumnNamesAndTypes('molecules')]
  idCol,idTyp=cns[0]
  if options.propQuery or options.queryMol:
    conn = DbConnect(molDbName)
    curs = conn.GetCursor()
    if options.queryMol:
      if not options.silent: logger.info('Doing substructure query')
      if options.propQuery:
        where='where %s'%options.propQuery
      else:
        where=''
      if not options.silent:
        curs.execute('select count(*) from molecules %(where)s'%locals())
        nToDo = curs.fetchone()[0]

      join=''        
      doSubstructFPs=False
      fpDbName = os.path.join(options.dbDir,options.fpDbName)
      if os.path.exists(fpDbName) and not options.negateQuery :
        curs.execute("attach database '%s' as fpdb"%(fpDbName))
        try:
          curs.execute('select * from fpdb.%s limit 1'%options.layeredTableName)
        except:
          pass
        else:
          doSubstructFPs=True
          join = 'join fpdb.%s using (%s)'%(options.layeredTableName,idCol)
          query = LayeredOptions.GetQueryText(options.queryMol)
          if query:
            if not where:
              where='where'
            else:
              where += ' and'
            where += ' '+query

      cmd = 'select %(idCol)s,molpkl from molecules %(join)s %(where)s'%locals()
      curs.execute(cmd)
      row=curs.fetchone()
      nDone=0
      ids=[]
      while row:
        id,molpkl = row
        if not options.zipMols:
          m = Chem.Mol(str(molpkl))
        else:
          m = Chem.Mol(zlib.decompress(str(molpkl)))
        matched=m.HasSubstructMatch(options.queryMol)
        if options.negateQuery:
          matched = not matched
        if matched:
          ids.append(id)
        nDone+=1
        if not options.silent and not nDone%500:
          if not doSubstructFPs:
            logger.info('  searched %d (of %d) molecules; %d hits so far'%(nDone,nToDo,len(ids)))
          else:
            logger.info('  searched through %d molecules; %d hits so far'%(nDone,len(ids)))
        row=curs.fetchone()
      if not options.silent and doSubstructFPs and nToDo:
        nFiltered = nToDo-nDone
        logger.info('   Fingerprint screenout rate: %d of %d (%%%.2f)'%(nFiltered,nToDo,100.*nFiltered/nToDo))

    elif options.propQuery:
      if not options.silent: logger.info('Doing property query')
      propQuery=options.propQuery.split(';')[0]
      curs.execute('select %(idCol)s from molecules where %(propQuery)s'%locals())
      ids = [x[0] for x in curs.fetchall()]
    if not options.silent:
      logger.info('Found %d molecules matching the query'%(len(ids)))

  t1=time.time()
  if probes:
    if not options.silent: logger.info('Finding Neighbors')
    conn = DbConnect(dbName)
    cns = conn.GetColumnNames(fpTableName)
    curs = conn.GetCursor()

    if ids:
      ids = [(x,) for x in ids]
      curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
      curs.executemany('insert into _tmpTbl values (?)',ids)
      join='join  _tmpTbl using (%(idCol)s)'%locals()
    else:
      join=''

    if cns[0].lower() != idCol.lower():
      # backwards compatibility to the days when mol tables had a guid and
      # the fps tables did not:
      curs.execute("attach database '%(molDbName)s' as mols"%locals())
      curs.execute("""
  select %(idCol)s,%(fpColName)s from %(fpTableName)s join
      (select %(idCol)s,%(molIdName)s from mols.molecules %(join)s)
    using (%(molIdName)s)
"""%(locals()))
    else:
      curs.execute('select %(idCol)s,%(fpColName)s from %(fpTableName)s %(join)s'%locals())
    def poolFromCurs(curs,similarityMethod):
      row = curs.fetchone()
      while row:
        id,pkl = row
        fp = DepickleFP(str(pkl),similarityMethod)
        yield (id,fp)
        row = curs.fetchone()
    topNLists = GetNeighborLists(probes,options.topN,poolFromCurs(curs,options.similarityType),
                                 simMetric=simMetric,simThresh=options.simThresh,**extraArgs)
    uniqIds=set()
    nbrLists = {}
    for i,nm in enumerate(nms):
      topNLists[i].reverse()
      scores=topNLists[i].GetPts()
      nbrNames = topNLists[i].GetExtras()
      nbrs = []
      for j,nbrGuid in enumerate(nbrNames):
        if nbrGuid is None:
          break
        else:
          uniqIds.add(nbrGuid)
          nbrs.append((nbrGuid,scores[j]))
      nbrLists[(i,nm)] = nbrs
    t2=time.time()
    if not options.silent: logger.info('The search took %.1f seconds'%(t2-t1))
    
    if not options.silent: logger.info('Creating output')

    
    curs = mConn.GetCursor()
    ids = list(uniqIds)

    ids = [(x,) for x in ids]
    curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
    curs.executemany('insert into _tmpTbl values (?)',ids)
    curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals())
    nmDict={}
    for guid,id in curs.fetchall():
      nmDict[guid]=str(id)
    
    ks = nbrLists.keys()
    ks.sort()
    if not options.transpose:
      for i,nm in ks:
        nbrs= nbrLists[(i,nm)]
        nbrTxt=options.outputDelim.join([nm]+['%s%s%.3f'%(nmDict[id],options.outputDelim,score) for id,score in nbrs])
        if outF: print >>outF,nbrTxt
    else:
      labels = ['%s%sSimilarity'%(x[1],options.outputDelim) for x in ks]
      if outF: print >>outF,options.outputDelim.join(labels)
      for i in range(options.topN):
        outL = []
        for idx,nm in ks:
          nbr = nbrLists[(idx,nm)][i]
          outL.append(nmDict[nbr[0]])
          outL.append('%.3f'%nbr[1])
        if outF: print >>outF,options.outputDelim.join(outL)
  else:
    if not options.silent: logger.info('Creating output')
    curs = mConn.GetCursor()
    ids = [(x,) for x in set(ids)]
    curs.execute('create temporary table _tmpTbl (%(idCol)s %(idTyp)s)'%locals())
    curs.executemany('insert into _tmpTbl values (?)',ids)
    molIdName = options.molIdName
    curs.execute('select %(idCol)s,%(molIdName)s from molecules join _tmpTbl using (%(idCol)s)'%locals())
    nmDict={}
    for guid,id in curs.fetchall():
      nmDict[guid]=str(id)
    if outF: print >>outF,'\n'.join(nmDict.values())
  if molsOut and ids:
    molDbName = os.path.join(options.dbDir,options.molDbName)
    cns = [x.lower() for x in mConn.GetColumnNames('molecules')]
    if cns[-1]!='molpkl':
      cns.remove('molpkl')
      cns.append('molpkl')

    curs = mConn.GetCursor()
    #curs.execute('create temporary table _tmpTbl (guid integer)'%locals())
    #curs.executemany('insert into _tmpTbl values (?)',ids)
    cnText=','.join(cns)
    curs.execute('select %(cnText)s from molecules join _tmpTbl using (%(idCol)s)'%locals())

    row=curs.fetchone()
    molD = {}
    while row:
      row = list(row)
      pkl = row[-1]
      m = Chem.Mol(str(pkl))
      guid = row[0]
      nm = nmDict[guid]
      if sdfOut:
        m.SetProp('_Name',nm)
        print >>sdfOut,Chem.MolToMolBlock(m)
        for i in range(1,len(cns)-1):
          pn = cns[i]
          pv = str(row[i])
          print >>sdfOut,'> <%s>\n%s\n'%(pn,pv)
        print >>sdfOut,'$$$$'
      if smilesOut:
        smi=Chem.MolToSmiles(m,options.chiralSmiles)        
      if smilesOut:
        print >>smilesOut,'%s %s'%(smi,str(row[1]))
      row=curs.fetchone()
  if not options.silent: logger.info('Done!')
  # get the input data
  # the receptor
  try:
    rec = Chem.MolFromMol2File(options.recF,removeHs=False)
    # setup the conformer for the receptor
    recConf = rec.GetConformer()
  except:
    print ("Problem reading receptor from %s" % options.recF)
    print "Error:", sys.exc_info()[0]
    quit()

  try:
    f = open(options.ph4F,'r')
  except IOError, err:
    logger.error(err)
  ph4Filters=[]
  for line in f:
    line=line.rstrip("\n")
    splitL=line.split(" ")
    # modify the types from str to the relevant types
    splitL[1]=int(splitL[1])
    # I am not going to worry about "Represntation Errors"!
    for i in range(2,len(splitL),3):
      splitL[i+1]=float(splitL[i+1])
      splitL[i+2]=float(splitL[i+2])
    ph4Filters.append(splitL) 
      
  if options.scoreFilter:
    try:
      f = open(options.scoreF,'r')
Example #11
0
    logger.info(f'testing {nm} queries')
    t1 = time.time()
    nPossible = 0
    nTested = 0
    nFound = 0
    nErrors = 0
    for i, fragfp in enumerate(qfps):
        for j, mfp in enumerate(mfps):
            nPossible += 1
            if args.validateResults:
                matched = mols[j].HasSubstructMatch(qs[i])
                fpMatch = DataStructs.AllProbeBitsMatch(fragfp, mfp)
                if fpMatch:
                    nTested += 1
                if matched:
                    nFound += 1
                    if not fpMatch:
                        nErrors += 1
                        logger.error(f"ERROR: mol {j} query {i}")
            else:
                if DataStructs.AllProbeBitsMatch(fragfp, mfp):
                    nTested += 1
                    if mols[j].HasSubstructMatch(qs[i]):
                        nFound += 1
    t2 = time.time()
    ts.append(t2 - t1)
    logger.info(
        f'Results{len(ts)}: {t2-t1 : .2f} seconds. {nTested} tested ({nTested/nPossible :.4f} of total), {nFound} found, {nFound/nTested : .2f} accuracy. {nErrors} errors.'
    )

print(f"| {rdkit.__version__} | {' | '.join(['%.1f' % x for x in ts])} |")