Ejemplo n.º 1
0
    def RunMPI(self, distData):
        starttime = time.time()
        nData = self.comm.bcast([len(distData)], root=0)  #synchronize w/root
        self.IdleTime += time.time() - starttime
        if self.rank == 0:
            return self.ScatterDriver(distData)

        stats = MPI.Status()

        if self.verbose():
            print 'Starting calculation (' + str(nData) + ' total inputs)'

        while True:
            output.StartTimer('WAITING')
            if self.verbose:
                print 'Waiting for data ...'
            sys.stdout.flush()
            SLICE = self.comm.recv(source=0, tag=MPI.ANY_TAG, status=stats)
            output.EndTimer('WAITING')

            if stats.Get_tag() == QUIT:
                break
            if self.verbose:
                print 'Working on ' + str(len(SLICE)) + ' data slices'
                sys.stdout.flush()
            results = [self.DistFunc(item) for item in SLICE]

            output.StartTimer('WAITING')
            self.comm.Send(signaldone, dest=0)
            output.EndTimer('WAITING')

            self.comm.send(results, dest=0)

        self.stdout.flush()
Ejemplo n.º 2
0
def ScatterCoords(mols):

    if not pl.mpi:
        output.StartTimer('COORDS')
        for mol in mols:
            if not mol.HasProp('coords'):
                SetCoords(mol)
        output.EndTimer('COORDS')

        return None

    needCalc = [mol for mol in mols if not mol.HasProp('coords')]
    if len(needCalc) == 0:
        return None

    # Check if values are already computed
    if mprms.UseMongo:
        import mongoserver as mongo

        needSMI = [Chem.MolToSmiles(mol) for mol in needCalc]
        myMongo = mongo.LookupDB(metric, needSMI)
        for v, m in zip(myMongo, needCalc):
            if v is not None:
                SetListProp(m, 'coords', v)

        oldn = len(needCalc)
        needCalc = [mol for mol in mols if not mol.HasProp('coords')]
        print 'Used %d memorized coordinate values' % (oldn - len(needCalc))

        if len(needCalc) == 0:
            return None

    output.StartTimer('COORDS')
    print 'Scattering chemical space coordinate calculation ...', len(needCalc)
    pl.MyTask.SetFunction(MPICoordCalc)
    coords = pl.MyTask.RunMPI(needCalc)

    for coord, mol in zip(coords, needCalc):
        SetListProp(mol, 'coords', coord)

    # Update values in mongo database
    if mprms.UseMongo:
        needSMI = [Chem.MolToSmiles(mol) for mol in needCalc]
        mongo.UpdateDB(
            metric,
            {s: GetListProp(m, 'coords')
             for s, m in zip(needSMI, needCalc)})
        print '%d new memorized coordinate values' % len(needCalc)

    output.EndTimer('COORDS')
Ejemplo n.º 3
0
def ScatterAssign(mols):
    toassign = [m for m in mols if not m.HasProp('gridcoord')]
    if len(toassign) == 0: return

    oput.StartTimer('GRID ASSIGN')
    if len(toassign) < ChunkSize or not pl.mpi:
        assignments = grid(toassign)
        for assign, mol in zip(assignments, toassign):
            print "type(assign):", type(assign)
            print "assign:", assign
            print[type(item) for item in assign]
            newassign = tuple(map(float, assign))
            SetListProp(mol, 'gridcoord', newassign)

    else:
        print 'Scattering grid assignments ...'
        scatter = [(grid,
                    [m.GetProp('coords') for m in toassign[i:i + ChunkSize]])
                   for i in xrange(0, len(toassign), ChunkSize)]
        if pl.verbose:
            print len(scatter)

        pl.MyTask.SetFunction(MPIGridAssign)
        assigns = pl.MyTask.RunMPI(scatter)
        for mol, gridcoord in zip(toassign, (a for x in assigns for a in x)):
            mol.SetProp('gridcoord', gridcoord)

    oput.EndTimer('GRID ASSIGN')
Ejemplo n.º 4
0
def WriteWatchFolder(mylib, wrotepool, itnum):
    oput.StartTimer('WRITE')
    filename = WatchPrefix + str(itnum) + '.oeb.gz'
    writemols = list(LMMScreener(mylib, wrotepool))
    DumpMols(writemols, filename)
    for mol in writemols:
        StripData(mol)
    os.system('mv ' + filename + ' ' + WatchFolder + filename)
    NoReadFiles.add(WatchFolder + filename)
    oput.EndTimer('WRITE')
Ejemplo n.º 5
0
def ComputeObjectives(mols_tocalc, gen=0):
    if CINDES_interface:
        print 'calculating via CINDES program'
        output.StartTimer('CINDES')
        qc.calculate(mols_tocalc, gen=gen)
        output.EndTimer('CINDES')
    elif callable(fitnessfunction):
        for mol in mols_tocalc:
            value = fitnessfunction(mol)
            mol.SetDoubleProp('Objective', float(value))
    else:  #Serial
        raise NotImplementedError(
            'only CINDES objectives are supported currently')
Ejemplo n.º 6
0
    def select(self, pool):
        nSwap = 0

        # 2. we assign molecular coordinates to the molecules in pool
        coords = self.GetCoords(pool)

        # 3. Do some initializations:
        scores = np.array([mol.GetDoubleProp('Objective') for mol in pool])

        # 4 Calculate the diversity measure for the pure diversity subset
        # 4.1 First Select a pure diversity based sample subset
        picks = self.GetPureDiversityPicks(coords)
        templib = [pool[i] for i in picks]
        # 4.2
        coords = self.NormCoords(coords, templib)
        # 4.3 calculate the average distance
        AveDistSqr = self.GetAveDistSqr(templib)
        # 4.4 calculate the average objective value
        aveobj = sum(m.GetDoubleProp('Objective')
                     for m in templib) / (float(len(templib)))
        print 'Average objective value of pure diversity subset:', aveobj

        output.StartTimer("OBJECTIVE MXMN")
        print 'Optimizing library ...',

        distances = self.GetDistances(coords)

        # scale scores and diversity-values:
        from sklearn import preprocessing
        distances = preprocessing.scale(distances)
        scores = preprocessing.scale(scores)

        fitness = self.cdiv * distances + self.pdiv * scores * minsign
        fittests = np.argsort(fitness)

        newlib = []
        for i in fittests[-self.subsetSize:]:
            newlib.append(pool[i])
        newlib.sort(key=lambda x: x.GetDoubleProp('Objective'),
                    reverse=not minimize)
        print 'Average objective value after optimization: ', sum(
            np.array([m.GetDoubleProp('Objective')
                      for m in newlib]) / float(len(newlib)))

        output.EndTimer('OBJECTIVE MXMN')
        return newlib
Ejemplo n.º 7
0
def GridDiversity(oldmols, newmols, pcabasis=None, molgrid=None):
    global grid, nCellDims, nBins

    if DE:
        print "nCellDims:", nCellDims
        print "nBins:", nBins

    if molgrid is None:
        molgrid = dict()
        mols = oldmols + newmols
    else:
        mols = newmols

    if grid is None:
        print 'Creating new grid'
        grid = PCAGrid(mols,
                       nCellDims,
                       nBins,
                       pcabasis=pcabasis,
                       scaleBins=BinsByVariance)

    ScatterCoords([m for m in mols if not m.HasProp('gridcoord')])
    ScatterAssign(mols)
    ScatterDecider(mols)

    nNew = 0

    oput.StartTimer('GRID PICKS')
    for mol in mols:
        index = mol.GetProp('gridcoord')
        if molgrid.has_key(index):
            if molgrid[index].GetDoubleProp('decider') < mol.GetDoubleProp(
                    'decider'):
                molgrid[index] = mol
                nNew += 1
        else:
            molgrid[index] = mol
            nNew += 1

    print len(molgrid),'/',np.product(grid.nbins),'occupied cells (',\
          nNew,'new)'
    oput.EndTimer('GRID PICKS')

    return len(molgrid), molgrid.values(), molgrid
Ejemplo n.º 8
0
def ScatterDecider(mols):
    toCompute = [m for m in mols if not m.HasProp('decider')]
    if len(toCompute) == 0: return
    else:
        pass
        #print "len(toCompute):", len(toCompute)
        #print 'C1=CC(=O)C(=O)C=C1CC=O' in [ oe.OECreateCanSmiString(m) for m in mols ]

    oput.StartTimer('BIAS FUNCTION')
    if not pl.mpi or fastDecider:
        for m in toCompute:
            print m.SetDoubleProp('decider', decider(m))
    else:
        print 'Scattering bias function ...'
        sendmols = [pl.SendMol(m) for m in toCompute]
        pl.MyTask.SetFunction(MPIDecider)
        vals = pl.MyTask.RunMPI(sendmols)
        for v, m in zip(vals, toCompute):
            m.SetDoubleProp('decider', v)
    oput.EndTimer('BIAS FUNCTION')
Ejemplo n.º 9
0
def ReadWatchFolder(mylib, wrotepool):
    global mymaxit

    oput.StartTimer('READ')
    if pl.mpi:
        answer = ScatterReadWatchFolder(mylib, wrotepool)
        oput.EndTimer('READ')
        return answer

    mysmi = set(m.GetProp('isosmi') for m in mylib)

    print "Reading additional molecules: ",
    for file in glob(WatchFolder + '*.oeb.gz'):

        if basename(file)[:len(WatchPrefix)] == WatchPrefix:
            mymaxit = extractnum(basename(file))

        if basename(file, False) in NoReadFiles: continue

        nNew = 0
        for nmol, newmol in enumerate(GetLowMemMols(file)):
            smi = newmol.GetProp('isosmi')
            if not smi in mysmi:
                nNew += 1
                mysmi.add(smi)
                wrotepool.add(smi)
                StripData(newmol)
                mylib.append(newmol)

        NoReadFiles.add(basename(file, False))
        print basename(file,
                       True) + ' (' + str(nNew) + '/' + str(nmol + 1) + ')',

    print 'done.'
    oput.EndTimer('READ')
    return mymaxit
Ejemplo n.º 10
0
 def GetAveDistSqr(templib):
     output.StartTimer('NN DIST CALC')
     AveDistSqr = (1.0 - similarity.NNSimilarity(templib, average=True))**2
     output.EndTimer('NN DIST CALC')
     print 'average diversity value of pure diversity subset:', AveDistSqr
     return AveDistSqr
Ejemplo n.º 11
0
    def select(self, pool):
        nSwap = 0
 
        # 2. we assign molecular coordinates to the molecules in pool
        coords = self.GetCoords(pool)
 
        # 3. Do some initializations:
        scores = np.ma.array([mol.GetDoubleProp('Objective') for mol in pool])
 
        # 4 Calculate the diversity measure for the pure diversity subset
        # 4.1 First Select a pure diversity based sample subset
        picks = self.GetPureDiversityPicks(coords)
        templib = [pool[i] for i in picks]
        # 4.2
        coords = self.NormCoords(coords, templib)
        # 4.3 calculate the average distance
        AveDistSqr = self.GetAveDistSqr(templib)
        # 4.4 calculate the average objective value
        aveobj = sum( m.GetDoubleProp('Objective') for m in templib) / (float(len(templib))) 
        print 'Average objective value of pure diversity subset:', aveobj
 
        ############################ NEIGHBORHOOD MAXIMIN ##################
        #Discard the original subset; instead, pick the BEST SCORING COMPOUND
        #within the neighborhood of each compound
 
        # make a masked array so we don't pick already picked ones 
        pickmask = np.zeros(len(pool), dtype=np.bool)
        for i in picks:
            pickmask[i] = True
        # mask every value larger than TargetScore.
        targetmask = np.ma.getmask(
            np.ma.masked_greater(scores * minsign, TargetScore * minsign))
        print "targetmask:", targetmask
        print "scores*minsign:", scores * minsign
        print "TargetScore * minsign:", TargetScore * minsign
 
        output.StartTimer("OBJECTIVE MXMN")
        print 'Optimizing library ...',
        newlib = []
 
        ######### MAIN LOOP #########
        for ipick in picks:

            myscore = pool[ipick].GetDoubleProp('Objective')
            if not self.selectfittest:
                #Skip compounds already at target
                if myscore * minsign <= TargetScore * minsign:
                    newlib.append(pool[ipick])
                    continue
 
            #Mask compounds outside of current neighborhood
            #or that have already been picked
            distsqr = self.GetDistSqr(coords, ipick)
            neighbor_pick_mask = self.GetNeighborPickMask(distsqr, AveDistSqr, pickmask)

            mask = _array_or(targetmask, neighbor_pick_mask)
            if self.selectfittest:
                distsqr = np.ma.masked_array(scores * -1.0 * minsign, mask=mask)
            else:
                 #If any compounds in the neighborhood hit the target, pick
                 #the closest one
                 # get dist's of mols not already picked, in neighborhood
                 # and objective value above cutoff value
                 distsqr = np.ma.masked_array(distsqr, mask=mask)
            if distsqr.count() > 0:
                print "I'm here!"
                # change pick for best pick:
                inewpick = np.argmin(distsqr)
                newlib.append(pool[inewpick])
                # adjust pickmask
                pickmask[inewpick] = True
                pickmask[ipick] = False
                nSwap += 1
                continue
 
            #If there is no compound hitting the target, pick the
            #best one in the neigbhorhood
            # get only scores in neighborhood not already picked
            scores.mask = neighbor_pick_mask
            # and get the best value even if not fullfilling cutoff
            inewpick = np.argmin(minsign * scores)
            # if inewpick is different from the current ipick:
            if scores[inewpick] * minsign < myscore * minsign:
                newlib.append(pool[inewpick])
                pickmask[inewpick] = True
                pickmask[ipick] = False
                nSwap += 1
            else:
                newlib.append(pool[ipick])
 
        #####
        #Done with optimizing maximin
        newlib.sort(
            key=lambda x: x.GetDoubleProp('Objective'), reverse=not minimize)
        print 'swapped', nSwap, '/', len(newlib), 'compounds'
        print 'Average objective value after optimization: ', sum(
                np.array([m.GetDoubleProp('Objective') for m in newlib])/float(len(newlib)))
 
        output.EndTimer('OBJECTIVE MXMN')
        output.obstats['nSwap']=nSwap
        return newlib
Ejemplo n.º 12
0
 def GetAveDistSqr(templib):
     output.StartTimer('NN DIST CALC')
     AveDistSqr = distance.AveNNDistance(templib)
     output.EndTimer('NN DIST CALC')
     print 'average diversity value of pure diversity subset:', AveDistSqr
     return AveDistSqr
Ejemplo n.º 13
0
def GridDiversity_JITFilter(oldmols,
                            newmols,
                            Filter=True,
                            Geom=False,
                            molgrid=None):

    if DE:
        print "oldmols:", oldmols

    if not (Filter or Geom):
        return GridDiversity(oldmols, newmols)

    if Filter: newmols = dr.DriveFilters(newmols, Filter, False)
    ScatterCoords([m for m in newmols if not m.HasProp('gridcoord')])
    ScatterAssign(newmols)
    ScatterDecider(newmols)

    #Get old assignments if not passed
    oput.StartTimer('GRID PICKS')
    if not molgrid:
        molgrid = {}
        for mol in oldmols:
            index = mol.GetProp('gridcoord')
            if molgrid.has_key(index):
                if molgrid[index].GetDoubleProp('decider') < mol.GetDoubleProp(
                        'decider'):
                    molgrid[index] = mol
            else:
                molgrid[index] = mol

    nOld = len(molgrid)

    #Screen new molecules for novelty
    toscreen = []
    for mol in newmols:
        index = mol.GetProp('gridcoord')
        try:
            if (not molgrid.has_key(index)) or \
                molgrid[index].GetDoubleProp('decider')<mol.GetDoubleProp('decider'):
                toscreen.append(mol)
        except ValueError:
            print "No Decider keyword for:", Chem.MolToSmiles(molgrid[index])
            #print molgrid[index].GetDoubleProp('decider')
            pass

    oput.EndTimer('GRID PICKS')

    print 'Novel mutants:', len(toscreen), '/', len(newmols)

    #Run filters on novel molecules only
    if Geom:
        goodmols = dr.DriveFilters(toscreen, Filter, Geom)
        print 'Molecules passing filters:', len(goodmols)
    else:
        goodmols = toscreen

    #Check for some problems
    badgoodmols = [m for m in goodmols if not m.HasProp('gridcoord')]
    if len(badgoodmols) > 0:
        DumpLowMemMols(badgoodmols, 'failchange.pjar.gz', True)
        print 'Molecule was changed after filtering ...'
        ScatterCoords(badgoodmols)
        ScatterAssign(badgoodmols)
        ScatterDecider(badgoodmols)

    #Assign filtered novel molecules
    oput.StartTimer('GRID PICKS')
    nReplace = 0
    for mol in goodmols:
        index = mol.GetProp('gridcoord')
        if molgrid.has_key(index):
            if molgrid[index].GetDoubleProp('decider') < mol.GetDoubleProp(
                    'decider'):
                molgrid[index] = mol
                nReplace += 1
        else:
            molgrid[index] = mol
    oput.EndTimer('GRID PICKS')

    print len(molgrid),'/',np.product(grid.nbins),'occupied cells (',\
          len(molgrid)-nOld,'new, '+str(nReplace)+' replaced.)'

    return len(molgrid), molgrid.values(), molgrid