def ReadSymbols(self, tree):
     if tree[0] in ['any atom', '$']:
         atom = rdqueries.AtomNumGreaterQueryAtom(0)
     elif tree[0] in ['heteroatom', '&']:
         #N, O, P, S
         atom = rdqueries.AtomNumEqualsQueryAtom(7)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(8),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(15),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
         atom.ExpandQuery(rdqueries.AtomNumEqualsQueryAtom(16),\
             how=Chem.rdchem.CompositeQueryType.COMPOSITE_OR)
     elif tree[0] in ['heavy atom', 'X']:
         # heavier than H
         atom = rdqueries.AtomNumGreaterQueryAtom(1)
     elif tree[0][0].islower():
         # aromatic molecule
         symbol = tree[0][0].upper() + tree[0][1:]
         try:
             atom = Chem.Atom(symbol)
             atom.SetIsAromatic(True)
         except RuntimeError:
             msg = 'Element aromatic ' + symbol + ' not found'
             raise RINGReaderError(msg)
     elif tree[0] == 'M':
         # metal
         atom = rdqueries.AtomNumGreaterQueryAtom(19)
     else:
         try:
             atom = Chem.Atom(tree[0])
             atom = rdqueries.AtomNumEqualsQueryAtom(atom.GetAtomicNum())
         except RuntimeError:
             msg = 'Element ' + tree[0] + ' not found'
             raise RINGReaderError(msg)
     return atom
 def ReadAtomSuffix(self, tree, atom):
     constraint = None
     #'+','-','.',':','+.','-.','*'
     if tree[0] == '+.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '-.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1))
     elif tree[0] == '+':
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '-':
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1))
     elif tree[0] == '.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
     elif tree[0] == ':':
         constraint = AtomRadical(False, ConstraintNumber('=2'))
     elif tree[0] == ':.':
         constraint = AtomRadical(False, ConstraintNumber('=3'))
     elif tree[0] == '*':
         from rdkit.Chem import GetPeriodicTable
         #if type(atom).__name__ == 'QueryAtom':
         #    raise NotImplementedError('Onium $,&,X atoms not supported yet')
         atomicnum = atom.GetAtomicNum()
         atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum)
         valence = GetPeriodicTable().GetDefaultValence(atomicnum)
         atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence +
                                                                1))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '?':
         pass
     else:
         s = "Unsupported atom suffic: '" + tree[0] + "'"
         raise NotImplementedError(s)
     return constraint
Esempio n. 3
0
def get_simple(ms):
    errors = 0
    descr = {}
    for m in ms:
        try:
            name = m.GetProp('_Name')
            if name not in descr:
                descr[name] = []

            #get descriptor vector
            dv = []
            dv.append(m.GetNumHeavyAtoms())

            a_nums = [5, 35, 6, 17, 9, 53, 7, 8, 15, 16]
            for n in a_nums:
                q = rdqueries.AtomNumEqualsQueryAtom(n)
                dv.append(len(m.GetAtomsMatchingQuery(q)))

            dv.append(Descriptors.NumHAcceptors(m))
            dv.append(Descriptors.NumHDonors(m))
            dv.append(Descriptors.MolLogP(m))
            dv.append(Descriptors.RingCount(m))

            #print(rdmolops.AssignAtomChiralTagsFromStructure(m))

            descr[name].append(dv)
        except ValueError:
            if len(descr[name]) == 0:
                del descr[name]
            errors = errors + 1
    print(str(errors) + ' ValueError(s) has(have) occured')
    return descr
Esempio n. 4
0
def _findMissingReactiveReactants(rfps, pfps, currentReactants, unmappedPAtoms, output=False):
    if output:
        print("--- _findMissingReactiveReactants ---")
    if not len(unmappedPAtoms):
        return currentReactants
    # if there are unmapped product bits find possible reactants for those
    else:
        finalReactants = []
        numReactants=len(rfps)
        # investigate all possible solutions of the scoring before
        for reacts,umPA in zip(currentReactants,unmappedPAtoms):
            # if there are unmapped product atoms find possible reactants for those
            finalReactants.append(reacts)
            if umPA[1] > 0:
                remainingReactants=set(range(numReactants)).difference(set(reacts))
                # sort the possible reactants by the reactivity
                remainingReactants = sorted(remainingReactants, key=lambda x: rfps[x].reactivity/float(rfps[x].molecule.GetNumAtoms()),\
                                            reverse=True)
                missingPAtoms = []
                # get the missing atoms and counts
                for bit,c in umPA[-1]:
                    for pbi in range(len(pfps)):
                        if bit in pfps[pbi].bitInfoScaffoldFP:
                            a = pfps[pbi].bitInfoScaffoldFP[bit][0]
                            missingPAtoms.extend([pfps[pbi].molecule.GetAtomWithIdx(a[0]).GetAtomicNum()]*c)
                missingPAtoms = Counter(missingPAtoms)
                if output >  0:
                    print(missingPAtoms)
                # build queries for the missing atoms
                queries=[(rdqueries.AtomNumEqualsQueryAtom(a),a) for a in missingPAtoms]
                maxFullfilledQueries=0
                maxReactivity=-1
                addReactants=[]
                # search for the most reactive reactants capturing all/most of the unmapped product atoms
                for r in remainingReactants:
                    if output > 0:
                        print(" >> Reactant", r, rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()))
                    countFullfilledQueries=0
                    for q,a in queries:
                        if len(rfps[r].molecule.GetAtomsMatchingQuery(q)) >= missingPAtoms[a]:
                            countFullfilledQueries+=1
                    if output > 0:
                        print(" Max reactivity", maxReactivity)
                        print(" Max fullfilled queries", maxFullfilledQueries)
                    if countFullfilledQueries > maxFullfilledQueries:
                        maxFullfilledQueries = countFullfilledQueries
                        maxReactivity = rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms())
                        addReactants = [r]
                    elif maxFullfilledQueries and countFullfilledQueries == maxFullfilledQueries and \
                         rfps[r].reactivity/float(rfps[r].molecule.GetNumAtoms()) >= maxReactivity:
                        maxFullfilledQueries = countFullfilledQueries
                        addReactants.append(r)
                    if output > 0:
                        print(" Added reactants", addReactants)
                finalReactants[-1].extend(addReactants)
    if output > 0:
        print(" >> Final reactants", finalReactants)
    return finalReactants
Esempio n. 5
0
def _getAtomWeights(mol, molID, topicID, topicModel):

    weights = [0] * mol.GetNumAtoms()
    # ignore "wildcard atoms" in BRICS fragments
    q = rdqueries.AtomNumEqualsQueryAtom(0)
    # get all fragments of a certain molecule
    _, aBits = chemTopicModel._generateFPs(mol, topicModel.fragmentMethod)
    fp = topicModel.moldata.loc[molID, 'fps']
    probs = topicModel.getTopicFragmentProbabilities()
    nTopics, nFrags = probs.shape
    # use the max probability of a fragment associated with a certain topic
    # to normalize the fragment weights
    maxWeightTopic = max(probs[topicID])
    r = 0.0
    # calculate the weight of an atom concerning a certain topic
    for bit in fp.keys():
        try:
            idxBit = bit
            if topicModel.fragmentMethod in ['Morgan', 'RDK']:
                idxBit = topicModel.fragIdx[bit]
        except:
            continue
        try:
            r = probs[topicID, idxBit]
        except:
            continue
        if r <= 1. / nFrags:
            continue
        # Morgan/RDK fingerprints
        if topicModel.fragmentMethod in ['Morgan', 'RDK'] and bit in aBits:
            paths = aBits[bit]
            for p in paths:
                for b in p:
                    bond = mol.GetBondWithIdx(b)
                    # for overlapping fragments take the highest weight for the atom
                    weights[bond.GetBeginAtomIdx()] = max(
                        r, weights[bond.GetBeginAtomIdx()])
                    weights[bond.GetEndAtomIdx()] = max(
                        r, weights[bond.GetEndAtomIdx()])
        elif topicModel.fragmentMethod.startswith('Brics'):
            # BRICS fragments
            submol = Chem.MolFromSmarts(topicModel.vocabulary[idxBit])
            ignoreWildcards = [
                i.GetIdx() for i in list(submol.GetAtomsMatchingQuery(q))
            ]
            matches = mol.GetSubstructMatches(submol)
            for m in matches:
                for n, atomidx in enumerate(m):
                    if n in ignoreWildcards:
                        continue
                    # for overlapping fragments take the highest weight for the atom, this not happen for BRICS though :)
                    weights[atomidx] = max(r, weights[atomidx])
    atomWeights = np.array(weights)
    return atomWeights, maxWeightTopic
 def __call__(self,comb_mol,mapped_index):
     atom = comb_mol.GetAtomWithIdx(mapped_index[self.idx])
     if self.valence != 0:
         from rdkit.Chem import GetPeriodicTable
         atomicnum = atom.GetAtomicNum()
         atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum)
         valence = GetPeriodicTable().GetDefaultValence(atomicnum)
         atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence+self.valence))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(self.charge))
         comb_mol.ReplaceAtom(self.idx,atom)
     
     atom.SetNumRadicalElectrons(self.radical)
     atom.SetFormalCharge(self.charge)
Esempio n. 7
0
FTYPE_CYCLIC_ACYCLIC = 'cyclic_and_acyclic'

# Global SMARTS used by the program

# acyclic bond smarts
ACYC_SMARTS = Chem.MolFromSmarts("[*]!@!=!#[*]")
# exocyclic/fused exocyclic bond smarts
CYC_SMARTS = Chem.MolFromSmarts("[R1,R2]@[r;!R1]")

# smarts used to find appropriate fragment for
# would use SMARTS: [$([#0][r].[r][#0]),$([#0][r][#0])]
# but RDkit doesn't support component SMARTS in recursive one - $([#0][r].[r][#0])
# hence split into two
cSma1 = Chem.MolFromSmarts("[#0][r].[r][#0]")
cSma2 = Chem.MolFromSmarts("[#0][r][#0]")
dummyAtomQuery = rdqueries.AtomNumEqualsQueryAtom(0)


def delete_bonds(mol, bonds, ftype, hac):
    """ Fragment molecule on bonds and reduce to fraggle fragmentation SMILES.
  If none exists, returns None """

    # Replace the given bonds with attachment points (B1-B2 -> B1-[*].[*]-B2)
    bondIdx = [mol.GetBondBetweenAtoms(*bond).GetIdx() for bond in bonds]
    modifiedMol = Chem.FragmentOnBonds(mol,
                                       bondIdx,
                                       dummyLabels=[(0, 0)] * len(bondIdx))

    # should be able to get away without sanitising mol as the valencies should be okay
    # do not do a full sanitization, but do find rings and calculate valences:
    Chem.SanitizeMol(
Esempio n. 8
0
def main():
    """
    Main function
    """
    sys.path.append(os.path.dirname(__file__))

    # Parse the command line arguments
    cl_params = command_line_params()

    # Create the file parameter for the FragTreeLibrary
    file_params = {
        'input_inchi_file': cl_params['inchi_file'],
        'output_directory': cl_params['output_dir'],
        'output_hdf5_file_base': cl_params['output_base_name'],
        'output_error_log': cl_params['error_log']
    }

    isotope_dict = get_isotope_dict(isostope_file=cl_params['isotope_file'])

    # Make output directory if it does not exist
    if not os.path.isdir(file_params['output_directory']):
        try:
            os.mkdir(file_params['output_directory'])
        except OSError:
            # When executed in parallel it is possible that another rank already created the dir
            # in the meantime. We can safely ignore this error.
            if os.path.isdir(file_params['output_directory']):
                pass
            else:
                raise

    # Get isotope dictionary (if none was provided)
    if isotope_dict is None:
        isotope_dict = get_isotope_dict()
    else:
        isotope_dict = isotope_dict

    # make list of inchis
    inchi_list = []
    with open(file_params['input_inchi_file'], 'r') as inchi_file:
        for line in inchi_file:
            inchi_list.append(line.strip())

    with open(file_params['output_error_log'], 'w') as _:
        pass

#    for inchi in inchi_list:
#        grow_tree_from_inchi(inchi,max_depth=cl_params['max_depth'], isotope_dict=isotope_dict, file_params=file_params)
    mp_params = []
    for inchi in inchi_list:
        mol = Chem.MolFromInchi(inchi)
        q = rdqueries.AtomNumEqualsQueryAtom(6)
        try:
            # In the code above, you have to have at least one bond to break or it will crash.
            if len(mol.GetAtomsMatchingQuery(q)) > 1:
                #grow_tree_from_inchi(inchi,max_depth=cl_params['max_depth'], isotope_dict=isotope_dict, file_params=file_params)
                mp_params.append(
                    (inchi, cl_params['max_depth'], isotope_dict, file_params))
        except:
            print(inchi)

    pool = mp.Pool(processes=10)
    pool.map(grow_tree_mp, mp_params)
    pool.close()
    # for p in mp_params:
    # grow_tree_mp(p)

    return