Beispiel #1
0
def get_filter_values(mol):
    """
    calculate the values, for a given molecule, that are used to filter
    return as a dictionary
    """

    assert isinstance(mol, Chem.Mol)

    values = {}
    values["MW"] = desc.CalcExactMolWt(mol)
    values["logP"] = crip.MolLogP(mol)
    values["HBA"] = lip.NumHAcceptors(mol)
    values["HBD"] = lip.NumHDonors(mol)
    values["tPSA"] = desc.CalcTPSA(mol)
    values["rot_bonds"] = lip.NumRotatableBonds(mol)
    values["rigid_bonds"] = mol.GetNumBonds() - values["rot_bonds"]  # assume mutual exclusion
    values["num_rings"] = lip.RingCount(mol)
    values["num_hetero_atoms"] = lip.NumHeteroatoms(mol)
    values["charge"] = rdmolops.GetFormalCharge(mol)  # trusting this charge calculation method
    values["num_carbons"], values["num_charges"], values["max_ring_size"] = get_atom_props(mol)
    try:
        values["hc_ratio"] = float(values["num_hetero_atoms"]) / float(values["num_carbons"])
    except ZeroDivisionError:
        values["hc_ratio"] = 100000000  # if there are zero carbons
    values["fc"] = len(list(Brics.FindBRICSBonds(mol)))  # how many BRICS bonds, related to complexity
    values["is_good"] = True  # default to true, but not yet observed
    atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]  # get all the atoms, and make the list unique (only types)
    atoms = set(atoms)
    atoms = list(atoms)
    values["atoms"] = atoms
    values["num_chiral_centers"] = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True))
    values["rejections"] = []  # empty list to store the reasons for rejection

    return values
Beispiel #2
0
def _generateFPs(mol, fragmentMethod='Morgan'):
    aBits = {}
    fp = None
    # circular Morgan fingerprint fragmentation, we use a simple invariant than ususal here
    if fragmentMethod == 'Morgan':
        tmp = {}
        fp = AllChem.GetMorganFingerprint(
            mol,
            radius=2,
            invariants=utilsFP.generateAtomInvariant(mol),
            bitInfo=tmp)
        aBits = utilsFP.getMorganEnvironment(mol, tmp, fp=fp, minRad=2)
        fp = fp.GetNonzeroElements()
    # path-based RDKit fingerprint fragmentation
    elif fragmentMethod == 'RDK':
        fp = AllChem.UnfoldedRDKFingerprintCountBased(mol,
                                                      maxPath=5,
                                                      minPath=3,
                                                      bitInfo=aBits)
        fp = fp.GetNonzeroElements()
    # get the final BRICS fragmentation (= smallest possible BRICS fragments of a molecule)
    elif fragmentMethod == 'Brics':
        fragMol = BRICS.BreakBRICSBonds(mol)
        propSmi = _prepBRICSSmiles(fragMol)
        fp = Counter(propSmi.split('.'))
    else:
        print("Unknown fragment method")
    return fp, aBits
Beispiel #3
0
def get_ddi_mask(atc42SMLES, med_voc):

    # ATC3_List[22] = {0}
    # ATC3_List[25] = {0}
    # ATC3_List[27] = {0}
    fraction = []
    for k, v in med_voc.idx2word.items():
        tempF = set()
        for SMILES in atc42SMLES[v]:
            try:
                m = BRICS.BRICSDecompose(Chem.MolFromSmiles(SMILES))
                for frac in m:
                    tempF.add(frac)
            except:
                pass
        fraction.append(tempF)
    fracSet = []
    for i in fraction:
        fracSet += i
    fracSet = list(set(fracSet))  # set of all segments

    ddi_matrix = np.zeros((len(med_voc.idx2word), len(fracSet)))
    for i, fracList in enumerate(fraction):
        for frac in fracList:
            ddi_matrix[i, fracSet.index(frac)] = 1
    return ddi_matrix
Beispiel #4
0
def pair_frags(fname, out, method='Recap', is_mf=True):
    smiles = pd.read_table(fname).Smiles.dropna()
    pairs = []
    for i, smile in enumerate(tqdm(smiles)):
        smile = utils.clean_mol(smile)
        mol = Chem.MolFromSmiles(smile)
        if method == 'recap':
            frags = np.array(sorted(Recap.RecapDecompose(mol).GetLeaves().keys()))
        else:
            frags = BRICS.BRICSDecompose(mol)
            frags = np.array(sorted({re.sub(r'\[\d+\*\]', '*', f) for f in frags}))
        if len(frags) == 1: continue
        du, hy = Chem.MolFromSmiles('*'), Chem.MolFromSmiles('[H]')
        subs = np.array([Chem.MolFromSmiles(f) for f in frags])
        subs = np.array([Chem.RemoveHs(Chem.ReplaceSubstructs(f, du, hy, replaceAll=True)[0]) for f in subs])
        subs = np.array([m for m in subs if m.GetNumAtoms() > 1])
        match = np.array([[m.HasSubstructMatch(f) for f in subs] for m in subs])
        frags = subs[match.sum(axis=0) == 1]
        frags = sorted(frags, key=lambda x:-x.GetNumAtoms())[:voc.n_frags]
        frags = [Chem.MolToSmiles(Chem.RemoveHs(f)) for f in frags]

        max_comb = len(frags) if is_mf else 1
        for ix in range(1, max_comb+1):
            combs = combinations(frags, ix)
            for comb in combs:
                input = '.'.join(comb)
                if len(input) > len(smile): continue
                if mol.HasSubstructMatch(Chem.MolFromSmarts(input)):
                    pairs.append([input, smile])
    df = pd.DataFrame(pairs, columns=['Frags', 'Smiles'])
    df.to_csv(out, sep='\t',  index=False)
def get_sgs(record_dict, n_min, n_max, method="exhaustive"):
    if method == "exhaustive":
        return Chem.rdmolops.FindAllSubgraphsOfLengthMToN(
            record_dict["mol"], n_min, n_max)

    elif method == "RECAP":
        hierarchy = Recap.RecapDecompose(record_dict["mol"])
        sgs = []
        for substructure in hierarchy.GetAllChildren().values():
            substructure = Chem.DeleteSubstructs(substructure.mol,
                                                 Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)

    elif method == "BRICS":
        substructures = BRICS.BRICSDecompose(record_dict["mol"])
        sgs = []
        for substructure in substructures:
            substructure = Chem.DeleteSubstructs(
                Chem.MolFromSmiles(substructure), Chem.MolFromSmarts('[#0]'))
            edge_idxs = get_substructure_bond_idx(substructure,
                                                  record_dict["mol"])
            if edge_idxs is not None:
                sgs.append(edge_idxs)
        return subset_sgs_sizes([sgs], n_min, n_max)
Beispiel #6
0
def brics(
    mol: Chem.Mol,
    singlepass: bool = True,
    remove_parent: bool = False,
    sanitize: bool = True,
    fix: bool = True,
):
    """Run BRICS on the molecules and potentially fix dummy atoms.

    Args:
        mol: a molecule.
        singlepass: Single pass for `BRICSDecompose`.
        remove_parent: Remove parent from the fragments.
        sanitize: Wether to sanitize the fragments.
        fix: Wether to fix the fragments.
    """
    frags = BRICS.BRICSDecompose(mol, returnMols=True, singlePass=singlepass)
    frags = list(frags)

    if fix:
        frags = [dm.fix_mol(x) for x in frags]
    if sanitize:
        frags = [dm.sanitize_mol(x) for x in frags]
    if remove_parent:
        frags.pop(0)

    frags = [x for x in frags if x is not None]

    return frags
def generate_chemicals_from_fragments(smiles_list, n=10):
    """
    reconstruct chemicals from fragments

    Paramters
    -----------------
    smiles_list: list of string
        list of smiles of fragments
    n: int
        number of chemicals to be generated

    Returns
    ---------------
    smiles_list: list of string
        list of newly generated smiles
    """

    # convert smiles to mol objects
    all_components = [Chem.MolFromSmiles(f) for f in smiles_list]
    builder = BRICS.BRICSBuild(all_components)

    generated_mol_list = []
    for i in (range(n)):
        m = next(builder)
        m.UpdatePropertyCache(strict=True)
        generated_mol_list.append(m)

    smiles_list = [Chem.MolToSmiles(m) for m in generated_mol_list]

    return smiles_list
def fragmentate_chemicals(SMILES_list, return_only_fragments=True):
    """
    fragmentate chemicals by BRICs algorithm

    Parameters
    ---------------------
    SMILES_list: list of string
       list of smiles
    return_only_fragments: bool
       if true, return only fragment parts

    Returns
    ----------------------
    fragmentated_smiles: list of string
        list of fragmentated chemicals
    """

    mols = [Chem.MolFromSmiles(SMILES) for SMILES in SMILES_list]
    fragmentated_smiles = [BRICS.BRICSDecompose(mol) for mol in tqdm(mols)]

    # nested list to normal list
    fragmentated_smiles = (list(
        itertools.chain.from_iterable(fragmentated_smiles)))
    fragmentated_smiles = list(set(fragmentated_smiles))

    if return_only_fragments:
        fragmentated_smiles = [
            i for i in fragmentated_smiles if i.find("*") > 0
        ]

    return fragmentated_smiles
Beispiel #9
0
def fragment_iterative(mol, min_length=3):

    bond_data = list(BRICS.FindBRICSBonds(mol))

    try:
        idxs, labs = zip(*bond_data)
    except Exception:
        return []

    bonds = []
    for a1, a2 in idxs:
        bond = mol.GetBondBetweenAtoms(a1, a2)
        bonds.append(bond.GetIdx())

    order = np.argsort(bonds).tolist()
    bonds = [bonds[i] for i in order]

    frags, temp = [], deepcopy(mol)
    for bond in bonds:
        res = break_on_bond(temp, bond)

        if len(res) == 1:
            frags.append(temp)
            break

        head, tail = res
        if get_size(head) < min_length or get_size(tail) < min_length:
            continue

        frags.append(head)
        temp = deepcopy(tail)

    return frags
Beispiel #10
0
def fragment_recursive(mol, frags):
    try:
        bonds = list(BRICS.FindBRICSBonds(mol))

        if bonds == []:
            frags.append(mol)
            return frags

        idxs, labs = list(zip(*bonds))

        bond_idxs = []
        for a1, a2 in idxs:
            bond = mol.GetBondBetweenAtoms(a1, a2)
            bond_idxs.append(bond.GetIdx())

        order = np.argsort(bond_idxs).tolist()
        bond_idxs = [bond_idxs[i] for i in order]

        broken = Chem.FragmentOnBonds(mol,
                                      bondIndices=[bond_idxs[0]],
                                      dummyLabels=[(0, 0)])
        head, tail = Chem.GetMolFrags(broken, asMols=True)
        print(mol_to_smiles(head), mol_to_smiles(tail))
        frags.append(head)

        fragment_recursive(tail, frags)
    except Exception:
        pass
Beispiel #11
0
def fragmenter(thefile):
    os.remove('output.txt')
    id = []

    for line in open(thefile):

        line = line.strip()

        id.append(line)

    df = pd.DataFrame()

    df = id

    count = 0

    mylist = []

    for y in df:

        base = Chem.MolFromSmiles(df[count])

        catalog = BRICS.BRICSDecompose(base)

        mcat = [Chem.MolFromSmiles(x) for x in catalog]

        ms = BRICS.BRICSBuild(mcat)

        for m in ms:

            a = Chem.MolToSmiles(m)

            mylist.append(a)

        count = count + 1

    df2 = pd.DataFrame({'smiles': mylist})
    f3 = open('output.txt', 'w+')
    for j in mylist:
        print(j, file=f3)

    f3.close()
    return mylist
Beispiel #12
0
def break_on_rotatable_bonds_to_mol(inmol):
    """Takes a mol and breaks it on all of the rotatable bonds (well, most of them) - returns a single mol, or None if there are no rotatable bonds"""

    # Get the indices of the atoms around the bonds to be broken
    atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(inmol) if p]
    # Return if no bonds found... as it was given
    if not atom_pairs:
        return inmol
    # Get the bond indices
    bonds = [
        inmol.GetBondBetweenAtoms(at1, at2).GetIdx()
        for (at1, at2) in atom_pairs
    ]
    # Fragment the molecule
    fragged_mol = Chem.rdmolops.FragmentOnBonds(inmol, bonds, addDummies=False)

    return fragged_mol
Beispiel #13
0
def identify_rotatable_bond_atom_pairs(mol):
    """find the atom quadruplets around rotatable bonds of a molecule"""

    # List of tuples of 4 atoms
    atom_sets = []
    # Get the atoms on the ends of rotatable bonds
    atom_pairs = [p[0] for p in BRICS.FindBRICSBonds(mol) if p]
    # Go through and get one of the neighbours for each
    for a1,a2 in atom_pairs:
        # Get the neighbours for a1 (removing a2)
        a1_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a1).GetNeighbors()]
        a1_neighbours.remove(a2)
        # Get the neighbours for a2 (removing a1)
        a2_neighbours = [n.GetIdx() for n in mol.GetAtomWithIdx(a2).GetNeighbors()]
        a2_neighbours.remove(a1)
        # Add one from either side of the double bond
        atom_sets.append((a1_neighbours[0], a1, a2, a2_neighbours[0]))
    # Now have 4 atoms from which we can calculate a dihedral angle
    return atom_sets
 def splitMol(self, mol, bondsToKeep):
     ''' fragments a molecule on a particular set of BRICS bonds.
     Partially sanitizes the results
     '''
     bbnds = BRICS.FindBRICSBonds(mol)
     bndsToTry = []
     lbls = []
     for aids, lbl in bbnds:
         if lbl in bondsToKeep:
             bndsToTry.append(mol.GetBondBetweenAtoms(
                 aids[0], aids[1]).GetIdx())
             lbls.append([int(x) for x in lbl])
     if not bndsToTry:
         return []
     res = Chem.FragmentOnSomeBonds(mol, bndsToTry, dummyLabels=lbls)
     # We need at least a partial sanitization for the rest of what we will be doing:
     for entry in res:
         entry.UpdatePropertyCache(False)
         Chem.FastFindRings(entry)
     return res
Beispiel #15
0
    def combine_frag(self):
        self.generate_frag_templates()
        print('Merging fragments together to generate compounds...')
        for current_template in self.potential_cpd_templates:
            fragms = [Chem.MolFromSmiles(x) for x in sorted(current_template)]
            ms = BRICS.BRICSBuild(fragms)
            prods = [next(ms) for x in range(1)]
            #            mini_frags = self.collect_mini_frags_from_each_template(current_template)
            #            percent = len(mini_frags)
            #            counter = 0
            for i in range(1):
                #                for j in range(len(mini_frags)):
                sampler = Chem.MolToSmiles(prods[i], True)
                #                    if mini_frags[j] in sampler:
                #                        counter+=1
                #                        if counter == percent:

                if sampler not in self.templates:
                    print(sampler)
                    self.templates.append(sampler)
                    self.chembank.write(sampler + '\n')
Beispiel #16
0
    def fragment_database(self):
        fName = 'C:/RDKit_2017_03_2/Data/FunctionalGroups.txt'
        fparams = FragmentCatalog.FragCatParams(1, 6, fName)
        self.fcat = FragmentCatalog.FragCatalog(fparams)

        ##        macrocycle_file = 'macrocycles_IDs.csv'
        ##        suppl = [i.split(',')[0] for i in open(self.directory+name,'r').read().splitlines()][1:]       # read all the macrocycle smiles from file
        ##        ms = [Chem.MolFromSmiles(i) for i in suppl]     # mols of macrocycles

        zinc_file = 'data/smiles_database.csv'
        zinc_suppl = [
            i.split(',')[1]
            for i in open(self.directory + zinc_file, 'r').read().splitlines()
        ][1:]
        zinc_ms = [Chem.MolFromSmiles(i) for i in zinc_suppl]

        pre_synthetic_frag_database = [
            BRICS.BRICSDecompose(i) for i in zinc_ms
        ]
        self.synthetic_frag_database = list(
            set(chain.from_iterable(pre_synthetic_frag_database)))
Beispiel #17
0
def getBits(mol):
    '''

    Parameters
    ----------
    mol : rdkit mol object to be broken up into fragments by breaking
    rotable bonds

    Returns
    -------
    mols : A list of rdkit mol objects

    '''
    # find the rotatable bonds
    bonds = mol.GetSubstructMatches(RotatableBondSmarts)

    bonds = [((x, y), (0, 0)) for x, y in bonds]
    p = BRICS.BreakBRICSBonds(mol, bonds=bonds)

    mols = [mol for mol in Chem.GetMolFrags(p, asMols=True)]

    return mols
    def __init__(self, radius, fpSize, IC50function, molFile):
        self.fpgen = rdFingerprintGenerator.GetMorganGenerator(
            radius=radius, fpSize=fpSize)
        self.getIC50 = IC50function
        self.molFile = molFile

        # Open SMILES file and convert each sequence to rdkit molecule
        with open(self.molFile) as f:
            raw_text = f.read()

        raw_data = raw_text.split("\n")
        mol_list = [Chem.MolFromSmiles(x) for x in raw_data[:1000]]
        self.ms = [rdMolStandardize.FragmentParent(x) for x in mol_list]

        # Get a count of the BRICS bonds within the molecules
        cntr = Counter()
        for m in self.ms:
            bbnds = BRICS.FindBRICSBonds(m)
            for aids, lbls in bbnds:
                cntr[lbls] += 1
        freqs = sorted([(y, x) for x, y in cntr.items()], reverse=True)

        # Keep the top 10 bonds
        self.bondsToKeep = [y for x, y in freqs]
Beispiel #19
0
    def get_scaffolds(self,
                      scaffolding_method=ScaffoldingMethod.MurckoScaffold):
        """Compute deemed scaffolds for a given compound.

        Args:
            scaffolding_method (ScaffoldingMethod, optional):
                Defaults to MurckoScaffold. Scaffolding method to use

        Returns:
            list[rdkit.Chem.rdchem.Mol]: Scaffolds found in the component.
        """
        try:
            scaffolds = []

            if scaffolding_method == ScaffoldingMethod.MurckoScaffold:
                scaffolds = [(MurckoScaffold.GetScaffoldForMol(self.mol_no_h))]

            elif scaffolding_method == ScaffoldingMethod.MurckoGeneric:
                scaffolds = [
                    (MurckoScaffold.MakeScaffoldGeneric(self.mol_no_h))
                ]

            elif scaffolding_method == ScaffoldingMethod.Brics:
                scaffolds = BRICS.BRICSDecompose(self.mol_no_h)
                brics_smiles = [
                    re.sub(r"(\[[0-9]*\*\])", "[H]", i) for i in scaffolds
                ]  # replace dummy atoms with H's to get matches https://sourceforge.net/p/rdkit/mailman/message/35261974/
                brics_mols = [
                    rdkit.Chem.MolFromSmiles(x) for x in brics_smiles
                ]

                for mol in brics_mols:
                    rdkit.Chem.RemoveHs(mol)

                brics_hits = [
                    self.mol_no_h.GetSubstructMatches(i) for i in brics_mols
                ]

                for index, brics_hit in enumerate(brics_hits):
                    smiles = rdkit.Chem.MolToSmiles(brics_mols[index])
                    name = scaffolding_method.name
                    source = 'RDKit scaffolds'
                    key = f'{name}_{smiles}'
                    brics_hit = conversions.listit(brics_hit)

                    if not smiles:
                        continue

                    if key not in self._scaffolds:
                        self._scaffolds[key] = SubstructureMapping(
                            name, smiles, source, brics_hit)

                return brics_mols

            for s in scaffolds:
                scaffold_atom_names = [
                    atom.GetProp('name') for atom in s.GetAtoms()
                ]
                mapping = []
                for at_name in scaffold_atom_names:
                    idx = [
                        atom.GetIdx() for atom in self.mol.GetAtoms()
                        if atom.GetProp('name') == at_name
                    ][0]
                    mapping.append(idx)

                smiles = rdkit.Chem.MolToSmiles(s)
                name = scaffolding_method.name
                source = 'RDKit scaffolds'

                if not smiles:
                    continue

                if name in self._scaffolds:
                    self._scaffolds[name].mappings.append(mapping)
                else:
                    self._scaffolds[name] = SubstructureMapping(
                        name, smiles, source, [mapping])

            return scaffolds

        except (RuntimeError, ValueError):
            raise CCDUtilsError(
                f'Computing scaffolds using method {scaffolding_method.name} failed.'
            )
Beispiel #20
0
print 'Argument List:', str(sys.argv)

if len(sys.argv ) == 3 :
    inp_sdf_file = sys.argv[1]
    out_sdf_file = sys.argv[2]
else:
   sys.exit ("Usage: fragmenter.py infile outfile")
   
   
try:
    suppl = Chem.SDMolSupplier(inp_sdf_file)
    catalog=set()
    for mol in suppl:
        if mol is None: continue
        print mol.GetNumAtoms() 
        #AllChem.Compute2DCoords(mol)
        pieces = BRICS.BRICSDecompose(mol)                   
        catalog.update(pieces)

    print('Generated: ', len(catalog), ' fragments.')
    ofile = Chem.SDWriter(out_sdf_file)    
    for frg in catalog: 
        cmol = Chem.MolFromSmiles(frg)
        AllChem.Compute2DCoords(cmol)
        ofile.write(cmol)        

            
except IOError:
   print >> sys.stderr, "Input file could not be opened"
   sys.exit(1)    
Beispiel #21
0
fgData = """AcidChloride	C(=O)Cl	Acid Chloride
CarboxylicAcid	C(=O)[O;H,-]	Carboxylic acid
SulfonylChloride	[$(S-!@[#6])](=O)(=O)(Cl)	Sulfonyl Chloride
Amine				[N;!H0;$(N-[#6]);!$(N-[!#6]);!$(N-C=[O,N,S])]	Amine
BoronicAcid			[$(B-!@[#6])](O)(O)		Boronic Acid
Isocyanate			[$(N-!@[#6])](=!@C=!@O)	Isocyanate
Alcohol				[O;H1;$(O-!@[#6;!$(C=!@[O,N,S])])]	Alcohol
Aldehyde			[CH;D2;!$(C-[!#6])]=O	Aldehyde
Halogen				[$([Cl,Br,I]-!@[#6]);!$([Cl,Br,I]-!@C-!@[F,Cl,Br,I]);!$([Cl,Br,I]-[C,S](=[O,S,N]))]	Halogen"""
fglines = [re.split(r'\t+', x.strip()) for x in fgData.split('\n')]
hLabels = [x[0] for x in fglines]
patts = [Chem.MolFromSmarts(x[1]) for x in fglines]

labels = inLs[0].strip().split(delim) + hLabels + ['HasBRICSBond?']
print(delim.join(labels))
for line in inLs[1:]:
  splitL = line.strip().split(delim)
  mol = Chem.MolFromSmiles(splitL[1])
  for fg in patts:
    if mol.HasSubstructMatch(fg):
      splitL.append('True')
    else:
      splitL.append('False')

  bricsRes = BRICS.BRICSDecompose(mol)
  if len(bricsRes) > 1:
    splitL.append('True')
  else:
    splitL.append('False')
  print(delim.join(splitL))
Beispiel #22
0
random.seed(1)
max_number_of_generated_structures = 100

molecules = [
    molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf')
    if molecule is not None
]
# molecules = [molecule for molecule in Chem.SmilesMolSupplier('logSdataset1290_2d.smi',
#                                                              delimiter='\t', titleLine=False)
#              if molecule is not None]

print(len(molecules))
fragments = set()
for molecule in molecules:
    fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2)
    #    print(fragment)
    #    print(list(BRICS.FindBRICSBonds(molecule)))
    fragments.update(fragment)
print(len(fragments))
# print (fragments)

generated_structures = BRICS.BRICSBuild(
    [Chem.MolFromSmiles(smiles) for smiles in fragments])
writer = Chem.SDWriter('generated_structures.sdf')
# writer = Chem.SmilesWriter('generated_structures.smi')
number_of_generated_structures = 0
for generated_structure in generated_structures:
    generated_structure.UpdatePropertyCache(True)
    AllChem.Compute2DCoords(generated_structure)
    writer.write(generated_structure)
Beispiel #23
0
    def get_fc0_frags(self):
        """
        This function breaks every bond and enumerates all the FC0 fragments. This is a needed first step, since these
        are used to build up all the more complex fragments.
        :return:
        """
        fc0_frags = []
        brics = BRICS.FindBRICSBonds(self.init_mol)
        brics_counter = Counter(
            counter=100
        )  # counter starts at 100 since it's not going to be in use as a p.atom
        edges = []
        for bond, brics_type in brics:
            i, j = bond
            x, y = brics_type
            pseudo_i = brics_counter.increment()
            pseudo_j = brics_counter.increment(
            )  # this is always done in pairs so that eg. 100 and 101 go together
            self.brics_legend[pseudo_i] = int(
                x)  # remember to store the BRICS typing, we need it later
            self.brics_legend[pseudo_j] = int(y)
            edges.append((pseudo_i, pseudo_j))  # keep a record of this bond
            self.breakAndReplace(i, j, pseudo_i, pseudo_j)
        self.edges = edges

        # this while loop finds parts of a graph that are no longer connected, meaning FC0 fragments in this case
        untouched = set(self.atoms.keys()
                        )  # keep a list of atoms we haven't investigated yet
        while untouched:  # if there are still atoms we haven't processed
            currSet = set()
            currSet.add(untouched.pop())
            newMol = RecomposerMol()
            newAtoms = {}
            newPseudoAtoms = {}
            new_brics_legend = {}
            new_brics_legend.update(self.brics_legend)
            while currSet:
                # perform graph search
                currAtom = self.atoms[currSet.pop()]
                for bond in currAtom.bonds:
                    idx = None
                    if currAtom.index == bond.endAtomIdx:
                        idx = bond.beginAtomIdx
                    elif currAtom.index == bond.beginAtomIdx:
                        idx = bond.endAtomIdx
                    # add connected atoms to curr mol atom set
                    if idx in untouched:
                        currSet.add(idx)
                        untouched.remove(idx)
                    else:
                        continue
                newAtoms[currAtom.index] = self._copyAtom(
                    currAtom)  # watch out for reference errors!
                if currAtom.atom_num == 0:
                    newPseudoAtoms[currAtom.isotope] = currAtom.index
            newMol.atoms = newAtoms
            newMol.pseudoIndex = newPseudoAtoms
            newMol.brics_legend = new_brics_legend
            fc0_frags.append(newMol)
            self.frag_cache.append(newMol)
        # now need to index the fragments to make it easier to bond them
        fc0_index = {}
        for i, frag in enumerate(fc0_frags):
            for pseudo_atom in frag.pseudoIndex.keys():
                fc0_index[
                    pseudo_atom] = i  # each FC0 fragment should be accessible by pseudoatom
        self.fc0_index = fc0_index
        self.fragments[0] = fc0_frags
Beispiel #24
0
 def fragment_target(self):
     self.target_fragments = list(
         BRICS.BRICSDecompose(Chem.MolFromSmiles(self.target)))
Beispiel #25
0
        if (len(Chem.MolToSmiles(rm3).split(".")) > 1):
            count1 = count1 + 1
            [functional_list.append(i) for i in Chem.MolToSmiles(rm3).split(".")]
        else: 
            if(len(Chem.MolToSmiles(rm2).split(".")) > 1):
                count2 = count2 + 1
                [functional_list.append(i) for i in Chem.MolToSmiles(rm2).split(".")]

            else:
                if(len(Chem.MolToSmiles(rm).split(".")) > 1):
                    count3 = count3 + 1
                    [functional_list.append(i) for i in Chem.MolToSmiles(rm).split(".")]
                    
                else:
                    pieces_smi = Chem.BRICS.BRICSDecompose(temp)
                    pieces = [Chem.MolFromSmiles(x) for x in BRICS.BRICSDecompose(temp)]
                    count_fail_no_match += 1
                    print(can_smi)

                    
    except:
        count_fail += 1
print(list(set(functional_list))) #retrieve only the found functional groups
print(len(list(set(functional_list))))
print(count1, count2, count3)
print("total processed: "+ str(count1+count2+count3))
print("no substructured: "+ str(count_fail_no_match))
print("fail processed: "+ str(count_fail))


Beispiel #26
0
def update_atom_position(mol1, mol2):
    mol_copy = Chem.Mol(mol2)

    # This is a work-around to get a seedSmarts for the FMCS algorithm
    # and prevent the occassional hanging of FMCS
    # Might be unnecessary with future versions of rdkit
    core_frags = BRICS.BRICSDecompose(Chem.RemoveHs(mol1))
    frag_smarts = []
    for frag in enumerate(core_frags):
        smi_str = (re.sub('[[1-9][0-9]{0,2}\*]', '[*]', frag[1]))
        frag_smarts.append(
            Chem.MolToSmarts(Chem.MolFromSmiles(smi_str)).replace(
                ':', '~').replace('-', '~').replace('=',
                                                    '~').replace('#0', '*'))

    seed = None
    for query in frag_smarts:
        if mol_copy.HasSubstructMatch(Chem.MolFromSmarts(query)):
            seed = query
            break

    # Now get MCSS
    res = rdFMCS.FindMCS([mol1, mol_copy], seedSmarts=seed)
    mcs_q = Chem.MolFromSmarts(res.smartsString)

    # Get atom IDs
    template = mol1.GetSubstructMatches(mcs_q)[0]
    hit_atom = mol_copy.GetSubstructMatches(mcs_q)[0]

    # Update XYZ coords of MCSS
    running_distance = 0
    for i in range(0, len(template)):
        origin = mol1.GetConformer().GetAtomPosition(template[i])
        pos = mol_copy.GetConformer().GetAtomPosition(hit_atom[i])

        p1 = np.array([origin.x, origin.y, origin.z])
        p2 = np.array([pos.x, pos.y, pos.z])

        sq_dist = np.sum((p1 - p2)**2, axis=0)
        dist = np.sqrt(sq_dist)

        running_distance += dist

        mol_copy.GetConformer().SetAtomPosition(hit_atom[i],
                                                (origin.x, origin.y, origin.z))

    if running_distance > 0.1:
        # relax atoms outside MCSS
        res_atom = []
        for atom in mol_copy.GetAtoms():
            if atom.GetIdx() not in hit_atom:
                res_atom.append(atom.GetIdx())

        # do minimization
        mp = ChemicalForceFields.MMFFGetMoleculeProperties(mol_copy)
        ff = ChemicalForceFields.MMFFGetMoleculeForceField(mol_copy, mp)

        for val in hit_atom:
            ff.AddFixedPoint(val)
        for val in res_atom:
            ff.MMFFAddPositionConstraint(val, 1, 5)

        ff.Minimize()

    return mol_copy
Beispiel #27
0
number_of_free_bonds = 0  # The number of free bond(s) restricted. If 0, all fragments are saved

# load molecules
molecules = [
    molecule for molecule in Chem.SmilesMolSupplier(
        'logS_molecules_1290.smi', delimiter='\t', titleLine=False)
    if molecule is not None
]
# molecules = [molecule for molecule in Chem.SDMolSupplier('logSdataset1290_2d.sdf') if molecule is not None]
print('number of molecules :', len(molecules))

# generate fragments
fragments = set()
for molecule in molecules:
    fragment = BRICS.BRICSDecompose(molecule, minFragmentSize=2)
    fragments.update(fragment)

# select and arange fragments
new_fragments = []
number_of_generated_structures = 0
for fragment in fragments:
    free_bond = []
    free_bond = [index for index, atom in enumerate(fragment) if atom == '*']
    flag = False
    if number_of_free_bonds == 0:
        if len(free_bond):
            flag = True
    else:
        if len(free_bond) == number_of_free_bonds:
            flag = True
Beispiel #28
0
# In[ ]:




# In[8]:

from rdkit.Chem import AllChem
from rdkit.Chem import BRICS
from rdkit.Chem import rdMolDescriptors


# In[21]:

smiles = Chem.MolToSmiles(mol)
frag = BRICS.BRICSDecompose(mol)
print(smiles)


# In[20]:

print(len(frag))


# In[11]:

print(frag)


# In[12]:
Beispiel #29
0
if tests[9]:
    logger.info('Writing: Mol blocks')
    t1 = time.time()
    for mol in mols:
        mb = Chem.MolToMolBlock(mol)
    t2 = time.time()
    logger.info('Results10: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[10]:
    from rdkit.Chem import BRICS
    logger.info('BRICS decomposition')
    t1 = time.time()
    for mol in mols:
        d = BRICS.BreakBRICSBonds(mol)
    t2 = time.time()
    logger.info('Results11: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[11]:
    logger.info('Generate 2D coords')
    t1 = time.time()
    for mol in mols:
        AllChem.Compute2DCoords(mol)
    t2 = time.time()
    logger.info('Results12: %.2f seconds' % (t2 - t1))
    ts.append(t2 - t1)

if tests[12]:
    logger.info('Generate topological fingerprints')
# ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1']
# # 3.2 BRICS方法
# RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。

# 对下图中的分子进行BRICS分解
smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N'
m = Chem.MolFromSmiles(smi)
Draw.MolToImageFile(
    m,
    "/drug_development/studyRdkit/st_rdcit/img/mol34.jpg",
    size=(600, 400),
    legend=
    'zanubrutinib(C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N)'
)

frags = (BRICS.BRICSDecompose(m))
print(frags)

mols = []
for fsmi in frags:
    mols.append(Chem.MolFromSmiles(fsmi))

img = Draw.MolsToGridImage(mols,
                           molsPerRow=3,
                           subImgSize=(200, 200),
                           legends=['' for x in mols])
img.save('/drug_development/studyRdkit/st_rdcit/img/mol35.jpg')
# 四、组合分子片段--BRICS方法
# 以上述片段进行BRICS组合产生分子
newms = BRICS.BRICSBuild(mols)
newms = list(newms)