def count_struct_isomers(smiles_list): """ Counts the number of molecules with the same molecular formula Keyword arguments: smiles_list -- a list of smiles strings of the set/subset of molecules to look at Returns: """ # formula: isomer count dict_isomers = {} # formula : smiles list dict_smiles = {} # weight : isomer count dict_exactwt = {} for mol_smiles in smiles_list: mol = MolFromSmiles(mol_smiles) formula = CalcMolFormula(mol) weight = ExactMolWt(mol) if formula in dict_isomers.keys(): dict_isomers[formula] += 1 # increase the isomer count by 1 dict_smiles[formula].append( mol_smiles) # These are MOD's smiles, not RDKit's dict_exactwt[ weight] += 1 # Weight calculated by RDKit, not MOD's in-built else: dict_isomers[formula] = 1 dict_smiles[formula] = [mol_smiles] dict_exactwt[weight] = 1 return dict_exactwt # modify this as per your needs
def ghose_filter(self, filepath, exclude_salt=False): r""" Filter the given file with ghose filter. If exclude_salt is true, the molecule with atoms not in the no_salt_atoms list will also be filtered out. file_path (str): path to the .mol2 file or .gz file. exclude_salt(bool): if filter out the molecule containg salt atoms. ======================================================================= return (str): filtered string in Mol2 file format. """ reader = Mol2Reader(filepath) blocks = reader.get_blocks() filtered = list() for block in tqdm(blocks): mol = Chem.rdmolfiles.MolFromMol2Block(block, sanitize=False) if mol is None: continue n_atoms = mol.GetNumAtoms() if n_atoms < 20 or n_atoms > 70: continue mw = ExactMolWt(mol) if mw < 180 or mw > 480: continue if exclude_salt: atoms = mol.GetAtoms() flag = 0 for atom in atoms: if atom.GetSymbol() not in self.no_salt_atoms: flag = 1 break if flag == 1: continue filtered.append(block) return "\n\n".join(filtered)
def calc_properties(smi): # returns logP, TPSA, MW, MR m = Chem.MolFromSmiles(smi.numpy()) logP = MolLogP(m) tpsa = CalcTPSA(m) # sas = calculateScore(m) mw = ExactMolWt(m) mr = MolMR(m) return np.asarray(logP), np.asarray(tpsa), np.asarray(mw), np.asarray(mr)
def fill_calculated_fields(comp: metob.Compound, mol: Chem.rdchem.Mol) -> None: assert mol is not None comp.inchi_key = comp.inchi_key or Chem.inchi.InchiToInchiKey(comp.inchi) comp.formula = comp.formula or Chem.rdMolDescriptors.CalcMolFormula(mol) comp.mono_isotopic_molecular_weight = comp.mono_isotopic_molecular_weight or ExactMolWt( mol) comp.permanent_charge = comp.permanent_charge or Chem.GetFormalCharge(mol) comp.number_components = comp.number_components or 1 # type: ignore comp.num_free_radicals = comp.num_free_radicals or Chem.Descriptors.NumRadicalElectrons( mol) fill_neutralized_fields(comp, mol)
def _printinfo(mol): mol2 = Chem.AddHs(mol) AllChem.EmbedMolecule(mol2) OUTPUT_FILENAME = "output.mol" output_path = os.path.join(output_dir, OUTPUT_FILENAME) with open(output_path, "w") as fp: print(Chem.MolToMolBlock(mol2), file=fp) print(CONFORMATION_KEY, output_path) print(MOLW_KEY, ExactMolWt(mol2)) print(ATOMCOUNT_KEY, mol2.GetNumAtoms()) print(BONDCOUNT_KEY, mol2.GetNumBonds())
def sdf_text_worker(merged_results, vendors, num_mols, start_time, mol_counter, fragment_counter, drug_like_counter, big_counter, parent_fragment_collector, parent_drug_like_collector, parent_big_collector, failures, addhs, embed, verbose): if not verbose: RDLogger.DisableLog('rdApp.*') fragment_collector, drug_like_collector, big_collector = [], [], [] for index, row in merged_results.iterrows(): try: mol = Chem.MolFromSmiles(row['smiles']) if addhs: mol = Chem.AddHs(mol) if embed: AllChem.EmbedMolecule(mol) properties = {vendor: row[vendor] for vendor in vendors} mol_name = ','.join([ identifier for identifier in properties.values() if len(identifier) > 0 ]) if len(mol_name) > 20: mol_name = mol_name[:17] + '...' mol.SetProp('_Name', mol_name) properties['smiles'] = row['smiles'] molecular_weight = ExactMolWt(mol) except: failures.append(' '.join(['write_error', row['smiles']])) molecular_weight = 10000 if molecular_weight < 1200: if molecular_weight < 300: with fragment_counter.get_lock(): fragment_counter.value += 1 fragment_collector.append(sdf_text(mol, properties)) elif 300 <= molecular_weight < 700: with drug_like_counter.get_lock(): drug_like_counter.value += 1 drug_like_collector.append(sdf_text(mol, properties)) else: with big_counter.get_lock(): big_counter.value += 1 big_collector.append(sdf_text(mol, properties)) with mol_counter.get_lock(): mol_counter.value += 1 update_progress(mol_counter.value / num_mols, 'Progress of writing', ((time.time() - start_time) / mol_counter.value) * (num_mols - mol_counter.value)) parent_fragment_collector.extend(fragment_collector) parent_drug_like_collector.extend(drug_like_collector) parent_big_collector.extend(big_collector) return
def mol_remover(smile, mol): remove = False reason = 0 if len(smile) == 0: reason = "No smile this line, removed" remove = True #mol = MolFromSmiles(smile) if ExactMolWt(mol) > 700: reason = "Molecule too heavy, removed" remove = True """ remover = SaltRemover(defnData = "[Cl]") res = remover(mol) if res is not None: reason = "Include salt" remove = True """ return remove, reason
def calc_properties(smi): """ :param smi: :return: logP, TPSA, MR, MW """ m = Chem.MolFromSmiles(smi.numpy()) logP = np.asarray(MolLogP(m)) logP = (logP - LOGP_MEAN) / LOGP_STD tpsa = np.asarray(CalcTPSA(m)) tpsa = np.log10(tpsa + 1) tpsa = (tpsa - TPSA_MEAN) / TPSA_STD # sas = calculateScore(m) mw = np.asarray(ExactMolWt(m)) mw = np.log10(mw + 1) mw = (mw - MW_MEAN) / MW_STD mr = np.asarray(MolMR(m)) mr = np.log10(mr + 1) mr = (mr - MR_MEAN) / MR_STD return logP, tpsa, mr, mw