Beispiel #1
0
def count_struct_isomers(smiles_list):
    """
	Counts the number of molecules with the same molecular formula
	Keyword arguments:
	smiles_list -- a list of smiles strings of the set/subset of molecules to look at
	Returns: 
	"""
    # formula: isomer count
    dict_isomers = {}
    # formula : smiles list
    dict_smiles = {}
    # weight : isomer count
    dict_exactwt = {}

    for mol_smiles in smiles_list:
        mol = MolFromSmiles(mol_smiles)
        formula = CalcMolFormula(mol)
        weight = ExactMolWt(mol)
        if formula in dict_isomers.keys():
            dict_isomers[formula] += 1  # increase the isomer count by 1
            dict_smiles[formula].append(
                mol_smiles)  # These are MOD's smiles, not RDKit's
            dict_exactwt[
                weight] += 1  # Weight calculated by RDKit, not MOD's in-built
        else:
            dict_isomers[formula] = 1
            dict_smiles[formula] = [mol_smiles]
            dict_exactwt[weight] = 1
    return dict_exactwt  # modify this as per your needs
Beispiel #2
0
    def ghose_filter(self, filepath, exclude_salt=False):
        r""" Filter the given file with ghose filter. If exclude_salt is true,
        the molecule with atoms not in the no_salt_atoms list will also be
        filtered out.
        file_path (str): path to the .mol2 file or .gz file.
        exclude_salt(bool): if filter out the molecule containg salt atoms.
        =======================================================================
        return (str): filtered string in Mol2 file format. 
        """
        reader = Mol2Reader(filepath)
        blocks = reader.get_blocks()
        filtered = list()
        for block in tqdm(blocks):
            mol = Chem.rdmolfiles.MolFromMol2Block(block, sanitize=False)
            if mol is None:
                continue
            n_atoms = mol.GetNumAtoms()
            if n_atoms < 20 or n_atoms > 70:
                continue
            mw = ExactMolWt(mol)
            if mw < 180 or mw > 480:
                continue
            if exclude_salt:
                atoms = mol.GetAtoms()
                flag = 0
                for atom in atoms:    
                    if atom.GetSymbol() not in self.no_salt_atoms:
                        flag = 1
                        break
                if flag == 1:
                    continue
            filtered.append(block)

        return "\n\n".join(filtered)
Beispiel #3
0
def calc_properties(smi):
    # returns logP, TPSA, MW, MR
    m = Chem.MolFromSmiles(smi.numpy())
    logP = MolLogP(m)
    tpsa = CalcTPSA(m)
    # sas = calculateScore(m)
    mw = ExactMolWt(m)
    mr = MolMR(m)
    return np.asarray(logP), np.asarray(tpsa), np.asarray(mw), np.asarray(mr)
Beispiel #4
0
def fill_calculated_fields(comp: metob.Compound, mol: Chem.rdchem.Mol) -> None:
    assert mol is not None
    comp.inchi_key = comp.inchi_key or Chem.inchi.InchiToInchiKey(comp.inchi)
    comp.formula = comp.formula or Chem.rdMolDescriptors.CalcMolFormula(mol)
    comp.mono_isotopic_molecular_weight = comp.mono_isotopic_molecular_weight or ExactMolWt(
        mol)
    comp.permanent_charge = comp.permanent_charge or Chem.GetFormalCharge(mol)
    comp.number_components = comp.number_components or 1  # type: ignore
    comp.num_free_radicals = comp.num_free_radicals or Chem.Descriptors.NumRadicalElectrons(
        mol)
    fill_neutralized_fields(comp, mol)
Beispiel #5
0
 def _printinfo(mol):
     mol2 = Chem.AddHs(mol)
     AllChem.EmbedMolecule(mol2)
     OUTPUT_FILENAME = "output.mol"
     output_path = os.path.join(output_dir, OUTPUT_FILENAME)
     with open(output_path, "w") as fp:
         print(Chem.MolToMolBlock(mol2), file=fp)
     print(CONFORMATION_KEY, output_path)
     print(MOLW_KEY, ExactMolWt(mol2))
     print(ATOMCOUNT_KEY, mol2.GetNumAtoms())
     print(BONDCOUNT_KEY, mol2.GetNumBonds())
Beispiel #6
0
def sdf_text_worker(merged_results, vendors, num_mols, start_time, mol_counter,
                    fragment_counter, drug_like_counter, big_counter,
                    parent_fragment_collector, parent_drug_like_collector,
                    parent_big_collector, failures, addhs, embed, verbose):
    if not verbose:
        RDLogger.DisableLog('rdApp.*')
    fragment_collector, drug_like_collector, big_collector = [], [], []
    for index, row in merged_results.iterrows():
        try:
            mol = Chem.MolFromSmiles(row['smiles'])
            if addhs:
                mol = Chem.AddHs(mol)
            if embed:
                AllChem.EmbedMolecule(mol)
            properties = {vendor: row[vendor] for vendor in vendors}
            mol_name = ','.join([
                identifier for identifier in properties.values()
                if len(identifier) > 0
            ])
            if len(mol_name) > 20:
                mol_name = mol_name[:17] + '...'
            mol.SetProp('_Name', mol_name)
            properties['smiles'] = row['smiles']
            molecular_weight = ExactMolWt(mol)
        except:
            failures.append(' '.join(['write_error', row['smiles']]))
            molecular_weight = 10000
        if molecular_weight < 1200:
            if molecular_weight < 300:
                with fragment_counter.get_lock():
                    fragment_counter.value += 1
                fragment_collector.append(sdf_text(mol, properties))
            elif 300 <= molecular_weight < 700:
                with drug_like_counter.get_lock():
                    drug_like_counter.value += 1
                drug_like_collector.append(sdf_text(mol, properties))
            else:
                with big_counter.get_lock():
                    big_counter.value += 1
                big_collector.append(sdf_text(mol, properties))
        with mol_counter.get_lock():
            mol_counter.value += 1
            update_progress(mol_counter.value / num_mols,
                            'Progress of writing',
                            ((time.time() - start_time) / mol_counter.value) *
                            (num_mols - mol_counter.value))
    parent_fragment_collector.extend(fragment_collector)
    parent_drug_like_collector.extend(drug_like_collector)
    parent_big_collector.extend(big_collector)
    return
Beispiel #7
0
def mol_remover(smile, mol):
    remove = False
    reason = 0
    if len(smile) == 0:
        reason = "No smile this line, removed"
        remove = True
    #mol = MolFromSmiles(smile)
    if ExactMolWt(mol) > 700:
        reason = "Molecule too heavy, removed"
        remove = True
    """
    remover = SaltRemover(defnData = "[Cl]")
    res = remover(mol)
    if res is not None:
        reason = "Include salt"
        remove = True
    """
    return remove, reason
def calc_properties(smi):
    """
    :param smi:
    :return: logP, TPSA, MR, MW
    """
    m = Chem.MolFromSmiles(smi.numpy())
    logP = np.asarray(MolLogP(m))
    logP = (logP - LOGP_MEAN) / LOGP_STD

    tpsa = np.asarray(CalcTPSA(m))
    tpsa = np.log10(tpsa + 1)
    tpsa = (tpsa - TPSA_MEAN) / TPSA_STD

    # sas = calculateScore(m)

    mw = np.asarray(ExactMolWt(m))
    mw = np.log10(mw + 1)
    mw = (mw - MW_MEAN) / MW_STD

    mr = np.asarray(MolMR(m))
    mr = np.log10(mr + 1)
    mr = (mr - MR_MEAN) / MR_STD
    return logP, tpsa, mr, mw