def read_standardise(smi) -> Chem.rdchem.Mol:
    null_mol = Chem.MolFromSmiles('')
    try:
        m = Chem.MolFromSmiles(smi)
        assert m is not None
    except:
        warnings.warn('SMILES: {} is invalid'.format(smi))
        return null_mol
    m = standardizer.standardize_mol(m)
    m = standardizer.get_parent_mol(m)[0]
    return m
Exemple #2
0
def standardize_smiles(smiles):
    try:
        # Convert to RDKit mol
        m = Chem.MolFromSmiles(smiles)
        # Neutralize and separate from counterion
        m = standardizer.get_parent_mol(m, neutralize=True, check_exclusion=True, verbose=False)[0]
        # Standardize representation
        m = standardizer.standardize_mol(m, check_exclusion=True)
        # Return RDKit canonical SMILES
        # remove stereochemistry information to avoid some activity cliffs
        return Chem.MolToSmiles(Chem.MolFromSmiles(Chem.MolToSmiles(m, isomericSmiles=False)))
    except:
        return np.nan
Exemple #3
0
    def check_errors(self, smi: str) -> str:
        """
            This function processes the SMILES in order to canonicalize it and detect any errors.
            If errors are detected, the returned SMILES is not sanitized.

            :param smi: SMILES string of the compound

            :return final_smi: canonicalized SMILES
        """

        try:
            final_smi = standardizer.standardize_mol(smi)
        except:
            final_smi = smi
        final_smi = Chem.MolToSmiles(final_smi)
        final_smi = self.salt_remover(final_smi)

        return final_smi
Exemple #4
0
    def __call__(self, mol):
        if chembl_standardizer.exclude_flag(mol,
                                            includeRDKitSanitization=False):
            raise StandardizationError(
                None,
                f'ChEMBL standardizer set the exclusion flag for molecule: {Chem.MolToSmiles(mol)}'
            )

        # just for outputs
        try:
            smiles = Chem.MolToSmiles(mol)
        except Exception as exp:
            raise StandardizationError(
                exp,
                f'An exception occurred while getting the SMILES for molecule: {mol}'
            )

        try:
            mol = chembl_standardizer.standardize_mol(mol,
                                                      check_exclusion=False)
        except Exception as exp:
            raise StandardizationError(
                exp,
                f'An exception occurred while standardizing molecule: {smiles}'
            )

        try:
            mol, _ = chembl_standardizer.get_parent_mol(mol,
                                                        check_exclusion=False,
                                                        verbose=True,
                                                        neutralize=True)
        except Exception as exp:
            raise StandardizationError(
                exp,
                f'An exception occurred while getting the parent molecule of: {smiles}'
            )

        return mol
# get all csvs from folders
received_csv_files = [
    f for f in dir_path.glob("**/*.csv")
    if "all_received_mols.csv" not in str(f)
]

smiles_dict = {}
for csv_file in received_csv_files:
    try:
        received_df = pd.read_csv(csv_file)
        received_df["SMILES"] = received_df["SMILES"].apply(
            lambda x: Chem.MolToSmiles(
                Chem.MolFromSmiles(
                    Chem.MolToSmiles(
                        standardizer.standardize_mol(
                            standardizer.get_parent_mol(Chem.MolFromSmiles(x))[
                                0])))))

        received_smi = list(received_df["SMILES"])
        for smi in received_smi:
            smiles_dict[smi] = str(csv_file).split("/")[-1]

    except Exception as e:
        print(f"FAILED ON {csv_file}")
        print(e)
        pass

# write out final csv
all_smiles = list(smiles_dict.keys())
all_shipments = [smiles_dict[x] for x in all_smiles]
Exemple #6
0
def strip_and_standardize_smi(smi):
    return Chem.MolToSmiles(
        Chem.MolFromSmiles(
            Chem.MolToSmiles(
                standardizer.standardize_mol(
                    standardizer.get_parent_mol(Chem.MolFromSmiles(smi))[0]))))