def generate_rdDescriptors(mol, Normalized=True): smiles = Chem.MolToSmiles( mol, isomericSmiles=True) if type(mol) != str else mol from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors if Normalized: generator = rdNormalizedDescriptors.RDKit2DNormalized() tors = generator.process(smiles) else: generator = rdDescriptors.RDKit2D() tors = generator.process(smiles) return tors[1:]
def rdkit_2d_features_generator(mol: Molecule) -> np.ndarray: """ Generates RDKit 2D features for a molecule. :param mol: A molecule (i.e., either a SMILES or an RDKit molecule). :return: A 1D numpy array containing the RDKit 2D features. """ smiles = Chem.MolToSmiles(mol, isomericSmiles=True) if type(mol) != str else mol generator = rdDescriptors.RDKit2D() features = generator.process(smiles)[1:] return features
def generate_rdDescriptorsSets(mols, Normalized=True): from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors if Normalized: generator = rdNormalizedDescriptors.RDKit2DNormalized() else: generator = rdDescriptors.RDKit2D() tors = [] for mol in mols: smiles = Chem.MolToSmiles( mol, isomericSmiles=True) if type(mol) != str else mol tors.append(generator.process(smiles)[1:]) return np.asarray(tors)
def rdkit_functional_group_label_features_generator( mol: Molecule) -> np.ndarray: """ Generates functional group label for a molecule using RDKit. :param mol: A molecule (i.e. either a SMILES string or an RDKit molecule). :return: A 1D numpy array containing the RDKit 2D features. """ smiles = Chem.MolToSmiles(mol, isomericSmiles=True) if type(mol) != str else mol generator = rdDescriptors.RDKit2D(RDKIT_PROPS) features = generator.process(smiles)[1:] features = np.array(features) features[features != 0] = 1 return features
calc = Calculator(descriptors, ignore_3D=True) descriptors = calc.pandas(mols) descriptors = descriptors.astype(str) masks = descriptors.apply(lambda d: d.str.contains('[a-zA-Z]', na=False)) descriptors = descriptors[~masks] descriptors = descriptors.astype(float) y = pd.DataFrame(y, index=smiles, columns=[y_name]) if dataset_type == 3: from descriptastorus.descriptors.DescriptorGenerator import MakeGenerator from descriptastorus.descriptors import rdDescriptors from descriptastorus.descriptors import rdNormalizedDescriptors gen1 = MakeGenerator(('rdkit2d', 'Morgan3counts')) gen2 = rdDescriptors.RDKit2D() gen3 = rdNormalizedDescriptors.RDKit2DNormalized() data1 = gen1.process(smiles) data2 = gen2.process(smiles) data3 = gen3.process(smiles) for col in gen1.GetColumns(): y_name.append(col) y = pd.DataFrame(y, index=smiles, columns=[y_name]) if dataset_type == 4: #3D Descriptors from e3fp.fingerprint.generate import fp, fprints_dict_from_mol from e3fp.conformer.generate import generate_conformers mols = [Chem.MolFromSmiles(smile) for smile in smiles] optimize_mols = [] for mol in mols: