def rdkit_2d_normalized_features_generator(mol): smiles = Chem.MolToSmiles(mol, isomericSmiles=True) if type(mol) != str else mol generator = rdNormalizedDescriptors.RDKit2DNormalized() features = generator.process(smiles)[1:] return features
def smiles2rdkit2d(s): try: generator = rdNormalizedDescriptors.RDKit2DNormalized() features = generator.process(s)[1:] except: print('descriptastorus not found this smiles: ' + s + ' convert to all 1 features') features = np.ones((200, )) return np.array(features)
def smiles2rdkit2d(s): try: generator = rdNormalizedDescriptors.RDKit2DNormalized() features = np.array(generator.process(s)[1:]) NaNs = np.isnan(features) features[NaNs] = 0 except: print('descriptastorus not found this smiles: ' + s + ' convert to all 0 features') features = np.zeros((200, )) return np.array(features)
def generate_rdDescriptors(mol, Normalized=True): smiles = Chem.MolToSmiles( mol, isomericSmiles=True) if type(mol) != str else mol from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors if Normalized: generator = rdNormalizedDescriptors.RDKit2DNormalized() tors = generator.process(smiles) else: generator = rdDescriptors.RDKit2D() tors = generator.process(smiles) return tors[1:]
def rdkit_2d_features_normalized_generator(mol: Molecule) -> np.ndarray: """ Generates RDKit 2D normalized features for a molecule. :param mol: A molecule (i.e. either a SMILES string or an RDKit molecule). :return: A 1D numpy array containing the RDKit 2D normalized features. """ smiles = Chem.MolToSmiles( mol, isomericSmiles=True) if type(mol) != str else mol generator = rdNormalizedDescriptors.RDKit2DNormalized() features = generator.process(smiles)[1:] return features
def generate_rdDescriptorsSets(mols, Normalized=True): from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors if Normalized: generator = rdNormalizedDescriptors.RDKit2DNormalized() else: generator = rdDescriptors.RDKit2D() tors = [] for mol in mols: smiles = Chem.MolToSmiles( mol, isomericSmiles=True) if type(mol) != str else mol tors.append(generator.process(smiles)[1:]) return np.asarray(tors)
import logging from rdkit import Chem try: from descriptastorus.descriptors import rdNormalizedDescriptors generator = rdNormalizedDescriptors.RDKit2DNormalized() def rdkit_2d_normalized_features(smiles: str): # the first element is true/false if the mol was properly computed if type(smiles) == str: return generator.process(smiles)[1:] else: # this is a bit of a waste, but the desciptastorus API is smiles # based for normalization purposes return generator.process( Chem.MolToSmiles(smiles, isomericSmiles=True))[1:] except ImportError: logging.getLogger(__name__).warning( "descriptastorus is not available, normalized descriptors are not available" ) rdkit_2d_normalized_features = None
descriptors = calc.pandas(mols) descriptors = descriptors.astype(str) masks = descriptors.apply(lambda d: d.str.contains('[a-zA-Z]', na=False)) descriptors = descriptors[~masks] descriptors = descriptors.astype(float) y = pd.DataFrame(y, index=smiles, columns=[y_name]) if dataset_type == 3: from descriptastorus.descriptors.DescriptorGenerator import MakeGenerator from descriptastorus.descriptors import rdDescriptors from descriptastorus.descriptors import rdNormalizedDescriptors gen1 = MakeGenerator(('rdkit2d', 'Morgan3counts')) gen2 = rdDescriptors.RDKit2D() gen3 = rdNormalizedDescriptors.RDKit2DNormalized() data1 = gen1.process(smiles) data2 = gen2.process(smiles) data3 = gen3.process(smiles) for col in gen1.GetColumns(): y_name.append(col) y = pd.DataFrame(y, index=smiles, columns=[y_name]) if dataset_type == 4: #3D Descriptors from e3fp.fingerprint.generate import fp, fprints_dict_from_mol from e3fp.conformer.generate import generate_conformers mols = [Chem.MolFromSmiles(smile) for smile in smiles] optimize_mols = [] for mol in mols: mh = Chem.AddHs(mol)
def get_rdnorm_fp(smiles): generator = rdNormalizedDescriptors.RDKit2DNormalized() features = generator.process(smiles)[1:] arr = np.array(features) return arr
def preprocess(data): feature_generator = rdNormalizedDescriptors.RDKit2DNormalized() result = [[MoleculeDatapoint(line, feature_generator)] for line in data] return result
def generate_descriptors(smi): generator = rdNormalizedDescriptors.RDKit2DNormalized() features = generator.process(smi)[1:] return features[:114]