コード例 #1
0
ファイル: tran_data.py プロジェクト: vargeus/MoleculeKit
def rdkit_2d_normalized_features_generator(mol):
    smiles = Chem.MolToSmiles(mol,
                              isomericSmiles=True) if type(mol) != str else mol
    generator = rdNormalizedDescriptors.RDKit2DNormalized()
    features = generator.process(smiles)[1:]

    return features
コード例 #2
0
def smiles2rdkit2d(s):
    try:
        generator = rdNormalizedDescriptors.RDKit2DNormalized()
        features = generator.process(s)[1:]
    except:
        print('descriptastorus not found this smiles: ' + s +
              ' convert to all 1 features')
        features = np.ones((200, ))
    return np.array(features)
コード例 #3
0
def smiles2rdkit2d(s):    
    try:
        generator = rdNormalizedDescriptors.RDKit2DNormalized()
        features = np.array(generator.process(s)[1:])
        NaNs = np.isnan(features)
        features[NaNs] = 0
    except:
        print('descriptastorus not found this smiles: ' + s + ' convert to all 0 features')
        features = np.zeros((200, ))
    return np.array(features)
コード例 #4
0
 def generate_rdDescriptors(mol, Normalized=True):
     smiles = Chem.MolToSmiles(
         mol, isomericSmiles=True) if type(mol) != str else mol
     from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors
     if Normalized:
         generator = rdNormalizedDescriptors.RDKit2DNormalized()
         tors = generator.process(smiles)
     else:
         generator = rdDescriptors.RDKit2D()
         tors = generator.process(smiles)
     return tors[1:]
コード例 #5
0
    def rdkit_2d_features_normalized_generator(mol: Molecule) -> np.ndarray:
        """
        Generates RDKit 2D normalized features for a molecule.

        :param mol: A molecule (i.e. either a SMILES string or an RDKit molecule).
        :return: A 1D numpy array containing the RDKit 2D normalized features.
        """
        smiles = Chem.MolToSmiles(
            mol, isomericSmiles=True) if type(mol) != str else mol
        generator = rdNormalizedDescriptors.RDKit2DNormalized()
        features = generator.process(smiles)[1:]
        return features
コード例 #6
0
    def generate_rdDescriptorsSets(mols, Normalized=True):
        from descriptastorus.descriptors import rdDescriptors, rdNormalizedDescriptors
        if Normalized:
            generator = rdNormalizedDescriptors.RDKit2DNormalized()
        else:
            generator = rdDescriptors.RDKit2D()

        tors = []
        for mol in mols:
            smiles = Chem.MolToSmiles(
                mol, isomericSmiles=True) if type(mol) != str else mol
            tors.append(generator.process(smiles)[1:])

        return np.asarray(tors)
コード例 #7
0
import logging
from rdkit import Chem

try:
    from descriptastorus.descriptors import rdNormalizedDescriptors
    generator = rdNormalizedDescriptors.RDKit2DNormalized()

    def rdkit_2d_normalized_features(smiles: str):
        # the first element is true/false if the mol was properly computed
        if type(smiles) == str:
            return generator.process(smiles)[1:]

        else:
            # this is a bit of a waste, but the desciptastorus API is smiles
            #  based for normalization purposes
            return generator.process(
                Chem.MolToSmiles(smiles, isomericSmiles=True))[1:]

except ImportError:
    logging.getLogger(__name__).warning(
        "descriptastorus is not available, normalized descriptors are not available"
    )
    rdkit_2d_normalized_features = None
コード例 #8
0
    descriptors = calc.pandas(mols)

    descriptors = descriptors.astype(str)
    masks = descriptors.apply(lambda d: d.str.contains('[a-zA-Z]', na=False))
    descriptors = descriptors[~masks]
    descriptors = descriptors.astype(float)

    y = pd.DataFrame(y, index=smiles, columns=[y_name])

if dataset_type == 3:
    from descriptastorus.descriptors.DescriptorGenerator import MakeGenerator
    from descriptastorus.descriptors import rdDescriptors
    from descriptastorus.descriptors import rdNormalizedDescriptors
    gen1 = MakeGenerator(('rdkit2d', 'Morgan3counts'))
    gen2 = rdDescriptors.RDKit2D()
    gen3 = rdNormalizedDescriptors.RDKit2DNormalized()

    data1 = gen1.process(smiles)
    data2 = gen2.process(smiles)
    data3 = gen3.process(smiles)
    for col in gen1.GetColumns():
        y_name.append(col)
    y = pd.DataFrame(y, index=smiles, columns=[y_name])

if dataset_type == 4:  #3D Descriptors
    from e3fp.fingerprint.generate import fp, fprints_dict_from_mol
    from e3fp.conformer.generate import generate_conformers
    mols = [Chem.MolFromSmiles(smile) for smile in smiles]
    optimize_mols = []
    for mol in mols:
        mh = Chem.AddHs(mol)
コード例 #9
0
ファイル: fp.py プロジェクト: huluxiaohuowa/jupyfuncs
def get_rdnorm_fp(smiles):
    generator = rdNormalizedDescriptors.RDKit2DNormalized()
    features = generator.process(smiles)[1:]
    arr = np.array(features)
    return arr
コード例 #10
0
ファイル: preprocess.py プロジェクト: hacors/Drug
def preprocess(data):
    feature_generator = rdNormalizedDescriptors.RDKit2DNormalized()
    result = [[MoleculeDatapoint(line, feature_generator)] for line in data]
    return result
コード例 #11
0
def generate_descriptors(smi):
    generator = rdNormalizedDescriptors.RDKit2DNormalized()
    features = generator.process(smi)[1:]
    return features[:114]