Python GetAvalonFP 예제들, rdkit.Avalon.pyAvalonTools.GetAvalonFP Python 예제들

예제 #1

0

파일 보기

파일: adam_simple2.py 프로젝트: scidatasoft/ml-services

def similarity(individual):
    final_vector = [0.0 for x in range(256)]
    individual_latent_vector = [x for x in individual]
    counter = 0
    for i in range(256):
        if i in non_zero_index:
            final_vector[i] = individual_latent_vector[counter]
            counter += 1

    final_vector = np.reshape(final_vector, (1, 256))
    smiles = latent_to_smiles(charset,
                              smiles_len,
                              char_to_int,
                              int_to_char,
                              latent_to_states_model,
                              sample_model,
                              final_vector,
                              type='2_layers')
    molecule = Chem.MolFromSmiles(smiles)
    if molecule and smiles is not '' and len(smiles) != 1:
        try:
            mol_fp = GetAvalonFP(molecule, 512)
            ref = GetAvalonFP(
                Chem.MolFromSmiles('CC(C)Cc1ccc(cc1)[C@@H](C)C(=O)O'), 512)
            dissimilarity_to_ref = (1 - TanimotoSimilarity(mol_fp, ref))
            print(Chem.MolToSmiles(molecule))
            print(dissimilarity_to_ref)
            return dissimilarity_to_ref,
        except:
            return 9999,
    else:
        return 9999,

예제 #2

0

파일 보기

파일: Fingerprint_test.py 프로젝트: Snigdha-Agarwal/LabWork

def make_fingerprints(data, length=512, verbose=False):
    fp_list = [
        fingerprint(Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
                    "Torsion "),
        fingerprint(lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length),
                    "Morgan"),
        fingerprint(FingerprintMol, "Estate (1995)"),
        fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)"),
        fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight"),
        fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)"),
        fingerprint(lambda x: RDKFingerprint(x, fpSize=length),
                    "RDKit fingerprint"),
        fingerprint(lambda x: MACCSkeys.GenMACCSKeys(x),
                    "MACCS fingerprint"),
        fingerprint(lambda x: get_fingerprint(x,fp_type='pubchem'), "PubChem"),
        # fingerprint(lambda x: get_fingerprint(x, fp_type='FP4'), "FP4")
        fingerprint(lambda x: Generate.Gen2DFingerprint(x,Gobbi_Pharm2D.factory,dMat=Chem.Get3DDistanceMatrix(x)),
                    "3D pharmacophore"),

    ]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(data)

    return fp_list

예제 #3

0

파일 보기

파일: chemstruct.py 프로젝트: vladchimescu/chemgen

def get_fingerprints(smiles_df, r=2, length=512,
                     type_='morgan'):
    if type_ == 'morgan':
        fp = [AllChem.GetMorganFingerprintAsBitVect(m, r,
                                                    nBits = length)\
              for m in smiles_df['mol']]
    elif type_ == 'fcpf':
        fp = [AllChem.GetMorganFingerprintAsBitVect(m, r,
                                                    useFeatures=True,
                                                    nBits = length)\
              for m in smiles_df['mol']]
    elif type_ == 'atom pair':
        fp = [GetHashedAtomPairFingerprintAsBitVect(m,
                                                    nBits = length)\
              for m in smiles_df['mol']]
    elif type_ == 'avalon':
         fp = [GetAvalonFP(m, nBits = length) for m in smiles_df['mol']]
    elif type_ == 'torsion':
        fp = [GetHashedTopologicalTorsionFingerprintAsBitVect(m,
                                                         nBits = length)\
         for m in smiles_df['mol']]
    elif type_ == 'rdkit':
        fp = [RDKFingerprint(m, fpSize = length) for m in smiles_df['mol']]
    else:
        raise ValueError("Possible values: morgan, fcpf, atom pair, avalon, torision and rdkit")

    drug_names = smiles_df['drug'].values
    return fp_to_pandas(fp=fp, drug_names=drug_names)

예제 #4

0

파일 보기

def make_fingerprints(mols, length=1024, verbose=False):

    fp_list = [
        #fingerprint(lambda x : GetBPFingerprint(x, fpfn=GetHashedAtomPairFingerprintAsBitVect),
        #            "Physiochemical properties (1996)"), ##NOTE: takes a long time to compute
        fingerprint(
            lambda x: GetHashedAtomPairFingerprintAsBitVect(x, nBits=length),
            "Atom pair (1985)"),
        fingerprint(
            lambda x: GetHashedTopologicalTorsionFingerprintAsBitVect(
                x, nBits=length), "Topological Torsion (1987)"),
        fingerprint(
            lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length),
            "ECFPs/Morgan Circular (2010) "),
        fingerprint(fp_Estate_ints, "E-state (fixed length) (1995)"),
        #fingerprint(fp_Estate_and_mw, "E-state + MW weight (1995)"),
        #fingerprint(FingerprintMol, "E-state, index sum (1995)"),
        fingerprint(lambda x: GetAvalonFP(x, nBits=length), "Avalon (2006)"),
        #fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
        #           "Avalon+mol. weight"),
        fingerprint(lambda x: GetErGFingerprint(x), "ErG (2006)"),
        fingerprint(lambda x: RDKFingerprint(x, fpSize=length),
                    "RDKit topological (2006)")
    ]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)

        fp.apply_fp(mols)

    return fp_list

예제 #5

0

파일 보기

파일: sb_models.py 프로젝트: SWFarag/NRP-structure-classifier

def make_fingerprints(length, verbose, mols, chosen=None):
    if chosen   == 1:
        fp_list = [
            fingerprint(lambda x : GetHashedAtomPairFingerprintAsBitVect(x, nBits = length),
                     "&qfuot;Atom pair (1985)")]
    elif chosen == 2:
        fp_list = [
             fingerprint(lambda x : GetHashedTopologicalTorsionFingerprintAsBitVect(x, nBits = length),
                     "Topological torsion (1987)")]
    elif chosen == 3:
        fp_list = [
             fingerprint(lambda x : GetMorganFingerprintAsBitVect(x, 3, nBits = length),
                     "Morgan circular ")]
    elif chosen == 4:
         fp_list = [
             fingerprint(FingerprintMol, "Estate (1995)")]
    elif chosen == 5:
        fp_list = [
             fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)")]
    elif chosen == 6:
        fp_list = [
            fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight")]
    elif chosen == 7:
        fp_list = [
            fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)")]
    elif chosen == 8:
        fp_list = [
            fingerprint(lambda x : RDKFingerprint(x, fpSize=length),
                     "RDKit fingerprint")]
    elif chosen == 9:
        fp_list = [
            fingerprint(lambda x : FingerprintMols.FingerprintMol(x),
                     "RDKit fingerprint2")]
    else:
        fp_list = [fingerprint(lambda x : MACCSkeys.GenMACCSKeys(x), "RDKit MACCSkeys")]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(mols)

    return fp_list

예제 #6

0

파일 보기

파일: chemical_models.py 프로젝트: Zerwer/Chemistry

    def predict(self, mol, selected_descriptors):
        options = [0, 0, 0, 0, 0]
        return_properties = {}

        for option in selected_descriptors:
            if option == 'logP':
                options[0] = 1
            elif option == 'sol':
                options[0] = 1
                options[1] = 1
            elif option == 'mp':
                options[0] = 1
                options[1] = 1
                options[2] = 1
            elif option == 'pka':
                options[3] = 1
            elif option == 'mol_wt':
                options[4] = 1

        fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

        if options[0]:
            logP = self.logP_model.run(fp)
            return_properties['logP'] = logP

        if options[1]:
            logP_sol = self.logP_solubility_model.run(logP)
            atom_pair_sol = self.atom_pair_sol_model.run(fp)
            combined_sol = self.combined_model.run(mol, logP,
                                                   logP_sol, atom_pair_sol)
            mg_ml_sol = logs_to_mg_ml(combined_sol, mol)
            return_properties['sol'] = mg_ml_sol

        if options[2]:
            mp = self.melting_point_model.run(combined_sol, logP)
            return_properties['mp'] = mp

        if options[3]:
            avalon = GetAvalonFP(mol)
            maacs = MACCSkeys.GenMACCSKeys(mol)
            pka = self.pKa_model.run(avalon + maacs + fp)
            return_properties['pka'] = pka

        if options[4]:
            wt = rdMolDescriptors.CalcExactMolWt(mol)
            return_properties['mol_wt'] = wt

        return return_properties

예제 #7

0

파일 보기

파일: featurisers.py 프로젝트: Bundaberg-Joey/ChemScI

def avalon_fp(mol):
    """Generates the Avalon fingerprint for a passed 'rdkit.Chem.rdchem.Mol' object using
    `rdkit.Avalon.pyAvalonTools.GetAvalonFP`.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        `rdkit` mol object.

    Returns
    -------
    fp_arr : np.ndarray, shape(512,)
        Fingerprint expressed as a numpy row vector.
    """
    fp = GetAvalonFP(mol)
    fp_arr = _rdkit_fp_to_np_arr(fp)
    return fp_arr

예제 #8

0

파일 보기

def avalon(m: list) -> list:
    """Avalon fingerprint."""

    return [GetAvalonFP(x, nBits=1024) for x in m]

예제 #9

0

파일 보기

def avalon(mol, **kwargs):
    return list(GetAvalonFP(mol, **kwargs).GetOnBits())

예제 #10

0

파일 보기

class FingerprintsTransformer(MoleculeTransformer):
    r"""
    Fingerprint molecule transformer.
    This transformer is able to compute various fingerprints regularly used in QSAR modeling.

    Arguments
    ----------
        kind: str, optional
            Name of the fingerprinting method used. Should be one of
            {'global_properties', 'atom_pair', 'topological_torsion',
            'morgan_circular', 'estate', 'avalon_bit', 'avalon_count', 'erg',
            'rdkit', 'maccs'}
            (Default value = 'morgan_circular')
        length: int, optional
            Length of the fingerprint to use
            (Default value = 2000)

    Attributes
    ----------
        kind: str
            Name of the fingerprinting technique used
        length: int
            Length of the fingerprint to use
        fpfun: function
            function to call to compute the fingerprint
    """
    MAPPING = OrderedDict(
        # global_properties=lambda x, params: augmented_mol_properties(x),
        # physiochemical=lambda x: GetBPFingerprint(x),
        atom_pair=lambda x, params: GetHashedAtomPairFingerprintAsBitVect(
            x, **params),
        topological_torsion=lambda x, params:
        GetHashedTopologicalTorsionFingerprintAsBitVect(x, **params),
        ecfp2=lambda x, params: GetMorganFingerprintAsBitVect(x, 1, **params),
        ecfp4=lambda x, params: GetMorganFingerprintAsBitVect(x, 2, **params),
        ecfp6=lambda x, params: GetMorganFingerprintAsBitVect(x, 3, **params),
        estate=lambda x, params: FingerprintMol(x)[0],
        avalon_bit=lambda x, params: GetAvalonFP(x, **params),
        avalon_count=lambda x, params: GetAvalonCountFP(x, **params),
        erg=lambda x, params: GetErGFingerprint(x),
        rdkit=lambda x, params: RDKFingerprint(x, **params),
        maccs=lambda x, params: GetMACCSKeysFingerprint(x))

    def __init__(self, kind='ecfp2', length=4096):
        super(FingerprintsTransformer, self).__init__()
        if not (isinstance(kind, str) and
                (kind in FingerprintsTransformer.MAPPING.keys())):
            raise ValueError("Argument kind must be in: " +
                             ', '.join(FingerprintsTransformer.MAPPING.keys()))
        self.kind = kind
        self.length = length
        self.fpfun = self.MAPPING.get(kind, None)
        if not self.fpfun:
            raise ValueError("Fingerprint {} is not offered".format(kind))
        self._params = {}
        self._params.update({
            ('fpSize' if kind == 'rdkit' else 'nBits'): length
        })

    def _transform(self, mol):
        r"""
        Transforms a molecule into a fingerprint vector
        :raises ValueError: when the input molecule is None

        Arguments
        ----------
            mol: rdkit.Chem.Mol
                Molecule of interest

        Returns
        -------
            fp: np.ndarray
                The computed fingerprint

        """

        if mol is None:
            raise ValueError("Expecting a Chem.Mol object, got None")
        # expect cryptic rdkit errors here if this fails, #rdkitdev
        fp = self.fpfun(mol, self._params)
        if isinstance(fp, ExplicitBitVect):
            fp = explicit_bit_vect_to_array(fp)
        else:
            fp = list(fp)
        return fp

    def transform(self, mols, **kwargs):
        r"""
        Transforms a batch of molecules into fingerprint vectors.

        .. note::
            The recommended way is to use the object as a callable.

        Arguments
        ----------
            mols: (str or rdkit.Chem.Mol) iterable
                List of SMILES or molecules
            kwargs: named parameters for transform (see below)

        Returns
        -------
            fp: array
                computed fingerprints of size NxD, where D is the
                requested length of features and N is the number of input
                molecules that have been successfully featurized.

        See Also
        --------
            :func:`~ivbase.transformers.features.MoleculeTransformer.transform`

        """
        mol_list = [
            self.to_mol(mol, addHs=False) for i, mol in enumerate(mols)
        ]
        # idx = [i for i, m in enumerate(mol_list) if m is None]
        mol_list = list(filter(None.__ne__, mol_list))
        features = np.array([self._transform(mol)
                             for mol in mol_list]).astype(np.float32)
        features = totensor(features, gpu=False)

        return features

    def __call__(self, mols, dtype=torch.long, cuda=False, **kwargs):
        r"""
        Transforms a batch of molecules into fingerprint vectors,
        and return the transformation in the desired data type format as well as
        the set of valid indexes.

        Arguments
        ----------
            mols: (str or rdkit.Chem.Mol) iterable
                The list of input smiles or molecules
            dtype: torch.dtype or numpy.dtype, optional
                Datatype of the transformed variable.
                Expect a tensor if you provide a torch dtype, a numpy array if you provide a
                numpy dtype (supports valid strings) or a vanilla int/float. Any other option will
                return the output of the transform function.
                (Default value = torch.long)
            cuda: bool, optional
                Whether to transfer tensor on the GPU (if output is a tensor)
            kwargs: named parameters for transform (see below)

        Returns
        -------
            fp: array
                computed fingerprints (in `dtype` datatype) of size NxD,
                where D is the requested length of features and N is the number
                of input molecules that have been successfully featurized.
            ids: array
                all valid molecule positions that did not failed during featurization

        See Also
        --------
            :func:`~ivbase.transformers.features.FingerprintsTransformer.transform`

        """
        fp, ids = super(FingerprintsTransformer, self).__call__(mols, **kwargs)
        if is_dtype_numpy_array(dtype):
            fp = np.array(fp, dtype=dtype)
        elif is_dtype_torch_tensor(dtype):
            fp = totensor(fp, gpu=cuda, dtype=dtype)
        else:
            raise (TypeError('The type {} is not supported'.format(dtype)))
        return fp, ids

예제 #11

0

파일 보기

"""
Predict the pKa of an acid from SMILES string
Returns both the fingerprint model prediction and similarity model predictions
"""
from chemical_models import AcidSimilarity, AcidpKa
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors, MACCSkeys
from rdkit.Avalon.pyAvalonTools import GetAvalonFP
import sys

# Load models
sim_model = AcidSimilarity('acid_sim')
fp_model = AcidpKa('pKa_acid')

# Set of acids required for similarity model
acid_data = open('data/pKa/formatted_acidic.txt', 'r')
acids = []

mol = Chem.MolFromSmiles(sys.argv[1])

# Read acids from file
for line in acid_data.readlines():
    split = line.split(' ')
    acids.append([split[0], float(split[1][:-1]),
                  rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Chem.MolFromSmiles(split[0]))])

# Run the models and print results
print("Similarity based model: " + str(sim_model.run(sys.argv[1], acids)))
print("Fingerprint based model: " + str(fp_model.run(GetAvalonFP(mol) + MACCSkeys.GenMACCSKeys(mol) +
                                                     rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol))))

예제 #12

0

파일 보기

파일: transformers.py 프로젝트: prtos/few_shot_regression

class FingerprintsTransformer(MoleculeTransformer):
    """Molecule transformer into molecular fingerprint

    Parameters
    ----------
    kind : {'global_properties', 'atom_pair', 'topological_torsion', 'morgan_circular',
        'estate', 'avalon_bit', 'avalon_count', 'erg', 'rdkit', 'maccs'}, optional, default='global_properties'
        Name of the fingerprinting technique used
    length: int
        Length of the fingerprint to use

    Attributes
    ----------
    kind : str
        Name of the fingerprinting technique used
    length : int
        Length of the fingerprint to use
    fpfun : function
        function to call to compute the fingerprint
    """
    mapping = OrderedDict(
        # physiochemical=lambda x: GetBPFingerprint(x),
        atom_pair=lambda x, params: GetHashedAtomPairFingerprintAsBitVect(
            x, **params),
        topological_torsion=lambda x, params:
        GetHashedTopologicalTorsionFingerprintAsBitVect(x, **params),
        morgan_circular=lambda x, params: GetMorganFingerprintAsBitVect(
            x, 2, **params),
        estate=lambda x, params: FingerprintMol(x)[0],
        avalon_bit=lambda x, params: GetAvalonFP(x, **params),
        avalon_count=lambda x, params: GetAvalonCountFP(x, **params),
        erg=lambda x, params: GetErGFingerprint(x),
        rdkit=lambda x, params: RDKFingerprint(x, **params),
        maccs=lambda x, params: GetMACCSKeysFingerprint(x))

    def __init__(self, kind='morgan_circular', length=2000):
        super(FingerprintsTransformer, self).__init__()
        if not (isinstance(kind, str) and
                (kind in FingerprintsTransformer.mapping)):
            raise ValueError("Argument kind must be in: " +
                             ', '.join(FingerprintsTransformer.mapping.keys()))
        self.kind = kind
        self.length = length
        self.fpfun = self.mapping.get(kind, None)
        if not self.fpfun:
            raise ValueError("Fingerprint {} is not offered".format(kind))
        self._params = {}
        self._params.update({
            ('fpSize' if kind == 'rdkit' else 'nBits'): length
        })

    def _transform(self, mol):
        """Transform a molecule into a fingerprint vector

        Parameters
        ----------
        mol: str or rdkit.Chem.Mol
            The smiles of the molecule of interest or the molecule itself
        Returns
        -------
        fp : np.ndarray
            The computed fingerprint
        """
        if mol is None:
            warnings.warn("None value received for argument mol")
            fp = np.zeros(self.length)
        else:
            fp = self.fpfun(mol, self._params)
        if isinstance(fp, ExplicitBitVect):
            fp = explicit_bit_vect_to_array(fp)
        else:
            fp = np.array(list(fp))
        return fp

    def transform(self, mols):
        """Transform a batch of molecule into a fingerprint vectors

        Parameters
        ----------
        X: (str or rdkit.Chem.Mol) list
            The list of smiles or molecule

        Returns
        -------
        fp : 2d np.ndarray
            The computed fingerprint vectors
        """
        res = np.array(
            super(FingerprintsTransformer, self).transform(mols,
                                                           as_numpy=True))
        return res

예제 #13

0

파일 보기

def avalon(n_bits=2048):
    return lambda x: GetAvalonFP(x, nBits=n_bits)

예제 #14

0

파일 보기

파일: sample_program_6_5_4.py 프로젝트: naototachibana/python_data_analysis_ohmsha

from rdkit.Avalon.pyAvalonTools import GetAvalonFP
from rdkit.Chem import AllChem

y_name = 'boiling_point'
fingerprint_type = 0  # 0: MACCS key, 1: RDKit, 2: Morgan (≒ECFP4), 3: Avalon

sdf = Chem.SDMolSupplier('boiling_point.sdf')  # sdf ファイルの読み込み

# フィンガープリントの計算
# 分子ごとに、リスト型の変数 y に物性値を、fingerprints に計算されたフィンガープリントを、smiles に SMILES を追加
fingerprints, y, smiles = [], [], []
print('分子の数 :', len(sdf))
for index, molecule in enumerate(sdf):
    print(index + 1, '/', len(sdf))
    y.append(float(molecule.GetProp(y_name)))
    smiles.append(Chem.MolToSmiles(molecule))
    if fingerprint_type == 0:
        fingerprints.append(AllChem.GetMACCSKeysFingerprint(molecule))
    elif fingerprint_type == 1:
        fingerprints.append(Chem.RDKFingerprint(molecule))
    elif fingerprint_type == 2:
        fingerprints.append(AllChem.GetMorganFingerprintAsBitVect(molecule, 2, nBits=2048))
    elif fingerprint_type == 3:
        fingerprints.append(GetAvalonFP(molecule))
fingerprints = pd.DataFrame(np.array(fingerprints, int), index=smiles)
y = pd.DataFrame(y, index=smiles, columns=[y_name])

# 保存
fingerprints_with_y = pd.concat([y, fingerprints], axis=1)  # y と記述子を結合
fingerprints_with_y.to_csv('fingerprints_with_y.csv')  # csv ファイルに保存。同じ名前のファイルがあるときは上書きされますので注意してください

예제 #15

0

파일 보기

                  rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)])

acid_model = AcidpKa('pKa_acid')
sim_model = AcidSimilarity('acid_sim')

X = []
Y = []

# For x combine predictions and descriptors, for y append actual pKa
for line in data.readlines():
    split = line.split(' ')

    mol = Chem.MolFromSmiles(split[0])
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

    pKa = acid_model.run(GetAvalonFP(mol) +
                         MACCSkeys.GenMACCSKeys(mol) +
                         fingerprint)
    sim_pKa = sim_model.run(split[0], acids)

    X.append([pKa,
              sim_pKa,
              Lipinski.NumHDonors(mol),
              Lipinski.NumHAcceptors(mol),
              Lipinski.NHOHCount(mol)])

    Y.append(float(split[1][:-1]))

scaler = preprocessing.StandardScaler()
X = scaler.fit_transform(np.asarray(X))
Y = np.asarray(Y)

예제 #16

0

파일 보기

# Predicts pKa of any molecule
from chemical_models import GeneralPKa
from rdkit import Chem
from rdkit.Chem import rdMolDescriptors, MACCSkeys
from rdkit.Avalon.pyAvalonTools import GetAvalonFP
import sys

# Load models
model = GeneralPKa('pKa')

mol = Chem.MolFromSmiles(sys.argv[1])

# Run the models and print results
print("Predicted pKa: " + str(
    model.run(
        GetAvalonFP(mol) + MACCSkeys.GenMACCSKeys(mol) +
        rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol))))

예제 #17

0

파일 보기

def convert_to_avalon(SMILES):
    mol = MS(SMILES)
    desc_val = GetAvalonFP(mol)
    desc_val_float_list = list(
        np.asarray(list(desc_val.ToBitString()), dtype=float))
    return desc_val_float_list

예제 #18

0

파일 보기

파일: AutoDescriptor.py 프로젝트: KanHatakeyama/polySMILES

def default_FP_Func(smiles):
    m = mol_from_smiles(smiles)
    fp = GetAvalonFP(m)
    fp = [int(i) for i in fp]
    return fp

예제 #19

0

파일 보기

파일: gen_fp.py 프로젝트: ntcockroft/STarFish

def main():
    parser = argparse.ArgumentParser(description='Generate chemical \
                                     fingerprints from smiles strings')
    parser.add_argument('-S',
                        '--smiles',
                        action='store',
                        nargs=1,
                        dest='smiles',
                        help='List of smiles strings to convert to chemical \
                        chemical fingerprint - should be in a column named \
                        "smiles" (.csv format)')
    parser.add_argument('-f',
                        '--fingerprint',
                        action='store',
                        nargs='*',
                        dest='fingerprints',
                        help='Desired fingerprint type \
                        (avalon, ecfp, fcfp, or maccs)')
    parser.add_argument('-n',
                        '--name',
                        action='store',
                        nargs=1,
                        dest='name',
                        help='Name of fingerprint csv file \
                        to write')
    parser.add_argument('-i',
                        '--input_directory',
                        action='store',
                        nargs=1,
                        dest='input',
                        default=['./'],
                        help='Directory where input files are stored')
    parser.add_argument('-o',
                        '--output_directory',
                        action='store',
                        nargs=1,
                        dest='output',
                        default=['./'],
                        help='Directory where output files should be written')
    args = vars(parser.parse_args())

    for fptype in args['fingerprints']:
        data = pd.read_csv(args['input'][0] + args['smiles'][0],
                           usecols=['smiles'])
        ofile = args['output'][0] + args['name'][0]
        time_start = time.time()
        with open(ofile, 'w') as csv_file:
            writer = csv.writer(csv_file, delimiter=',', lineterminator='\n')
            for smiles in data.smiles.unique():
                mol = MolFromSmiles(smiles)
                try:
                    if fptype == 'avalon':
                        fp = GetAvalonFP(mol, nBits=2048)
                    elif fptype == 'ecfp':
                        fp = GetMorganFingerprintAsBitVect(mol, radius=2)
                    elif fptype == 'fcfp':
                        fp = GetMorganFingerprintAsBitVect(mol,
                                                           radius=2,
                                                           useFeatures=True)
                    elif fptype == 'maccs':
                        fp = MACCSkeys.GenMACCSKeys(mol)

                    fp_bitstr = list(fp.ToBitString())
                    fp_bitstr.insert(0, smiles)
                    writer.writerow(fp_bitstr)
                except:
                    writer.writerow((smiles, "NA"))
                    print('Issue with conversion to ' + fptype +
                          ' fingerprint: ' + str(smiles))
        print('Done writing ' + fptype + ' fingerprints! Time elapsed: \
              {} seconds'.format(time.time() - time_start))

예제 #20

0

파일 보기

def default_FP_Func(m):
    return GetAvalonFP(m)