コード例 #1
0
ファイル: fingerprinting.py プロジェクト: xiongzhp/ASKCOS
def create_rxn_Morgan2FP_separately(rsmi, psmi, rxnfpsize=gc.fingerprint_bits, pfpsize=gc.fingerprint_bits, useFeatures=False, calculate_rfp=True, useChirality=False):
    # Similar as the above function but takes smiles separately and returns pfp and rfp separately

    rsmi = rsmi.encode('utf-8')
    psmi = psmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(rsmi)
    except Exception as e:
        print(e)
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(
            mol=mol, radius=2, nBits=rxnfpsize, useFeatures=useFeatures, useChirality=useChirality)
        fp = np.empty(rxnfpsize, dtype='float32')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build reactant fp due to {}".format(e))
        return
    rfp = fp

    try:
        mol = Chem.MolFromSmiles(psmi)
    except Exception as e:
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(
            mol=mol, radius=2, nBits=pfpsize, useFeatures=useFeatures, useChirality=useChirality)
        fp = np.empty(pfpsize, dtype='float32')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build product fp due to {}".format(e))
        return
    pfp = fp
    return [pfp, rfp]
コード例 #2
0
ファイル: molz.py プロジェクト: LiamWilbraham/molz
    def _compute_fps(self) -> None:
        """Compute a numpy array of Morgan fingerprint vectors.
        """
        fp_vects = []
        for mol in tqdm.tqdm(self.data.mol,
                             desc='Computing fingerprints',
                             disable=self.prog):

            if self.fp_type == 'morgan':
                fp_vect = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    mol, self.fp_rad, self.fp_bits)

            if self.fp_type == 'rdkit':
                fp_vect = Chem.RDKFingerprint(
                    mol,
                    minPath=self.fp_rad,
                    maxPath=self.fp_rad,
                    fpSize=self.fp_bits,
                )

            array = np.zeros((0, ), dtype=np.int8)
            DataStructs.ConvertToNumpyArray(fp_vect, array)
            fp_vects.append(array)

        self.fps = np.zeros((len(fp_vects), self.fp_bits))
        for i, fp_vect in enumerate(fp_vects):
            self.fps[i, :] = fp_vect
コード例 #3
0
 def fit_model(self, toxicity_data):
     y = []
     X = None
     # Loading data
     with open(toxicity_data, "r") as file_hdl:
         reader = csv.DictReader(file_hdl, delimiter='\t')
         for row in reader:
             y.append(math.log(float(row["toxicity"])))
             arr = np.zeros((1, ))
             fp = self.calculate_ECFP(row["InChI"])
             DataStructs.ConvertToNumpyArray(fp, arr)
             arr = np.reshape(arr, (1, 1024))
             if X is None:
                 X = arr
             else:
                 X = np.concatenate((X, arr), axis=0)
     self.log_loading = "Loaded {} compounds from {}".format(
         len(y), toxicity_data)
     y = np.array(y)
     # Fitting mdoel:
     best_model, score = self.select_current_best_model(X,
                                                        y,
                                                        models_number=10)
     y_pred = best_model.predict(X)
     score = sklearn.metrics.r2_score(y, y_pred)
     self.log_score = "The toxicity model has a R2 score of {} on itself".format(
         round(score, 2))
     self.model = best_model
コード例 #4
0
 def compound_scoring(compound):
     ECFP = compound._get_ECFP()
     arr = np.zeros((1, ))
     DataStructs.ConvertToNumpyArray(ECFP, arr)
     arr = np.reshape(arr, (1, 1024))
     y_pred = self.model.predict(arr)
     return (y_pred)
コード例 #5
0
ファイル: cluster_img.py プロジェクト: zeromtmu/phd-scripts
def chemical_space(fname):
    """
    from text file with smiles data, create a chemical space representation
    :param fname:
    :return:
    """
    ligands = []
    X = []

    with open(fname, "r") as f:
        entries = f.read().splitlines()

        for e in entries:
            smiles = e.split(",")[2]
            mol = Chem.MolFromSmiles(smiles)
            mol.SetProp("_Name", str(e.split(",")[0] + "/" + e.split(",")[1]))
            ligands.append(mol)

        for l in ligands:
            AllChem.Compute2DCoords(l)
            arr = np.zeros((0,))
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2)
            DataStructs.ConvertToNumpyArray(fp, arr)
            X.append(arr)

    #return TSNE(n_components=3, metric=tanimoto_dist).fit_transform(X)
    return umap.UMAP(n_neighbors=5, min_dist=0.2, metric=tanimoto_dist).fit_transform(X)
コード例 #6
0
 def search_by_mols(self, mols, topk=10):
     '''
     :param mols: a list of molecuar
     :param topk:
     :return: [[{"id": xx, "smiles": xx, "score": xx}, {}, ...], []]
     '''
     mols_vec = []
     for mol in mols:
         tmp_arr = np.array([])
         DataStructs.ConvertToNumpyArray(
             rdMolDescriptors.GetMACCSKeysFingerprint(mol), tmp_arr)
         mols_vec.append(self.vec2bytes(tmp_arr))
     ret_dists, ret_ids = self.index.search(
         np.array(mols_vec).astype("uint8"), topk)
     rets = []
     for mol, dists, ids in zip(mols, ret_dists, ret_ids):
         ret = []
         for id in ids:
             ret.append({
                 "id":
                 self.df_zinc.iloc[id]["zinc_id"],
                 "smiles":
                 self.df_zinc.iloc[id]["smiles"],
                 "score":
                 self.calc_similarity(
                     mol,
                     Chem.MolFromSmiles(self.df_zinc.iloc[id]["smiles"]))
             })
     rets.append(sorted(ret, key=lambda item: item["score"], reverse=True))
     return rets
コード例 #7
0
ファイル: rdkitfp.py プロジェクト: shenwanxiang/bidd-molmap
def GetRDkitFPs(mol, nBits=2048, return_bitInfo=False):
    """
    #################################################################
    Calculate Daylight-like fingerprint or topological fingerprint
    
    (1024 bits).
    
    Usage:
        
        result=CalculateDaylightFingerprint(mol)
        
        Input: mol is a molecule object.
        
        Output: result is a tuple form. The first is the number of 
        
        fingerprints. The second is a dict form whose keys are the 
        
        position which this molecule has some substructure. The third
        
        is the DataStructs which is used for calculating the similarity.
    #################################################################
    """

    bitInfo = {}
    fp = RDKFingerprint(mol, fpSize=nBits, bitInfo=bitInfo)
    arr = np.zeros((0, ), dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    if return_bitInfo:
        return arr, return_bitInfo
    return arr
コード例 #8
0
def calc_fp_arr( mols ):
    fplist = []
    for mol in mols:
        arr = np.zeros( (1,) )
        fp = AllChem.GetMorganFingerprintAsBitVect( mol, 2 )
        DataStructs.ConvertToNumpyArray( fp, arr )
        fplist.append( arr )
    return np.asarray( fplist )
コード例 #9
0
def convert_fps(fp):
    """ Converts RDKit Fingerprints to numpy array """
    np_fps = []
    array = numpy.zeros((1, ))
    DataStructs.ConvertToNumpyArray(fp, array)
    np_fps.append(''.join([str(int(x)) for x in array]))

    return np_fps
コード例 #10
0
def convert_reaction_to_fp(rsmi, psmi, fpsize=2048):
    rsmi = rsmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(rsmi)
    except Exception as e:
        print("Cannot build reactant mol due to {}".format(e))
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius=2,
                                                       nBits=fpsize,
                                                       useFeatures=False,
                                                       useChirality=True)
        fp = np.empty(fpsize, dtype='int8')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build reactant fp due to {}".format(e))
        print(rsmi)
        return

    rfp = fp

    psmi = psmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(psmi)
    except Exception as e:
        print("Cannot build product mol due to {}".format(e))
        return

    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius=2,
                                                       nBits=fpsize,
                                                       useFeatures=False,
                                                       useChirality=True)
        fp = np.empty(fpsize, dtype='int8')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)

    except Exception as e:
        print("Cannot build product fp due to {}".format(e))
        return

    pfp = fp

    rxnfp = pfp - rfp
    return np.asarray(pfp), np.asarray(rxnfp)
コード例 #11
0
def getFpArr( mols, nBits = 1024 ):
    fps = [ AllChem.GetMorganFingerprintAsBitVect( mol, 2, nBits=nBits ) for mol in mols ]
    X = []
    for fp in fps:
        arr = np.zeros( (1,) )
        DataStructs.ConvertToNumpyArray( fp, arr )
        X.append( arr )
    return np.array( X )
コード例 #12
0
ファイル: agg_fea_hpc.py プロジェクト: adpartin/mol-features
def fps_to_nparr(x):
    """ Convert fps strings (base64) to integers. """
    import base64
    from rdkit.Chem import DataStructs
    x = DataStructs.ExplicitBitVect(base64.b64decode(x))
    arr = np.zeros((1, ))
    DataStructs.ConvertToNumpyArray(x, arr)
    return arr
コード例 #13
0
def getFpArr( fps ):
    X = []
    for item in fps:
        bv = DataStructs.ExplicitBitVect(4096)
        DataStructs.ExplicitBitVect.FromBase64(bv, item)
        arr = np.zeros( (1,) )
        DataStructs.ConvertToNumpyArray( bv, arr )
        X.append(arr)
    return X
コード例 #14
0
ファイル: Agents.py プロジェクト: shuan4638/ChemSearchDQN
 def smiles2fps(self, smiles):
     arr = np.zeros((1, ))
     mol = Chem.MolFromSmiles(smiles)
     mol = AllChem.AddHs(mol)
     fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                3,
                                                nBits=self.state_size)
     DataStructs.ConvertToNumpyArray(fp, arr)
     return np.array([arr])
コード例 #15
0
ファイル: maccskeys.py プロジェクト: shenwanxiang/bidd-molmap
def GetMACCSFPs(mol):
    '''
    166 bits
    '''

    fp = AllChem.GetMACCSKeysFingerprint(mol)

    arr = np.zeros((0, ), dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
コード例 #16
0
def GetAvalonFPs(mol, nBits=2048):

    '''
    Avalon_fingerprints: https://pubs.acs.org/doi/pdf/10.1021/ci050413p
    '''

    fp = GAFP(mol, nBits = nBits)
    arr = np.zeros((0,),  dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
コード例 #17
0
def build_mol_features(in_file, out_file):
    df_zinc = pd.read_csv(in_file, compression="zip")
    fp_list = []
    for smi in tqdm.tqdm(df_zinc["smiles"], total=len(df_zinc)):
        tmp_arr = np.array([])
        DataStructs.ConvertToNumpyArray(
            rdMolDescriptors.GetMACCSKeysFingerprint(Chem.MolFromSmiles(smi)),
            tmp_arr)
        fp_list.append(tmp_arr)
    fp_arr = np.array(fp_list)
    np.save(out_file, fp_arr)
コード例 #18
0
def GetTorsionFPs(mol, nBits = 2048, binary = True):
    '''
    atompairs fingerprints
    '''
    fp = Torsions.GetHashedTopologicalTorsionFingerprint(mol, nBits = nBits)
    if binary:
        arr = np.zeros((0,),  dtype=np.bool)
    else:
        arr = np.zeros((0,),  dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
コード例 #19
0
def getFpArrSmiles( smiles, radius=2, nBits=1024 ):
    X = []
    for line in smiles:
        try:
            m = Chem.MolFromSmiles(line)
            fp = AllChem.GetMorganFingerprintAsBitVect( m, 2, nBits=nBits )
        except Boost.Python.ArgumentError:
            continue # mis-formed
        arr = np.zeros( (1,) )
        DataStructs.ConvertToNumpyArray( fp, arr )
        X.append( arr )
    return X
コード例 #20
0
ファイル: fingerprints.py プロジェクト: anasf97/drug_learning
 def transform(self):
     super().transform()
     fts = []
     self.mol_names = []
     for mol in self.structures:
         fp = RDKFingerprint(mol)
         arr = np.zeros((0, ), dtype=np.int8)
         DataStructs.ConvertToNumpyArray(fp, arr)
         fts.append(arr)
         self.features = np.array(fts)
         self.mol_names.append(mol.GetProp("_Name"))
     self.columns = [str(i) for i in list(range(self.features.shape[1]))]
     return self.features
コード例 #21
0
ファイル: get_ecfp.py プロジェクト: rfhari/neuraldecipher
def get_ecfp_count_vector(smiles: str, radius: int, nbits: int) -> np.ndarray:
    """
    Returns the count ECFP representation as numpy array
    :param smiles: Smiles string
    :param radius: Radius for the ECFP algorithm. (eq. to number of iterations per atom)
    :param nbits: Length of final ECFP representation
    :return: ECFP as numpy array
    """
    m = Chem.MolFromSmiles(smiles)
    fp = AllChem.GetHashedMorganFingerprint(m, radius, nbits)
    ecfp_count = np.zeros((0, ), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fp, ecfp_count)
    return ecfp_count
コード例 #22
0
def maccs_molstring(molecule, fptype):
    """
    Method for make molstring for maccs fingerprint

    :param molecule: molecule object
    :param fptype: type, radius and size of fingerprint
    :type fptype: dict
    :return: molstring for maccs fingerprint
    """
    arr = np.zeros((1, ), dtype=int)
    DataStructs.ConvertToNumpyArray(MACCSkeys.GenMACCSKeys(molecule), arr)

    return arr
コード例 #23
0
def fingerprint_features(smile_string, radius=2, size=256):
    mol = MolFromSmiles(smile_string)
    new_order = rdmolfiles.CanonicalRankAtoms(mol)
    mol = rdmolops.RenumberAtoms(mol, new_order)
    arr = np.zeros((0,), dtype=np.int8)
    DataStructs.ConvertToNumpyArray(
        rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius,
                                                       nBits=size,
                                                       useChirality=True,
                                                       useBondTypes=True,
                                                       useFeatures=False
                                                       ), arr)
    return arr
コード例 #24
0
ファイル: handler.py プロジェクト: stjordanis/of_conformal
def calc_descriptors(rdmol):
    fp = Chem.GetMorganFingerprintAsBitVect(rdmol,
                                            radius=2,
                                            nBits=N_BITS,
                                            useFeatures=False)
    np_fp = np.zeros(N_BITS)
    ecfp = DataStructs.ConvertToNumpyArray(fp, np_fp)
    logp = Descriptors.MolLogP(rdmol)
    mwt = Descriptors.MolWt(rdmol)
    rtb = Descriptors.NumRotatableBonds(rdmol)
    hbd = Descriptors.NumHDonors(rdmol)
    hba = Descriptors.NumHAcceptors(rdmol)
    tpsa = Descriptors.TPSA(rdmol)
    return [logp, mwt, rtb, hbd, hba, tpsa, np_fp]
コード例 #25
0
def rdk_molstring(molecule, fptype):
    """
    Method for make molstring for rdk fingerprint

    :param molecule: molecule object
    :param fptype: type, radius and size of fingerprint
    :type fptype: dict
    :return: molstring for rdk fingerprint
    """
    arr = np.zeros((1, ), dtype=int)
    DataStructs.ConvertToNumpyArray(
        RDKFingerprint(molecule, fpSize=fptype['Size']), arr)

    return arr
コード例 #26
0
def get_smiles2mol(smiles):
    """load mol and generate morgan fp"""
    mols = [Chem.MolFromSmiles(smi) for smi in smiles]
    for mol in mols:
        AllChem.Compute2DCoords(mol)
    smiles2mol = dict(zip(smiles, mols))
    X = []
    for mol in mols:
        arr = np.zeros((0, ))
        fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2)
        DataStructs.ConvertToNumpyArray(fp, arr)
        X.append(arr)
    print('{} mols loaded'.format(len(X)))
    return X, smiles, smiles2mol
コード例 #27
0
ファイル: morganfp.py プロジェクト: shenwanxiang/bidd-molmap
def GetMorganFPs(mol, nBits=2048, radius = 2, return_bitInfo = False):
    
    """
    ECFP4: radius=2
    """
    bitInfo={}
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, 
                                               bitInfo=bitInfo, nBits = nBits)
    arr = np.zeros((0,),  dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    
    if return_bitInfo:
        return arr, bitInfo
    return arr
コード例 #28
0
def avalon_molstring(molecule, fptype):
    """
    Method for make molstring for avalon fingerprint

    :param molecule: molecule object
    :param fptype: type, radius and size of fingerprint
    :type fptype: dict
    :return: molstring for avalon fingerprint
    """
    arr = np.zeros((1, ), dtype=int)
    DataStructs.ConvertToNumpyArray(
        GetAvalonCountFP(molecule, nBits=fptype['Size']), arr)

    return arr
コード例 #29
0
def pharma_molstring(molecule, fptype):
    """
    Method for make molstring for pharma fingerprint

    :param molecule: molecule object
    :param fptype: type, radius and size of fingerprint
    :type fptype: dict
    :return: molstring for pharma fingerprint
    """
    arr = np.zeros((1, ), dtype=int)
    DataStructs.ConvertToNumpyArray(
        ConvertToExplicit(
            Generate.Gen2DFingerprint(molecule, Gobbi_Pharm2D.factory)), arr)

    return arr
コード例 #30
0
 def mapperfunc( mol ):
     fig, weight = SimilarityMaps.GetSimilarityMapForModel( mol, SimilarityMaps.GetMorganFingerprint, lambda x: getProba( x, cls.predict_proba), colorMap=cm.bwr  )
     fp = AllChem.GetMorganFingerprintAsBitVect( mol, 2 )
     print(fp)
     arr = np.zeros((1,))
     DataStructs.ConvertToNumpyArray( fp, arr )
     print(arr)
     res = cls.predict( arr )
     smi = Chem.MolToSmiles( mol )
     print(smi)
     
     if res[0] == 1:
         fig.savefig( "res/act_"+smi+"_.png", bbox_inches = "tight" )
     else:
         fig.savefig("res/nonact_"+smi+"_.png", bbox_inches = "tight" )