Exemple #1
0
def filter_actions(smiles, valid_actions, target_fps, target_atoms,
                   target_bonds, target_C_envs, radius):
    filter_actions = []
    reach = False
    mol1 = Chem.MolFromSmiles(smiles)
    fps1 = AllChem.GetMorganFingerprintAsBitVect(mol1,
                                                 radius=radius,
                                                 nBits=1024)
    base_similarity = DataStructs.FingerprintSimilarity(fps1, target_fps)
    for next_smiles in valid_actions:
        fps2, atoms2, bonds2, C_envs2 = get_mol_infos(next_smiles, radius)
        #        print (all(elem in target_C_envs for elem in C_envs2))
        next_similarity = DataStructs.FingerprintSimilarity(fps2, target_fps)
        if next_similarity > base_similarity and not mol_violation(
                atoms2, bonds2, C_envs2, target_atoms, target_bonds,
                target_C_envs):
            #            base_similarity = next_similarity ## Accelerate
            #            print (next_smiles)
            #            print ('target', target_C_envs)
            #            print ('next', C_envs2)
            filter_actions.append(next_smiles)


#            print (next_smiles, next_similarity)
        if next_similarity == 1:
            reach = True
            filter_actions = [next_smiles]
            break
    return filter_actions, reach
Exemple #2
0
def create_rxn_Morgan2FP_separately(rsmi, psmi, rxnfpsize=gc.fingerprint_bits, pfpsize=gc.fingerprint_bits, useFeatures=False, calculate_rfp=True, useChirality=False):
    # Similar as the above function but takes smiles separately and returns pfp and rfp separately

    rsmi = rsmi.encode('utf-8')
    psmi = psmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(rsmi)
    except Exception as e:
        print(e)
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(
            mol=mol, radius=2, nBits=rxnfpsize, useFeatures=useFeatures, useChirality=useChirality)
        fp = np.empty(rxnfpsize, dtype='float32')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build reactant fp due to {}".format(e))
        return
    rfp = fp

    try:
        mol = Chem.MolFromSmiles(psmi)
    except Exception as e:
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(
            mol=mol, radius=2, nBits=pfpsize, useFeatures=useFeatures, useChirality=useChirality)
        fp = np.empty(pfpsize, dtype='float32')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build product fp due to {}".format(e))
        return
    pfp = fp
    return [pfp, rfp]
Exemple #3
0
def fps_to_nparr(x):
    """ Convert fps strings (base64) to integers. """
    import base64
    from rdkit.Chem import DataStructs
    x = DataStructs.ExplicitBitVect(base64.b64decode(x))
    arr = np.zeros((1, ))
    DataStructs.ConvertToNumpyArray(x, arr)
    return arr
Exemple #4
0
def getFpArr( fps ):
    X = []
    for item in fps:
        bv = DataStructs.ExplicitBitVect(4096)
        DataStructs.ExplicitBitVect.FromBase64(bv, item)
        arr = np.zeros( (1,) )
        DataStructs.ConvertToNumpyArray( bv, arr )
        X.append(arr)
    return X
Exemple #5
0
def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()
Exemple #6
0
def chemical_random_episode(env, search_dict, target_fps, target_atoms,
                            target_bonds, target_C_envs, radius):
    initial_state = env.reset()
    state = initial_state
    pre_state = initial_state
    episode = [state]
    reach = False
    while True:
        if state not in search_dict:
            valid_actions = env._get_valid_actions()
            valid_actions, reach = filter_actions(state, valid_actions,
                                                  target_fps, target_atoms,
                                                  target_bonds, target_C_envs,
                                                  radius)  # filter actions
            search_dict[
                state] = valid_actions  # first meet state, record possible actions
        elif search_dict == {initial_state: []}:
            search_dict = 'terminate'
            break
        else:
            valid_actions = search_dict[state]  # load updated actions
            valid_actions, reach = filter_actions(state, valid_actions,
                                                  target_fps, target_atoms,
                                                  target_bonds, target_C_envs,
                                                  radius)  ##filter again


#        print (valid_actions)
        nA = len(valid_actions)
        if nA == 0:  # if len(valid_actions) == 0, fail and remove this state from dictionary and never add back
            search_dict.pop(
                state)  # if state has no action left, delete from dictionary
            search_dict[pre_state].remove(state)
            mol1 = Chem.MolFromSmiles(state)
            fps1 = AllChem.GetMorganFingerprintAsBitVect(mol1, radius=radius)
            print('No action space, Last action: %s, similarity: %.3f' %
                  (state, DataStructs.FingerprintSimilarity(fps1, target_fps)))
            break
        action = np.random.randint(nA)
        next_state, reward, done = env.step(valid_actions, action)
        episode.append(next_state)

        if reach == True:
            search_dict[state].remove(next_state)
            mol2 = Chem.MolFromSmiles(next_state)
            fps2 = AllChem.GetMorganFingerprintAsBitVect(mol2, radius=radius)
            print('Reach, last action: %s, similarity: %.3f' %
                  (next_state,
                   DataStructs.FingerprintSimilarity(fps2, target_fps)))
            break
        pre_state = state
        state = next_state

    return episode, reach, search_dict
 def fit_model(self, toxicity_data):
     y = []
     X = None
     # Loading data
     with open(toxicity_data, "r") as file_hdl:
         reader = csv.DictReader(file_hdl, delimiter='\t')
         for row in reader:
             y.append(math.log(float(row["toxicity"])))
             arr = np.zeros((1, ))
             fp = self.calculate_ECFP(row["InChI"])
             DataStructs.ConvertToNumpyArray(fp, arr)
             arr = np.reshape(arr, (1, 1024))
             if X is None:
                 X = arr
             else:
                 X = np.concatenate((X, arr), axis=0)
     self.log_loading = "Loaded {} compounds from {}".format(
         len(y), toxicity_data)
     y = np.array(y)
     # Fitting mdoel:
     best_model, score = self.select_current_best_model(X,
                                                        y,
                                                        models_number=10)
     y_pred = best_model.predict(X)
     score = sklearn.metrics.r2_score(y, y_pred)
     self.log_score = "The toxicity model has a R2 score of {} on itself".format(
         round(score, 2))
     self.model = best_model
Exemple #8
0
def GetRDkitFPs(mol, nBits=2048, return_bitInfo=False):
    """
    #################################################################
    Calculate Daylight-like fingerprint or topological fingerprint
    
    (1024 bits).
    
    Usage:
        
        result=CalculateDaylightFingerprint(mol)
        
        Input: mol is a molecule object.
        
        Output: result is a tuple form. The first is the number of 
        
        fingerprints. The second is a dict form whose keys are the 
        
        position which this molecule has some substructure. The third
        
        is the DataStructs which is used for calculating the similarity.
    #################################################################
    """

    bitInfo = {}
    fp = RDKFingerprint(mol, fpSize=nBits, bitInfo=bitInfo)
    arr = np.zeros((0, ), dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    if return_bitInfo:
        return arr, return_bitInfo
    return arr
    def measure_similarity(self, db_fps, sim_metric=DataStructs.TanimotoSimilarity, th=0.8):
        global user_ip_fps
        global db_cntr
        global fps_matches

        if db_cntr % 10000 == 0:
            self.jlogger.info("Completed checking similarity with {} compound of db".format(db_cntr))

        u_fps_cntr = 0

        if not db_fps is None:
            for u_fps in user_ip_fps:
                try:
                    if not u_fps is None:
                        sim = DataStructs.FingerprintSimilarity(u_fps, db_fps, metric=sim_metric)
                        if sim >= th:
                            if db_cntr in fps_matches:
                                fps_matches[db_cntr].append((u_fps_cntr, sim))
                            else:
                                fps_matches[db_cntr] = [(u_fps_cntr, sim)]
                    else:
                        self.jlogger.debug(
                            "User Finger print is unavailable, skipping this compound {}".format(u_fps_cntr))
                except Exception as e:
                    logger.exception(
                        "Error measuring similarity of compound db_cntr {} and u_fps_cntr {}".format(db_cntr,
                                                                                                     u_fps_cntr))
                    self.jlogger.debug(
                        "Error measuring similarity of compound db_cntr {} and u_fps_cntr {}".format(db_cntr,
                                                                                                     u_fps_cntr))
                u_fps_cntr += 1

            db_cntr += 1
        else:
            self.jlogger.debug("DB Finger print is unavailable, skipping this compound {}".format(db_cntr))
    def get_vars_for_sim_calc(self, fp1, fp2):
        # ref: https://github.com/rdkit/rdkit-orig/blob/master/rdkit/DataStructs/__init__.py

        sz1 = fp1.GetNumBits()
        sz2 = fp2.GetNumBits()

        if sz1 < sz2:
            fp2 = DataStructs.FoldFingerprint(fp2, sz2 // sz1)
        elif sz2 < sz1:
            fp1 = DataStructs.FoldFingerprint(fp1, sz1 // sz2)

        a = fp1.GetNumOnBits()
        b = fp2.GetNumOnBits()
        c = len(DataStructs.OnBitsInCommon(fp1, fp2))

        return a, b, c
Exemple #11
0
def dg_score(active_mols, decoy_mols):

    # Similar to DEKOIS

    # Lower is better (less like actives), higher is worse (more like actives)

    active_fps = [AllChem.GetMorganFingerprintAsBitVect(mol,3,useFeatures=True) \

                  for mol in active_mols] # Roughly FCFP_6

    decoys_fps = [AllChem.GetMorganFingerprintAsBitVect(mol,3,useFeatures=True) \

                  if mol is not None else None for mol in decoy_mols] # Roughly FCFP_6

    closest_sims = []

    closest_sims_id = []

    for active_fp in active_fps:

        active_sims = []

        for decoy_fp in decoys_fps:

            active_sims.append(DataStructs.TanimotoSimilarity(active_fp, decoy_fp) \

                               if decoy_fp is not None else 0)

        closest_sims.append(max(active_sims))

        closest_sims_id.append(np.argmax(active_sims))

    return np.array(closest_sims), np.array(closest_sims_id)
 def compound_scoring(compound):
     ECFP = compound._get_ECFP()
     arr = np.zeros((1, ))
     DataStructs.ConvertToNumpyArray(ECFP, arr)
     arr = np.reshape(arr, (1, 1024))
     y_pred = self.model.predict(arr)
     return (y_pred)
Exemple #13
0
    def __call__(self, smiles: List[str]) -> dict:
        mols = [Chem.MolFromSmiles(smile) for smile in smiles]
        valid = [1 if mol is not None else 0 for mol in mols]
        valid_idxs = [idx for idx, boolean in enumerate(valid) if boolean == 1]
        valid_mols = [mols[idx] for idx in valid_idxs]

        fps = [
            AllChem.GetMorganFingerprint(mol,
                                         3,
                                         useCounts=True,
                                         useFeatures=False)
            for mol in valid_mols
        ]

        tanimoto = np.array([
            np.max(DataStructs.BulkTanimotoSimilarity(fp, self.ref_fps))
            for fp in fps
        ])
        tanimoto = np.maximum((1 - 2 * np.absolute(0.5 - tanimoto)), 0)

        score = np.full(len(smiles), 0, dtype=np.float32)

        for idx, value in zip(valid_idxs, tanimoto):
            score[idx] = value
        return {"total_score": np.array(score, dtype=np.float32)}
Exemple #14
0
    def _compute_fps(self) -> None:
        """Compute a numpy array of Morgan fingerprint vectors.
        """
        fp_vects = []
        for mol in tqdm.tqdm(self.data.mol,
                             desc='Computing fingerprints',
                             disable=self.prog):

            if self.fp_type == 'morgan':
                fp_vect = rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    mol, self.fp_rad, self.fp_bits)

            if self.fp_type == 'rdkit':
                fp_vect = Chem.RDKFingerprint(
                    mol,
                    minPath=self.fp_rad,
                    maxPath=self.fp_rad,
                    fpSize=self.fp_bits,
                )

            array = np.zeros((0, ), dtype=np.int8)
            DataStructs.ConvertToNumpyArray(fp_vect, array)
            fp_vects.append(array)

        self.fps = np.zeros((len(fp_vects), self.fp_bits))
        for i, fp_vect in enumerate(fp_vects):
            self.fps[i, :] = fp_vect
Exemple #15
0
def dg_score_rev(actives, decoys):
    # Similar to DEKOIS
    # Lower is better (less like actives), higher is worse (more like actives)
    active_fps = [
        AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smi),
                                              3,
                                              useFeatures=True)
        for smi in actives
    ]  # Roughly FCFP_6
    decoys_fps = [
        AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smi),
                                              3,
                                              useFeatures=True)
        for smi in decoys
    ]  # Roughly FCFP_6

    closest_sims = []
    closest_sims_id = []
    for decoy_fp in decoys_fps:
        active_sims = []
        for active_fp in active_fps:
            active_sims.append(
                DataStructs.TanimotoSimilarity(active_fp, decoy_fp))
        closest_sims.append(max(active_sims))
        closest_sims_id.append(np.argmax(active_sims))

    return closest_sims, closest_sims_id
Exemple #16
0
def chemical_space(fname):
    """
    from text file with smiles data, create a chemical space representation
    :param fname:
    :return:
    """
    ligands = []
    X = []

    with open(fname, "r") as f:
        entries = f.read().splitlines()

        for e in entries:
            smiles = e.split(",")[2]
            mol = Chem.MolFromSmiles(smiles)
            mol.SetProp("_Name", str(e.split(",")[0] + "/" + e.split(",")[1]))
            ligands.append(mol)

        for l in ligands:
            AllChem.Compute2DCoords(l)
            arr = np.zeros((0,))
            fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2)
            DataStructs.ConvertToNumpyArray(fp, arr)
            X.append(arr)

    #return TSNE(n_components=3, metric=tanimoto_dist).fit_transform(X)
    return umap.UMAP(n_neighbors=5, min_dist=0.2, metric=tanimoto_dist).fit_transform(X)
Exemple #17
0
 def search_by_mols(self, mols, topk=10):
     '''
     :param mols: a list of molecuar
     :param topk:
     :return: [[{"id": xx, "smiles": xx, "score": xx}, {}, ...], []]
     '''
     mols_vec = []
     for mol in mols:
         tmp_arr = np.array([])
         DataStructs.ConvertToNumpyArray(
             rdMolDescriptors.GetMACCSKeysFingerprint(mol), tmp_arr)
         mols_vec.append(self.vec2bytes(tmp_arr))
     ret_dists, ret_ids = self.index.search(
         np.array(mols_vec).astype("uint8"), topk)
     rets = []
     for mol, dists, ids in zip(mols, ret_dists, ret_ids):
         ret = []
         for id in ids:
             ret.append({
                 "id":
                 self.df_zinc.iloc[id]["zinc_id"],
                 "smiles":
                 self.df_zinc.iloc[id]["smiles"],
                 "score":
                 self.calc_similarity(
                     mol,
                     Chem.MolFromSmiles(self.df_zinc.iloc[id]["smiles"]))
             })
     rets.append(sorted(ret, key=lambda item: item["score"], reverse=True))
     return rets
Exemple #18
0
def convert_fps(fp):
    """ Converts RDKit Fingerprints to numpy array """
    np_fps = []
    array = numpy.zeros((1, ))
    DataStructs.ConvertToNumpyArray(fp, array)
    np_fps.append(''.join([str(int(x)) for x in array]))

    return np_fps
Exemple #19
0
def getFpArr( mols, nBits = 1024 ):
    fps = [ AllChem.GetMorganFingerprintAsBitVect( mol, 2, nBits=nBits ) for mol in mols ]
    X = []
    for fp in fps:
        arr = np.zeros( (1,) )
        DataStructs.ConvertToNumpyArray( fp, arr )
        X.append( arr )
    return np.array( X )
def calc_fp_arr( mols ):
    fplist = []
    for mol in mols:
        arr = np.zeros( (1,) )
        fp = AllChem.GetMorganFingerprintAsBitVect( mol, 2 )
        DataStructs.ConvertToNumpyArray( fp, arr )
        fplist.append( arr )
    return np.asarray( fplist )
Exemple #21
0
def convert_reaction_to_fp(rsmi, psmi, fpsize=2048):
    rsmi = rsmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(rsmi)
    except Exception as e:
        print("Cannot build reactant mol due to {}".format(e))
        return
    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius=2,
                                                       nBits=fpsize,
                                                       useFeatures=False,
                                                       useChirality=True)
        fp = np.empty(fpsize, dtype='int8')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)
    except Exception as e:
        print("Cannot build reactant fp due to {}".format(e))
        print(rsmi)
        return

    rfp = fp

    psmi = psmi.encode('utf-8')
    try:
        mol = Chem.MolFromSmiles(psmi)
    except Exception as e:
        print("Cannot build product mol due to {}".format(e))
        return

    try:
        fp_bit = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                       radius=2,
                                                       nBits=fpsize,
                                                       useFeatures=False,
                                                       useChirality=True)
        fp = np.empty(fpsize, dtype='int8')
        DataStructs.ConvertToNumpyArray(fp_bit, fp)

    except Exception as e:
        print("Cannot build product fp due to {}".format(e))
        return

    pfp = fp

    rxnfp = pfp - rfp
    return np.asarray(pfp), np.asarray(rxnfp)
Exemple #22
0
 def smiles2fps(self, smiles):
     arr = np.zeros((1, ))
     mol = Chem.MolFromSmiles(smiles)
     mol = AllChem.AddHs(mol)
     fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                3,
                                                nBits=self.state_size)
     DataStructs.ConvertToNumpyArray(fp, arr)
     return np.array([arr])
Exemple #23
0
def pka_similarities(smile, mol_set, n):
    mol = Chem.MolFromSmiles(smile)
    mol_fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    similarity = []
    for molecule in mol_set:
        sim = DataStructs.DiceSimilarity(mol_fp, molecule[2])
        similarity.append([sim, molecule[1]])

    return np.asarray(sorted(similarity)[:n]).flatten()
Exemple #24
0
def TakeInput(filepath, hmdb_filepath, OR_name):
    positive_Cancer = extractPositiveOnes(filepath)
    data_hmdb = pd.read_csv(hmdb_filepath, encoding="ISO-8859-1")
    positive_Cancer = pd.read_csv(filepath, encoding="ISO-8859-1")
    hmdb_names = data_hmdb['NAME']
    hmdb_SMILES = data_hmdb['SMILES']
    positive_Cancer_SMILES = positive_Cancer['Smiles']
    positive_Cancer_Names = positive_Cancer["Ligand"]
    hmdb_data = pd.concat([hmdb_SMILES, hmdb_names], axis=1)
    dataframe = pd.concat([positive_Cancer_SMILES, positive_Cancer_Names],
                          axis=1)
    Cancer_clean_data = dataframe.drop_duplicates()
    Cancer_clean_data = Cancer_clean_data.reset_index(drop=True)
    df1 = pd.DataFrame({
        "Cancer_Molecule": [],
        "Cancer_SMILES": [],
        "HMDB_Molecule": [],
        "HMDB_SMILES": [],
        "TANIMOTO_Similarity_Value": []
    })
    hmdb_data = hmdb_data.reset_index(drop=True)
    k = 0
    for i in range(len(Cancer_clean_data)):
        # df1=df1.iloc[0:0]
        # df1= pd.DataFrame({"Cancer_clean_data_Molecule":[],"Cancer_clean_data_SMILES":[],"HMDB_Molecule":[],"HMDB_SMILES":[],"TANIMOTO_Similarity_Value":[]})
        y = Chem.MolFromSmiles(Cancer_clean_data['Smiles'][i])
        fps1 = FingerprintMols.FingerprintMol(y)
        for j in range(len(hmdb_data)):
            try:
                x = Chem.MolFromSmiles(hmdb_data['SMILES'][j])
                fps2 = FingerprintMols.FingerprintMol(x)
                sim_val = DataStructs.FingerprintSimilarity(fps1, fps2)
                if sim_val >= 0.85:  # threshold for similarity value
                    df1.loc[k] = [
                        Cancer_clean_data['Ligand'][i],
                        Cancer_clean_data['Smiles'][i], hmdb_data['NAME'][j],
                        hmdb_data['SMILES'][j], sim_val
                    ]
                    k = k + 1
            except:
                print("WARNING")
        print("Comparison Done for Ligand :" + str(i))
    df1.to_csv("Final_test_set_" + OR_name + ".csv")
    Ligand = df1["Cancer_clean_data_Molecule"]
    Smiles = df1["Cancer_clean_data_SMILES"]
    Activation_Status = []
    Shortlisted_Metabolites = pd.DataFrame(
        list(zip(Smiles, Ligand, Activation_Status)),
        columns=['Smiles', 'Ligand', 'Activation Status'])
    Shortlisted_Metabolites = Shortlisted_Metabolites.drop_duplicates(
        subset='Ligand', keep='first')
    Shortlisted_Metabolites.to_csv("Shortlisted_Metabolites" + OR_Name +
                                   ".csv")
    print("Shortlisted_Metabolites" + OR_Name + ".csv" + " has been saved")
    print("Congrats! Final_test_set_" + OR_Name +
          ".csv has been successfully saved!")
Exemple #25
0
def GetAvalonFPs(mol, nBits=2048):

    '''
    Avalon_fingerprints: https://pubs.acs.org/doi/pdf/10.1021/ci050413p
    '''

    fp = GAFP(mol, nBits = nBits)
    arr = np.zeros((0,),  dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
Exemple #26
0
def GetMACCSFPs(mol):
    '''
    166 bits
    '''

    fp = AllChem.GetMACCSKeysFingerprint(mol)

    arr = np.zeros((0, ), dtype=np.bool)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
    def tanimoto(self, mol):
        try:
            with Timeout(seconds=1):
                fp = Generate.Gen2DFingerprint(mol, self.sigFactory)
            return DataStructs.TanimotoSimilarity(fp, self.query_fp)

        except TimeoutError:
            logging.debug("SMILES Pharmacophore timeout: ",
                          Chem.MolToSmiles(mol, isomericSmiles=False))
            return 0
Exemple #28
0
def GetTorsionFPs(mol, nBits = 2048, binary = True):
    '''
    atompairs fingerprints
    '''
    fp = Torsions.GetHashedTopologicalTorsionFingerprint(mol, nBits = nBits)
    if binary:
        arr = np.zeros((0,),  dtype=np.bool)
    else:
        arr = np.zeros((0,),  dtype=np.int8)
    DataStructs.ConvertToNumpyArray(fp, arr)
    return arr
Exemple #29
0
def build_mol_features(in_file, out_file):
    df_zinc = pd.read_csv(in_file, compression="zip")
    fp_list = []
    for smi in tqdm.tqdm(df_zinc["smiles"], total=len(df_zinc)):
        tmp_arr = np.array([])
        DataStructs.ConvertToNumpyArray(
            rdMolDescriptors.GetMACCSKeysFingerprint(Chem.MolFromSmiles(smi)),
            tmp_arr)
        fp_list.append(tmp_arr)
    fp_arr = np.array(fp_list)
    np.save(out_file, fp_arr)
    def get_on_bits(self, mol):
        if isinstance(mol, str):
            mol = Chem.MolFromSmiles(mol)
        mol_fp = Chem.RDKFingerprint(mol)

        on_bits = []
        for i, s_fp_i in enumerate(self.scaffold_fps):
            if DataStructs.AllProbeBitsMatch(s_fp_i, mol_fp):
                if mol.HasSubstructMatch(self.scaffolds[i]):
                    on_bits.append(i)

        return on_bits