def _searchSubStructure (self, numsel, metric): if metric is None: metric = 'Substructural' elif metric != 'Substructural': LOG.warning (f'Metric {metric} is not compatible with the descriptors present in this space. Using Substructural') metric = 'Substructural' results = [] t1 = time.time() # for each compound in the search set for i, ivector in enumerate(self.X): bitestring="".join(ivector.astype(str)) ifp = DataStructs.cDataStructs.CreateFromBitString(bitestring) nselected = 0 selected_i = [] selected_d = [] # for each compound in the space for j, jfp in enumerate(self.Xref): if DataStructs.AllProbeBitsMatch(ifp, jfp): # if True: mi = Chem.MolFromSmiles(self.objinfo['SMILES'][i]) # mi = Chem.MolFromSmarts('C[!C](C)CC1*C=CCC1') mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j]) if mj.HasSubstructMatch(mi): selected_i.append(j) selected_d.append(1.00) nselected+=1 if nselected >= numsel: break # results for molecule i are stored in a dictionary results_info = {} results_info['distances'] = [] # distances are allways stored for oi in self.objinforef: results_info[oi] = [] # all the objects information (name, smiles, ID, activity, etc.) for sd,si in zip(selected_d, selected_i): results_info['distances'].append(sd) for oi in self.objinforef: results_info[oi].append(self.objinforef[oi][si]) results.append(results_info) LOG.info (f'search completed in time: {time.time()-t1:.4f} secs') return True, results
def get_on_bits(self, mol): if isinstance(mol, str): mol = Chem.MolFromSmiles(mol) mol_fp = Chem.RDKFingerprint(mol) on_bits = [] for i, s_fp_i in enumerate(self.scaffold_fps): if DataStructs.AllProbeBitsMatch(s_fp_i, mol_fp): if mol.HasSubstructMatch(self.scaffolds[i]): on_bits.append(i) return on_bits
def _searchSubStructure (self, numsel, metric): LOG.info ('searching for similar compounds using Substructure similarity') results = [] t1 = time.time() if self.isSMARTS: nselected = 0 selected_i = [] selected_d = [] # for each compound in the space for j, jfp in enumerate(self.Xref): # mi = Chem.MolFromSmarts('C[!C](C)CC1*C=CCC1') mi = Chem.MolFromSmarts(self.conveyor.getVal('SMARTS')) mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j]) if mj.HasSubstructMatch(mi): selected_i.append(j) selected_d.append(1.00) nselected+=1 if nselected >= numsel: break # results for molecule i are stored in a dictionary results_info = {} results_info['distances'] = [] # distances are allways stored for oi in self.objinforef: results_info[oi] = [] # all the objects information (name, smiles, ID, activity, etc.) for sd,si in zip(selected_d, selected_i): results_info['distances'].append(sd) for oi in self.objinforef: results_info[oi].append(self.objinforef[oi][si]) results.append(results_info) LOG.info (f'search completed in time: {time.time()-t1:.4f} secs') return True, results # for each compound in the search set for i, ivector in enumerate(self.X): bitestring="".join(ivector.astype(str)) ifp = DataStructs.cDataStructs.CreateFromBitString(bitestring) nselected = 0 selected_i = [] selected_d = [] # for each compound in the space for j, jfp in enumerate(self.Xref): if DataStructs.AllProbeBitsMatch(ifp, jfp): # if True: mi = Chem.MolFromSmiles(self.objinfo['SMILES'][i]) mj = Chem.MolFromSmiles(self.objinforef['SMILES'][j]) if mj.HasSubstructMatch(mi): selected_i.append(j) selected_d.append(1.00) nselected+=1 if nselected >= numsel: break # results for molecule i are stored in a dictionary results_info = {} results_info['distances'] = [] # distances are allways stored for oi in self.objinforef: results_info[oi] = [] # all the objects information (name, smiles, ID, activity, etc.) for sd,si in zip(selected_d, selected_i): results_info['distances'].append(sd) for oi in self.objinforef: results_info[oi].append(self.objinforef[oi][si]) results.append(results_info) LOG.info (f'search completed in time: {time.time()-t1:.4f} secs') return True, results