def rescale(self, f=1.4): def _transform(m): if show_prog: pb.inc() rescale(m, f) self.find_mol_col() if len(self.data) > 1000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False if self.inplace: if not self.has_mols: return self.data[self.use_col].apply(_transform) else: result = self.copy() result.use_col = self.use_col result.mol_method = self.mol_method if not self.has_mols: return result result.data[self.use_col].apply(_transform) if show_prog: pb.done() return result
def add_coords(self, force=False): """Generates 2D coordinates if necessary. Requires the Mol object to be present (use add_mols() ).""" self.find_mol_col() if len(self.data) > 1000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False def _apply(x): if show_prog: pb.inc() mol = self.mol_method(x) if mol: add_coords(mol, force=force) if self.inplace: self.data[self.use_col].apply(_apply) if show_prog: pb.done() else: result = self.copy() result.use_col = self.use_col result.mol_method = self.mol_method result.data[self.use_col].apply(_apply) if show_prog: pb.done() return result
def apply_to_mol(self, new_col_name, lambda_func): """Applies a func to the Mol object, which is generated on-the-fly, if necessary. Displays a progress bar for longer operations. Returns a new copy or modifies inplace, depending on self.inplace.""" self.find_mol_col() if len(self.data) > 1000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False def _apply(x): if show_prog: pb.inc() mol = self.mol_method(x) if not mol: return pd.np.nan return lambda_func(mol) if self.inplace: self.data[new_col_name] = self.data[self.use_col].apply(_apply) if show_prog: pb.done() else: result = self.new() result.data = self.data.copy() result.data[new_col_name] = result.data[self.use_col].apply(_apply) if show_prog: pb.done() return result
def apply_to_col(self, col_name, new_col_name, lambda_func): """Applies a func to a column in the MolFrame. A wrapper around pd.apply to enable progress bars. Returns a new copy or modifies inplace, depending on self.inplace.""" if len(self.data) > 5000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False def _apply(x): if show_prog: pb.inc() return lambda_func(x) if self.inplace: self.data[new_col_name] = self.data[col_name].apply(_apply) if show_prog: pb.done() else: result = self.new() result.data = self.data result.data[new_col_name] = result.data[col_name].apply(_apply) if show_prog: pb.done() return result
def add_inchikeys(self): """Adds Inchi Keys.""" self.find_mol_col() if len(self.data) > 5000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False def _lambda(x): if show_prog: pb.inc() mol = self.mol_method(x) if not mol: return "NO_MOL." try: ik = Chem.inchi.MolToInchiKey(mol) except ValueError: ik = "FAILED." return ik if self.inplace: self.data["InchiKey"] = self.data[self.use_col].apply(_lambda) if show_prog: pb.done() else: result = self.copy() result.data["InchiKey"] = result.data[self.use_col].apply(_lambda) if show_prog: pb.done() return result
def rescale(self, f=1.5): def _transform(m): if show_prog: pb.inc() tm = np.zeros((4, 4), np.double) for i in range(3): tm[i, i] = f tm[3, 3] = 1. Chem.TransformMol(m, tm) self.find_mol_col() if len(self.data) > 1000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False if self.inplace: if not self.has_mols: return self.data[self.use_col].apply(_transform) else: result = self.copy() result.use_col = self.use_col result.mol_method = self.mol_method if not self.has_mols: return result result.data[self.use_col].apply(_transform) if show_prog: pb.done() return result
def mol_filter(self, query, add_h=False): """Substructure filter. Returns a new MolFrame instance. ``query`` has to be a Smiles string.""" if len(self.data) > 5000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False query_mol = Chem.MolFromSmiles(query) if not query_mol: raise ValueError("Could not generate query mol.") if "[H]" in query or "#1" in query: add_h = True print("> explicit hydrogens turned on (add_h = True)") res_l = [] self.find_mol_col() for _, rec in self.data.iterrows(): if show_prog: pb.inc() mol = self.mol_method(rec[self.use_col]) if not mol: continue hit = False if add_h: mol_with_h = Chem.AddHs(mol) if mol_with_h.HasSubstructMatch(query_mol): hit = True else: if mol.HasSubstructMatch(query_mol): hit = True if hit: res_l.append(rec) result = self.new() result.data = pd.DataFrame(res_l) if show_prog: pb.done() print_log(result.data, "mol_filter") return result
def sim_filter(self, query, cutoff=0.75): """Similarity filter. Returns a new MolFrame instance. Add a suitable fingerprint once with addf_fps(), then give a reference molecule or a SMILES string as query.""" if len(self.fp_name) == 0 or self.fp_col not in self.data.columns: raise KeyError( "No fingerprints found. Please generate them first with add_fp()." ) if len(self.data) > 5000: show_prog = True pb = nbt.Progressbar(end=len(self.data)) else: show_prog = False if isinstance(query, str): query_mol = Chem.MolFromSmiles(query) else: query_mol = deepcopy(query) if not query_mol: raise ValueError("Could not generate query mol.") fp_method = FPDICT[self.fp_name] query_fp = fp_method(query_mol) res_l = [] for _, rec in self.data.iterrows(): if show_prog: pb.inc() mol_fp = pickle.loads(b64.b64decode(rec[self.fp_col])) sim = DataStructs.TanimotoSimilarity(query_fp, mol_fp) if sim >= cutoff: rec["Sim"] = sim res_l.append(rec) result = self.new() result.data = pd.DataFrame(res_l) print_log(result.data, "sim_filter") if show_prog: pb.done() return result