Example #1
0
    def rescale(self, f=1.4):
        def _transform(m):
            if show_prog:
                pb.inc()
            rescale(m, f)

        self.find_mol_col()
        if len(self.data) > 1000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        if self.inplace:
            if not self.has_mols:
                return
            self.data[self.use_col].apply(_transform)
        else:
            result = self.copy()
            result.use_col = self.use_col
            result.mol_method = self.mol_method
            if not self.has_mols:
                return result
            result.data[self.use_col].apply(_transform)
            if show_prog:
                pb.done()
            return result
Example #2
0
    def add_coords(self, force=False):
        """Generates 2D coordinates if necessary.
        Requires the Mol object to be present (use add_mols() )."""
        self.find_mol_col()
        if len(self.data) > 1000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        def _apply(x):
            if show_prog:
                pb.inc()
            mol = self.mol_method(x)
            if mol:
                add_coords(mol, force=force)

        if self.inplace:
            self.data[self.use_col].apply(_apply)
            if show_prog:
                pb.done()
        else:
            result = self.copy()
            result.use_col = self.use_col
            result.mol_method = self.mol_method
            result.data[self.use_col].apply(_apply)
            if show_prog:
                pb.done()
            return result
Example #3
0
    def apply_to_mol(self, new_col_name, lambda_func):
        """Applies a func to the Mol object, which is generated on-the-fly, if necessary.
        Displays a progress bar for longer operations.
        Returns a new copy or modifies inplace, depending on self.inplace."""
        self.find_mol_col()
        if len(self.data) > 1000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        def _apply(x):
            if show_prog:
                pb.inc()
            mol = self.mol_method(x)
            if not mol:
                return pd.np.nan
            return lambda_func(mol)

        if self.inplace:
            self.data[new_col_name] = self.data[self.use_col].apply(_apply)
            if show_prog:
                pb.done()
        else:
            result = self.new()
            result.data = self.data.copy()
            result.data[new_col_name] = result.data[self.use_col].apply(_apply)
            if show_prog:
                pb.done()
            return result
Example #4
0
    def apply_to_col(self, col_name, new_col_name, lambda_func):
        """Applies a func to a column in the MolFrame.
        A wrapper around pd.apply to enable progress bars.
        Returns a new copy or modifies inplace, depending on self.inplace."""
        if len(self.data) > 5000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        def _apply(x):
            if show_prog:
                pb.inc()
            return lambda_func(x)

        if self.inplace:
            self.data[new_col_name] = self.data[col_name].apply(_apply)
            if show_prog:
                pb.done()
        else:
            result = self.new()
            result.data = self.data
            result.data[new_col_name] = result.data[col_name].apply(_apply)
            if show_prog:
                pb.done()
            return result
Example #5
0
    def add_inchikeys(self):
        """Adds Inchi Keys."""
        self.find_mol_col()
        if len(self.data) > 5000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        def _lambda(x):
            if show_prog:
                pb.inc()
            mol = self.mol_method(x)
            if not mol:
                return "NO_MOL."
            try:
                ik = Chem.inchi.MolToInchiKey(mol)
            except ValueError:
                ik = "FAILED."
            return ik

        if self.inplace:
            self.data["InchiKey"] = self.data[self.use_col].apply(_lambda)
            if show_prog:
                pb.done()
        else:
            result = self.copy()
            result.data["InchiKey"] = result.data[self.use_col].apply(_lambda)
            if show_prog:
                pb.done()
            return result
Example #6
0
    def rescale(self, f=1.5):
        def _transform(m):
            if show_prog:
                pb.inc()
            tm = np.zeros((4, 4), np.double)
            for i in range(3):
                tm[i, i] = f
            tm[3, 3] = 1.
            Chem.TransformMol(m, tm)

        self.find_mol_col()
        if len(self.data) > 1000:
            show_prog = True
            pb = nbt.Progressbar(end=len(self.data))
        else:
            show_prog = False

        if self.inplace:
            if not self.has_mols: return
            self.data[self.use_col].apply(_transform)
        else:
            result = self.copy()
            result.use_col = self.use_col
            result.mol_method = self.mol_method
            if not self.has_mols: return result
            result.data[self.use_col].apply(_transform)
            if show_prog:
                pb.done()
            return result
Example #7
0
 def mol_filter(self, query, add_h=False):
     """Substructure filter. Returns a new MolFrame instance.
     ``query`` has to be a Smiles string."""
     if len(self.data) > 5000:
         show_prog = True
         pb = nbt.Progressbar(end=len(self.data))
     else:
         show_prog = False
     query_mol = Chem.MolFromSmiles(query)
     if not query_mol:
         raise ValueError("Could not generate query mol.")
     if "[H]" in query or "#1" in query:
         add_h = True
         print("> explicit hydrogens turned on (add_h = True)")
     res_l = []
     self.find_mol_col()
     for _, rec in self.data.iterrows():
         if show_prog:
             pb.inc()
         mol = self.mol_method(rec[self.use_col])
         if not mol:
             continue
         hit = False
         if add_h:
             mol_with_h = Chem.AddHs(mol)
             if mol_with_h.HasSubstructMatch(query_mol):
                 hit = True
         else:
             if mol.HasSubstructMatch(query_mol):
                 hit = True
         if hit:
             res_l.append(rec)
     result = self.new()
     result.data = pd.DataFrame(res_l)
     if show_prog:
         pb.done()
     print_log(result.data, "mol_filter")
     return result
Example #8
0
 def sim_filter(self, query, cutoff=0.75):
     """Similarity filter. Returns a new MolFrame instance.
     Add a suitable fingerprint once with addf_fps(),
     then give a reference molecule or a SMILES string as query."""
     if len(self.fp_name) == 0 or self.fp_col not in self.data.columns:
         raise KeyError(
             "No fingerprints found. Please generate them first with add_fp()."
         )
     if len(self.data) > 5000:
         show_prog = True
         pb = nbt.Progressbar(end=len(self.data))
     else:
         show_prog = False
     if isinstance(query, str):
         query_mol = Chem.MolFromSmiles(query)
     else:
         query_mol = deepcopy(query)
     if not query_mol:
         raise ValueError("Could not generate query mol.")
     fp_method = FPDICT[self.fp_name]
     query_fp = fp_method(query_mol)
     res_l = []
     for _, rec in self.data.iterrows():
         if show_prog:
             pb.inc()
         mol_fp = pickle.loads(b64.b64decode(rec[self.fp_col]))
         sim = DataStructs.TanimotoSimilarity(query_fp, mol_fp)
         if sim >= cutoff:
             rec["Sim"] = sim
             res_l.append(rec)
     result = self.new()
     result.data = pd.DataFrame(res_l)
     print_log(result.data, "sim_filter")
     if show_prog:
         pb.done()
     return result