Ejemplo n.º 1
0
    def apply_to_mol(self, new_col_name, lambda_func):
        """Applies a func to the Mol object, which is generated on-the-fly, if necessary.
        Displays a progress bar for longer operations.
        Returns a new copy or modifies inplace, depending on self.inplace."""
        show_prog, data_len = self.which_progress(min_len=2000)
        self.find_mol_col()
        if show_prog is not None:
            ctr = nbt.ProgCtr()

        def _apply(x):
            if show_prog is not None:
                ctr.inc()
                if ctr() % 500 == 0:
                    print("  - processed: {:8d}\r".format(ctr()), end="")
                    sys.stdout.flush()
            mol = self.mol_method(x)
            if not mol:
                return pd.np.nan
            return lambda_func(mol)

        if self.inplace:
            self.data[new_col_name] = self.data[self.use_col].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
        else:
            result = self.new()
            result.data = self.data
            result.data[new_col_name] = result.data[self.use_col].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
            return result
Ejemplo n.º 2
0
    def check_2d_coords(self, force=False):
        """Generates 2D coordinates if necessary.
        Requires the Mol object to be present (use add_mols() ).
        Always operates inplace."""
        show_prog, data_len = self.which_progress(min_len=1000)
        if show_prog is not None:
            ctr = nbt.ProgCtr()
        self.find_mol_col()

        def _apply(x):
            if show_prog is not None:
                ctr.inc()
                if ctr() % 100 == 0:
                    print("  - processed: {:8d}\r".format(ctr()), end="")
                    sys.stdout.flush()
            mol = self.mol_method(x)
            if mol:
                check_2d_coords(mol, force=force)

        if self.inplace:
            self.data[self.use_col].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
        else:
            result = self.data[self.use_col].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
            return result
Ejemplo n.º 3
0
    def apply_to_col(self, col_name, new_col_name, lambda_func):
        """Applies a func to a column in the MolFrame.
        A wrapper around pd.apply to enable progress bars.
        Returns a new copy or modifies inplace, depending on self.inplace."""
        show_prog, data_len = self.which_progress(min_len=5000)
        if show_prog is not None:
            ctr = nbt.ProgCtr()

        def _apply(x):
            if show_prog is not None:
                ctr.inc()
                if ctr() % 500 == 0:
                    print("  - processed: {:8d}\r".format(ctr()), end="")
                    sys.stdout.flush()
            return lambda_func(x)

        if self.inplace:
            self.data[new_col_name] = self.data[col_name].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
        else:
            result = self.new()
            result.data = self.data
            result.data[new_col_name] = result.data[col_name].apply(_apply)
            if show_prog is not None:
                print("  - processed: {:8d}  done.".format(ctr()))
            return result
Ejemplo n.º 4
0
 def mol_filter(self, query, add_h=False):
     """Substructure filter. Returns a new MolFrame instance.
     ``query`` has to be a Smiles string."""
     show_prog, data_len = self.which_progress(min_len=5000)
     if show_prog is not None:
         ctr = nbt.ProgCtr()
     query_mol = Chem.MolFromSmiles(query)
     if not query_mol:
         raise ValueError("Could not generate query mol.")
     if "[H]" in query or "#1" in query:
         add_h = True
         print("> explicit hydrogens turned on (add_h = True)")
     res_l = []
     self.find_mol_col()
     for _, rec in self.data.iterrows():
         if show_prog is not None:
             ctr.inc()
             if ctr() % 1000 == 0:
                 print("  - processed: {:8d}\r".format(ctr()), end="")
                 sys.stdout.flush()
         mol = self.mol_method(rec[self.use_col])
         if not mol: continue
         hit = False
         if add_h:
             mol_with_h = Chem.AddHs(mol)
             if mol_with_h.HasSubstructMatch(query_mol):
                 hit = True
         else:
             if mol.HasSubstructMatch(query_mol):
                 hit = True
         if hit:
             res_l.append(rec)
     result = self.new()
     result.data = pd.DataFrame(res_l)
     if show_prog is not None:
         print("  - processed: {:8d}  done.".format(ctr()))
     print_log(result.data, "mol_filter")
     return result
Ejemplo n.º 5
0
 def sim_filter(self, query, cutoff=0.75):
     """Similarity filter. Returns a new MolFrame instance.
     Add a suitable fingerprint once with addf_fps(),
     then give a reference molecule or a SMILES string as query."""
     if len(self.fp_name) == 0 or self.fp_col not in self.data.columns:
         raise KeyError(
             "No fingerprints found. Please generate them first with add_fp()."
         )
     show_prog, data_len = self.which_progress(min_len=5000)
     if show_prog is not None:
         ctr = nbt.ProgCtr()
     if isinstance(query, str):
         query_mol = Chem.MolFromSmiles(query)
     else:
         query_mol = deepcopy(query)
     if not query_mol:
         raise ValueError("Could not generate query mol.")
     fp_method = FPDICT[self.fp_name]
     query_fp = fp_method(query_mol)
     res_l = []
     for _, rec in self.data.iterrows():
         if show_prog is not None:
             ctr.inc()
             if ctr() % 1000 == 0:
                 print("  - processed: {:8d}\r".format(ctr()), end="")
                 sys.stdout.flush()
         mol_fp = pickle.loads(b64.b64decode(rec[self.fp_col]))
         sim = DataStructs.TanimotoSimilarity(query_fp, mol_fp)
         if sim >= cutoff:
             rec["Sim"] = sim
             res_l.append(rec)
     result = self.new()
     result.data = pd.DataFrame(res_l)
     print_log(result.data, "sim_filter")
     if show_prog is not None:
         print("  - processed: {:8d}  done.".format(ctr()))
     return result