def stop_df_from_stream(stream, summary=None, comp_id="stop_df_from_stream"): """Generates a Pandas DataFrame out of the data stream. The molecules need to be present in the stream, e.g. generated by `pipe_mol_from_smiles`.""" if not PANDAS: raise ImportError("pandas is not available.") PT.RenderImagesInAllDataFrames(images=True) df = pandas.DataFrame.from_dict(stop_dict_from_stream(stream, summary=summary, comp_id=comp_id)) return df
# Common setup for the IPython Notebooks from __future__ import print_function, division, absolute_import import six import warnings import sys import re import random from ipywidgets import HTML import pandas as pd from rdkit import Chem from rdkit.Chem import Draw, PandasTools, AllChem with warnings.catch_warnings(): warnings.simplefilter("ignore") from rdkit.Chem.Draw import IPythonConsole IPythonConsole.molSize = (450, 200) PandasTools.RenderImagesInAllDataFrames()
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Created by Nadine Schneider, June 2016 import numpy as np import pandas as pd import copy import re from rdkit.Chem import PandasTools from IPython.display import SVG # generate an HTML table of the svg images to visulize them nicely in the Jupyter notebook PandasTools.RenderImagesInAllDataFrames(images=True) def drawSVGsToHTMLGrid(svgs, cssTableName='default', tableHeader='', namesSVGs=[], size=(150, 150), numColumns=4, numRowsShown=2, noHeader=False): rows = [] names = copy.deepcopy(namesSVGs) rows = [SVG(i).data if i.startswith('<?xml version') else i for i in svgs] d = int(len(rows) / numColumns) x = len(rows) % numColumns
def to_df( mols: List[Chem.rdchem.Mol], smiles_column: Optional[str] = "smiles", mol_column: str = None, include_private: bool = False, include_computed: bool = False, render_df_mol: bool = True, render_all_df_mol: bool = False, ) -> Optional[pd.DataFrame]: """Convert a list of mols to a dataframe using each mol properties as a column. Args: mols: a molecule. smiles_column: name of the SMILES column. mol_column: Name of the column. If not None, rdkit.Chem.PandaTools is used to add a molecule column. include_private: Include private properties in the columns. include_computed: Include computed properties in the columns. render_df_mol: whether to render the molecule in the dataframe to images. If called once, it will be applied for the newly created dataframe with mol in it. render_all_df_mol: Whether to render all pandas dataframe mol column as images. """ # Init a dataframe df = pd.DataFrame() # Feed it with smiles if smiles_column is not None: smiles = [dm.to_smiles(mol) for mol in mols] df[smiles_column] = smiles # Add a mol column if mol_column is not None: df[mol_column] = mols # Add any other properties present in the molecule props = [ mol.GetPropsAsDict( includePrivate=include_private, includeComputed=include_computed, ) for mol in mols ] props_df = pd.DataFrame(props) if smiles_column is not None and smiles_column in props_df.columns: logger.warning( f"The SMILES column name provided ('{smiles_column}') is already present in the properties" " of the molecules. THe returned dataframe will two columns with the same name." ) # Concat the df with the properties df df = pd.concat([df, props_df], axis=1) # Render mol column to images if render_df_mol is True and mol_column is not None: # NOTE(hadim): replace by `PandaTools.ChangeMoleculeRendering` once # https://github.com/rdkit/rdkit/issues/3563 is fixed. _ChangeMoleculeRendering(df) if render_all_df_mol: PandasTools.RenderImagesInAllDataFrames() return df