def stop_df_from_stream(stream, summary=None, comp_id="stop_df_from_stream"):
    """Generates a Pandas DataFrame out of the data stream.
    The molecules need to be present in the stream,
    e.g. generated by `pipe_mol_from_smiles`."""

    if not PANDAS:
        raise ImportError("pandas is not available.")
    PT.RenderImagesInAllDataFrames(images=True)
    df = pandas.DataFrame.from_dict(stop_dict_from_stream(stream, summary=summary, comp_id=comp_id))
    return df
Beispiel #2
0
# Common setup for the IPython Notebooks

from __future__ import print_function, division, absolute_import
import six

import warnings

import sys
import re
import random

from ipywidgets import HTML

import pandas as pd

from rdkit import Chem
from rdkit.Chem import Draw, PandasTools, AllChem
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    from rdkit.Chem.Draw import IPythonConsole

IPythonConsole.molSize = (450, 200)
PandasTools.RenderImagesInAllDataFrames()
Beispiel #3
0
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Created by Nadine Schneider, June 2016

import numpy as np
import pandas as pd
import copy
import re
from rdkit.Chem import PandasTools
from IPython.display import SVG

# generate an HTML table of the svg images to visulize them nicely in the Jupyter notebook
PandasTools.RenderImagesInAllDataFrames(images=True)


def drawSVGsToHTMLGrid(svgs,
                       cssTableName='default',
                       tableHeader='',
                       namesSVGs=[],
                       size=(150, 150),
                       numColumns=4,
                       numRowsShown=2,
                       noHeader=False):
    rows = []
    names = copy.deepcopy(namesSVGs)
    rows = [SVG(i).data if i.startswith('<?xml version') else i for i in svgs]
    d = int(len(rows) / numColumns)
    x = len(rows) % numColumns
Beispiel #4
0
def to_df(
    mols: List[Chem.rdchem.Mol],
    smiles_column: Optional[str] = "smiles",
    mol_column: str = None,
    include_private: bool = False,
    include_computed: bool = False,
    render_df_mol: bool = True,
    render_all_df_mol: bool = False,
) -> Optional[pd.DataFrame]:
    """Convert a list of mols to a dataframe using each mol properties
    as a column.

    Args:
        mols: a molecule.
        smiles_column: name of the SMILES column.
        mol_column: Name of the column. If not None, rdkit.Chem.PandaTools
            is used to add a molecule column.
        include_private: Include private properties in the columns.
        include_computed: Include computed properties in the columns.
        render_df_mol: whether to render the molecule in the dataframe to images.
            If called once, it will be applied for the newly created dataframe with
            mol in it.
        render_all_df_mol: Whether to render all pandas dataframe mol column as images.
    """

    # Init a dataframe
    df = pd.DataFrame()

    # Feed it with smiles
    if smiles_column is not None:
        smiles = [dm.to_smiles(mol) for mol in mols]
        df[smiles_column] = smiles

    # Add a mol column
    if mol_column is not None:
        df[mol_column] = mols

    # Add any other properties present in the molecule
    props = [
        mol.GetPropsAsDict(
            includePrivate=include_private,
            includeComputed=include_computed,
        )
        for mol in mols
    ]
    props_df = pd.DataFrame(props)

    if smiles_column is not None and smiles_column in props_df.columns:
        logger.warning(
            f"The SMILES column name provided ('{smiles_column}') is already present in the properties"
            " of the molecules. THe returned dataframe will two columns with the same name."
        )

    # Concat the df with the properties df
    df = pd.concat([df, props_df], axis=1)

    # Render mol column to images
    if render_df_mol is True and mol_column is not None:
        # NOTE(hadim): replace by `PandaTools.ChangeMoleculeRendering` once
        # https://github.com/rdkit/rdkit/issues/3563 is fixed.
        _ChangeMoleculeRendering(df)

        if render_all_df_mol:
            PandasTools.RenderImagesInAllDataFrames()

    return df