Ejemplo n.º 1
0
def test_from_df_pop_mol_column():
    df = dm.data.freesolv().iloc[:10]  # type: ignore
    mols = [dm.to_mol(smiles) for smiles in df["smiles"]]

    df: pd.DataFrame = dm.to_df(mols, mol_column="mol")  # type: ignore
    df["dummy"] = "hello"

    # test with provided mol column
    mols = dm.from_df(df.copy(), mol_column="mol")
    assert set(mols[0].GetPropsAsDict().keys()) == {"smiles", "dummy"}

    # test with automatic mol column detection
    mols = dm.from_df(df.copy())
    assert set(mols[0].GetPropsAsDict().keys()) == {"smiles", "dummy"}
Ejemplo n.º 2
0
def test_to_image():

    # Get a list of molecules
    data = dm.data.freesolv()
    mols = dm.from_df(data)  # type: ignore
    mols = mols[:8]

    # With multiple molecules
    legends = [dm.to_smiles(mol) for mol in mols]
    image = dm.viz.to_image(mols, legends=legends, n_cols=4, mol_size=(200, 200))
    # image = _convert_ipython_to_array(image)
    image = np.array(image)

    assert image.dtype == np.uint8
    assert image.shape == (400, 800, 3)
    assert image.shape[1] == 200 * 4

    # With a single molecule
    mol = mols[0]
    legends = dm.to_smiles(mol)
    image = dm.viz.to_image(mol, legends=legends, mol_size=(200, 200))
    # image = _convert_ipython_to_array(image)
    image = np.array(image)

    assert image.dtype == np.uint8
    assert image.shape == (200, 200, 3)

    dm.viz.to_image(mol, indices=True, mol_size=400)
Ejemplo n.º 3
0
def test_to_df_smiles_warning(datadir, caplog):
    data_path = datadir / "freesolv.csv"
    df = dm.read_csv(data_path)

    mols = dm.from_df(df, conserve_smiles=True)
    df = dm.to_df(mols)

    assert sum(df.columns == "smiles") == 2

    assert "WARNING" in caplog.text
    assert (
        "The SMILES column name provided ('smiles') is already present in the properties of the molecules"
        in caplog.text)
Ejemplo n.º 4
0
def test_from_df(datadir):
    data_path = datadir / "TUBB3-observations.sdf"
    df = dm.read_sdf(data_path, as_df=True)

    mols = dm.from_df(df)

    assert len(mols) == 10
    assert isinstance(mols[0], Chem.rdchem.Mol)

    assert set(mols[0].GetPropsAsDict().keys()) == {
        "zinc_id",
        "ortholog_name",
        "gene_name",
        "affinity",
        "chembldocid",
        "title",
        "reference.pubmed_id",
        "reference.doi",
        "reference.chembl_id",
        "reference.journal",
        "reference.year",
    }

    assert dm.from_df(pd.DataFrame()) == []
Ejemplo n.º 5
0
def to_sdf(
    mols: Union[Chem.rdchem.Mol, Sequence[Chem.rdchem.Mol], pd.DataFrame],
    urlpath: Union[str, os.PathLike, TextIO],
    smiles_column: Optional[str] = "smiles",
    mol_column: str = None,
):
    """Write molecules to a file.

    Args:
        mols: a dataframe, a molecule or a list of molecule.
        urlpath: Path to a file or a file-like object. Path can be remote or local.
        smiles_column: Column name to extract the molecule.
        mol_column: Column name to extract the molecule. It takes
            precedence over `smiles_column`.
    """

    if isinstance(mols, pd.DataFrame):
        mols = dm.from_df(mols,
                          smiles_column=smiles_column,
                          mol_column=mol_column)

    elif isinstance(mols, Chem.rdchem.Mol):
        mols = [mols]

    # Filter out None values
    mols = [mol for mol in mols if mol is not None]

    # File-like object
    if isinstance(urlpath, io.IOBase):
        writer = Chem.SDWriter(urlpath)
        for mol in mols:
            writer.write(mol)
        writer.close()

    # Regular local or remote paths
    else:
        with fsspec.open(urlpath, mode="w") as f:
            writer = Chem.SDWriter(f)
            for mol in mols:
                writer.write(mol)
            writer.close()
Ejemplo n.º 6
0
def test_from_df_conserve_smiles(datadir):
    data_path = datadir / "freesolv.csv"
    df = dm.read_csv(data_path)
    mols = dm.from_df(df, conserve_smiles=True)
    assert "smiles" in mols[0].GetPropsAsDict().keys()