Beispiel #1
0
def filter_cli(
    input_path: str,
    output_path: str,
    n_processes: int,
    strip_ions: bool,
):

    print(" - Filtering molecules")

    with capture_toolkit_warnings():
        with stream_to_file(output_path) as writer:

            with Pool(processes=n_processes) as pool:

                for molecule, should_include in tqdm(
                        pool.imap(
                            functools.partial(apply_filter,
                                              retain_largest=strip_ions),
                            stream_from_file(input_path),
                        ), ):

                    if not should_include:
                        continue

                    writer(molecule)
Beispiel #2
0
def test_filter_cli(openff_methane: Molecule, runner):

    # Create an SDF file to filter.
    with stream_to_file("molecules.sdf") as writer:

        writer(
            Molecule.from_smiles("C1(=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl)[O-].[Na+]"))
        writer(Molecule.from_smiles("CCC(C)(C)C(F)(F)CCCCC(F)(F)C(C)(C)CC"))

    arguments = [
        "--input", "molecules.sdf", "--output", "filtered.sdf", "--strip-ions"
    ]

    result = runner.invoke(filter_cli, arguments)

    if result.exit_code != 0:
        raise result.exception

    assert os.path.isfile("filtered.sdf")

    filtered_molecules = [
        molecule for molecule in stream_from_file("filtered.sdf")
    ]
    assert len(filtered_molecules) == 1

    filtered_molecule = filtered_molecules[0]

    assert (filtered_molecule.to_smiles(toolkit_registry=RDKitToolkitWrapper())
            == "[O-][c]1[c]([Cl])[c]([Cl])[c]([Cl])[c]([Cl])[c]1[Cl]")
Beispiel #3
0
def test_enumerate_cli(openff_methane: Molecule, runner):

    # Create an SDF file to enumerate.
    buteneol = Molecule.from_smiles(r"C/C=C(/C)\O")

    with stream_to_file("molecules.sdf") as writer:

        writer(buteneol)
        writer(buteneol)

    arguments = ["--input", "molecules.sdf", "--output", "tautomers.sdf", "--tautomers"]

    result = runner.invoke(enumerate_cli, arguments)

    if result.exit_code != 0:
        raise result.exception

    assert os.path.isfile("tautomers.sdf")

    tautomers = [molecule for molecule in stream_from_file("tautomers.sdf")]
    assert len(tautomers) == 4

    assert {
        tautomer.to_smiles(
            explicit_hydrogens=False, toolkit_registry=RDKitToolkitWrapper()
        )
        for tautomer in tautomers
    } == {"C/C=C(/C)O", "C=C(O)CC", "CCC(C)=O", "CC=C(C)O"}
Beispiel #4
0
def enumerate_cli(
    input_path: str,
    output_path: str,
    enumerate_tautomers: bool,
    max_tautomers: int,
    enumerate_protomers: bool,
    max_protomers: int,
    n_processes: int,
):

    print(f" - Enumerating"
          f"{' tautomers' if enumerate_tautomers else ''}"
          f"{'/' if enumerate_protomers and enumerate_tautomers else ''}"
          f"{' protomers' if enumerate_protomers else ''}")

    unique_molecules = set()

    with capture_toolkit_warnings():
        with stream_to_file(output_path) as writer:

            with Pool(processes=n_processes) as pool:

                for smiles in tqdm(
                        pool.imap(
                            functools.partial(
                                _enumerate_tautomers,
                                enumerate_tautomers=enumerate_tautomers,
                                max_tautomers=max_tautomers,
                                enumerate_protomers=enumerate_protomers,
                                max_protomers=max_protomers,
                            ),
                            stream_from_file(input_path, as_smiles=True),
                        ), ):

                    for pattern in smiles:

                        from openff.toolkit.topology import Molecule

                        molecule: Molecule = Molecule.from_smiles(
                            pattern, allow_undefined_stereo=True)

                        inchi_key = molecule.to_inchikey(fixed_hydrogens=True)

                        if inchi_key in unique_molecules:
                            continue

                        writer(molecule)
                        unique_molecules.add(inchi_key)
Beispiel #5
0
def test_read_write_streams():

    molecules = [Molecule.from_smiles("C"), Molecule.from_smiles("CO")]

    with temporary_cd():

        with stream_to_file("molecules.sdf") as writer:

            for molecule in molecules:
                writer(molecule)

        loaded_molecules = [*stream_from_file("molecules.sdf")]

    assert len(molecules) == len(loaded_molecules)
    assert {molecule.to_smiles() for molecule in molecules} == {
        molecule.to_smiles() for molecule in loaded_molecules
    }