Esempio n. 1
0
    def test_multi_file_multi_mol(self, tmpdir):
        with tmpdir.as_cwd():
            test_dir = '1-validate_and_assign'
            input_mols = [
                get_data_file_path('input_five_confs_flexible.sdf'),
                get_data_file_path('input_eight_stereoisomers.sdf')
            ]
            response = runner.invoke(cli, [
                "preprocess", "validate", "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)
            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]
            assert sorted(output_files) == [
                'BBB-00000-00.sdf',  # The first input has 5 confs of the same molecule
                'BBB-00000-01.sdf',
                'BBB-00000-02.sdf',
                'BBB-00000-03.sdf',
                'BBB-00000-04.sdf',
                'BBB-00001-00.sdf',  # The there are 8 different stereoisomers with 1 conf each
                'BBB-00002-00.sdf',
                'BBB-00003-00.sdf',
                'BBB-00004-00.sdf',
                'BBB-00005-00.sdf',
                'BBB-00006-00.sdf',
                'BBB-00007-00.sdf',
                'BBB-00008-00.sdf',
            ]

            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert error_files == []
Esempio n. 2
0
 def test_multi_file_multi_mol_duplicates(self, tmpdir):
     with tmpdir.as_cwd():
         test_dir = '1-validate_and_assign'
         input_mols = [
             get_data_file_path('input_one_stereoisomer.sdf'),
             get_data_file_path('input_eight_stereoisomers.sdf')
         ]
         response = runner.invoke(cli, [
             "preprocess", "validate", "-g", "BBB", "-o", test_dir,
             *input_mols
         ],
                                  catch_exceptions=False)
         output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
         output_files = [os.path.basename(fname) for fname in output_files]
         assert sorted(output_files) == [
             'BBB-00000-00.sdf',
             'BBB-00001-00.sdf',
             'BBB-00002-00.sdf',
             'BBB-00003-00.sdf',
             'BBB-00004-00.sdf',
             'BBB-00005-00.sdf',
             'BBB-00006-00.sdf',
             'BBB-00007-00.sdf',
         ]
         error_files = glob.glob(
             os.path.join(test_dir, 'error_mols', '*.sdf'))
         assert len(error_files) == 1
Esempio n. 3
0
def test_multiple_files(n_procs):
    molecules = [Molecule.from_file(get_data_file_path('1-validate_and_assign_graphs_and_confs/BBB-00000-00.sdf'), "sdf"),
               Molecule.from_file(get_data_file_path('1-validate_and_assign_graphs_and_confs/BBB-00001-00.sdf'), "sdf")]
    coverage, success_mols, error_mols = generate_coverage_report(input_molecules=molecules,
                                                                  forcefield_name='openff_unconstrained-1.3.0.offxml',
                                                                  processors=n_procs)
    assert len(success_mols) == 2
    assert len(error_mols) == 0
    assert coverage["passed_unique_molecules"] == 2
    assert coverage["total_unique_molecules"] == 2
Esempio n. 4
0
def test_cli_add_molecules_error(tmpdir):
    """Make sure when adding error molecules the coverage report is not changed and the molecule is put in the error mols folder"""

    with tmpdir.as_cwd():
        test_dir = '3-coverage_report'
        input_dir = "1-validate_and_assign_graphs_and_confs"
        # copy all files to a local folder
        shutil.copytree(get_data_file_path(input_dir), input_dir)

        # run once to get the coverage report
        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "-o", test_dir,
                                       input_dir],
                                 catch_exceptions=False)

        # count the number of files in the output
        n_out_mols = len(os.listdir(test_dir))
        # get the coverage report
        with open(os.path.join(test_dir, "coverage_report.json")) as report:
            old_report = json.load(report)

        # now add a new molecule to dir
        mol = Molecule.from_file(get_data_file_path("missing_valence_params.sdf"))
        mol.properties["group_name"] = "BBB"
        mol.properties["molecule_index"] = "99999"
        mol.to_file(os.path.join(input_dir, "BBB-99999-00.sdf"), "sdf")

        # run again with add
        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "--add",
                                       "-o", test_dir,
                                       input_dir],
                                 catch_exceptions=False)

        # make sure no new molecules were added
        n_new_out_mols = len(os.listdir(test_dir))
        assert n_new_out_mols == n_out_mols

        # make sure the molecule is in the error folder
        assert len(glob.glob(os.path.join(test_dir, "error_mols", "*.sdf"))) == 1

        # get the new coverage report and make sure it has been updated
        with open(os.path.join(test_dir, "coverage_report.json")) as report:
            new_report = json.load(report)

        assert new_report.pop("total_unique_molecules") > old_report.pop("total_unique_molecules")
        assert new_report.pop("passed_unique_molecules") == old_report.pop("passed_unique_molecules")
        assert new_report.pop("forcefield_name") == old_report.pop("forcefield_name")
        # now we only have parameter counts left, make sure they have not changed
        assert new_report == old_report
Esempio n. 5
0
def test_cli_add_no_molecules(tmpdir):
    """Make sure that the cli exits if users run coverage report with add but no new molecules are found."""

    with tmpdir.as_cwd():
        test_dir = '3-coverage_report'
        input_folder = get_data_file_path('1-validate_and_assign_graphs_and_confs')
        # run once to get the coverage report

        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "-o", test_dir,
                                       input_folder],
                                 catch_exceptions=False)

        # count the number of files in the output
        n_out_mols = len(os.listdir(test_dir))

        # now run again with the add flag
        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "-o", test_dir,
                                       "--add",
                                       input_folder],
                                 catch_exceptions=False)

        assert response.output == f"No new files found in {input_folder}, the coverage report was not changed.\n"

        # make sure the number of output files has not changed
        n_out_add_mols = len(os.listdir(test_dir))
        assert n_out_mols == n_out_add_mols
Esempio n. 6
0
def test_cli_move_all_confs(tmpdir):
    """
    Make sure that if a molecule passes all conformers are also moved.
    """

    with tmpdir.as_cwd():
        test_dir = '3-coverage_report'
        input_folder = get_data_file_path('1-validate_and_assign_graphs_and_confs')
        # get the number of input molecules and conformers
        n_input_moles = len(glob.glob(os.path.join(input_folder, "*.sdf")))
        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "-o", test_dir,
                                       input_folder],
                                 catch_exceptions=False)
        n_out_mols = len(glob.glob(os.path.join(test_dir, "*.sdf")))
        # assuming no molecules fail
        assert n_input_moles == n_out_mols
        n_error_mols = len(glob.glob(os.path.join(test_dir, "error_mols", "*.sdf")))
        assert n_error_mols == 0
        # load the coverage report and make sure the unique mols is correct
        with open(os.path.join(test_dir, "coverage_report.json"), "r") as data:
            report = json.load(data)

        assert report["passed_unique_molecules"] == 5
        assert report["total_unique_molecules"] == 5
Esempio n. 7
0
def test_generate_conformers(tmpdir):
    with tmpdir.as_cwd():
        # test_name = inspect.stack()[0].function
        input_dir = get_data_file_path(
            '1-validate_and_assign_graphs_and_confs')
        output_dir = '2-generate_conformers'

        # generate_conformers(input_dir, output_dir)
        response = runner.invoke(
            cli,
            ["preprocess", "generate-conformers", "-o", output_dir, input_dir],
            catch_exceptions=False)

        ## BBB-00000 starts with two conformers, so many more conformers should be created
        bbb0_confs = glob.glob(os.path.join(output_dir, 'BBB-00000-*.sdf'))
        assert len(bbb0_confs) > 3

        ## BBB-00001 starts with a one conformer, so many more conformers should be created
        bbb1_confs = glob.glob(os.path.join(output_dir, 'BBB-00001-*.sdf'))
        assert len(bbb1_confs) > 2

        ## BBB-00002 starts with one conformer.
        # It is rigid so only one conformer should be created
        bbb2_confs = glob.glob(os.path.join(output_dir, 'BBB-00002-*.sdf'))
        assert len(bbb2_confs) == 1

        ## BBB-00003 starts with 12 conformers.
        # We should see 12 output confs here, since we NEVER delete user confs
        bbb3_confs = glob.glob(os.path.join(output_dir, 'BBB-00003-*.sdf'))
        assert len(bbb3_confs) == 12
Esempio n. 8
0
    def test_single_file_single_mol(self, tmpdir):
        with tmpdir.as_cwd():
            test_dir = '1-validate_and_assign'

            input_mols = [get_data_file_path('input_single_mol_rigid.sdf')]
            input_mols = [
                os.path.abspath(input_mol) for input_mol in input_mols
            ]
            response = runner.invoke(cli, [
                "preprocess", "validate", "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)

            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]
            assert 'BBB-00000-00.sdf' in output_files
            assert len(output_files) == 1
            file_text = open(os.path.join(test_dir, 'BBB-00000-00.sdf')).read()
            assert """
>  <group_name>  (1) 
BBB""" in file_text
            assert """
>  <molecule_index>  (1) 
0""" in file_text
            assert """
>  <conformer_index>  (1) 
0""" in file_text

            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert error_files == []
Esempio n. 9
0
def test_generate_conformers_add(tmpdir):
    with tmpdir.as_cwd():
        # test_name = inspect.stack()[0].function
        input_dir = '1-validate_and_assign_graphs_and_confs'
        # Make a copy of this directory, otherwise we'll contaminate the original when we add a new mol for the test
        shutil.copytree(get_data_file_path(input_dir), input_dir)
        output_dir = '2-generate_conformers'
        # generate_conformers(input_dir, output_dir)
        response = runner.invoke(
            cli,
            ["preprocess", "generate-conformers", "-o", output_dir, input_dir],
            catch_exceptions=False)
        initial_confs = glob.glob(os.path.join(output_dir, 'BBB-*.sdf'))
        initial_confs = [
            os.path.basename(filename) for filename in initial_confs
        ]

        # now add a new ridiculously flexible molecule to dir
        mol = Molecule.from_smiles('CCCCC[C@H](COCOC)COCCOCCCCCCC')
        mol.generate_conformers()
        mol.properties["group_name"] = "BBB"
        mol.properties["molecule_index"] = "99999"
        mol.to_file(os.path.join(input_dir, "BBB-99999-00.sdf"), "sdf")

        response = runner.invoke(cli, [
            "preprocess", "generate-conformers", "-o", output_dir, "--add",
            input_dir
        ],
                                 catch_exceptions=False)

        final_confs = glob.glob(os.path.join(output_dir, 'BBB-*.sdf'))
        final_confs = [os.path.basename(filename) for filename in final_confs]
        assert 'BBB-99999-00.sdf' in final_confs
        assert 'BBB-99999-09.sdf' in final_confs
        assert len(final_confs) == len(initial_confs) + 10
Esempio n. 10
0
def test_single_file(n_procs):
    molecules = Molecule.from_file(get_data_file_path('1-validate_and_assign_graphs_and_confs/BBB-00000-00.sdf'), "sdf" )
    coverage, success_mols, error_mols = generate_coverage_report(input_molecules=molecules,
                                                                  forcefield_name='openff_unconstrained-1.3.0.offxml',
                                                                  processors=n_procs)
    assert len(success_mols) == 1, error_mols[0][1]
    assert len(error_mols) == 0
Esempio n. 11
0
    def test_single_file_multi_mol(self, tmpdir):
        with tmpdir.as_cwd():
            test_dir = '1-validate_and_assign'
            input_mols = [
                get_data_file_path(
                    'input_one_stereoisomer_and_multi_conf_flexible.sdf')
            ]
            response = runner.invoke(cli, [
                "preprocess", "validate", "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)

            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]

            assert sorted(output_files) == [
                'BBB-00000-00.sdf', 'BBB-00001-00.sdf', 'BBB-00001-01.sdf',
                'BBB-00001-02.sdf', 'BBB-00001-03.sdf', 'BBB-00001-04.sdf'
            ]

            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert error_files == []
Esempio n. 12
0
def test_cli_error_mol(tmpdir):
    """Make sure that molecules that fail are correctly put in error mols folder."""

    with tmpdir.as_cwd():
        test_dir = '3-coverage_report'
        input_folder = "2-generate_conformers"
        os.mkdir(input_folder)
        mol = Molecule.from_file(get_data_file_path('missing_valence_params.sdf'), "sdf", allow_undefined_stereo=True)
        mol.properties["group_name"] = "OFF"
        mol.properties["molecule_index"] = "00001"
        mol.to_file(os.path.join(input_folder, "OFF-00001-00.sdf"), "sdf")
        response = runner.invoke(cli, ["preprocess", "coverage-report",
                                       "-p", 1,
                                       "-o", test_dir,
                                       input_folder],
                                 catch_exceptions=False)

        n_out_mols = len(glob.glob(os.path.join(test_dir, "*.sdf")))
        assert n_out_mols == 0
        n_error_mols = len(glob.glob(os.path.join(test_dir, "error_mols", "*.sdf")))
        assert n_error_mols == 1
        # load the coverage report and make sure the unique mols is correct
        with open(os.path.join(test_dir, "coverage_report.json"), "r") as data:
            report = json.load(data)

        assert report["passed_unique_molecules"] == 0
        assert report["total_unique_molecules"] == 1
Esempio n. 13
0
def test_error_uncovered_antechamber_param():
    molecule = Molecule.from_file(get_data_file_path('sodium_carbide.sdf'), "sdf")
    coverage, success_mols, error_mols = generate_coverage_report(input_molecules=molecule,
                                                                  forcefield_name='openff_unconstrained-1.3.0.offxml')
    assert len(success_mols) == 0
    assert len(error_mols) == 1
    assert coverage["passed_unique_molecules"] == 0
    assert coverage["total_unique_molecules"] == 1
    assert "Command '['antechamber'" in str(error_mols[0][1])
Esempio n. 14
0
def test_error_missing_valence_param(n_procs):
    molecules = Molecule.from_file(get_data_file_path('missing_valence_params.sdf'), "sdf")
    coverage, success_mols, error_mols = generate_coverage_report(input_molecules=molecules,
                                                                  forcefield_name='openff_unconstrained-1.3.0.offxml',
                                                                  processors=n_procs)
    assert len(success_mols) == 0
    assert len(error_mols) == 1
    assert coverage["passed_unique_molecules"] == 0
    assert coverage["total_unique_molecules"] == 1
    assert "BondHandler was not able to find parameters" in str(error_mols[0][1])
Esempio n. 15
0
def test_add_and_delete_existing_error(tmpdir):
    with tmpdir.as_cwd():
        test_dir = '1-validate_and_assign'
        input_mols = [get_data_file_path('input_one_stereoisomer.sdf')]
        with pytest.raises(Exception, match='Can not specify BOTH') as context:
            response = runner.invoke(cli, [
                "preprocess", "validate", "--add", "--delete-existing", "-g",
                "BBB", "-o", test_dir, *input_mols
            ],
                                     catch_exceptions=False)
Esempio n. 16
0
def test_do_overwrite_output_directory(tmpdir):
    with tmpdir.as_cwd():
        test_dir = '1-validate_and_assign'
        input_mols = [get_data_file_path('input_single_mol_rigid.sdf')]
        response = runner.invoke(cli, [
            "preprocess", "validate", "-g", "BBB", "-o", test_dir, *input_mols
        ],
                                 catch_exceptions=False)

        response = runner.invoke(cli, [
            "preprocess", "validate", "-g", "BBB", "-o", test_dir,
            "--delete-existing", *input_mols
        ],
                                 catch_exceptions=False)
Esempio n. 17
0
def test_double_smirks():
    """Test filtering based on 2 different smirks patterns."""
    molecules = []
    for i in [0, 1, 2, 3, 5]:
        molecules.append(
            Molecule.from_file(
                get_data_file_path(
                    f'1-validate_and_assign_graphs_and_confs/BBB-0000{i}-00.sdf'
                ), "sdf"))
    # filter P should only be one molecule, and F should also be one molecule
    result = smirks_filter(input_molecules=molecules,
                           filtered_smirks=["[P:1]", "[F:1]"],
                           processors=1)
    assert result.n_filtered == 2
    assert result.n_molecules == 3
Esempio n. 18
0
 def test_multi_file_single_mol_redundant_conf(self, tmpdir):
     with tmpdir.as_cwd():
         test_dir = '1-validate_and_assign'
         input_mols = [
             get_data_file_path('input_single_mol_rigid.sdf'),
             get_data_file_path('input_single_mol_rigid_translated.sdf')
         ]
         response = runner.invoke(cli, [
             "preprocess", "validate", "-g", "BBB", "-o", test_dir,
             *input_mols
         ],
                                  catch_exceptions=False)
         output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
         output_files = [os.path.basename(fname) for fname in output_files]
         assert sorted(output_files) == ['BBB-00000-00.sdf']
         error_files = glob.glob(
             os.path.join(test_dir, 'error_mols', '*.sdf'))
         error_files = [os.path.basename(fname) for fname in error_files]
         assert error_files == ['error_mol_0.sdf']
         error_txts = glob.glob(
             os.path.join(test_dir, 'error_mols', '*.txt'))
         error_txts = [open(fname).read() for fname in error_txts]
         assert 'Duplicate molecule conformer input detected' in error_txts[
             0]
Esempio n. 19
0
def test_cli_move_molecules(tmpdir):
    """Make sure that the cli can correctly move the molecules to the passed and fail directories."""
    with tmpdir.as_cwd():
        input_folder = get_data_file_path(
            '1-validate_and_assign_graphs_and_confs')
        n_input_moles = len(glob.glob(os.path.join(input_folder, "*.sdf")))
        test_dir = '5-smirks_filter'
        response = runner.invoke(cli, [
            "filter", "smirks", input_folder, test_dir, "-p", 1, "-s", "[P:1]"
        ],
                                 catch_exceptions=False)
        n_out_mols = len(glob.glob(os.path.join(test_dir, "*.sdf")))
        # this should only remove 1 molecule with 2 conformers
        assert n_out_mols == n_input_moles - 2
        n_error_mols = len(
            glob.glob(os.path.join(test_dir, "error_mols", "*.sdf")))
        assert n_error_mols == 2
Esempio n. 20
0
def test_dont_overwrite_output_directory(tmpdir):
    with tmpdir.as_cwd():
        test_name = inspect.stack()[0].function
        input_dir = get_data_file_path(
            '1-validate_and_assign_graphs_and_confs')
        output_dir = os.path.join(test_name, '2-generate_conformers')
        response = runner.invoke(
            cli,
            ["preprocess", "generate-conformers", "-o", output_dir, input_dir],
            catch_exceptions=False)
        with pytest.raises(Exception,
                           match='Specify `--delete-existing` to remove'):
            response = runner.invoke(cli, [
                "preprocess", "generate-conformers", "-o", output_dir,
                input_dir
            ],
                                     catch_exceptions=False)
Esempio n. 21
0
def test_bad_macrocycle(tmpdir):
    with tmpdir.as_cwd():
        # test_name = inspect.stack()[0].function
        input_dir = get_data_file_path(
            '1-validate_and_assign_graphs_and_confs_bad_macrocycle')
        output_dir = '2-generate_conformers'

        # generate_conformers(input_dir, output_dir)
        response = runner.invoke(
            cli,
            ["preprocess", "generate-conformers", "-o", output_dir, input_dir],
            catch_exceptions=False)

        # JAN_00203 has a macrocycle that RDKit generates bad conformers for. These conformers
        # have twisted double bonds and can't be parsed by subsequent processing steps.
        jan_203_confs = glob.glob(os.path.join(output_dir, 'JAN-00203-*.sdf'))
        assert len(jan_203_confs) == 1
Esempio n. 22
0
    def test_add_doesnt_overwrite_error_mols(self, tmpdir):
        """
        Run add multiple times, such that error mols are generated by two separate invocations. Then, make sure
        that the error outputs don't overwrite each other
        """
        with tmpdir.as_cwd():
            test_dir = '1-validate_and_assign'
            input_mols = [get_data_file_path('input_single_mol_rigid.sdf')]
            response = runner.invoke(cli, [
                "preprocess", "validate", "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)

            response = runner.invoke(cli, [
                "preprocess", "validate", '--add', "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)

            response = runner.invoke(cli, [
                "preprocess", "validate", '--add', "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)
            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]
            assert sorted(output_files) == ['BBB-00000-00.sdf']
            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert sorted(error_files) == [
                'error_mol_0.sdf', 'error_mol_1.sdf'
            ]
            error_txts = sorted(
                glob.glob(os.path.join(test_dir, 'error_mols', '*.txt')))
            error_txts = [open(fname).read() for fname in error_txts]
            assert "Input molecule graph is already present in output" in error_txts[
                0]
            assert "Input molecule graph is already present in output" in error_txts[
                1]
Esempio n. 23
0
    def test_add(self, tmpdir):

        with tmpdir.as_cwd():
            test_dir = '1-validate_and_assign'
            input_mols = [get_data_file_path('input_one_stereoisomer.sdf')]
            response = runner.invoke(cli, [
                "preprocess", "validate", "-g", "BBB", "-o", test_dir,
                *input_mols
            ],
                                     catch_exceptions=False)

            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]
            assert sorted(output_files) == ['BBB-00000-00.sdf']

            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert error_files == []

            # Test that output names were correctly assigned
            output_name_assignments = []
            with open(os.path.join(test_dir, 'name_assignments.csv')) as of:
                csv_reader = csv.reader(of)
                for row in csv_reader:
                    new_row = [os.path.basename(i) for i in row]
                    output_name_assignments.append(new_row)
            assert output_name_assignments == [
                ['orig_name', 'orig_file', 'orig_file_index', 'out_file_name'],
                [
                    'PDB_DB00136_99', 'input_one_stereoisomer.sdf', '0',
                    'BBB-00000-00'
                ],
            ]

            # Run again, with a partially overlapping set of molecules
            input_mols2 = [get_data_file_path('input_eight_stereoisomers.sdf')]
            response = runner.invoke(cli, [
                "preprocess", "validate", "--add", "-g", "BBB", "-o", test_dir,
                *input_mols2
            ],
                                     catch_exceptions=False)

            output_files = glob.glob(os.path.join(test_dir, '*.sdf'))
            output_files = [os.path.basename(fname) for fname in output_files]
            assert sorted(output_files) == [
                'BBB-00000-00.sdf',
                'BBB-00001-00.sdf',
                'BBB-00002-00.sdf',
                'BBB-00003-00.sdf',
                'BBB-00004-00.sdf',
                'BBB-00005-00.sdf',
                'BBB-00006-00.sdf',
                'BBB-00007-00.sdf',
            ]

            error_files = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.sdf'))
            error_files = [os.path.basename(fname) for fname in error_files]
            assert error_files == ['error_mol_0.sdf']
            error_txts = glob.glob(
                os.path.join(test_dir, 'error_mols', '*.txt'))
            error_txts = [open(fname).read() for fname in error_txts]
            assert "Input molecule graph is already present in output" in error_txts[
                0]

            # Test that output names were correctly assigned
            output_name_assignments = []
            with open(os.path.join(test_dir, 'name_assignments.csv')) as of:
                csv_reader = csv.reader(of)
                for row in csv_reader:
                    new_row = [os.path.basename(i) for i in row]
                    output_name_assignments.append(new_row)
            assert output_name_assignments == [
                ['orig_name', 'orig_file', 'orig_file_index', 'out_file_name'],
                [
                    'PDB_DB00136_99', 'input_one_stereoisomer.sdf', '0',
                    'BBB-00000-00'
                ],
                [
                    'PDB_DB00136_01', 'input_eight_stereoisomers.sdf', '1',
                    'BBB-00001-00'
                ],
                [
                    'PDB_DB00136_02', 'input_eight_stereoisomers.sdf', '2',
                    'BBB-00002-00'
                ],
                [
                    'PDB_DB00136_03', 'input_eight_stereoisomers.sdf', '3',
                    'BBB-00003-00'
                ],
                [
                    'PDB_DB00136_04', 'input_eight_stereoisomers.sdf', '4',
                    'BBB-00004-00'
                ],
                [
                    'PDB_DB00136_05', 'input_eight_stereoisomers.sdf', '5',
                    'BBB-00005-00'
                ],
                [
                    'PDB_DB00136_06', 'input_eight_stereoisomers.sdf', '6',
                    'BBB-00006-00'
                ],
                [
                    'PDB_DB00136_07', 'input_eight_stereoisomers.sdf', '7',
                    'BBB-00007-00'
                ],
            ]