Beispiel #1
0
    def test_get_clustered_files(self, tmpdir):
        """
        Asserts if the `get_files` method reads only **clustered** files 
        when its flag is set (i.e., those with the cluster postfix).

        For instance, from a directory as the following, it should read 
        only `file_1_clustered.csv`:

        ├─── file_1.csv
        ├─── file_2.csv
        ├─── file_1_clustered.csv
        └─── file_3.txt

        :type  tmpdir:  string
        :param tmpdir:  The ‘tmpdir’ fixture is a py.path.local object
                        which will provide a temporary directory unique 
                        to the test invocation.
        """
        x = "content"
        for i in range(CSV_FILES_COUNT):
            tmpdir.join(f"file_{i}{CLUSTERED_FILENAME_POSFIX}.csv").write(x)
        tmpdir.join(f"file_{i}.csv").write(x)
        tmpdir.join(f"file_n.txt").write(x)

        # Act
        files = Base.get_files(tmpdir, include_clustered_files=True)

        # Assert
        assert len(files) == CSV_FILES_COUNT
        # checks if files are returned with their absolute path.
        for file in files:
            assert os.path.isabs(file) == True
Beispiel #2
0
    def test_cluster_numbers(self, tmp_clustered_files):
        # Arrange
        tmpdir = tmp_clustered_files[0]
        repos = tmp_clustered_files[1]
        test_pubs = BaseTestCase.get_test_publications()

        # Act
        Cluster().run(tmpdir)
        files = Base.get_files(tmpdir, include_clustered_files=True)

        # Assert
        # TODO: This assertion is anti-pattern; must be re-implemented in a much better way.
        for file in files:
            publications = Base.get_publications(file)

            checked = False
            for idx, row in publications.iterrows():
                for test_pub in test_pubs:
                    for idx2, row2 in test_pub[0].iterrows():
                        if row.get("Tools") == row2.get("Tools"):
                            assert row.get(
                                CLUSTER_NAME_COLUMN_LABEL) == row2.get(
                                    CLUSTER_NAME_COLUMN_LABEL)
                            checked = True

            assert checked == True