Ejemplo n.º 1
0
    def test_csv_encoding(self):
        """
        Specify character encoding in the URL. This test doesn't ensure data conforms.
        """
        c = CsvConnector(engine_url="csv://" + EXAMPLE_CSV_PATH)
        self.assertEqual("utf-8-sig", c.encoding,
                         "Unexpected default encoding")

        c = CsvConnector(engine_url="csv://" + EXAMPLE_CSV_PATH +
                         ";encoding=latin-1")
        self.assertEqual("latin-1", c.encoding,
                         "Can't override default encoding")
Ejemplo n.º 2
0
    def test_expected_fields(self):
        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_PATH,
            expected_fields=['common_name', 'native_to'],
        )
        r = next(iter(c))
        # doesn't raise an exception as fields are exactly as given in file's header
        self.assertIsInstance(r, ayeaye.Pinnate)

        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_PATH,
            expected_fields=['common_name'],
        )
        # missing field
        with self.assertRaises(ValueError):
            next(iter(c))
Ejemplo n.º 3
0
 def test_alias_fields_dictionary(self):
     c = CsvConnector(
         engine_url="csv://" + EXAMPLE_CSV_PATH,
         alias_fields={'common_name': 'animal_name'},
     )
     r = next(iter(c))
     self.assertEqual("Crown of thorns starfish", r.animal_name)
Ejemplo n.º 4
0
    def test_required_fields(self):
        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_PATH,
            required_fields=['common_name'],
        )
        r = next(iter(c))
        # doesn't raise an exception as given required field is present
        self.assertIsInstance(r, ayeaye.Pinnate)

        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_PATH,
            required_fields=['common_name', 'native_to', 'unknown_field'],
        )
        # missing required field
        with self.assertRaises(ValueError):
            next(iter(c))
Ejemplo n.º 5
0
    def test_resolve_engine_url(self):
        """
        The engine_url contains a parameter that is replaced on demand.
        """
        msg = "There are existing resolver callables before the test has started"
        self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)

        class MockFakeEngineResolver:
            "Record when it's used and just substitute {data_version} with '1234'"

            def __init__(self):
                self.has_been_called = False

            def __call__(self, unresolved_engine_url):
                self.has_been_called = True
                return unresolved_engine_url.format(**{'data_version': '1234'})

        c = CsvConnector(engine_url="csv://my_path/data_{data_version}.csv")

        m_resolver = MockFakeEngineResolver()
        with connector_resolver.context(m_resolver):
            self.assertFalse(m_resolver.has_been_called, "Should only be called on demand")
            msg = "One resolver exists during the .context"
            self.assertEqual(1, len(connector_resolver.unnamed_callables), msg)

            self.assertEqual('csv://my_path/data_1234.csv', c.engine_url)

            msg = "Should have been called after engine_url is available"
            self.assertTrue(m_resolver.has_been_called, msg)

        msg = "At end of with .context the MockFakeEngineResolver should have been removed"
        self.assertEqual(0, len(connector_resolver.unnamed_callables), msg)
Ejemplo n.º 6
0
    def test_optional_args_with_write(self):
        """Read only connector arguments raise exception when called in write mode.
        """
        data_dir = tempfile.mkdtemp()
        csv_file = os.path.join(data_dir, "garden_insects.csv")

        for optional_field in [
                'required_fields', 'expected_fields', 'alias_fields'
        ]:

            c = CsvConnector(
                engine_url="csv://" + csv_file,
                access=ayeaye.AccessMode.WRITE,
                **{optional_field: 'xyz'},
            )
            with self.assertRaises(ValueError):
                c.add({"common_name": "Grasshopper"})
Ejemplo n.º 7
0
 def test_csv_missing_values(self):
     """
     Approx position in file not working when None values are in the CSV.
     """
     c = CsvConnector(engine_url="csv://" + EXAMPLE_CSV_BROKEN_PATH)
     current_position = 0
     for _ in c:
         self.assertTrue(c.progress > current_position)
         current_position = c.progress
Ejemplo n.º 8
0
    def test_incompatible_optional_args(self):

        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_MICE,
            field_names=["common_name", "scientific_name", "geo_distribution"],
            alias_fields=["a", "b", "c"])

        # Can't have both field_names and alias_fields, just use alias field!
        with self.assertRaises(ValueError):
            next(iter(c))
Ejemplo n.º 9
0
    def test_csv_basics(self):
        """
        Iterate all the data items and check each row is being yielded as an instance of
        :class:`ayeaye.Pinnate`
        """
        c = CsvConnector(engine_url="csv://" + EXAMPLE_CSV_PATH)

        animals_names = ", ".join(
            [deadly_animal.common_name for deadly_animal in c])
        expected = "Crown of thorns starfish, Golden dart frog"
        assert expected == animals_names
Ejemplo n.º 10
0
 def test_alias_fields_complete_replace(self):
     c = CsvConnector(
         engine_url="csv://" + EXAMPLE_CSV_PATH,
         alias_fields=["animal_name", "lives"],
     )
     actual = next(iter(c))
     expected = {
         "animal_name": "Crown of thorns starfish",
         "lives": "Indo-Pacific"
     }
     self.assertEqual(expected, actual.as_dict())
Ejemplo n.º 11
0
    def test_csv_without_fieldname_header_write(self):
        """
        Specify fields. Without this fields are taken from first record to be added.
        """
        data_dir = tempfile.mkdtemp()
        csv_file = os.path.join(data_dir, "lemurs.csv")
        c = CsvConnector(
            engine_url="csv://" + csv_file,
            access=ayeaye.AccessMode.WRITE,
            field_names=["common_name", "main_colours"],
        )
        for lemur in [
            {
                "common_name": "Indri"
            },
            {
                "common_name": "Ring tailed",
                "main_colours": "grey, black, white"
            },
        ]:
            c.add(lemur)

        c.close_connection()

        with open(csv_file, "r", encoding=c.encoding) as f:
            csv_content = f.read()

        expected_content = ("common_name,main_colours\n"
                            "Indri,\n"
                            'Ring tailed,"grey, black, white"\n')

        self.assertEqual(expected_content, csv_content)
Ejemplo n.º 12
0
    def test_csv_engine_decode(self):

        c = CsvConnector(engine_url="csv:///data/abc.csv")
        a = c.engine_params
        expected_path = "/data/abc.csv"
        if True or os.path.sep != "/":
            expected_path = expected_path.replace("/", os.path.sep)
        self.assertEqual(expected_path, a.file_path)

        c = CsvConnector(
            "csv:///data/abc.csv;encoding=latin-1;start=3;end=100")
        with self.assertRaises(NotImplementedError):
            c.engine_params

        a = c._engine_params
        expected_path = "/data/abc.csv"
        if True or os.path.sep != "/":
            expected_path = expected_path.replace("/", os.path.sep)
        self.assertEqual(expected_path, a.file_path)
        self.assertEqual("latin-1", a.encoding)
        self.assertEqual(3, a.start)
        self.assertEqual(100, a.end)
Ejemplo n.º 13
0
    def test_csv_without_fieldname_header(self):

        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_MICE,
            field_names=["common_name", "scientific_name", "geo_distribution"],
        )
        mice = [mouse.as_dict() for mouse in c]

        expected_line_0 = {
            "common_name": "Yellow-necked mouse",
            "scientific_name": "Apodemus flavicollis",
            "geo_distribution": "Europe",
        }

        self.assertEqual(3, len(mice))
        # just checking first line is data with correct field names
        self.assertEqual(expected_line_0, mice[0])
Ejemplo n.º 14
0
    def test_csv_write(self):
        """
        Write to a CSV without using a schema.
        """
        data_dir = tempfile.mkdtemp()
        csv_file = os.path.join(data_dir, "fish.csv")
        c = CsvConnector(engine_url="csv://" + csv_file,
                         access=ayeaye.AccessMode.WRITE)

        # two data types that can be added
        p = ayeaye.Pinnate({"common_name": "Angel fish"})
        c.add(p)

        d = {"common_name": "Grey reef shark"}
        c.add(d)
        c.close_connection()  # flush to disk

        with open(csv_file, "r", encoding=c.encoding) as f:
            csv_content = f.read()

        expected_content = "common_name\n" "Angel fish\n" "Grey reef shark\n"

        self.assertEqual(expected_content, csv_content)
Ejemplo n.º 15
0
    def test_duplicate_fieldnames(self):
        """Simple way to handle CSV file with non-unique field names."""

        c = CsvConnector(
            engine_url="csv://" + EXAMPLE_CSV_DUPLICATE_FIELDNAMES,
            expected_fields=[
                "Species",
                "Habitat",
                "Description",
                "Oviparity-viviparity",
                "Description",
            ],
            alias_fields=[
                "species",
                "habitat_type",
                "habitat_description",
                "ovi-vivi",
                "ov_description",
            ],
        )
        actual = next(iter(c))

        expected = {
            "species":
            "Tiger shark",
            "habitat_type":
            "Tropical",
            "habitat_description":
            ("Populations are found in many tropical and temperate"
             " waters, especially around central Pacific islands."),
            "ovi-vivi":
            "Ovoviviparous",
            "ov_description":
            ("Eggs hatch internally and the young are born live when "
             "fully developed"),
        }
        self.assertEqual(expected, actual.as_dict())