コード例 #1
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
        class InsectSurvey(Model):
            manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_" +
                               "{build_id}.json")
            ants = Connect(
                engine_url=EngineFromManifest(manifest, "source_files", "csv"))
            invertebrates = Connect(
                engine_url=EngineFromManifest(manifest, "single_file", "json"))

            def build(self):
                return
コード例 #2
0
 def test_connect_spare_kwargs(self):
     """
     subclasses of :class:`ayeaye.connectors.base.DataConnector` can be given specific/custom
     kwargs. An exception should be raised when unclaimed spare kwargs remain. This will make
     it harder for users to make mistakes and typos referring to arguments that never come
     into play.
     """
     c = Connect(engine_url="fake://foo", doesntexist="oh dear")
     with self.assertRaises(ValueError):
         # the kwargs are not used until an engine_url is needed
         c._prepare_connection()
コード例 #3
0
    def test_construction_args(self):

        with self.assertRaises(
                ValueError, msg="Ref and engine_url are mutually exclusive"):
            Connect(ref="x", engine_url="tsv://" + EXAMPLE_TSV_PATH)

        model_data_msg = "Ref + engine_url are for DataConnectors and models is for ayeaye.models"
        with self.assertRaises(ValueError, msg=model_data_msg):
            Connect(ref="x", models=AbstractFakeModel)

        with self.assertRaises(ValueError, msg=model_data_msg):
            Connect(engine_url="tsv://" + EXAMPLE_TSV_PATH,
                    models=AbstractFakeModel)
コード例 #4
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
        class SeabedSurvey(Model):
            manifest = Connect()
            mapper = SeabedMapper(manifest_dataset=manifest,
                                  field_name="more_files")
            x_files = Connect(engine_url=mapper.x)

            def __init__(self, manifest_file, **kwargs):
                super().__init__(**kwargs)
                self.manifest.update(
                    engine_url=f"json://{manifest_file};encoding=utf-8-sig")

            def build(self):
                return
コード例 #5
0
    def test_compile_time_multiple_engine_urls(self):
        """
        engine_url could be a list of engine_urls.
        In the future, a dictionary version might be added
        """
        tsv_engine_url = "tsv://" + EXAMPLE_TSV_PATH
        csv_engine_url = "csv://" + EXAMPLE_CSV_PATH
        c = Connect(engine_url=[tsv_engine_url, csv_engine_url])

        all_the_animals = []
        for index, data_connector in enumerate(c):

            if index == 0:
                self.assertIsInstance(data_connector, TsvConnector)
            elif index == 1:
                self.assertIsInstance(data_connector, CsvConnector)
            else:
                raise ValueError(
                    "Connect has more than expected data connectors")

            all_the_animals += [
                animal.common_name for animal in data_connector
            ]

        expected = [
            "Goeldi's marmoset",
            "Common squirrel monkey",
            "Crab-eating macaque",
            "Crown of thorns starfish",
            "Golden dart frog",
        ]
        self.assertEqual(expected, all_the_animals)
コード例 #6
0
 def test_standalone_as_proxy(self):
     """
     Access an attribute of the subclass that doesn't belong to the DataConnector abstract class.
     """
     animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH +
                       ";encoding=magic_encoding")
     self.assertEqual("magic_encoding", animals.encoding)
コード例 #7
0
    def test_connect_callable_kwargs(self):
        """
        :class:`ayeaye.connectots.fake.FakeDataConnector` has an optional kwarg-
        'quantum_accelerator_module' set this using a literal or a callable.
        """

        c = Connect(engine_url="fake://MyDataset",
                    quantum_accelerator_module="entanglement_v1")
        self.assertEqual({"fake": "data"}, c.data[0])
        self.assertEqual("entanglement_v1", c.quantum_accelerator_module)

        def simple_callable():
            "simple means it doesn't take arguments"
            return "entanglement_v2"

        # TODO - standalone is only calling the callable after _prepare_connection
        # this isn't right
        # c = Connect(engine_url="fake://MyDataset", quantum_accelerator_module=simple_callable)
        # self.assertEqual({"fake": "data"}, c.data[0])

        class QuatumSort(AbstractFakeModel):
            source = Connect(engine_url="fake://MyDataset",
                             quantum_accelerator_module=simple_callable)

        m1 = QuatumSort()
        self.assertEqual("entanglement_v2",
                         m1.source.quantum_accelerator_module)
コード例 #8
0
 def test_connect_standalone(self):
     """
     :class:`ayeaye.Connect` can be used outside of the ETL so data discovery can use the same
     way of working as full :class:`ayeaye.Model`s.
     """
     # happy path
     # it works without Connect being part of a ayeaye.Model
     c = Connect(engine_url="fake://MyDataset")
     self.assertEqual({"fake": "data"}, c.data[0])
コード例 #9
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
        class LandAnimalsSurvey(Model):
            manifest = Connect()
            build_attributes = ManifestProperty(manifest_dataset=manifest)
            bad_weather = build_attributes.bad_weather

            def __init__(self, manifest_file, **kwargs):
                super().__init__(**kwargs)
                self.manifest.update(
                    engine_url=f"json://{manifest_file};encoding=utf-8-sig")
コード例 #10
0
 def test_custom_kwargs_are_passed(self):
     """
     ayeaye.Connect should relay kwargs to subclasses of DataConnecter
     """
     # using bigquery because it has custom 'credentials' kwarg
     engine_url = "bigquery://projectId=my_project;datasetId=nice_food;tableId=cakes;"
     c = Connect(engine_url=engine_url, credentials="hello_world")
     # on demand connection
     self.assertIsNotNone(c.data)
     self.assertEqual("hello_world", c._standalone_connection.credentials)
コード例 #11
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
        class FishStocksCollator(FakeModel):
            fish = Connect(engine_url=['csv://{file_location}/pond_1.csv',
                                       'csv://{file_location}/pond_2.csv',
                                       ]
                           )

            def build(self):
                # add a new dataset at runtime
                c = self.fish.add_engine_url('csv://{file_location}/pond_3.csv')
                assert isinstance(c, CsvConnector)
                assert c.engine_url == 'csv:///data/pond_3.csv'
コード例 #12
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_manifest_mapper_find_mapper_methods(self):
        class SuperMapper(AbstractManifestMapper):
            def map_xyz(self):
                pass

            def map_abc(self):
                pass

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        s = SuperMapper(manifest_dataset=manifest, field_name="more_files")

        key_names = s.methods_mapper.keys()
        self.assertEqual({"xyz", "abc"}, set(key_names))
コード例 #13
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_manifest_iterate(self):

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        for engine_set in m:
            # just test results for one mapping (fanout) for one manifest listed file
            if engine_set.manifest_item == "z.ndjson":
                expected = ["csv://z.ndjson.csv", "ndjson://z.ndjson.ndjson"]
                self.assertEqual(engine_set.fanout, expected)
                break
        else:
            raise ValueError("test item not found")
コード例 #14
0
    def test_replace_existing_connect(self):
        class FakeModel(AbstractFakeModel):
            insects = Connect(engine_url="fake://bugsDB")

        m = FakeModel()
        with self.assertRaises(ValueError) as context:
            m.insects = "this is a string, not an instance of Connect"
        self.assertEqual("Only Connect instances can be set",
                         str(context.exception))

        self.assertEqual({}, m._connections,
                         "Connections not initialised prior to access")
        self.assertEqual("fake://bugsDB", m.insects.engine_url,
                         "Original connection")

        m.insects = Connect(engine_url="fake://creepyCrawliesDB")
        self.assertEqual("fake://creepyCrawliesDB", m.insects.engine_url,
                         "New connection")
コード例 #15
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_manifest_callable(self):
        """
        map_xxx() method becomes .xxx() method and is callable later
        """

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        call_later = m.bijection

        # note - self.map_bijection() returns [(manifest_file, engine_url)..] and
        # .bijection just returns the engine_urls
        expected_engine_urls = [
            "json://x.ndjson", "json://y.ndjson", "json://z.ndjson"
        ]

        # ... it's later now. Call it.
        self.assertEqual(expected_engine_urls, call_later())
コード例 #16
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_multi_connector_resolve(self):
        """
        MultiConnector + ConnectorResolver.
        Other tests for this in :class:`TestConnectors`.
        """

        def simple_resolver(unresolved_engine_url):
            return unresolved_engine_url.format(**{'data_version': '1234'})

        # A MultiConnector
        c = Connect(engine_url=["csv://my_path_x/data_{data_version}.csv",
                                "csv://my_path_y/data_{data_version}.csv"
                                ]
                    )

        with connector_resolver.context(simple_resolver):
            resolved_engine_urls = [data_conn.engine_url for data_conn in c]

        expected_urls = ['csv://my_path_x/data_1234.csv', 'csv://my_path_y/data_1234.csv']
        self.assertEqual(expected_urls, resolved_engine_urls)
コード例 #17
0
ファイル: test_manifest.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_manifest_full_map(self):

        manifest = Connect(engine_url=f"json://{TEST_DATA}/manifest_abcd.json")
        m = MagicMapper(manifest_dataset=manifest, field_name="more_files")

        expected = {
            "x.ndjson": {
                "bijection": ["json://x.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://x.ndjson.csv", "ndjson://x.ndjson.ndjson"],
            },
            "y.ndjson": {
                "bijection": ["json://y.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://y.ndjson.csv", "ndjson://y.ndjson.ndjson"],
            },
            "z.ndjson": {
                "bijection": ["json://z.ndjson"],
                "collapse_in": ["csv://results_summary.csv"],
                "fanout": ["csv://z.ndjson.csv", "ndjson://z.ndjson.ndjson"],
            },
        }
        self.assertEqual(expected, m.full_map)
コード例 #18
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
 class CheeseSales(Model):
     products = Connect(engine_url="csv://my_path_x/data_{data_version}.csv")
コード例 #19
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
 class AnimalsSurvey(Model):
     rodents = Connect(
         engine_url=connector_resolver.my_survey.sample_data(rodent_type="mice"))
コード例 #20
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
    def test_named_variables(self):

        with connector_resolver.context(env_secret_password="******"):
            x = Connect(engine_url="mysql://*****:*****@localhost/my_database")
            x.connect_standalone()
            self.assertEqual('mysql://*****:*****@localhost/my_database', x.engine_url)
コード例 #21
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
        class LizardLocator(FakeModel):
            habitats = Connect(engine_url='csv://{file_location}/habitat.csv')

            def get_the_important_engine_url(self):
                return self.habitats.engine_url
コード例 #22
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
        class InsectSurvey(Model):
            ants = Connect(engine_url=connector_resolver.my_ants.all_the_files(ant_types="red"))

            def build(self):
                assert self.ants.engine_url == "csv://red_ants.csv"
コード例 #23
0
 class AnimalsModel(AbstractFakeModel):
     animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH)
コード例 #24
0
    def test_callable_engine_url(self):
        def pointlessly_deterministic_example_callable():
            return "fake://MyDataset"

        c = Connect(engine_url=pointlessly_deterministic_example_callable)
        self.assertEqual({"fake": "data"}, c.data[0], "Example data not found")
コード例 #25
0
        class QuatumSort(AbstractFakeModel):
            source = Connect(engine_url="fake://MyDataset",
                             quantum_factory=q_fact)

            def calculate_result(self):
                return self.source.quantum_factory("quantum dynamics")
コード例 #26
0
 class QuatumSort(AbstractFakeModel):
     source = Connect(engine_url="fake://MyDataset",
                      quantum_accelerator_module=simple_callable)
コード例 #27
0
 class AnimalsModel(AbstractFakeModel):
     animals = Connect(engine_url=[])
コード例 #28
0
 class FakeModel(AbstractFakeModel):
     insects = Connect(engine_url="fake://bugsDB")
コード例 #29
0
ファイル: test_resolvers.py プロジェクト: Aye-Aye-Dev/AyeAye
class FakeModel:
    insects = Connect(engine_url="fake://bugsDB")

    def __init__(self):
        self._connections = {}
コード例 #30
0
 def __init__(self):
     # the instance of Connect is just a variable, it's not an attribute so it's descriptor
     # methods aren't called.
     self.animals = Connect(engine_url="csv://" + EXAMPLE_CSV_PATH)