def _get_iterator(self, generator_asset, query_parameters=None, limit=None, offset=None, partition_id=None):
        batch_kwargs = None
        # First, we check if we have a configured asset
        if generator_asset in self._assets:
            asset_config = self._assets[generator_asset]
            try:
                if query_parameters is None:
                    query_parameters = {}
                table_name = Template(asset_config.table).substitute(query_parameters)
                schema_name = None
                if asset_config.schema is not None:
                    schema_name = Template(asset_config.schema).substitute(query_parameters)
            except KeyError:
                raise BatchKwargsError("Unable to generate batch kwargs for asset '" + generator_asset + "': "
                                       "missing template key",
                                       {"generator_asset": generator_asset,
                                        "table_template": asset_config.table,
                                        "schema_template": asset_config.schema}
                                       )
            batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(table=table_name, schema=schema_name)

        # If this is not a manually configured asset, we fall back to inspection of the database
        elif self.engine is not None and self.inspector is not None:
            split_generator_asset = generator_asset.split(".")
            if len(split_generator_asset) == 2:
                schema_name = split_generator_asset[0]
                table_name = split_generator_asset[1]
            elif len(split_generator_asset) == 1:
                schema_name = self.inspector.default_schema_name
                table_name = split_generator_asset[0]
            else:
                raise ValueError("Table name must be of shape '[SCHEMA.]TABLE'. Passed: " + split_generator_asset)
            tables = self.inspector.get_table_names(schema=schema_name)
            try:
                tables.extend(self.inspector.get_view_names(schema=schema_name))
            except NotImplementedError:
                # Not implemented by bigquery dialect
                pass

            if table_name in tables:
                batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(table=table_name, schema=schema_name)

        if batch_kwargs is not None:
            if partition_id is not None:
                logger.warning("table_generator cannot identify partitions; provided partition id will be recorded "
                               "only")
                batch_kwargs['partition_id'] = partition_id
            if limit is not None:
                batch_kwargs['limit'] = limit
            if offset is not None:
                batch_kwargs['offset'] = offset
            return iter([batch_kwargs])

        # Otherwise, we return None
        return
Ejemplo n.º 2
0
    def _get_iterator(self, generator_asset, **kwargs):
        # First, we check if we have a configured asset
        if generator_asset in self._assets:
            asset_config = self._assets[generator_asset]
            try:
                table_name = Template(asset_config.table).substitute(kwargs)
                schema_name = None
                if asset_config.schema is not None:
                    schema_name = Template(
                        asset_config.schema).substitute(kwargs)
            except KeyError:
                raise BatchKwargsError(
                    "Unable to generate batch kwargs for asset '" +
                    generator_asset + "': "
                    "missing template key", {
                        "generator_asset": generator_asset,
                        "table_template": asset_config.table,
                        "schema_template": asset_config.schema
                    })
            return iter([
                SqlAlchemyDatasourceTableBatchKwargs(table=table_name,
                                                     schema=schema_name)
            ])

        # If this is not a manually configured asset, we fall back to inspection of the database
        elif self.engine is not None and self.inspector is not None:
            split_generator_asset = generator_asset.split(".")
            if len(split_generator_asset) == 2:
                schema_name = split_generator_asset[0]
                table_name = split_generator_asset[1]
            elif len(split_generator_asset) == 1:
                schema_name = self.inspector.default_schema_name
                table_name = split_generator_asset[0]
            else:
                raise ValueError(
                    "Table name must be of shape '[SCHEMA.]TABLE'. Passed: " +
                    split_generator_asset)
            tables = self.inspector.get_table_names(schema=schema_name)
            tables.extend(self.inspector.get_view_names(schema=schema_name))
            if table_name in tables:
                return iter([
                    SqlAlchemyDatasourceTableBatchKwargs(
                        table=table_name,
                        schema=schema_name,
                    )
                ])
Ejemplo n.º 3
0
    def _get_iterator(
        self,
        data_asset_name,
        query_parameters=None,
        limit=None,
        offset=None,
        partition_id=None,
    ):
        batch_kwargs = None
        # First, we check if we have a configured asset
        if data_asset_name in self._assets:
            asset_config = self._assets[data_asset_name]
            try:
                if query_parameters is None:
                    query_parameters = {}
                table_name = Template(
                    asset_config.table).substitute(query_parameters)
                schema_name = None
                if asset_config.schema is not None:
                    schema_name = Template(
                        asset_config.schema).substitute(query_parameters)
            except KeyError:
                raise BatchKwargsError(
                    "Unable to generate batch kwargs for asset '" +
                    data_asset_name + "': "
                    "missing template key",
                    {
                        "data_asset_name": data_asset_name,
                        "table_template": asset_config.table,
                        "schema_template": asset_config.schema,
                    },
                )
            batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(
                table=table_name, schema=schema_name)

        # If this is not a manually configured asset, we fall back to inspection of the database
        elif self.engine is not None and self.inspector is not None:
            project_id = None
            schema_name = None
            split_data_asset_name = data_asset_name.split(".")
            if len(split_data_asset_name) == 2:
                schema_name = split_data_asset_name[0]
                if self.engine.dialect.name.lower() == "bigquery":
                    table_name = data_asset_name
                else:
                    table_name = split_data_asset_name[1]
            elif len(split_data_asset_name) == 1:
                schema_name = self.inspector.default_schema_name
                table_name = split_data_asset_name[0]

            elif (len(split_data_asset_name) == 3
                  and self.engine.dialect.name.lower() == "bigquery"):
                project_id = split_data_asset_name[0]
                schema_name = split_data_asset_name[1]
                table_name = data_asset_name
            else:
                shape = "[SCHEMA.]TABLE"
                if self.engine.dialect.name.lower() == "bigquery":
                    shape = f"[PROJECT_ID.]{shape}"

                raise ValueError(
                    "Table name must be of shape '{}'. Passed: {}".format(
                        shape, split_data_asset_name))

            try:
                has_table = self.inspector.has_table
            except AttributeError:
                has_table = self.engine.has_table

            if has_table(table_name, schema=schema_name):
                batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(
                    table=table_name, schema=schema_name)
            else:
                raise BatchKwargsError(
                    "TableBatchKwargsGenerator cannot access the following data:"
                    f"SCHEMA : {schema_name}"
                    f"TABLE : {table_name}",
                    {},
                )

        if batch_kwargs is not None:
            if partition_id is not None:
                logger.warning(
                    "table_generator cannot identify partitions; provided partition id will be recorded "
                    "only")
                batch_kwargs["partition_id"] = partition_id
            if limit is not None:
                batch_kwargs["limit"] = limit
            if offset is not None:
                batch_kwargs["offset"] = offset
            return iter([batch_kwargs])
        # Otherwise, we return None
        return