Example #1
0
    def _get_iterator(self,
                      generator_asset,
                      query_params=None,
                      limit=None,
                      offset=None,
                      partition_id=None):
        batch_kwargs = None
        # First, we check if we have a configured asset
        if generator_asset in self._assets:
            asset_config = self._assets[generator_asset]
            try:
                if query_params is None:
                    query_params = {}
                table_name = Template(
                    asset_config.table).substitute(query_params)
                schema_name = None
                if asset_config.schema is not None:
                    schema_name = Template(
                        asset_config.schema).substitute(query_params)
            except KeyError:
                raise BatchKwargsError(
                    "Unable to generate batch kwargs for asset '" +
                    generator_asset + "': "
                    "missing template key", {
                        "generator_asset": generator_asset,
                        "table_template": asset_config.table,
                        "schema_template": asset_config.schema
                    })
            batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(
                table=table_name, schema=schema_name)

        # If this is not a manually configured asset, we fall back to inspection of the database
        elif self.engine is not None and self.inspector is not None:
            split_generator_asset = generator_asset.split(".")
            if len(split_generator_asset) == 2:
                schema_name = split_generator_asset[0]
                table_name = split_generator_asset[1]
            elif len(split_generator_asset) == 1:
                schema_name = self.inspector.default_schema_name
                table_name = split_generator_asset[0]
            else:
                raise ValueError(
                    "Table name must be of shape '[SCHEMA.]TABLE'. Passed: " +
                    split_generator_asset)
            tables = self.inspector.get_table_names(schema=schema_name)
            try:
                tables.extend(
                    self.inspector.get_view_names(schema=schema_name))
            except NotImplementedError:
                # Not implemented by bigquery dialect
                pass

            if table_name in tables:
                batch_kwargs = SqlAlchemyDatasourceTableBatchKwargs(
                    table=table_name, schema=schema_name)

        if batch_kwargs is not None:
            if partition_id is not None:
                logger.warning(
                    "table_generator cannot identify partitions; provided partition id will be recorded "
                    "only")
                batch_kwargs['partition_id'] = partition_id
            if limit is not None:
                batch_kwargs['limit'] = limit
            if offset is not None:
                batch_kwargs['offset'] = offset
            return iter([batch_kwargs])

        # Otherwise, we return None
        return
Example #2
0
 def get_available_partition_ids(self, generator_asset):
     raise BatchKwargsError(
         "TableGenerator cannot identify partitions, however any existing table may"
         "already be referenced by accessing a generator_asset with the name of the "
         "table or of the form SCHEMA.TABLE", {})