def test_export_dataset_command(self, mock_g):
        mock_g.user = security_manager.find_user("admin")

        example_db = get_example_database()
        example_dataset = _get_table_from_list_by_name("energy_usage",
                                                       example_db.tables)
        command = ExportDatasetsCommand([example_dataset.id])
        contents = dict(command.run())

        assert list(contents.keys()) == [
            "metadata.yaml",
            "datasets/examples/energy_usage.yaml",
            "databases/examples.yaml",
        ]

        metadata = yaml.safe_load(
            contents["datasets/examples/energy_usage.yaml"])

        # sort columns for deterministc comparison
        metadata["columns"] = sorted(metadata["columns"],
                                     key=itemgetter("column_name"))
        metadata["metrics"] = sorted(metadata["metrics"],
                                     key=itemgetter("metric_name"))

        # types are different depending on the backend
        type_map = {
            column.column_name: str(column.type)
            for column in example_dataset.columns
        }

        assert metadata == {
            "cache_timeout":
            None,
            "columns": [
                {
                    "column_name": "source",
                    "description": None,
                    "expression": "",
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": type_map["source"],
                    "verbose_name": None,
                    "extra": None,
                },
                {
                    "column_name": "target",
                    "description": None,
                    "expression": "",
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": type_map["target"],
                    "verbose_name": None,
                    "extra": None,
                },
                {
                    "column_name": "value",
                    "description": None,
                    "expression": "",
                    "filterable": True,
                    "groupby": True,
                    "is_active": True,
                    "is_dttm": False,
                    "python_date_format": None,
                    "type": type_map["value"],
                    "verbose_name": None,
                    "extra": None,
                },
            ],
            "database_uuid":
            str(example_db.uuid),
            "default_endpoint":
            None,
            "description":
            "Energy consumption",
            "extra":
            None,
            "fetch_values_predicate":
            None,
            "filter_select_enabled":
            False,
            "main_dttm_col":
            None,
            "metrics": [
                {
                    "d3format": None,
                    "description": None,
                    "expression": "COUNT(*)",
                    "extra": None,
                    "metric_name": "count",
                    "metric_type": "count",
                    "verbose_name": "COUNT(*)",
                    "warning_text": None,
                },
                {
                    "d3format": None,
                    "description": None,
                    "expression": "SUM(value)",
                    "extra": None,
                    "metric_name": "sum__value",
                    "metric_type": None,
                    "verbose_name": None,
                    "warning_text": None,
                },
            ],
            "offset":
            0,
            "params":
            None,
            "schema":
            get_example_default_schema(),
            "sql":
            None,
            "table_name":
            "energy_usage",
            "template_params":
            None,
            "uuid":
            str(example_dataset.uuid),
            "version":
            "1.0.0",
        }
Beispiel #2
0
    def _export(model: Dashboard,
                export_related: bool = True) -> Iterator[Tuple[str, str]]:
        dashboard_slug = secure_filename(model.dashboard_title)
        file_name = f"dashboards/{dashboard_slug}_{model.id}.yaml"

        payload = model.export_to_dict(
            recursive=False,
            include_parent_ref=False,
            include_defaults=True,
            export_uuids=True,
        )
        # TODO (betodealmeida): move this logic to export_to_dict once this
        #  becomes the default export endpoint
        for key, new_name in JSON_KEYS.items():
            value: Optional[str] = payload.pop(key, None)
            if value:
                try:
                    payload[new_name] = json.loads(value)
                except (TypeError, json.decoder.JSONDecodeError):
                    logger.info("Unable to decode `%s` field: %s", key, value)
                    payload[new_name] = {}

        # Extract all native filter datasets and replace native
        # filter dataset references with uuid
        for native_filter in payload.get("metadata",
                                         {}).get("native_filter_configuration",
                                                 []):
            for target in native_filter.get("targets", []):
                dataset_id = target.pop("datasetId", None)
                if dataset_id is not None:
                    dataset = DatasetDAO.find_by_id(dataset_id)
                    if dataset:
                        target["datasetUuid"] = str(dataset.uuid)
                        if export_related:
                            yield from ExportDatasetsCommand([dataset_id
                                                              ]).run()

        # the mapping between dashboard -> charts is inferred from the position
        # attribute, so if it's not present we need to add a default config
        if not payload.get("position"):
            payload["position"] = get_default_position(model.dashboard_title)

        # if any charts or not referenced in position, we need to add them
        # in a new row
        referenced_charts = find_chart_uuids(payload["position"])
        orphan_charts = {
            chart
            for chart in model.slices
            if str(chart.uuid) not in referenced_charts
        }

        if orphan_charts:
            payload["position"] = append_charts(payload["position"],
                                                orphan_charts)

        payload["version"] = EXPORT_VERSION

        file_content = yaml.safe_dump(payload, sort_keys=False)
        yield file_name, file_content

        if export_related:
            chart_ids = [chart.id for chart in model.slices]
            yield from ExportChartsCommand(chart_ids).run()
    def export(self, **kwargs: Any) -> Response:
        """Export datasets
        ---
        get:
          description: >-
            Exports multiple datasets and downloads them as YAML files
          parameters:
          - in: query
            name: q
            content:
              application/json:
                schema:
                  $ref: '#/components/schemas/get_export_ids_schema'
          responses:
            200:
              description: Dataset export
              content:
                text/plain:
                  schema:
                    type: string
            400:
              $ref: '#/components/responses/400'
            401:
              $ref: '#/components/responses/401'
            404:
              $ref: '#/components/responses/404'
            500:
              $ref: '#/components/responses/500'
        """
        requested_ids = kwargs["rison"]

        if is_feature_enabled("VERSIONED_EXPORT"):
            timestamp = datetime.now().strftime("%Y%m%dT%H%M%S")
            root = f"dataset_export_{timestamp}"
            filename = f"{root}.zip"

            buf = BytesIO()
            with ZipFile(buf, "w") as bundle:
                try:
                    for file_name, file_content in ExportDatasetsCommand(
                            requested_ids).run():
                        with bundle.open(f"{root}/{file_name}", "w") as fp:
                            fp.write(file_content.encode())
                except DatasetNotFoundError:
                    return self.response_404()
            buf.seek(0)

            return send_file(
                buf,
                mimetype="application/zip",
                as_attachment=True,
                attachment_filename=filename,
            )

        query = self.datamodel.session.query(SqlaTable).filter(
            SqlaTable.id.in_(requested_ids))
        query = self._base_filters.apply_all(query)
        items = query.all()
        ids = [item.id for item in items]
        if len(ids) != len(requested_ids):
            return self.response_404()

        data = [t.export_to_dict() for t in items]
        return Response(
            yaml.safe_dump(data),
            headers=generate_download_headers("yaml"),
            mimetype="application/text",
        )
def test_export(app_context: None, session: Session) -> None:
    """
    Test exporting a dataset.
    """
    from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
    from superset.datasets.commands.export import ExportDatasetsCommand
    from superset.models.core import Database

    engine = session.get_bind()
    SqlaTable.metadata.create_all(engine)  # pylint: disable=no-member

    database = Database(database_name="my_database",
                        sqlalchemy_uri="sqlite://")
    session.add(database)
    session.flush()

    columns = [
        TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
        TableColumn(column_name="user_id", type="INTEGER"),
        TableColumn(column_name="revenue", type="INTEGER"),
        TableColumn(column_name="expenses", type="INTEGER"),
        TableColumn(
            column_name="profit",
            type="INTEGER",
            expression="revenue-expenses",
            extra=json.dumps({"certified_by": "User"}),
        ),
    ]
    metrics = [
        SqlMetric(
            metric_name="cnt",
            expression="COUNT(*)",
            extra=json.dumps({"warning_markdown": None}),
        ),
    ]

    sqla_table = SqlaTable(
        table_name="my_table",
        columns=columns,
        metrics=metrics,
        main_dttm_col="ds",
        database=database,
        offset=-8,
        description="This is the description",
        is_featured=1,
        cache_timeout=3600,
        schema="my_schema",
        sql=None,
        params=json.dumps({
            "remote_id": 64,
            "database_name": "examples",
            "import_time": 1606677834,
        }),
        perm=None,
        filter_select_enabled=1,
        fetch_values_predicate="foo IN (1, 2)",
        is_sqllab_view=0,  # no longer used?
        template_params=json.dumps({"answer": "42"}),
        schema_perm=None,
        extra=json.dumps({"warning_markdown": "*WARNING*"}),
    )

    export = list(
        ExportDatasetsCommand._export(sqla_table)  # pylint: disable=protected-access
    )
    assert export == [
        (
            "datasets/my_database/my_table.yaml",
            f"""table_name: my_table
main_dttm_col: ds
description: This is the description
default_endpoint: null
offset: -8
cache_timeout: 3600
schema: my_schema
sql: null
params:
  remote_id: 64
  database_name: examples
  import_time: 1606677834
template_params:
  answer: '42'
filter_select_enabled: 1
fetch_values_predicate: foo IN (1, 2)
extra:
  warning_markdown: '*WARNING*'
uuid: null
metrics:
- metric_name: cnt
  verbose_name: null
  metric_type: null
  expression: COUNT(*)
  description: null
  d3format: null
  extra:
    warning_markdown: null
  warning_text: null
columns:
- column_name: profit
  verbose_name: null
  is_dttm: null
  is_active: null
  type: INTEGER
  groupby: null
  filterable: null
  expression: revenue-expenses
  description: null
  python_date_format: null
  extra:
    certified_by: User
- column_name: ds
  verbose_name: null
  is_dttm: 1
  is_active: null
  type: TIMESTAMP
  groupby: null
  filterable: null
  expression: null
  description: null
  python_date_format: null
  extra: null
- column_name: user_id
  verbose_name: null
  is_dttm: null
  is_active: null
  type: INTEGER
  groupby: null
  filterable: null
  expression: null
  description: null
  python_date_format: null
  extra: null
- column_name: expenses
  verbose_name: null
  is_dttm: null
  is_active: null
  type: INTEGER
  groupby: null
  filterable: null
  expression: null
  description: null
  python_date_format: null
  extra: null
- column_name: revenue
  verbose_name: null
  is_dttm: null
  is_active: null
  type: INTEGER
  groupby: null
  filterable: null
  expression: null
  description: null
  python_date_format: null
  extra: null
version: 1.0.0
database_uuid: {database.uuid}
""",
        ),
        (
            "databases/my_database.yaml",
            f"""database_name: my_database
sqlalchemy_uri: sqlite://
cache_timeout: null
expose_in_sqllab: true
allow_run_async: false
allow_ctas: false
allow_cvas: false
allow_file_upload: false
extra:
  metadata_params: {{}}
  engine_params: {{}}
  metadata_cache_timeout: {{}}
  schemas_allowed_for_file_upload: []
uuid: {database.uuid}
version: 1.0.0
""",
        ),
    ]