Ejemplo n.º 1
0
    def _create_table_in_superset(self, table_name: str, database: Database,
                                  schema) -> SqlaTable:
        """ Create the Table itself

        Keyword arguments:
        table_name -- the name of the table to create
        database -- the database object which will be used
        schema -- the schema of table

        Raises:
            TableCreationException:  1. If the Table object could not be created
                                     2. If the Table could not be created in the database
        """
        try:
            table = (db.session.query(SqlaTable).filter_by(
                table_name=table_name, schema=schema,
                database_id=database.id).one_or_none())
            if table:
                table.fetch_metadata()
            if not table:
                table = SqlaTable(table_name=table_name)
                table.database = database
                table.database_id = database.id
                table.user_id = g.user.id
                table.schema = schema
                table.fetch_metadata()
                db.session.add(table)
            db.session.commit()

            return table
        except Exception as e:
            raise TableCreationException(
                f"Table {table_name} could not be created.", e)
Ejemplo n.º 2
0
    def form_post(self, form):
        database = form.con.data
        schema_name = form.schema.data or ""

        if not self.is_schema_allowed(database, schema_name):
            message = _(
                'Database "{0}" Schema "{1}" is not allowed for csv uploads. '
                "Please contact Superset Admin".format(
                    database.database_name, schema_name
                )
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        csv_file = form.csv_file.data
        form.csv_file.data.filename = secure_filename(form.csv_file.data.filename)
        csv_filename = form.csv_file.data.filename
        path = os.path.join(config["UPLOAD_FOLDER"], csv_filename)
        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            csv_file.save(path)
            table = SqlaTable(table_name=form.name.data)
            table.database = form.data.get("con")
            table.database_id = table.database.id
            table.database.db_engine_spec.create_table_from_csv(form, table)
        except Exception as e:
            try:
                os.remove(path)
            except OSError:
                pass
            message = (
                "Table name {} already exists. Please pick another".format(
                    form.name.data
                )
                if isinstance(e, IntegrityError)
                else str(e)
            )
            flash(message, "danger")
            stats_logger.incr("failed_csv_upload")
            return redirect("/csvtodatabaseview/form")

        os.remove(path)
        # Go back to welcome page / splash screen
        db_name = table.database.database_name
        message = _(
            'CSV file "{0}" uploaded to table "{1}" in '
            'database "{2}"'.format(csv_filename, form.name.data, db_name)
        )
        flash(message, "info")
        stats_logger.incr("successful_csv_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 3
0
    def form_post(  # pylint: disable=too-many-locals
        self, form: ColumnarToDatabaseForm
    ) -> Response:
        database = form.con.data
        columnar_table = Table(table=form.name.data, schema=form.schema.data)
        files = form.columnar_file.data
        file_type = {file.filename.split(".")[-1] for file in files}

        if file_type == {"zip"}:
            zipfile_ob = zipfile.ZipFile(  # pylint: disable=consider-using-with
                form.columnar_file.data[0]
            )  # pylint: disable=consider-using-with
            file_type = {filename.split(".")[-1] for filename in zipfile_ob.namelist()}
            files = [
                io.BytesIO((zipfile_ob.open(filename).read(), filename)[0])
                for filename in zipfile_ob.namelist()
            ]

        if len(file_type) > 1:
            message = _(
                "Multiple file extensions are not allowed for columnar uploads."
                " Please make sure all files are of the same extension.",
            )
            flash(message, "danger")
            return redirect("/columnartodatabaseview/form")

        read = pd.read_parquet
        kwargs = {
            "columns": form.usecols.data if form.usecols.data else None,
        }

        if not schema_allows_csv_upload(database, columnar_table.schema):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for columnar uploads. "
                "Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=columnar_table.schema,
            )
            flash(message, "danger")
            return redirect("/columnartodatabaseview/form")

        if "." in columnar_table.table and columnar_table.schema:
            message = _(
                "You cannot specify a namespace both in the name of the table: "
                '"%(columnar_table.table)s" and in the schema field: '
                '"%(columnar_table.schema)s". Please remove one',
                table=columnar_table.table,
                schema=columnar_table.schema,
            )
            flash(message, "danger")
            return redirect("/columnartodatabaseview/form")

        try:
            chunks = [read(file, **kwargs) for file in files]
            df = pd.concat(chunks)

            database = (
                db.session.query(models.Database)
                .filter_by(id=form.data.get("con").data.get("id"))
                .one()
            )

            database.db_engine_spec.df_to_sql(
                database,
                columnar_table,
                df,
                to_sql_kwargs={
                    "chunksize": 1000,
                    "if_exists": form.if_exists.data,
                    "index": form.index.data,
                    "index_label": form.index_label.data,
                },
            )

            # Connect table to the database that should be used for exploration.
            # E.g. if hive was used to upload a csv, presto will be a better option
            # to explore the table.
            expore_database = database
            explore_database_id = database.explore_database_id
            if explore_database_id:
                expore_database = (
                    db.session.query(models.Database)
                    .filter_by(id=explore_database_id)
                    .one_or_none()
                    or database
                )

            sqla_table = (
                db.session.query(SqlaTable)
                .filter_by(
                    table_name=columnar_table.table,
                    schema=columnar_table.schema,
                    database_id=expore_database.id,
                )
                .one_or_none()
            )

            if sqla_table:
                sqla_table.fetch_metadata()
            if not sqla_table:
                sqla_table = SqlaTable(table_name=columnar_table.table)
                sqla_table.database = expore_database
                sqla_table.database_id = database.id
                sqla_table.user_id = g.user.get_id()
                sqla_table.schema = columnar_table.schema
                sqla_table.fetch_metadata()
                db.session.add(sqla_table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            message = _(
                'Unable to upload Columnar file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=[file.filename for file in form.columnar_file.data],
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_columnar_upload")
            return redirect("/columnartodatabaseview/form")

        # Go back to welcome page / splash screen
        message = _(
            'Columnar file "%(columnar_filename)s" uploaded to table "%(table_name)s" '
            'in database "%(db_name)s"',
            columnar_filename=[file.filename for file in form.columnar_file.data],
            table_name=str(columnar_table),
            db_name=sqla_table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_columnar_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 4
0
    def form_post(self, form: ExcelToDatabaseForm) -> Response:
        database = form.con.data
        excel_table = Table(table=form.name.data, schema=form.schema.data)

        if not schema_allows_csv_upload(database, excel_table.schema):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for excel uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=excel_table.schema,
            )
            flash(message, "danger")
            return redirect("/exceltodatabaseview/form")

        if "." in excel_table.table and excel_table.schema:
            message = _(
                "You cannot specify a namespace both in the name of the table: "
                '"%(excel_table.table)s" and in the schema field: '
                '"%(excel_table.schema)s". Please remove one',
                table=excel_table.table,
                schema=excel_table.schema,
            )
            flash(message, "danger")
            return redirect("/exceltodatabaseview/form")

        uploaded_tmp_file_path = tempfile.NamedTemporaryFile(  # pylint: disable=consider-using-with
            dir=app.config["UPLOAD_FOLDER"],
            suffix=os.path.splitext(form.excel_file.data.filename)[1].lower(),
            delete=False,
        ).name

        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            upload_stream_write(form.excel_file.data, uploaded_tmp_file_path)

            df = pd.read_excel(
                header=form.header.data if form.header.data else 0,
                index_col=form.index_col.data,
                io=form.excel_file.data,
                keep_default_na=not form.null_values.data,
                mangle_dupe_cols=form.mangle_dupe_cols.data,
                na_values=form.null_values.data if form.null_values.data else None,
                parse_dates=form.parse_dates.data,
                skiprows=form.skiprows.data,
                sheet_name=form.sheet_name.data if form.sheet_name.data else 0,
            )

            database = (
                db.session.query(models.Database)
                .filter_by(id=form.data.get("con").data.get("id"))
                .one()
            )

            database.db_engine_spec.df_to_sql(
                database,
                excel_table,
                df,
                to_sql_kwargs={
                    "chunksize": 1000,
                    "if_exists": form.if_exists.data,
                    "index": form.index.data,
                    "index_label": form.index_label.data,
                },
            )

            # Connect table to the database that should be used for exploration.
            # E.g. if hive was used to upload a excel, presto will be a better option
            # to explore the table.
            expore_database = database
            explore_database_id = database.explore_database_id
            if explore_database_id:
                expore_database = (
                    db.session.query(models.Database)
                    .filter_by(id=explore_database_id)
                    .one_or_none()
                    or database
                )

            sqla_table = (
                db.session.query(SqlaTable)
                .filter_by(
                    table_name=excel_table.table,
                    schema=excel_table.schema,
                    database_id=expore_database.id,
                )
                .one_or_none()
            )

            if sqla_table:
                sqla_table.fetch_metadata()
            if not sqla_table:
                sqla_table = SqlaTable(table_name=excel_table.table)
                sqla_table.database = expore_database
                sqla_table.database_id = database.id
                sqla_table.user_id = g.user.get_id()
                sqla_table.schema = excel_table.schema
                sqla_table.fetch_metadata()
                db.session.add(sqla_table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            message = _(
                'Unable to upload Excel file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=form.excel_file.data.filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_excel_upload")
            return redirect("/exceltodatabaseview/form")

        # Go back to welcome page / splash screen
        message = _(
            'Excel file "%(excel_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            excel_filename=form.excel_file.data.filename,
            table_name=str(excel_table),
            db_name=sqla_table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_excel_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 5
0
    def form_post(self, form: CsvToDatabaseForm) -> Response:
        database = form.con.data
        csv_table = Table(table=form.name.data, schema=form.schema.data)

        if not schema_allows_csv_upload(database, csv_table.schema):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for csv uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=csv_table.schema,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        if "." in csv_table.table and csv_table.schema:
            message = _(
                "You cannot specify a namespace both in the name of the table: "
                '"%(csv_table.table)s" and in the schema field: '
                '"%(csv_table.schema)s". Please remove one',
                table=csv_table.table,
                schema=csv_table.schema,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        try:
            df = pd.concat(
                pd.read_csv(
                    chunksize=1000,
                    encoding="utf-8",
                    filepath_or_buffer=form.csv_file.data,
                    header=form.header.data if form.header.data else 0,
                    index_col=form.index_col.data,
                    infer_datetime_format=form.infer_datetime_format.data,
                    iterator=True,
                    keep_default_na=not form.null_values.data,
                    mangle_dupe_cols=form.mangle_dupe_cols.data,
                    usecols=form.usecols.data if form.usecols.data else None,
                    na_values=form.null_values.data if form.null_values.data else None,
                    nrows=form.nrows.data,
                    parse_dates=form.parse_dates.data,
                    sep=form.sep.data,
                    skip_blank_lines=form.skip_blank_lines.data,
                    skipinitialspace=form.skipinitialspace.data,
                    skiprows=form.skiprows.data,
                )
            )

            database = (
                db.session.query(models.Database)
                .filter_by(id=form.data.get("con").data.get("id"))
                .one()
            )

            database.db_engine_spec.df_to_sql(
                database,
                csv_table,
                df,
                to_sql_kwargs={
                    "chunksize": 1000,
                    "if_exists": form.if_exists.data,
                    "index": form.index.data,
                    "index_label": form.index_label.data,
                },
            )

            # Connect table to the database that should be used for exploration.
            # E.g. if hive was used to upload a csv, presto will be a better option
            # to explore the table.
            expore_database = database
            explore_database_id = database.explore_database_id
            if explore_database_id:
                expore_database = (
                    db.session.query(models.Database)
                    .filter_by(id=explore_database_id)
                    .one_or_none()
                    or database
                )

            sqla_table = (
                db.session.query(SqlaTable)
                .filter_by(
                    table_name=csv_table.table,
                    schema=csv_table.schema,
                    database_id=expore_database.id,
                )
                .one_or_none()
            )

            if sqla_table:
                sqla_table.fetch_metadata()
            if not sqla_table:
                sqla_table = SqlaTable(table_name=csv_table.table)
                sqla_table.database = expore_database
                sqla_table.database_id = database.id
                sqla_table.user_id = g.user.get_id()
                sqla_table.schema = csv_table.schema
                sqla_table.fetch_metadata()
                db.session.add(sqla_table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            message = _(
                'Unable to upload CSV file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=form.csv_file.data.filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_csv_upload")
            return redirect("/csvtodatabaseview/form")

        # Go back to welcome page / splash screen
        message = _(
            'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            csv_filename=form.csv_file.data.filename,
            table_name=str(csv_table),
            db_name=sqla_table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_csv_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 6
0
    def form_post(self, form):
        database = form.con.data
        schema_name = form.schema.data or ""

        if not schema_allows_csv_upload(database, schema_name):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for csv uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=schema_name,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        csv_file = form.csv_file.data
        form.csv_file.data.filename = secure_filename(
            form.csv_file.data.filename)
        csv_filename = form.csv_file.data.filename
        path = os.path.join(config["UPLOAD_FOLDER"], csv_filename)
        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            csv_file.save(path)
            table_name = form.name.data

            con = form.data.get("con")
            database = (db.session.query(
                models.Database).filter_by(id=con.data.get("id")).one())
            database.db_engine_spec.create_table_from_csv(form, database)

            table = (db.session.query(SqlaTable).filter_by(
                table_name=table_name,
                schema=form.schema.data,
                database_id=database.id,
            ).one_or_none())
            if table:
                table.fetch_metadata()
            if not table:
                table = SqlaTable(table_name=table_name)
                table.database = database
                table.database_id = database.id
                table.user_id = g.user.id
                table.schema = form.schema.data
                table.fetch_metadata()
                db.session.add(table)
            db.session.commit()
        except Exception as e:  # pylint: disable=broad-except
            db.session.rollback()
            try:
                os.remove(path)
            except OSError:
                pass
            message = _(
                'Unable to upload CSV file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=csv_filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(e),
            )

            flash(message, "danger")
            stats_logger.incr("failed_csv_upload")
            return redirect("/csvtodatabaseview/form")

        os.remove(path)
        # Go back to welcome page / splash screen
        message = _(
            'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            csv_filename=csv_filename,
            table_name=form.name.data,
            db_name=table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_csv_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 7
0
    def form_post(self, form: ExcelToDatabaseForm) -> Response:
        database = form.con.data
        excel_table = Table(table=form.name.data, schema=form.schema.data)

        if not schema_allows_csv_upload(database, excel_table.schema):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for excel uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=excel_table.schema,
            )
            flash(message, "danger")
            return redirect("/exceltodatabaseview/form")

        if "." in excel_table.table and excel_table.schema:
            message = _(
                "You cannot specify a namespace both in the name of the table: "
                '"%(excel_table.table)s" and in the schema field: '
                '"%(excel_table.schema)s". Please remove one',
                table=excel_table.table,
                schema=excel_table.schema,
            )
            flash(message, "danger")
            return redirect("/exceltodatabaseview/form")

        uploaded_tmp_file_path = tempfile.NamedTemporaryFile(
            dir=app.config["UPLOAD_FOLDER"],
            suffix=os.path.splitext(form.excel_file.data.filename)[1].lower(),
            delete=False,
        ).name

        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            upload_stream_write(form.excel_file.data, uploaded_tmp_file_path)

            con = form.data.get("con")
            database = (db.session.query(
                models.Database).filter_by(id=con.data.get("id")).one())

            # some params are not supported by pandas.read_excel (e.g. chunksize).
            # More can be found here:
            # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
            excel_to_df_kwargs = {
                "header": form.header.data if form.header.data else 0,
                "index_col": form.index_col.data,
                "mangle_dupe_cols": form.mangle_dupe_cols.data,
                "skiprows": form.skiprows.data,
                "nrows": form.nrows.data,
                "sheet_name":
                form.sheet_name.data if form.sheet_name.data else 0,
                "parse_dates": form.parse_dates.data,
            }
            if form.null_values.data:
                excel_to_df_kwargs["na_values"] = form.null_values.data
                excel_to_df_kwargs["keep_default_na"] = False

            df_to_sql_kwargs = {
                "name": excel_table.table,
                "if_exists": form.if_exists.data,
                "index": form.index.data,
                "index_label": form.index_label.data,
                "chunksize": 1000,
            }
            database.db_engine_spec.create_table_from_excel(
                uploaded_tmp_file_path,
                excel_table,
                database,
                excel_to_df_kwargs,
                df_to_sql_kwargs,
            )

            # Connect table to the database that should be used for exploration.
            # E.g. if hive was used to upload a excel, presto will be a better option
            # to explore the table.
            expore_database = database
            explore_database_id = database.explore_database_id
            if explore_database_id:
                expore_database = (db.session.query(models.Database).filter_by(
                    id=explore_database_id).one_or_none() or database)

            sqla_table = (db.session.query(SqlaTable).filter_by(
                table_name=excel_table.table,
                schema=excel_table.schema,
                database_id=expore_database.id,
            ).one_or_none())

            if sqla_table:
                sqla_table.fetch_metadata()
            if not sqla_table:
                sqla_table = SqlaTable(table_name=excel_table.table)
                sqla_table.database = expore_database
                sqla_table.database_id = database.id
                sqla_table.user_id = g.user.id
                sqla_table.schema = excel_table.schema
                sqla_table.fetch_metadata()
                db.session.add(sqla_table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            try:
                os.remove(uploaded_tmp_file_path)
            except OSError:
                pass
            message = _(
                'Unable to upload Excel file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=form.excel_file.data.filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_excel_upload")
            return redirect("/exceltodatabaseview/form")

        os.remove(uploaded_tmp_file_path)
        # Go back to welcome page / splash screen
        message = _(
            'CSV file "%(excel_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            excel_filename=form.excel_file.data.filename,
            table_name=str(excel_table),
            db_name=sqla_table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_excel_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 8
0
def test_update_physical_sqlatable(mocker: MockFixture, app_context: None,
                                   session: Session) -> None:
    """
    Test updating the table on a physical dataset.

    When updating the table on a physical dataset by pointing it somewhere else (change
    in database ID, schema, or table name) we should point the ``Dataset`` to an
    existing ``Table`` if possible, and create a new one otherwise.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)
    mocker.patch("superset.datasets.dao.db.session", session)

    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table
    from superset.tables.schemas import TableSchema

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    columns = [
        TableColumn(column_name="a", type="INTEGER"),
    ]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=[],
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
    )
    session.add(sqla_table)
    session.flush()

    # check that the table was created, and that the created dataset points to it
    table = session.query(Table).one()
    assert table.id == 1
    assert table.name == "old_dataset"
    assert table.schema is None
    assert table.database_id == 1

    dataset = session.query(Dataset).one()
    assert dataset.tables == [table]

    # point ``SqlaTable`` to a different database
    new_database = Database(database_name="my_other_database",
                            sqlalchemy_uri="sqlite://")
    session.add(new_database)
    session.flush()
    sqla_table.database = new_database
    session.flush()

    # ignore these keys when comparing results
    ignored_keys = {"created_on", "changed_on", "uuid"}

    # check that the old table still exists, and that the dataset points to the newly
    # created table (id=2) and column (id=2), on the new database (also id=2)
    table_schema = TableSchema()
    tables = [{
        k: v
        for k, v in table_schema.dump(table).items() if k not in ignored_keys
    } for table in session.query(Table).all()]
    assert tables == [
        {
            "created_by": None,
            "extra_json": "{}",
            "name": "old_dataset",
            "changed_by": None,
            "catalog": None,
            "columns": [1],
            "database": 1,
            "external_url": None,
            "schema": None,
            "id": 1,
            "is_managed_externally": False,
        },
        {
            "created_by": None,
            "extra_json": "{}",
            "name": "old_dataset",
            "changed_by": None,
            "catalog": None,
            "columns": [2],
            "database": 2,
            "external_url": None,
            "schema": None,
            "id": 2,
            "is_managed_externally": False,
        },
    ]

    # check that dataset now points to the new table
    assert dataset.tables[0].database_id == 2

    # point ``SqlaTable`` back
    sqla_table.database_id = 1
    session.flush()

    # check that dataset points to the original table
    assert dataset.tables[0].database_id == 1
Ejemplo n.º 9
0
    def form_post(self, form: CsvToDatabaseForm) -> FlaskResponse:
        database = form.con.data
        csv_table = Table(table=form.name.data, schema=form.schema.data)

        if not schema_allows_csv_upload(database, csv_table.schema):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for csv uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=csv_table.schema,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        if "." in csv_table.table and csv_table.schema:
            message = _(
                "You cannot specify a namespace both in the name of the table: "
                '"%(csv_table.table)s" and in the schema field: '
                '"%(csv_table.schema)s". Please remove one',
                table=csv_table.table,
                schema=csv_table.schema,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        uploaded_tmp_file_path = tempfile.NamedTemporaryFile(
            dir=app.config["UPLOAD_FOLDER"],
            suffix=os.path.splitext(form.csv_file.data.filename)[1].lower(),
            delete=False,
        ).name

        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            upload_stream_write(form.csv_file.data, uploaded_tmp_file_path)

            con = form.data.get("con")
            database = (db.session.query(
                models.Database).filter_by(id=con.data.get("id")).one())
            csv_to_df_kwargs = {
                "sep": form.sep.data,
                "header": form.header.data if form.header.data else 0,
                "index_col": form.index_col.data,
                "mangle_dupe_cols": form.mangle_dupe_cols.data,
                "skipinitialspace": form.skipinitialspace.data,
                "skiprows": form.skiprows.data,
                "nrows": form.nrows.data,
                "skip_blank_lines": form.skip_blank_lines.data,
                "parse_dates": form.parse_dates.data,
                "infer_datetime_format": form.infer_datetime_format.data,
                "chunksize": 1000,
            }
            df_to_sql_kwargs = {
                "name": csv_table.table,
                "if_exists": form.if_exists.data,
                "index": form.index.data,
                "index_label": form.index_label.data,
                "chunksize": 1000,
            }
            database.db_engine_spec.create_table_from_csv(
                uploaded_tmp_file_path,
                csv_table,
                database,
                csv_to_df_kwargs,
                df_to_sql_kwargs,
            )

            # Connect table to the database that should be used for exploration.
            # E.g. if hive was used to upload a csv, presto will be a better option
            # to explore the table.
            expore_database = database
            explore_database_id = database.get_extra().get(
                "explore_database_id", None)
            if explore_database_id:
                expore_database = (db.session.query(models.Database).filter_by(
                    id=explore_database_id).one_or_none() or database)

            sqla_table = (db.session.query(SqlaTable).filter_by(
                table_name=csv_table.table,
                schema=csv_table.schema,
                database_id=expore_database.id,
            ).one_or_none())

            if sqla_table:
                sqla_table.fetch_metadata()
            if not sqla_table:
                sqla_table = SqlaTable(table_name=csv_table.table)
                sqla_table.database = expore_database
                sqla_table.database_id = database.id
                sqla_table.user_id = g.user.id
                sqla_table.schema = csv_table.schema
                sqla_table.fetch_metadata()
                db.session.add(sqla_table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            try:
                os.remove(uploaded_tmp_file_path)
            except OSError:
                pass
            message = _(
                'Unable to upload CSV file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=form.csv_file.data.filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_csv_upload")
            return redirect("/csvtodatabaseview/form")

        os.remove(uploaded_tmp_file_path)
        # Go back to welcome page / splash screen
        message = _(
            'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            csv_filename=form.csv_file.data.filename,
            table_name=str(csv_table),
            db_name=sqla_table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_csv_upload")
        return redirect("/tablemodelview/list/")
Ejemplo n.º 10
0
    def form_post(self, form):
        database = form.con.data
        schema_name = form.schema.data or ""
        if not schema_allows_csv_upload(database, schema_name):
            message = _(
                'Database "%(database_name)s" schema "%(schema_name)s" '
                "is not allowed for csv uploads. Please contact your Superset Admin.",
                database_name=database.database_name,
                schema_name=schema_name,
            )
            flash(message, "danger")
            return redirect("/csvtodatabaseview/form")

        csv_filename = form.csv_file.data.filename
        extension = os.path.splitext(csv_filename)[1].lower()
        path = tempfile.NamedTemporaryFile(dir=app.config["UPLOAD_FOLDER"],
                                           suffix=extension,
                                           delete=False).name
        check_path = app.config["SAVE_FOLDER"] + str(
            os.path.splitext(form.csv_file.data.filename)[0]) + "/"
        save_path = check_path + form.csv_file.data.filename
        form.csv_file.data.filename = path

        try:
            utils.ensure_path_exists(config["UPLOAD_FOLDER"])
            upload_stream_write(form.csv_file.data, path)
            utils.ensure_path_exists(config["SAVE_FOLDER"])
            Path(check_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(path, save_path)
            table_name = form.name.data

            con = form.data.get("con")
            database = (db.session.query(
                models.Database).filter_by(id=con.data.get("id")).one())
            database.db_engine_spec.create_table_from_csv(form, database)
            table = (db.session.query(SqlaTable).filter_by(
                table_name=table_name,
                schema=form.schema.data,
                database_id=database.id,
            ).one_or_none())
            if table:
                table.fetch_metadata()
            if not table:
                table = SqlaTable(table_name=table_name)
                table.database = database
                table.database_id = database.id
                table.user_id = g.user.id
                table.schema = form.schema.data
                table.fetch_metadata()
                db.session.add(table)
            db.session.commit()
        except Exception as ex:  # pylint: disable=broad-except
            db.session.rollback()
            try:
                os.remove(path)
            except OSError:
                pass
            message = _(
                'Unable to upload CSV file "%(filename)s" to table '
                '"%(table_name)s" in database "%(db_name)s". '
                "Error message: %(error_msg)s",
                filename=csv_filename,
                table_name=form.name.data,
                db_name=database.database_name,
                error_msg=str(ex),
            )

            flash(message, "danger")
            stats_logger.incr("failed_csv_upload")
            return redirect("/csvtodatabaseview/form")

        os.remove(path)
        # Go back to welcome page / splash screen
        message = _(
            'CSV file "%(csv_filename)s" uploaded to table "%(table_name)s" in '
            'database "%(db_name)s"',
            csv_filename=csv_filename,
            table_name=form.name.data,
            db_name=table.database.database_name,
        )
        flash(message, "info")
        stats_logger.incr("successful_csv_upload")
        return redirect("/tablemodelview/list/")
def test_update_physical_sqlatable_no_dataset(mocker: MockFixture,
                                              app_context: None,
                                              session: Session) -> None:
    """
    Test updating the table on a physical dataset that it creates
    a new dataset if one didn't already exist.

    When updating the table on a physical dataset by pointing it somewhere else (change
    in database ID, schema, or table name) we should point the ``Dataset`` to an
    existing ``Table`` if possible, and create a new one otherwise.
    """
    # patch session
    mocker.patch("superset.security.SupersetSecurityManager.get_session",
                 return_value=session)
    mocker.patch("superset.datasets.dao.db.session", session)

    from superset.columns.models import Column
    from superset.connectors.sqla.models import SqlaTable, TableColumn
    from superset.datasets.models import Dataset
    from superset.models.core import Database
    from superset.tables.models import Table
    from superset.tables.schemas import TableSchema

    engine = session.get_bind()
    Dataset.metadata.create_all(engine)  # pylint: disable=no-member

    columns = [
        TableColumn(column_name="a", type="INTEGER"),
    ]

    sqla_table = SqlaTable(
        table_name="old_dataset",
        columns=columns,
        metrics=[],
        database=Database(database_name="my_database",
                          sqlalchemy_uri="sqlite://"),
    )
    session.add(sqla_table)
    session.flush()

    # check that the table was created
    table = session.query(Table).one()
    assert table.id == 1

    dataset = session.query(Dataset).one()
    assert dataset.tables == [table]

    # point ``SqlaTable`` to a different database
    new_database = Database(database_name="my_other_database",
                            sqlalchemy_uri="sqlite://")
    session.add(new_database)
    session.flush()
    sqla_table.database = new_database
    session.flush()

    new_dataset = session.query(Dataset).one()

    # check that dataset now points to the new table
    assert new_dataset.tables[0].database_id == 2

    # point ``SqlaTable`` back
    sqla_table.database_id = 1
    session.flush()

    # check that dataset points to the original table
    assert new_dataset.tables[0].database_id == 1