Ejemplo n.º 1
0
def check_validator(validator_type: str, config: str) -> None:
    if not AlertValidatorType.valid_type(validator_type):
        raise SupersetException(
            f"Error: {validator_type} is not a valid validator type.")

    config_dict = json.loads(config)

    if validator_type == AlertValidatorType.operator.value:

        if not (config_dict.get("op")
                and config_dict.get("threshold") is not None):
            raise SupersetException(
                "Error: Operator Validator needs specified operator and threshold "
                'values. Add "op" and "threshold" to config.')

        if not config_dict["op"] in OPERATOR_FUNCTIONS.keys():
            raise SupersetException(
                f'Error: {config_dict["op"]} is an invalid operator type. Change '
                f'the "op" value in the config to one of '
                f'["<", "<=", ">", ">=", "==", "!="]')

        if not isinstance(config_dict["threshold"], (int, float)):
            raise SupersetException(
                f'Error: {config_dict["threshold"]} is an invalid threshold value.'
                f' Change the "threshold" value in the config.')
Ejemplo n.º 2
0
    def pre_add(self, obj):
        try:
            recipients = get_email_address_list(obj.recipients)
            obj.recipients = ", ".join(recipients)
        except Exception:
            raise SupersetException("Invalid email list")

        obj.user = obj.user or g.user
        if not croniter.is_valid(obj.crontab):
            raise SupersetException("Invalid crontab format")
Ejemplo n.º 3
0
    def pre_add(self, item: "EmailScheduleView") -> None:
        try:
            recipients = get_email_address_list(item.recipients)
            item.recipients = ", ".join(recipients)
        except Exception:
            raise SupersetException("Invalid email list")

        item.user = item.user or g.user
        if not croniter.is_valid(item.crontab):
            raise SupersetException("Invalid crontab format")
Ejemplo n.º 4
0
 def pre_delete(self, obj):
     if obj.slices:
         raise SupersetException(
             Markup(
                 'Cannot delete a datasource that has slices attached to it.'
                 "Here's the list of associated charts: " +
                 ''.join([o.slice_link for o in obj.slices])))
Ejemplo n.º 5
0
 def pre_delete(self, item: BaseDatasource) -> None:
     if item.slices:
         raise SupersetException(
             Markup(
                 "Cannot delete a datasource that has slices attached to it."
                 "Here's the list of associated charts: " +
                 "".join([i.slice_name for i in item.slices])))
Ejemplo n.º 6
0
def get_datasource_info(datasource_id: Optional[int],
                        datasource_type: Optional[str],
                        form_data: FormData) -> Tuple[int, Optional[str]]:
    """
    Compatibility layer for handling of datasource info

    datasource_id & datasource_type used to be passed in the URL
    directory, now they should come as part of the form_data,

    This function allows supporting both without duplicating code

    :param datasource_id: The datasource ID
    :param datasource_type: The datasource type, i.e., 'druid' or 'table'
    :param form_data: The URL form data
    :returns: The datasource ID and type
    :raises SupersetException: If the datasource no longer exists
    """

    datasource = form_data.get("datasource", "")

    if "__" in datasource:
        datasource_id, datasource_type = datasource.split("__")
        # The case where the datasource has been deleted
        if datasource_id == "None":
            datasource_id = None

    if not datasource_id:
        raise SupersetException(
            "The datasource associated with this chart no longer exists")

    datasource_id = int(datasource_id)
    return datasource_id, datasource_type
Ejemplo n.º 7
0
 def pre_delete(self, database: Database) -> None:  # pylint: disable=no-self-use
     if database.tables:
         raise SupersetException(
             Markup("Cannot delete a database that has tables attached. "
                    "Here's the list of associated tables: " +
                    ", ".join("{}".format(table)
                              for table in database.tables)))
Ejemplo n.º 8
0
def validate_json(obj):
    if obj:
        try:
            json.loads(obj)
        except Exception as e:
            logger.error(f"JSON is not valid {e}")
            raise SupersetException("JSON is not valid")
Ejemplo n.º 9
0
def validate_json(obj: Union[bytes, bytearray, str]) -> None:
    if obj:
        try:
            json.loads(obj)
        except Exception as ex:
            logger.error(f"JSON is not valid {ex}")
            raise SupersetException("JSON is not valid")
Ejemplo n.º 10
0
 def pre_delete(self, obj):  # pylint: disable=no-self-use
     if obj.tables:
         raise SupersetException(
             Markup(
                 "Cannot delete a database that has tables attached. "
                 "Here's the list of associated tables: "
                 + ", ".join("{}".format(o) for o in obj.tables)
             )
         )
Ejemplo n.º 11
0
    def get_extra_params(database: "Database") -> Dict[str, Any]:
        """
        For Druid, the path to a SSL certificate is placed in `connect_args`.

        :param database: database instance from which to extract extras
        :raises CertificateException: If certificate is not valid/unparseable
        :raises SupersetException: If database extra json payload is unparseable
        """
        try:
            extra = json.loads(database.extra or "{}")
        except json.JSONDecodeError as ex:
            raise SupersetException("Unable to parse database extras") from ex

        if database.server_cert:
            engine_params = extra.get("engine_params", {})
            connect_args = engine_params.get("connect_args", {})
            connect_args["scheme"] = "https"
            path = utils.create_ssl_cert_file(database.server_cert)
            connect_args["ssl_verify_cert"] = path
            engine_params["connect_args"] = connect_args
            extra["engine_params"] = engine_params
        return extra
Ejemplo n.º 12
0
 def pre_add(self, item: "SliceEmailScheduleView") -> None:
     if item.slice is None:
         raise SupersetException("Slice is mandatory")
     super(SliceEmailScheduleView, self).pre_add(item)
Ejemplo n.º 13
0
    def create_table_from_csv(  # pylint: disable=too-many-arguments, too-many-locals
        cls,
        filename: str,
        table: Table,
        database: "Database",
        csv_to_df_kwargs: Dict[str, Any],
        df_to_sql_kwargs: Dict[str, Any],
    ) -> None:
        """Uploads a csv file and creates a superset datasource in Hive."""

        if_exists = df_to_sql_kwargs["if_exists"]
        if if_exists == "append":
            raise SupersetException("Append operation not currently supported")

        def convert_to_hive_type(col_type: str) -> str:
            """maps tableschema's types to hive types"""
            tableschema_to_hive_types = {
                "boolean": "BOOLEAN",
                "integer": "BIGINT",
                "number": "DOUBLE",
                "string": "STRING",
            }
            return tableschema_to_hive_types.get(col_type, "STRING")

        bucket_path = config["CSV_TO_HIVE_UPLOAD_S3_BUCKET"]

        if not bucket_path:
            logger.info("No upload bucket specified")
            raise Exception(
                "No upload bucket specified. You can specify one in the config file."
            )

        upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"](
            database, g.user, table.schema)

        # Optional dependency
        from tableschema import (  # pylint: disable=import-error
            Table as TableSchemaTable, )

        hive_table_schema = TableSchemaTable(filename).infer()
        column_name_and_type = []
        for column_info in hive_table_schema["fields"]:
            column_name_and_type.append("`{}` {}".format(
                column_info["name"],
                convert_to_hive_type(column_info["type"])))
        schema_definition = ", ".join(column_name_and_type)

        # ensure table doesn't already exist
        if (if_exists == "fail" and not database.get_df(
                f"SHOW TABLES IN {table.schema} LIKE '{table.table}'").empty):
            raise SupersetException("Table already exists")

        engine = cls.get_engine(database)

        if if_exists == "replace":
            engine.execute(f"DROP TABLE IF EXISTS {str(table)}")

        # Optional dependency
        import boto3  # pylint: disable=import-error

        s3 = boto3.client("s3")
        location = os.path.join("s3a://", bucket_path, upload_prefix,
                                table.table)
        s3.upload_file(
            filename,
            bucket_path,
            os.path.join(upload_prefix, table.table,
                         os.path.basename(filename)),
        )
        sql = text(f"""CREATE TABLE {str(table)} ( {schema_definition} )
            ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim
            STORED AS TEXTFILE LOCATION :location
            tblproperties ('skip.header.line.count'='1')""")
        engine = cls.get_engine(database)
        engine.execute(
            sql,
            delim=csv_to_df_kwargs["sep"].encode().decode("unicode_escape"),
            location=location,
        )
Ejemplo n.º 14
0
 def pre_add(self, obj):
     if obj.slice is None:
         raise SupersetException("Slice is mandatory")
     super(SliceEmailScheduleView, self).pre_add(obj)
Ejemplo n.º 15
0
 def pre_add(self, obj):
     if obj.dashboard is None:
         raise SupersetException("Dashboard is mandatory")
     super(DashboardEmailScheduleView, self).pre_add(obj)
Ejemplo n.º 16
0
    def create_table_from_csv(  # pylint: disable=too-many-arguments, too-many-locals
        cls,
        filename: str,
        table: Table,
        database: "Database",
        csv_to_df_kwargs: Dict[str, Any],
        df_to_sql_kwargs: Dict[str, Any],
    ) -> None:
        """Uploads a csv file and creates a superset datasource in Hive."""
        if_exists = df_to_sql_kwargs["if_exists"]
        if if_exists == "append":
            raise SupersetException("Append operation not currently supported")

        def convert_to_hive_type(col_type: str) -> str:
            """maps tableschema's types to hive types"""
            tableschema_to_hive_types = {
                "boolean": "BOOLEAN",
                "integer": "BIGINT",
                "number": "DOUBLE",
                "string": "STRING",
            }
            return tableschema_to_hive_types.get(col_type, "STRING")

        upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"](
            database, g.user, table.schema
        )

        # Optional dependency
        from tableschema import (  # pylint: disable=import-error
            Table as TableSchemaTable,
        )

        hive_table_schema = TableSchemaTable(filename).infer()
        column_name_and_type = []
        for column_info in hive_table_schema["fields"]:
            column_name_and_type.append(
                "`{}` {}".format(
                    column_info["name"], convert_to_hive_type(column_info["type"])
                )
            )
        schema_definition = ", ".join(column_name_and_type)

        # ensure table doesn't already exist
        if if_exists == "fail":
            if table.schema:
                table_exists = not database.get_df(
                    f"SHOW TABLES IN {table.schema} LIKE '{table.table}'"
                ).empty
            else:
                table_exists = not database.get_df(
                    f"SHOW TABLES LIKE '{table.table}'"
                ).empty
            if table_exists:
                raise SupersetException("Table already exists")

        engine = cls.get_engine(database)

        if if_exists == "replace":
            engine.execute(f"DROP TABLE IF EXISTS {str(table)}")
        location = upload_to_s3(filename, upload_prefix, table)
        sql, params = cls.get_create_table_stmt(
            table,
            schema_definition,
            location,
            csv_to_df_kwargs["sep"].encode().decode("unicode_escape"),
            int(csv_to_df_kwargs.get("header", 0)),
            csv_to_df_kwargs.get("na_values"),
        )
        engine = cls.get_engine(database)
        engine.execute(text(sql), **params)
Ejemplo n.º 17
0
 def pre_add(self, item: "DashboardEmailScheduleView") -> None:
     if item.dashboard is None:
         raise SupersetException("Dashboard is mandatory")
     super(DashboardEmailScheduleView, self).pre_add(item)
Ejemplo n.º 18
0
    def df_to_sql(
        cls,
        database: "Database",
        table: Table,
        df: pd.DataFrame,
        to_sql_kwargs: Dict[str, Any],
    ) -> None:
        """
        Upload data from a Pandas DataFrame to a database.

        The data is stored via the binary Parquet format which is both less problematic
        and more performant than a text file. More specifically storing a table as a
        CSV text file has severe limitations including the fact that the Hive CSV SerDe
        does not support multiline fields.

        Note this method does not create metadata for the table.

        :param database: The database to upload the data to
        :param: table The table to upload the data to
        :param df: The dataframe with data to be uploaded
        :param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method
        """

        engine = cls.get_engine(database)

        if to_sql_kwargs["if_exists"] == "append":
            raise SupersetException("Append operation not currently supported")

        if to_sql_kwargs["if_exists"] == "fail":

            # Ensure table doesn't already exist.
            if table.schema:
                table_exists = not database.get_df(
                    f"SHOW TABLES IN {table.schema} LIKE '{table.table}'"
                ).empty
            else:
                table_exists = not database.get_df(
                    f"SHOW TABLES LIKE '{table.table}'").empty

            if table_exists:
                raise SupersetException("Table already exists")
        elif to_sql_kwargs["if_exists"] == "replace":
            engine.execute(f"DROP TABLE IF EXISTS {str(table)}")

        def _get_hive_type(dtype: np.dtype) -> str:
            hive_type_by_dtype = {
                np.dtype("bool"): "BOOLEAN",
                np.dtype("float64"): "DOUBLE",
                np.dtype("int64"): "BIGINT",
                np.dtype("object"): "STRING",
            }

            return hive_type_by_dtype.get(dtype, "STRING")

        schema_definition = ", ".join(f"`{name}` {_get_hive_type(dtype)}"
                                      for name, dtype in df.dtypes.items())

        with tempfile.NamedTemporaryFile(dir=config["UPLOAD_FOLDER"],
                                         suffix=".parquet") as file:
            pq.write_table(pa.Table.from_pandas(df), where=file.name)

            engine.execute(
                text(f"""
                    CREATE TABLE {str(table)} ({schema_definition})
                    STORED AS PARQUET
                    LOCATION :location
                    """),
                location=upload_to_s3(
                    filename=file.name,
                    upload_prefix=config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"](
                        database, g.user, table.schema),
                    table=table,
                ),
            )
Ejemplo n.º 19
0
def validate_json(obj):
    if obj:
        try:
            json.loads(obj)
        except Exception:
            raise SupersetException('JSON is not valid')