def pre_add(self, item: "EmailScheduleView") -> None: try: recipients = get_email_address_list(item.recipients) item.recipients = ", ".join(recipients) except Exception as ex: raise SupersetException("Invalid email list") from ex item.user = item.user or g.user if not croniter.is_valid(item.crontab): raise SupersetException("Invalid crontab format")
def pre_add(self, item): try: recipients = get_email_address_list(item.recipients) item.recipients = ", ".join(recipients) except Exception: raise SupersetException("Invalid email list") item.user = item.user or g.user if not croniter.is_valid(item.crontab): raise SupersetException("Invalid crontab format")
def pre_add(self, obj): try: recipients = get_email_address_list(obj.recipients) obj.recipients = ', '.join(recipients) except Exception: raise SupersetException('Invalid email list') obj.user = obj.user or g.user if not croniter.is_valid(obj.crontab): raise SupersetException('Invalid crontab format')
def validate_json(obj): if obj: try: json.loads(obj) except Exception as e: logger.error(f"JSON is not valid {e}") raise SupersetException("JSON is not valid")
def pre_delete(self, obj): if obj.slices: raise SupersetException( Markup( 'Cannot delete a datasource that has slices attached to it.' "Here's the list of associated charts: " + ''.join([o.slice_link for o in obj.slices])))
def validate_json(obj: Union[bytes, bytearray, str]) -> None: if obj: try: json.loads(obj) except Exception as ex: logger.error(f"JSON is not valid {ex}") raise SupersetException("JSON is not valid")
def pre_delete(self, item: BaseDatasource) -> None: if item.slices: raise SupersetException( Markup( "Cannot delete a datasource that has slices attached to it." "Here's the list of associated charts: " + "".join([i.slice_name for i in item.slices])))
def pre_delete(self, database: Database) -> None: # pylint: disable=no-self-use if database.tables: raise SupersetException( Markup("Cannot delete a database that has tables attached. " "Here's the list of associated tables: " + ", ".join("{}".format(table) for table in database.tables)))
def get_datasource_info(datasource_id: Optional[int], datasource_type: Optional[str], form_data: FormData) -> Tuple[int, Optional[str]]: """ Compatibility layer for handling of datasource info datasource_id & datasource_type used to be passed in the URL directory, now they should come as part of the form_data, This function allows supporting both without duplicating code :param datasource_id: The datasource ID :param datasource_type: The datasource type, i.e., 'druid' or 'table' :param form_data: The URL form data :returns: The datasource ID and type :raises SupersetException: If the datasource no longer exists """ datasource = form_data.get("datasource", "") if "__" in datasource: datasource_id, datasource_type = datasource.split("__") # The case where the datasource has been deleted if datasource_id == "None": datasource_id = None if not datasource_id: raise SupersetException( "The datasource associated with this chart no longer exists") datasource_id = int(datasource_id) return datasource_id, datasource_type
def pre_delete(self, obj): # pylint: disable=no-self-use if obj.tables: raise SupersetException( Markup( "Cannot delete a database that has tables attached. " "Here's the list of associated tables: " + ", ".join("{}".format(o) for o in obj.tables) ) )
def check_validator(validator_type: str, config: str) -> None: if not AlertValidatorType.valid_type(validator_type): raise SupersetException( f"Error: {validator_type} is not a valid validator type.") config_dict = json.loads(config) if validator_type == AlertValidatorType.operator.value: if not (config_dict.get("op") and config_dict.get("threshold")): raise SupersetException( "Error: Operator Validator needs specified operator and threshold " 'values. Add "op" and "threshold" to config.') if not config_dict["op"] in OPERATOR_FUNCTIONS.keys(): raise SupersetException( f'Error: {config_dict["op"]} is an invalid operator type. Change ' f'the "op" value in the config to one of ' f'["<", "<=", ">", ">=", "==", "!="]') if not isinstance(config_dict["threshold"], (int, float)): raise SupersetException( f'Error: {config_dict["threshold"]} is an invalid threshold value.' f' Change the "threshold" value in the config.')
def get_extra_params(database: "Database") -> Dict[str, Any]: """ For Druid, the path to a SSL certificate is placed in `connect_args`. :param database: database instance from which to extract extras :raises CertificateException: If certificate is not valid/unparseable :raises SupersetException: If database extra json payload is unparseable """ try: extra = json.loads(database.extra or "{}") except json.JSONDecodeError as ex: raise SupersetException("Unable to parse database extras") from ex if database.server_cert: engine_params = extra.get("engine_params", {}) connect_args = engine_params.get("connect_args", {}) connect_args["scheme"] = "https" path = utils.create_ssl_cert_file(database.server_cert) connect_args["ssl_verify_cert"] = path engine_params["connect_args"] = connect_args extra["engine_params"] = engine_params return extra
def pre_add(self, item: "DashboardEmailScheduleView") -> None: if item.dashboard is None: raise SupersetException("Dashboard is mandatory") super(DashboardEmailScheduleView, self).pre_add(item)
def create_table_from_csv( # pylint: disable=too-many-arguments, too-many-locals cls, filename: str, table: Table, database: "Database", csv_to_df_kwargs: Dict[str, Any], df_to_sql_kwargs: Dict[str, Any], ) -> None: """Uploads a csv file and creates a superset datasource in Hive.""" if_exists = df_to_sql_kwargs["if_exists"] if if_exists == "append": raise SupersetException("Append operation not currently supported") def convert_to_hive_type(col_type: str) -> str: """maps tableschema's types to hive types""" tableschema_to_hive_types = { "boolean": "BOOLEAN", "integer": "BIGINT", "number": "DOUBLE", "string": "STRING", } return tableschema_to_hive_types.get(col_type, "STRING") bucket_path = config["CSV_TO_HIVE_UPLOAD_S3_BUCKET"] if not bucket_path: logger.info("No upload bucket specified") raise Exception( "No upload bucket specified. You can specify one in the config file." ) upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"]( database, g.user, table.schema) # Optional dependency from tableschema import ( # pylint: disable=import-error Table as TableSchemaTable, ) hive_table_schema = TableSchemaTable(filename).infer() column_name_and_type = [] for column_info in hive_table_schema["fields"]: column_name_and_type.append("`{}` {}".format( column_info["name"], convert_to_hive_type(column_info["type"]))) schema_definition = ", ".join(column_name_and_type) # ensure table doesn't already exist if (if_exists == "fail" and not database.get_df( f"SHOW TABLES IN {table.schema} LIKE '{table.table}'").empty): raise SupersetException("Table already exists") engine = cls.get_engine(database) if if_exists == "replace": engine.execute(f"DROP TABLE IF EXISTS {str(table)}") # Optional dependency import boto3 # pylint: disable=import-error s3 = boto3.client("s3") location = os.path.join("s3a://", bucket_path, upload_prefix, table.table) s3.upload_file( filename, bucket_path, os.path.join(upload_prefix, table.table, os.path.basename(filename)), ) sql = text(f"""CREATE TABLE {str(table)} ( {schema_definition} ) ROW FORMAT DELIMITED FIELDS TERMINATED BY :delim STORED AS TEXTFILE LOCATION :location tblproperties ('skip.header.line.count'='1')""") engine = cls.get_engine(database) engine.execute( sql, delim=csv_to_df_kwargs["sep"].encode().decode("unicode_escape"), location=location, )
def pre_add(self, obj): if obj.slice is None: raise SupersetException("Slice is mandatory") super(SliceEmailScheduleView, self).pre_add(obj)
def pre_add(self, obj): if obj.dashboard is None: raise SupersetException("Dashboard is mandatory") super(DashboardEmailScheduleView, self).pre_add(obj)
def create_table_from_csv( # pylint: disable=too-many-arguments, too-many-locals cls, filename: str, table: Table, database: "Database", csv_to_df_kwargs: Dict[str, Any], df_to_sql_kwargs: Dict[str, Any], ) -> None: """Uploads a csv file and creates a superset datasource in Hive.""" if_exists = df_to_sql_kwargs["if_exists"] if if_exists == "append": raise SupersetException("Append operation not currently supported") def convert_to_hive_type(col_type: str) -> str: """maps tableschema's types to hive types""" tableschema_to_hive_types = { "boolean": "BOOLEAN", "integer": "BIGINT", "number": "DOUBLE", "string": "STRING", } return tableschema_to_hive_types.get(col_type, "STRING") upload_prefix = config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"]( database, g.user, table.schema ) # Optional dependency from tableschema import ( # pylint: disable=import-error Table as TableSchemaTable, ) hive_table_schema = TableSchemaTable(filename).infer() column_name_and_type = [] for column_info in hive_table_schema["fields"]: column_name_and_type.append( "`{}` {}".format( column_info["name"], convert_to_hive_type(column_info["type"]) ) ) schema_definition = ", ".join(column_name_and_type) # ensure table doesn't already exist if if_exists == "fail": if table.schema: table_exists = not database.get_df( f"SHOW TABLES IN {table.schema} LIKE '{table.table}'" ).empty else: table_exists = not database.get_df( f"SHOW TABLES LIKE '{table.table}'" ).empty if table_exists: raise SupersetException("Table already exists") engine = cls.get_engine(database) if if_exists == "replace": engine.execute(f"DROP TABLE IF EXISTS {str(table)}") location = upload_to_s3(filename, upload_prefix, table) sql, params = cls.get_create_table_stmt( table, schema_definition, location, csv_to_df_kwargs["sep"].encode().decode("unicode_escape"), int(csv_to_df_kwargs.get("header", 0)), csv_to_df_kwargs.get("na_values"), ) engine = cls.get_engine(database) engine.execute(text(sql), **params)
def validate_json(obj): if obj: try: json.loads(obj) except Exception: raise SupersetException('JSON is not valid')
def df_to_sql( cls, database: "Database", table: Table, df: pd.DataFrame, to_sql_kwargs: Dict[str, Any], ) -> None: """ Upload data from a Pandas DataFrame to a database. The data is stored via the binary Parquet format which is both less problematic and more performant than a text file. More specifically storing a table as a CSV text file has severe limitations including the fact that the Hive CSV SerDe does not support multiline fields. Note this method does not create metadata for the table. :param database: The database to upload the data to :param: table The table to upload the data to :param df: The dataframe with data to be uploaded :param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method """ engine = cls.get_engine(database) if to_sql_kwargs["if_exists"] == "append": raise SupersetException("Append operation not currently supported") if to_sql_kwargs["if_exists"] == "fail": # Ensure table doesn't already exist. if table.schema: table_exists = not database.get_df( f"SHOW TABLES IN {table.schema} LIKE '{table.table}'" ).empty else: table_exists = not database.get_df( f"SHOW TABLES LIKE '{table.table}'").empty if table_exists: raise SupersetException("Table already exists") elif to_sql_kwargs["if_exists"] == "replace": engine.execute(f"DROP TABLE IF EXISTS {str(table)}") def _get_hive_type(dtype: np.dtype) -> str: hive_type_by_dtype = { np.dtype("bool"): "BOOLEAN", np.dtype("float64"): "DOUBLE", np.dtype("int64"): "BIGINT", np.dtype("object"): "STRING", } return hive_type_by_dtype.get(dtype, "STRING") schema_definition = ", ".join(f"`{name}` {_get_hive_type(dtype)}" for name, dtype in df.dtypes.items()) with tempfile.NamedTemporaryFile(dir=config["UPLOAD_FOLDER"], suffix=".parquet") as file: pq.write_table(pa.Table.from_pandas(df), where=file.name) engine.execute( text(f""" CREATE TABLE {str(table)} ({schema_definition}) STORED AS PARQUET LOCATION :location """), location=upload_to_s3( filename=file.name, upload_prefix=config["CSV_TO_HIVE_UPLOAD_DIRECTORY_FUNC"]( database, g.user, table.schema), table=table, ), )
def pre_add(self, item: "SliceEmailScheduleView") -> None: if item.slice is None: raise SupersetException("Slice is mandatory") super(SliceEmailScheduleView, self).pre_add(item)