Python AnalysisException Examples

Programming Language: Python

Namespace/Package Name: pyspark.sql.utils

Examples at hotexamples.com: 6

Python AnalysisException - 6 examples found. These are the top rated real world Python examples of pyspark.sql.utils.AnalysisException extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AnalysisException(6)

Frequently Used Methods

AnalysisException (6)

Example #1

Show file

File: test_deltatable_dataset.py Project: MerelTheisenQB/kedro

    def test_exists_raises_error(self, mocker):
        delta_ds = DeltaTableDataSet(filepath="")
        mocker.patch.object(delta_ds,
                            "_get_spark",
                            side_effect=AnalysisException(
                                "Other Exception", []))

        with pytest.raises(DataSetError, match="Other Exception"):
            delta_ds.exists()

Example #2

Show file

File: test_spark_dataset.py Project: vermashivam679/kedro

    def test_exists_raises_error(self, mocker):
        # exists should raise all errors except for
        # AnalysisExceptions clearly indicating a missing file
        spark_data_set = SparkDataSet(filepath="")
        mocker.patch.object(
            spark_data_set,
            "_get_spark",
            side_effect=AnalysisException("Other Exception", []),
        )

        with pytest.raises(DataSetError, match="Other Exception"):
            spark_data_set.exists()

Example #3

Show file

def _convert_delta_exception(e: "JavaObject") -> Optional[CapturedException]:
    """
    Convert Delta's Scala concurrent exceptions to the corresponding Python exceptions.
    """
    s: str = e.toString()
    c: "JavaObject" = e.getCause()

    jvm: "JVMView" = SparkContext._jvm  # type: ignore[attr-defined]
    gw = SparkContext._gateway  # type: ignore[attr-defined]
    stacktrace = jvm.org.apache.spark.util.Utils.exceptionString(e)

    # Temporary workaround until Delta Lake is upgraded to Spark 3.3
    # Below three exception handling cases are copied from
    # https://github.com/apache/spark/blob/master/python/pyspark/sql/utils.py#L156
    if is_instance_of(gw, e,
                      "org.apache.spark.sql.catalyst.parser.ParseException"):
        return ParseException(s.split(': ', 1)[1], stacktrace, c)
    # Order matters. ParseException inherits AnalysisException.
    if is_instance_of(gw, e, "org.apache.spark.sql.AnalysisException"):
        return AnalysisException(s.split(': ', 1)[1], stacktrace, c)
    if is_instance_of(gw, e, "java.lang.IllegalArgumentException"):
        return IllegalArgumentException(s.split(': ', 1)[1], stacktrace, c)

    if s.startswith(
            'io.delta.exceptions.DeltaConcurrentModificationException: '):
        return DeltaConcurrentModificationException(
            s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ConcurrentWriteException: '):
        return ConcurrentWriteException(s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.MetadataChangedException: '):
        return MetadataChangedException(s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ProtocolChangedException: '):
        return ProtocolChangedException(s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ConcurrentAppendException: '):
        return ConcurrentAppendException(s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ConcurrentDeleteReadException: '):
        return ConcurrentDeleteReadException(
            s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ConcurrentDeleteDeleteException: '):
        return ConcurrentDeleteDeleteException(
            s.split(': ', 1)[1], stacktrace, c)
    if s.startswith('io.delta.exceptions.ConcurrentTransactionException: '):
        return ConcurrentTransactionException(
            s.split(': ', 1)[1], stacktrace, c)
    return None

Example #4

Show file

def get_data_frame_count_type_of_topic(data_frame: DataFrame) -> pb.DataFrame:
    """
    From all the data, it takes the columns TopicID and Question and for each topic, count the number of+
    different SubTopic/Question
    :param data_frame: generate with pyspark, and contain all the data from the csv file
    :return: data frame of panda package
    """
    try:
        data_frame = data_frame \
            .select("TopicID", "Question") \
            .distinct() \
            .groupBy("TopicID") \
            .count() \
            .sort("TopicID")
    except Py4JError:
        raise AnalysisException('One columns is incorrect')
    print("The following table represent the number of the type of each topic")
    data_frame.show()
    data_frame_pandas = data_frame.toPandas()
    return data_frame_pandas

Example #5

Show file

def read_csv_with_data_frame(file_csv: str) -> DataFrame:
    """
    Read CSV with as data frame with spark
    :param file_csv: file name of csv
    :return: all the data of the file as data frame
    """
    spark_session = SparkSession \
        .builder \
        .getOrCreate()

    logger = spark_session._jvm.org.apache.log4j
    logger.LogManager.getLogger("org").setLevel(logger.Level.WARN)

    try:
        data_frame = spark_session\
            .read\
            .format("csv") \
            .options(header='true', inferschema='true')\
            .load(file_csv)
    except Py4JError:
        raise AnalysisException('There is no csv file in:'  + str(os.path))

    return data_frame

Example #6

Show file

File: test_spark_data_set.py Project: zulyang/kedro

 def faulty_get_spark():
     raise AnalysisException("Other Exception", [])