Python DeltaTable.isDeltaTable Examples

Programming Language: Python

Namespace/Package Name: delta.tables

Class/Type: DeltaTable

Method/Function: isDeltaTable

Examples at hotexamples.com: 5

Python DeltaTable.isDeltaTable - 5 examples found. These are the top rated real world Python examples of delta.tables.DeltaTable.isDeltaTable extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

forPath(30)

create(5)

forName(5)

isDeltaTable(5)

convertToDelta(4)

createIfNotExists(2)

createOrReplace(2)

replace(2)

toDF(1)

update(1)

Example #1

Show file

File: test_deltatable.py Project: takezoe/delta

 def test_isDeltaTable(self) -> None:
     df = self.spark.createDataFrame([('a', 1), ('b', 2), ('c', 3)], ["key", "value"])
     df.write.format("parquet").save(self.tempFile)
     tempFile2 = self.tempFile + '_2'
     df.write.format("delta").save(tempFile2)
     self.assertEqual(DeltaTable.isDeltaTable(self.spark, self.tempFile), False)
     self.assertEqual(DeltaTable.isDeltaTable(self.spark, tempFile2), True)

Example #2

Show file

File: demoComplete.py Project: MaxDBX/streamingNotebooks

def createDeltaBackedState(tableName, overwrite=False):

    from delta.tables import DeltaTable
    import pyspark.sql.types as T

    db_location = "dbfs:/home/[email protected]/streamingWorkshop/db"
    db_table_name = "sw_db." + tableName
    checkpoint_location = db_location + "/checkpointTables/" + db_table_name

    delta_schema = (T.StructType([
        T.StructField("item_id", T.LongType()),
        T.StructField("timestamp", T.TimestampType()),
        T.StructField("sales", T.LongType())
    ]))

    # Create an empty Delta table if it does not exist. This is required for the MERGE to work in the first mini batch.
    if overwrite or not DeltaTable.isDeltaTable(
            spark, db_location + "/" + db_table_name):
        (spark.createDataFrame(
            [], delta_schema).write.mode("overwrite").option(
                "overwriteSchema",
                "true").format("delta").saveAsTable(db_table_name))
        spark.sql(
            f"ALTER TABLE {db_table_name} SET TBLPROPERTIES (delta.autoOptimize.optimizeWrite = true, delta.autoOptimize.autoCompact = false)"
        )

Example #3

Show file

    def _merge_into_table(self, df, destination_path, checkpoints_path,
                          condition):
        """ Merges data from the given dataframe into the delta table at the specified destination_path, based on the given condition.
            If not delta table exists at the specified destination_path, a new delta table is created and the data from the given dataframe is inserted.
            eg, merge_into_table(df_lookup, np_destination_path, source_path + '/_checkpoints/delta_np', "current.id_pseudonym = updates.id_pseudonym")
        """
        if DeltaTable.isDeltaTable(spark, destination_path):
            dt = DeltaTable.forPath(spark, destination_path)

            def upsert(batch_df, batchId):
                dt.alias("current").merge(
                    batch_df.alias("updates"), condition).whenMatchedUpdateAll(
                    ).whenNotMatchedInsertAll().execute()

            query = df.writeStream.format("delta").foreachBatch(
                upsert).outputMode("update").trigger(once=True).option(
                    "checkpointLocation", checkpoints_path)
        else:
            logger.info(
                f'Delta table does not yet exist at {destination_path} - creating one now and inserting initial data.'
            )
            query = df.writeStream.format("delta").outputMode(
                "append").trigger(once=True).option("checkpointLocation",
                                                    checkpoints_path)
        query = query.start(destination_path)
        query.awaitTermination(
        )  # block until query is terminated, with stop() or with error; A StreamingQueryException will be thrown if an exception occurs.
        logger.info(query.lastProgress)

Example #4

Show file

    def merge_write(logger, df_dict: Dict[str, DataFrame], rules: Dict[str,
                                                                       str],
                    output_path: str, spark: SparkSession):
        """
        Write data if the dataset doesn't exist or merge it to the existing dataset
        Args:
            logger: Logger instance used to log events
            df_dict: Dictionary of the datasets with the structure {Name: Dataframe}
            rules: Matching rules use to merge
            output_path: Path to write the data
            spark: Spark instance

        Returns:

        """
        try:
            from delta.tables import DeltaTable
            for df_name, df in df_dict.items():
                file_path = path.join(output_path, df_name)
                if DeltaTable.isDeltaTable(spark, file_path):
                    delta_table = DeltaTable.forPath(spark, file_path)
                    delta_table.alias("old").merge(
                        df.alias("new"), rules.get(df_name)
                    ).whenMatchedUpdateAll().whenNotMatchedInsertAll()
                else:
                    df.write.format("delta").save(file_path)

        except Exception as e:
            logger.error(
                "Writing sanitized data couldn't be performed: {}".format(e),
                traceback.format_exc())
            raise e
        else:
            logger.info("Sanitized dataframes written in {} folder".format(
                output_path))

Example #5

Show file

File: table.py Project: arkady-emelyanov/qlik-replicate-cdc

def get_delta_table(
        spark: SparkSession,
        schema: StructType,
        delta_library_jar: str,
        delta_path: str):
    # load delta library jar, so we can use delta module
    spark.sparkContext.addPyFile(delta_library_jar)
    from delta.tables import DeltaTable

    # check existence of delta table
    if not DeltaTable.isDeltaTable(spark, delta_path):
        print(f">>> Delta table: {delta_path} is not initialized, performing initialization..")
        df = spark.createDataFrame([], schema=schema)
        df.write.format("delta").save(delta_path)

    return DeltaTable.forPath(spark, delta_path)