Python PipelineUtils Examples

Programming Language: Python

Namespace/Package Name: common.utils

Class/Type: PipelineUtils

Examples at hotexamples.com: 7

Python PipelineUtils - 7 examples found. These are the top rated real world Python examples of common.utils.PipelineUtils extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getConfig(3)

getSpark(3)

getZookeeperInstance(2)

Example #1

Show file

 def save_offsets(rdd):
     print("Saving offset | Exactly Once Semantics")
     zk = PipelineUtils.getZookeeperInstance()
     for offset in rdd.offsetRanges():
         path = f"/consumers/{offset.topic}/{offset.partition}"
         zk.ensure_path(path)
         zk.set(path, str(offset.untilOffset).encode())

Example #2

Show file

File: encounter_job.py Project: antarahealth/openmrs-elt

 def voidFlatObs(encounter_ids):
     try:
         db = PipelineUtils.getConfig()['storage']['db']
         encounter_ids=','.join(map(str, encounter_ids))
         if db=="delta":
             deltaTable = DeltaUtils.getDeltaTable("flat_obs_orders")
             deltaTable.delete("encounter_id IN ({0})".format(encounter_ids))
         elif db=="cassandra":
             CassandraUtils.deleteFromCassandra("flat_obs_orders",encounter_ids)
                 
     except Exception as e:
         print("An unexpected error occurred while sinking FlatObs microbatch", e)
         raise

Example #3

Show file

 def read_offsets(topics):
     try:
         zk = PipelineUtils.getZookeeperInstance()
         from_offsets = {}
         for topic in topics:
             for partition in zk.get_children(f'/consumers/{topic}'):
                 topic_partion = TopicAndPartition(topic, int(partition))
                 offset = int(zk.get(f'/consumers/{topic}/{partition}')[0])
                 from_offsets[topic_partion] = offset
         print("Previous offset -->", from_offsets)
         return from_offsets
     except Exception as e:
         print("An unexpected error occurred while reading offset", e)
         pass

Example #4

Show file

File: encounter_job.py Project: antarahealth/openmrs-elt

    def sinkFlatObs(microbatch, batchId):
        try:
            db = PipelineUtils.getConfig()['storage']['db']
            if db=="delta":
                patient_id = microbatch.select("patient_id").rdd.flatMap(lambda x: x).collect()
                whereClause = "table.patient_id IN ({0}) AND table.encounter_id = updates.encounter_id"\
                                    .format(','.join(map(str, patient_id)))
                print(whereClause)
                DeltaUtils.upsertMicroBatchToDelta("flat_obs_orders", # delta tablename
                                                microbatch, # microbatch
                                                whereClause # where clause condition
                                                )


            elif db=="cassandra":
                CassandraUtils.sinkToCassandra(microbatch, "flat_obs_orders", mode="append")
        except Exception as e:
            print("An unexpected error occurred while sinking FlatObs microbatch", e)
            raise

Example #5

Show file

File: delta.py Project: antarahealth/openmrs-elt

 def getDeltaTable(table):
     deltaConfig = PipelineUtils.getConfig()['storage']
     path=deltaConfig['tables'][table]["path"]
     spark = PipelineUtils.getSpark()
     return DeltaTable.forPath(spark, path)

Example #6

Show file

File: delta.py Project: antarahealth/openmrs-elt

from common.utils import PipelineUtils
PipelineUtils.getSpark()
from delta.tables import * # ignore pylint error 

class DeltaUtils:
    @staticmethod
    def getDeltaTable(table):
        deltaConfig = PipelineUtils.getConfig()['storage']
        path=deltaConfig['tables'][table]["path"]
        spark = PipelineUtils.getSpark()
        return DeltaTable.forPath(spark, path)

    # static method for merging incremental updates  into Delta tables
    @staticmethod
    def upsertMicroBatchToDelta(tableName,microBatchOutputDF, whereClause="table.id = updates.id"):
        deltaTable = DeltaUtils.getDeltaTable(tableName)
        return deltaTable.alias("table").merge(microBatchOutputDF.alias("updates"), whereClause)\
                .whenMatchedUpdateAll()\
                .whenNotMatchedInsertAll()\
                .execute()

Example #7

Show file

File: cassandra.py Project: antarahealth/openmrs-elt

 def sourceFromCassandra(table):
     return PipelineUtils.getSpark().read\
         .format("org.apache.spark.sql.cassandra")\
         .options(table=table, keyspace="elt")\
         .load()