"unrestrictedCompanyStockPercent")) if has_column(inputDF, 'enrollmentData.enrollmentEvents.lastServiceTierChange'): # Data frame for enrollmentData enrollmentDF = inputDF.withColumn("lastServiceTierChange", f.explode("enrollmentData.enrollmentEvents.lastServiceTierChange")). \ withColumn("serviceTier_new", f.explode("enrollmentData.enrollmentEvents.serviceTier")). \ withColumn("enrollmentChannel", f.explode("enrollmentData.enrollmentEvents.enrollmentChannel")). \ withColumn("enrollmentReason", f.explode("enrollmentData.enrollmentEvents.enrollmentReason")) enrollmentDF.registerTempTable("enrollmentTable") enrollmentTFSQL = glue_context.sql( "select userId,planOwnerId,recordKeeperId,max(enrollmentReason) as enrollmentReason,max(enrollmentChannel) as enrollmentChannel from ( select userId,planOwnerId,recordKeeperId,enrollmentReason,enrollmentChannel,lastServiceTierChange,serviceTier_new, \ rank() over(partition by enrollmentReason ,enrollmentChannel order by lastServiceTierChange DESC) rnk \ from enrollmentTable where serviceTier_new ='ma' \ ) where rnk=1 group by userId,planOwnerId,recordKeeperId " ) else: noenrollmentDF = inputDF.select( "userId", "planOwnerId", "recordKeeperId").withColumn( "enrollmentReason", lit(None).cast(StringType())).withColumn( "enrollmentChannel", lit(None).cast(StringType())).dropDuplicates() noenrollmentDF.registerTempTable("enrollmentTable") enrollmentTFSQL = spark.sql( "select userId,planOwnerId,recordKeeperId,max(enrollmentReason) as enrollmentReason ,\ max(enrollmentChannel) as enrollmentChannel from enrollmentTable group by userId,planOwnerId,recordKeeperId"
df = df.repartition("submitteddatehour") dyf = DynamicFrame.fromDF(df, glueContext, "submitteddatehour-extracted") return (DynamicFrameCollection({"CustomTransform0": dyf}, glueContext)) ## @params: [JOB_NAME, SOURCE_BUCKET_URI, DESTINATION_BUCKET_URI] args = getResolvedOptions( sys.argv, ['JOB_NAME', 'SOURCE_BUCKET_URI', 'DESTINATION_BUCKET_URI']) sc = SparkContext() glueContext = GlueContext(sc) spark = glueContext.spark_session glueContext.sql("set spark.sql.parquet.mergeSchema=true") job = Job(glueContext) job.init(args['JOB_NAME'], args) ## @type: DataSource ## @args: [connection_type = "s3", format = "parquet", connection_options = {"paths": ["s3://te-load-test-analytics-submission-parquet/"], "recurse":True}, transformation_ctx = "DataSource0"] ## @return: DataSource0 ## @inputs: [] DataSource0 = glueContext.create_dynamic_frame.from_options( connection_type="s3", format="parquet", connection_options={ "paths": [f"{args['SOURCE_BUCKET_URI']}/"], "recurse": True }, transformation_ctx="DataSource0") ## @type: CustomCode