def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info( "Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets(s3filePath, S3FilesDsl.readFileSchema(s3filePath, getClosedSchema(s3filePath), spark), spark, s3confPath) logging.info("validatedRecords.count().." + str(validatedRecords.count())) ticketToCloseDS = detailClosedColumns(validatedRecords, spark) logging.info("ticketToCloseDS.count().." + str(ticketToCloseDS.count())) esIndex = ticketToCloseDS\ .withColumn("open", F.lit(Constants.OPEN_NO))\ .withColumn("file", F.lit(s3filePath))\ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) logging.info("Persisting ES index..") dfCount = esIndex.count() logging.info("indexDataFrame.count.." + str(dfCount)) try: ElasticDsl.writeMappedESIndex(esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", s3confPath) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info("End batch Coptero ROD ----------------------------------------------------") except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame(copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", s3confPath)
def checkCount(indexName, fileName, dfCount, spark, conf): prefix = S3FilesDsl.readConfigJson(conf).elastic_env_index_prefix sqlContext = SQLContext(spark) logging.info('dfCount.. ' + str(dfCount)) path = fileName.replace(':', '\\:').replace("/", "\\/") qResultDF1 = sqlContext.read \ .option("es.resource",prefix + indexName) \ .option("es.query", "?q=file:\''" + path + " '\'") \ .format("org.elasticsearch.spark.sql") \ .load() # ¿Equivalente de qResultDF = spark.esDF("${indexName}", "?q=file:\"" + path + "\"").select("ticket_id") ? qResultDF = qResultDF1.select("ticket_id") qResultDF.cache() queryCount = qResultDF.count() qResultDF.unpersist() logging.info("queryCount.. " + str(queryCount)) if dfCount != queryCount: alertDataFrame = sqlContext.createDataFrame( [(fileName, dfCount, queryCount, datetime.now().strftime("%Y%m%d%H%M%S"))], ["file", "expected_count", "result_count", "date"]) ElasticDsl.writeESAlertsIndex(alertDataFrame, conf)
def validateTickets(s3filePath, tickets, spark, s3confPath): sqlContext = SQLContext(spark) corruptRecords = tickets.filter(tickets._corrupt_record.isNotNull() | tickets.ticket_id.isNull()) corruptRecords.cache() corruptRecordsCount = corruptRecords.count() logging.info("corruptRecords.count.." + str(corruptRecordsCount)) corruptRecords.unpersist() if corruptRecordsCount > 0: withS3path = corruptRecords.withColumn("file", F.lit(s3filePath)) ElasticDsl.writeESCorruptRecordsIndex(withS3path, "copt-rod-corrupt-records-", s3confPath) validatedRecords = tickets.filter(tickets._corrupt_record.isNull() & tickets.ticket_id.isNotNull()) validatedRecords.cache() logging.info("validatedRecords.count.." + str(validatedRecords.count())) validatedRecords.unpersist() return validatedRecords
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath: " + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getWISchema(s3filePath), spark), spark, s3confPath) indexAgentSmcCluster = getAgentSmcCluster(validatedRecords, s3confPath, spark) indexWithRelations = getRelations(indexAgentSmcCluster, s3confPath, spark) partitioned = indexWithRelations \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) \ .withColumn("work_info_category", Utils.getWorkInfoCategory("work_info_notes")) dfCount = partitioned.count() logging.info("Persisting ES index..") logging.info("indexWorkInfoDataFrame.count().." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( partitioned, "copt-rod-wif-{ticket_max_value_partition}", "instanceid", s3confPath) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) AlertDsl.checkCount("copt-rod-wif*", s3filePath, dfCount, spark, s3confPath) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data))
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath: " + s3confPath + "-------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getPBISchema(s3filePath), spark), spark, conf) rodTicketDetailProblems = detailPBMColumns(validatedRecords, spark) esIndexPBM = RemedyDsl.buildESIndex("problems", rodTicketDetailProblems, s3confPath, s3filePath, spark) dfCount = esIndexPBM.count() logging.info("Persisting ES indexes..") logging.info("indexProblemDataFrame.count().." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( esIndexPBM, "copt-rod-pbi-{ticket_max_value_partition}", "ticket_id", conf) except Exception as ex: e = str(ex) if e.find("index_closed_exception"): logging.info("catched index_closed_exception: " + e) else: raise ex AlertDsl.checkCount("copt-rod-pbi-*", s3filePath, dfCount, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as ex: e = str(ex) logStatus.success = False logStatus.count = dfCount logStatus.exception = e logStatus.end_date = "" logging.info("catched: " + e) raise ex finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getCRQSchema(s3filePath), spark), spark, conf) rodTicketDetailChanges = detailCHGColumns(validatedRecords) esIndexCHG = RemedyDsl.buildESIndex("changes", rodTicketDetailChanges, s3confPath, s3filePath, spark) logging.info("Persisting ES index..") dfCount = esIndexCHG.count() logging.info("indexDataFrame.count.." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( esIndexCHG, "copt-rod-crq-{ticket_max_value_partition}", "ticket_id", conf) except Exception as e: ex = str(e) if ex.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + ex) AlertDsl.checkCount("copt-rod-crq-*", s3filePath, dfCount, spark, s3confPath) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as e: ex = str(e) logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = ex logStatus.end_date = "" logging.error("catched: " + ex) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getIncidSchema(s3filePath), spark), spark, conf) logging.info("fileDetailHelpdesk.count().." + str(validatedRecords.count())) rodTicketDetailHelpdesk = TicketDetailHelpdesk.detailHPDColumns( validatedRecords) logging.info("rodTicketDetailHelpdesk.count.." + str(rodTicketDetailHelpdesk.count())) calendar = datetime.now().strftime("%Y%m%d%H%M%S") filtered = rodTicketDetailHelpdesk. \ filter((rodTicketDetailHelpdesk.status_id == "5") | (rodTicketDetailHelpdesk.status_id == "6")). \ filter(rodTicketDetailHelpdesk.last_modification_date < calendar) #TODO #valfileStatus: DataFrame = readFile(getAuxTablePath("TICKET_STATUS")) #val rodTicketStatus: Dataset[TicketStatus] = statusColumns(fileStatus) # rodTicketDetailHelpdesk #.join(rodTicketStatus, Seq("status_id"), "left") #.filter($"status_desc" == = "Closed" | | $"status_desc" == = "Cancelled") #.drop("status_desc") #filter($"last_modification_date" < new impleDateFormat("yyyyMMddHHmmss").format(calendar.getTime)) # val cisClosedDates = getCIsLastClosedDates(rodTicketDetailHelpdesk) esIndex = RemedyDsl.buildESIndex("helpdesk", filtered, s3confPath, s3filePath, spark) # TODO ? esIndex.as[IncidESIndex]with Option[String] = None logging.info("Persisting ES index..") #dfCount = esIndex.count() #logging.info("indexDataFrame.count.." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) removeClosedAgentSmc(esIndex, s3confPath, spark) AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCount, spark, s3confPath) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) logDataFrame.show(5) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCountRelation = 0 dfCountIncid = 0 try: logging.info("Start batch Coptero ROD for s3confPath: " + s3confPath + " -------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getRelationSchema(s3filePath), spark), spark, conf) rodTicketRelation1 = relationColumns(validatedRecords, spark) rodTicketRelation = rodTicketRelation1.withColumn( "relation_id", F.concat(rodTicketRelation1["ticket_id"], F.lit('-'), rodTicketRelation1["related_ticket_id"])) esIndexRel = rodTicketRelation.select( 'relation_id', 'ticket_id', 'ticket_type', 'related_ticket_id', 'related_ticket_type', 'association_type', 'submit_date', 'relation_summary', 'status', 'submitter', 'instanceid') partitioned = esIndexRel \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) dfCountRelation = partitioned.count() logging.info("Persisting ES index..") logging.info("indexRelationDataFrame.count().." + str(dfCountRelation)) try: ElasticDsl.writeMappedESIndex( partitioned, "copt-rod-rel-{ticket_max_value_partition}", "relation_id", conf) except Exception as ex: e = str(ex) if e.find("index_closed_exception"): logging.info("catched index_closed_exception: " + e) else: raise ex AlertDsl.checkCount("copt-rod-rel-*", s3filePath, dfCountRelation, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCountRelation logStatus.exception = "" logStatus.end_date = "" persistRelations(esIndexRel, conf, spark) '''relationsDF = esIndexRel \ .filter(esIndexRel.ticket_type == "Incident") \ .groupBy("ticket_id") \ .agg(F.collect_list("related_ticket_id").alias("relations")) \ .withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")) \ .withColumn("file", F.lit(s3filePath)) # TODO writeMappedESIndex CRQ Y PBI dfCountIncid = relationsDF.count() logging.info("Persisting ES index..") logging.info("relationsDF.count().."+ str(dfCountIncid)) try: ElasticDsl.writeMappedESIndex( relationsDF, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf) except Exception as ex: e = str(ex) if e.find("index_closed_exception"): logging.info("catched index_closed_exception: " + e) else: raise ex AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCountIncid, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCountIncid logStatus.exception = "" logStatus.end_date = ""''' logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as ex: e = str(ex) logStatus.success = False logStatus.count = 0 logStatus.exception = str(e) logStatus.end_date = "" logging.info("catched: " + e) raise ex finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)
def runJob(sparkSession, s3confPath, s3filePath): spark = sparkSession conf = s3confPath logStatus = startLogStatus(s3filePath) dfCount = 0 try: logging.info("Start batch Coptero ROD for s3confPath:" + s3confPath + "--------------------------------------") validatedRecords = ValidationsDsl.validateTickets( s3filePath, S3FilesDsl.readFileSchema(s3filePath, getIncidSchema(s3filePath), spark), spark, conf) logging.info("fileDetailHelpdesk.count().." + str(validatedRecords.count())) rodTicketDetailHelpdesk = TicketDetailHelpdesk.detailHPDColumns( validatedRecords) logging.info("rodTicketDetailHelpdesk.count.." + str(rodTicketDetailHelpdesk.count())) # val cisClosedDates = getCIsLastClosedDates(rodTicketDetailHelpdesk) esIndex = RemedyDsl.buildESIndex("helpdesk", rodTicketDetailHelpdesk, s3confPath, s3filePath, spark) print("NUESTRO DATAFRAME") esIndex.show() print("VUESTRO DATAFRAME") # TODO ? esIndex.as[IncidESIndex]with Option[String] = None logging.info("Persisting ES index..") dfCount = esIndex.count() logging.info("indexDataFrame.count.." + str(dfCount)) try: ElasticDsl.writeMappedESIndex( esIndex, "copt-rod-closed-{ticket_max_value_partition}", "ticket_id", conf) except Exception as e: message = str(e) if message.find("index_closed_exception"): raise e else: # TODO saveToEs {partitioned} works fine but ends with exception ?¿ logging.info("catched index_closed_exception: " + str(e)) persistAgentSmc(esIndex, s3confPath, spark) AlertDsl.checkCount("copt-rod-closed-*", s3filePath, dfCount, spark, conf) logStatus = copy.deepcopy(logStatus) logStatus.success = True logStatus.count = dfCount logStatus.exception = "" logStatus.end_date = "" logging.info( "End batch Coptero ROD ----------------------------------------------------" ) except Exception as e: logStatus = copy.deepcopy(logStatus) logStatus.success = False logStatus.count = dfCount logStatus.exception = str(e) logStatus.end_date = "" logging.error("catched: " + str(e)) raise e finally: sqlContext = SQLContext(spark) logStatus.end_date = datetime.now().strftime("%Y%m%d%H%M%S") logStatus_data = logESindexSchema(logStatus.file, logStatus.count, logStatus.success, logStatus.exception, logStatus.start_date, logStatus.end_date) logDataFrame = sqlContext.createDataFrame( copy.deepcopy(logStatus_data)) ElasticDsl.writeESLogIndex(logDataFrame, "copt-rod-log-", conf)