Ejemplo n.º 1
0
def persistRelations(esIndexRel, s3confPath, spark):
    sqlContext = SQLContext(spark)

    relationsDF = esIndexRel.select("ticket_id", "related_ticket_id")
    total = None

    try:
        total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)
    except Exception as e:
        message = str(e)
        if message.find("Path does not exist"):
            raise e
        else:
            logging.info("catched Path does not exist (first job execution): " + str(e))

            relationsDF \
                .repartition(1) \
                .write \
                .mode("overwite") \
                .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

            total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    total.cache()
    logging.info("total.count---------------------------------------------" + str(total.count()))

    result = total \
        .unionByName(relationsDF) \
        .distinct() \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    return result
Ejemplo n.º 2
0
def removeClosedAgentSmc(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)
    closed = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id").distinct()

    auxOuterJoin = closed.select("ticket_id")

    total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    total.cache()

    logging.info("totalWithoutClosed.count---------------------------------------------" + str(total.count()))

    totalWithoutClosed = total \
        .join(auxOuterJoin, ["ticket_id"], "left") \
        .where(auxOuterJoin["ticket_id"].isNull())

    result = totalWithoutClosed \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    logging.info(
        "totalWithoutClosed.count---------------------------------------------" + str(totalWithoutClosed.count()))
    return result
Ejemplo n.º 3
0
 def writeESCorruptRecordsIndex(index, name, conf):
     prefix = S3FilesDsl.readConfigJson(conf).elastic_env_index_prefix
     config = {
         "elastic_nodes": "127.0.0.1",
         "elastic_port": "9200",
         "elastic_user": "******",
         "elastic_pass": "******"
     }
     toElastic(config, index, addId,
               prefix + name + datetime.now().strftime("%Y"))
     '''index.write.format(
Ejemplo n.º 4
0
def fullPersistAgentSmc(esIndex, s3confPath):
    preload = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id")

    result = preload \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    logging.info("total.count---------------------------------------------" + str(preload.count()))
    return result
Ejemplo n.º 5
0
def persistAgentSmc(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)
    newOrUpdated = esIndex.select("ticket_id", "assigned_agent", "smc_cluster", "reported_source_id").distinct()
    auxOuterJoin = newOrUpdated.select("ticket_id")
    total = None

    try:
        total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)
    except Exception as e:
        message = str(e)
        if message.find("Path does not exist"):
            raise e
        else:
            logging.info("catched Path does not exist (first job execution): " + str(e))

            newOrUpdated \
                .repartition(1) \
                .write \
                .mode("overwite") \
                .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

            total = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    total.cache()

    logging.info("total.count---------------------------------------------" + str(total.count()))

    totalWithoutNewOrUpdated = total \
        .join(auxOuterJoin, ["ticket_id"], "left") \
        .where(auxOuterJoin["ticket_id"].isNull())

    updatedToParquet = totalWithoutNewOrUpdated.unionByName(newOrUpdated)

    result = updatedToParquet \
        .repartition(1) \
        .write \
        .mode("overwrite") \
        .parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)
    return result
Ejemplo n.º 6
0
def getRelations(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)

    parquet = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    agents = parquet \
        .filter(parquet.reported_source_id != "Vendor") \
        .withColumnRenamed("ticket_id", "agent_ticket_id")

    relations = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_relations_parquet_path)

    relatedAgents = relations \
        .join(agents, relations.related_ticket_id == agents.agent_ticket_id, "inner") \
        .groupBy("ticket_id") \
        .agg(F.collect_set("assigned_agent").alias("assignee"),
             F.collect_set("smc_cluster").alias("smc"))

    result = esIndex \
        .join(relatedAgents, ["ticket_id"], "left") \
        .withColumn("assignee", Utils.addToArray("assigned_agent", "assignee")) \
        .withColumn("smc", Utils.addToArray("smc_cluster", "smc")) \
        .drop("assigned_agent", "smc_cluster")

    return result
Ejemplo n.º 7
0
 def checkCount(indexName, fileName, dfCount, spark, conf):
     prefix = S3FilesDsl.readConfigJson(conf).elastic_env_index_prefix
     sqlContext = SQLContext(spark)
     logging.info('dfCount.. ' + str(dfCount))
     path = fileName.replace(':', '\\:').replace("/", "\\/")
     qResultDF1 = sqlContext.read \
         .option("es.resource",prefix + indexName) \
         .option("es.query", "?q=file:\''" + path + " '\'") \
         .format("org.elasticsearch.spark.sql") \
         .load()
     # ¿Equivalente de qResultDF = spark.esDF("${indexName}", "?q=file:\"" + path + "\"").select("ticket_id") ?
     qResultDF = qResultDF1.select("ticket_id")
     qResultDF.cache()
     queryCount = qResultDF.count()
     qResultDF.unpersist()
     logging.info("queryCount.. " + str(queryCount))
     if dfCount != queryCount:
         alertDataFrame = sqlContext.createDataFrame(
             [(fileName, dfCount, queryCount, datetime.now().strftime("%Y%m%d%H%M%S"))],
             ["file", "expected_count", "result_count", "date"])
         ElasticDsl.writeESAlertsIndex(alertDataFrame, conf)
Ejemplo n.º 8
0
    def buildESIndex(detailType, detail, s3confPath, s3filePath, spark):
        sqlContext = SQLContext(spark)
        # TODO.confinsteadof.json        val
        confJson = S3FilesDsl.readConfigJson(s3confPath)

        rodTicketANTags = AdminNumberTags.antagsColumns(
            S3FilesDsl.readFile(confJson.tags_admin_path, spark), spark)

        parquetPath = confJson.fast_parquet_path
        rodPostgreAdminNumber = sqlContext.read.parquet(parquetPath)

        logging.info("FAST joins..")
        networkFast = sqlContext.read.parquet(confJson.fast_network_parquet_path)

        logging.info("common joins..")

        # TODO: añadir import de utils.constantes
        # TODO: comprobar parametros que se pasan a los metodos de Utils
        common3 = joinMasterEntities(detail, spark)

        common2 = common3.join(rodPostgreAdminNumber, ["admin_number"], "left")

        common1 = Utils.fillEmptyFastColumns(common2)

        common = common1.join(networkFast, ["admin_number"], "left"). \
            withColumn("networkinfo", Utils.networkNestedObject("fast_customer", "fast_end_customer",
                                                                "router_interface_vendor_type_set")). \
            drop("router_interface_vendor_type_set"). \
            join(rodTicketANTags, ["admin_number"], "left"). \
            withColumn("open", F.when(common1.status_desc.isin(Constants.openStatus), Constants.OPEN_YES).
                       otherwise(F.when(common1.status_desc.isin(Constants.notOpenStatus), Constants.OPEN_NO).
                                 otherwise(Constants.EMPTY_STRING))). \
            withColumn("ticket_max_value_partition", Utils.getIndexPartition("ticket_id")). \
            withColumn("admin_number_escaped", Utils.urlWhitespaces("admin_number")). \
            withColumn("fast_max_resolution_time", Utils.validateNumeric("fast_max_resolution_time")). \
            withColumn("file", F.lit(s3filePath)). \
            fillna(Constants.EMPTY_STRING, ["assigned_agent"])

        if detailType == "helpdesk":
            rodTicketReportedSource = getReportedSource(spark)
            operationalManager = getOperationalManager(confJson.operational_path, spark)
            opTags = OperatingTags.operatingTagsColumns(S3FilesDsl.readFile(confJson.tags_operating_path, spark))
            customer = Customer.customerColumns(S3FilesDsl.readFile(confJson.customer_path, spark), spark)
            endCustomer = EndCustomer.endCustomerColumns(S3FilesDsl.readFile(confJson.end_customer_path, spark), spark)

            index1 = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .join(operationalManager, ["operating_company_name", "operating_le"], "left") \
                .na.fill(Constants.EMPTY_STRING, ["operational_manager"]) \
                .join(opTags, ["operating_company_name", "operating_le"], "left") \
                .withColumn("tags", Utils.mergeArrays("tags", "operating_tags")) \
                .drop("operating_tags") \
                .join(customer, ["operating_company_name"], "left") \
                .fillna(Constants.EMPTY_STRING, ["customer_correct"]) \
                .join(endCustomer, ["operating_le"], "left") \
                .fillna(Constants.EMPTY_STRING, ["end_customer_correct"]) \
                .withColumn("end_customer_correct",
                            Utils.emptyEndCustomerCorrect("customer_correct", "end_customer_correct")) \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("end_user_country", Utils.kibanaCountry("end_user_country")) \
                .withColumn("smc_cluster", Utils.smcClusterFromGroup("assigned_support_group")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name")) \
                .withColumn("product_categorization_all_tiers",
                            Utils.concat3Columns("product_categorization_tier_1", "product_categorization_tier_2",
                                                 "product_categorization_tier_3")) \
                .withColumn("closure_categorization_all_tiers",
                            Utils.concat3Columns("closure_categorization_tier_1", "closure_categorization_tier_2",
                                                 "closure_categorization_tier_3")) \
                .withColumn("operational_categorization_all_tiers",
                            Utils.concat3Columns("operational_categorization_tier_1",
                                                 "operational_categorization_tier_2",
                                                 "operational_categorization_tier_3")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "problems":
            index1 = common \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("ci_name_escaped", Utils.urlWhitespaces("ci_name"))
            index = FastDsl.fastCircuitFields(index1, confJson, spark)

        elif detailType == "changes":
            rodTicketReportedSource = getReportedSource(spark)
            index = common \
                .join(rodTicketReportedSource, ["reported_source_id"], "left") \
                .drop("reported_source_id") \
                .withColumn("ci_country", Utils.kibanaCountry("ci_country")) \
                .withColumn("company_country", Utils.kibanaCountry("company_country")) \
                .withColumnRenamed("reported_source_desc", "reported_source_id")

        # EL USUARIO SOLICITA QUE LAS DESCRIPCIONES DE LOS MAESTROS SE RENOMBREN COMO _id
        indexRenamed = index \
            .withColumnRenamed("status_desc", "status_id") \
            .withColumnRenamed("substatus_desc", "substatus_id") \
            .withColumnRenamed("urgency_desc", "urgency_id") \
            .withColumnRenamed("priority_desc", "priority_id") \
            .withColumnRenamed("impact_desc", "impact_id")

        return indexRenamed
Ejemplo n.º 9
0
def getAgentSmcCluster(esIndex, s3confPath, spark):
    sqlContext = SQLContext(spark)

    parquet = sqlContext.read.parquet(S3FilesDsl.readConfigJson(s3confPath).rod_agent_smc_parquet_path)

    return esIndex.join(parquet.select("ticket_id", "smc_cluster", "assigned_agent"), ["ticket_id"], "left")
Ejemplo n.º 10
0
 def writeMappedESIndex(index, name, mapId, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').option(
         'es.mapping.id', mapId).option('es.resource',
                                        prefix + name).save()
Ejemplo n.º 11
0
 def writeESAlertsIndex(index, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').mode(
         'append').option('es.write.operation', 'index').option(
             'es.resource', prefix + 'copt-rod-alerts').save()
Ejemplo n.º 12
0
 def writeESLogIndex(index, name, config):
     prefix = S3FilesDsl.readConfigJson(config).elastic_env_index_prefix
     index.write.format('org.elasticsearch.spark.sql').mode(
         'append').option('es.write.operation', 'index').option(
             'es.resource',
             prefix + name + datetime.now().strftime("%Y")).save()