コード例 #1
0
def get_qty_vfr_except():
    print(f"{str(dt.now())}  近30天零售户订货量与当月周边人流数量不符")
    area_code = get_area(spark).dropDuplicates(["com_id", "sale_center_id"]).select("com_id", "sale_center_id", "city")
    co_cust = get_valid_co_cust(spark).select("cust_id", "com_id", "sale_center_id") \
        .join(area_code, ["com_id", "sale_center_id"])

    cust_cluster = get_cust_cluster(spark)

    # 零售户近30天订货量
    qty_sum = get_co_co_01(spark, [0, 30]).groupBy("cust_id") \
        .agg(f.sum("qty_sum").alias("qty_sum"))

    # 获取周边人流
    around_vfr = get_around_vfr(spark)

    cols = {"value": "stream_avg_orders",
            "level1_code": "classify_level1_code",
            }
    vfr_avg_qty = qty_sum.join(around_vfr, "cust_id") \
        .withColumn(cols["value"], col("qty_sum") / col("avg_vfr")) \
        .join(cust_cluster, "cust_id") \
        .select("city", "cust_id", "cluster_index", cols["value"])

    except_cust = except_grade(vfr_avg_qty, cols, ["city", "cluster_index"], [3, 4, 5]) \
        .join(co_cust, "cust_id") \
        .withColumn(cols["level1_code"], f.lit("YJFL001"))

    values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id"
             ] + list(cols.values())

    except_cust.foreachPartition(lambda x: write_hbase2(x, values, hbase))
コード例 #2
0
def get_high_cons_except():
    try:
        cols = {"value": "high_order_cons_ratio",
                "level1_code": "classify_level1_code",
                }

        co_cust = get_valid_co_cust(spark).select("cust_id","sale_center_id")\



        cust_cluster = get_cust_cluster(spark)

        print(f"{str(dt.now())}  零售店高价烟比例/消费水平")
        # 消费水平
        consume_level_df = get_all_consume_level(spark).select("city", "cust_id", "consume_level")


        co_co_line = get_co_co_line(spark, scope=[0, 30]) \
                               .select("cust_id", "qty_ord", "price")


        # 零售户所定烟的总数目
        item_num = co_co_line.groupBy("cust_id").agg(f.sum("qty_ord").alias("item_num"))

        # 每个零售户高价烟的数量
        high_price_num = co_co_line.where(col("price") >= 500) \
            .groupBy("cust_id") \
            .agg(f.sum("qty_ord").alias("high_price_num"))
        # 每个零售户高价烟比例
        high_price_ratio = item_num.join(high_price_num, "cust_id") \
            .withColumn("high_price_ratio", col("high_price_num") / col("item_num")) \
            .select("cust_id", "high_price_ratio")

        # 每个零售户高档烟比例/周边消费水平的比值
        high_consume_level = high_price_ratio.join(consume_level_df, "cust_id") \
            .withColumn(cols["value"], col("high_price_ratio") / col("consume_level")) \
            .join(cust_cluster,"cust_id")\
            .select("city", "cust_id","cluster_index", cols["value"])


        result=except_grade(high_consume_level, cols, ["city","cluster_index"], [3, 4, 5]) \
                                        .join(co_cust, "cust_id") \
                                        .withColumn(cols["level1_code"], f.lit("YJFL004"))

        values =[
              "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
              "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
              "warning_level_code", "cust_id", "city", "sale_center_id"
              ] + list(cols.values())
        result.foreachPartition(lambda x: write_hbase2(x, values, hbase))
    except Exception:
        tb.print_exc()
コード例 #3
0
def get_same_vfr_grade_excpet():
    print(f"{str(dt.now())}  近30天零售户订货量与同等人流等级零售店订货量不符")
    #获取每个零售户 city sale_center_id
    area_code = get_area(spark).dropDuplicates(["com_id", "sale_center_id"]).select("com_id", "sale_center_id", "city")
    co_cust = get_valid_co_cust(spark).select("cust_id", "com_id", "sale_center_id") \
                                      .join(area_code, ["com_id", "sale_center_id"])

    cols = {"value": "retail_month_orders",
            "level1_code": "classify_level1_code",
            }
    # 每个零售户近30天订货量
    qty_sum = get_co_co_01(spark, [0, 30]).groupBy("cust_id") \
        .agg(f.sum("qty_sum").alias(cols["value"]))



    # 每个零售户平均人流
    around_vfr = get_around_vfr(spark).withColumnRenamed("cust_id", "cust_id0")
    #零售户聚类结果
    cust_cluster = get_cust_cluster(spark)


    around_vfr1 = around_vfr.withColumn("upper", col("avg_vfr") + col("avg_vfr") * 0.2) \
                            .withColumn("lower", col("avg_vfr") - col("avg_vfr") * 0.2) \
                            .withColumnRenamed("cust_id0", "cust_id1") \
                            .withColumnRenamed("city", "city1") \
                            .select("cust_id1", "city1", "upper", "lower")


    #同等人流等级零售户=全市其他零售户中当月平均人流位于该零售户当月平均人流正负20%范围的零售户
    # 与cust_id1 同人流级别的零售户cust_id0
    same_vfr_grade = around_vfr1.join(around_vfr, (col("city1") == col("city")) & (col("avg_vfr") > col("lower")) & (
                col("avg_vfr") < col("upper"))) \
                  .select("cust_id1", "cust_id0")


    except_cust = around_except_grade(same_vfr_grade, qty_sum,cust_cluster, cols, grade=[3, 4, 5]) \
                                .join(co_cust, "cust_id") \
                                .withColumn(cols["level1_code"], f.lit("YJFL002"))
    values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id"
             ] + list(cols.values())

    except_cust.foreachPartition(lambda x: write_hbase2(x, values, hbase))
コード例 #4
0
def get_grade_cons_except():
    print(f"{str(dt.now())} 近30天零售户档位与周边消费水平不符")
    try:
        consume_level=get_all_consume_level(spark)\
                             .select("city","cust_id","consume_level")
        cust_cluster=get_cust_cluster(spark)

        #只展示目前有效的零售户    去掉 过去有效 现在无效的零售户
        valid_co_cust=get_valid_co_cust(spark).select("cust_id","sale_center_id")

        #近30天零售户档位  一个零售户近30天内多种档位,分别预警
        cust_seg=spark.sql("select * from DB2_DB2INST1_CO_CUST")\
             .where(f.datediff(f.current_date(),col("dt"))<=30)\
             .where(col("com_id").isin(["011114305","011114306","011114302"]))\
             .withColumn("cust_seg",f.regexp_replace("cust_seg","ZZ","31"))\
             .select("com_id","cust_id","cust_seg")\
             .join(valid_co_cust,"cust_id")\
             .dropDuplicates(["cust_id","cust_seg"])

        cols = {"value": "retail_rim_cons",
                "level1_code": "classify_level1_code",
                }
        cust_seg_cons=cust_seg.join(consume_level,"cust_id")\
                             .withColumnRenamed("consume_level",cols["value"])\
                             .join(cust_cluster,"cust_id")\
                             .select("city","cust_id","cust_seg","cluster_index","sale_center_id",cols["value"])

        result=except_grade(cust_seg_cons,cols,["city","cust_seg","cluster_index"],[3,4,5])\
                .withColumnRenamed("cust_seg","retail_grade")\
                .withColumnRenamed("mean","city_grade_cons_avg") \
                .withColumn(cols["level1_code"], f.lit("YJFL003"))

        values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id",
                 "retail_grade", "city_grade_cons_avg"
                 ] + list(cols.values())
        result.foreachPartition(lambda x:write_hbase2(x,values,hbase))
    except Exception:
        tb.print_exc()
コード例 #5
0
def get_around_order_except():
    try:
        area_code = get_area(spark).dropDuplicates(["com_id", "sale_center_id"]).select("com_id", "sale_center_id","city")
        co_cust = get_valid_co_cust(spark).select("cust_id", "com_id", "sale_center_id") \
            .join(area_code, ["com_id", "sale_center_id"])

        #近30天的订单数据
        co_co_01 = get_co_co_01(spark, scope=[0, 30]) \
            .select("qty_sum", "amt_sum", "cust_id")

        # 每个零售户的订货总量 总订货额
        qty_amt_sum = co_co_01.groupBy("cust_id") \
                              .agg(f.sum("qty_sum").alias("order_sum"), f.sum("amt_sum").alias("amt_sum"))

        # 每个零售户cust_id1 周边有cust_id0这些零售户
        around_cust = get_around_cust(spark, 1).select("cust_id1","cust_id0")
        #零售户聚类结果
        cust_cluster= get_cust_cluster(spark)

        print(f"{str(dt.now())}  零售户近30天订货条均价与周边1km范围内零售户订货条均价不符")
        try:
            cols = {"value": "retail_month_price",
                    "level1_code":"classify_level1_code",
                    }

            # 条均价
            avg_price = qty_amt_sum.withColumn(cols["value"], col("amt_sum") / col("order_sum")) \
                .select("cust_id", cols["value"])

            except_cust = around_except_grade(around_cust, avg_price,cust_cluster, cols, [3, 4, 5]) \
                                            .join(co_cust, "cust_id")\
                                            .withColumn(cols["level1_code"],f.lit("YJFL007"))

            values =[
                     "avg_orders_plus3","avg_orders_minu3","avg_orders_plus4",
                     "avg_orders_minu4","avg_orders_plus5","avg_orders_minu5",
                     "warning_level_code","cust_id", "city", "sale_center_id"
                    ]+list(cols.values())

            except_cust.foreachPartition(lambda x: write_hbase2(x, values, hbase))
        except Exception:
            tb.print_exc()

        print(f"{str(dt.now())} 零售户近30天订单额与周边1km范围内零售户订单额不符")
        try:
            cols = {"value": "retail_month_order_book",
                    "level1_code": "classify_level1_code",
                    }

            qty_amt_sum = qty_amt_sum.withColumnRenamed("amt_sum", cols["value"])
            except_cust = around_except_grade(around_cust, qty_amt_sum, cust_cluster,cols, [3, 4, 5]) \
                                            .join(co_cust, "cust_id")\
                                            .withColumn(cols["level1_code"],f.lit("YJFL009"))

            values = [
                     "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                     "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                     "warning_level_code", "cust_id", "city", "sale_center_id"
                     ] + list(cols.values())
            except_cust.foreachPartition(lambda x: write_hbase2(x, values, hbase))
        except Exception:
            tb.print_exc()
    except Exception:
        tb.print_exc()
コード例 #6
0
def get_avg_cons_except():
    try:

        co_cust = get_valid_co_cust(spark).select("cust_id", "sale_center_id")\

        consume_level_df = get_all_consume_level(spark).select("city", "cust_id", "consume_level")

        cust_cluster = get_cust_cluster(spark)


        # -----------------------获取co_co_01
        co_co_01 = get_co_co_01(spark, scope=[0, 30]) \
                               .select("qty_sum", "amt_sum", "cust_id")

        # 每个零售户的订货总量 总订货额
        qty_amt_sum = co_co_01.groupBy("cust_id") \
                              .agg(f.sum("qty_sum").alias("order_sum"), f.sum("amt_sum").alias("amt_sum"))

        print(f"{str(dt.now())}  零售店订货条均价/消费水平")
        try:
            cols = {"value": "order_price_cons",
                    "level1_code": "classify_level1_code",
                    }
            # 每个零售户的订货条均价
            avg_price = qty_amt_sum.withColumn("avg_price", col("amt_sum") / col("order_sum")) \
                                  .select("cust_id", "avg_price")

            avg_consume_level = avg_price.join(consume_level_df, "cust_id") \
                                        .withColumn(cols["value"], col("avg_price") / col("consume_level")) \
                                        .join(cust_cluster,"cust_id")\
                                        .select("city", "cust_id","cluster_index", cols["value"])



            result=except_grade(avg_consume_level, cols, ["city","cluster_index"], [3, 4, 5]) \
                .join(co_cust, "cust_id") \
                .withColumn(cols["level1_code"], f.lit("YJFL006"))

            values = [
                     "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                     "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                     "warning_level_code", "cust_id", "city", "sale_center_id"
                     ] + list(cols.values())
            result.foreachPartition(lambda x: write_hbase2(x, values, hbase))
        except Exception:
            tb.print_exc()

        print(f"{str(dt.now())}  零售店订单额/消费水平")
        try:
            cols = {"value": "order_book_cons",
                    "level1_code": "classify_level1_code",
                    }

            amt_cons_ratio = qty_amt_sum.join(consume_level_df, "cust_id") \
                                        .withColumn(cols["value"], col("amt_sum") / col("consume_level")) \
                                        .join(cust_cluster,"cust_id")\
                                        .select("city", "cust_id","cluster_index", cols["value"])



            result=except_grade(amt_cons_ratio, cols, ["city","cluster_index"], [3, 4, 5])\
                .join(co_cust, "cust_id") \
                .withColumn(cols["level1_code"], f.lit("YJFL005"))

            values = [
                     "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                     "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                     "warning_level_code", "cust_id", "city", "sale_center_id"
                     ] + list(cols.values())
            result.foreachPartition(lambda x: write_hbase2(x, values, hbase))
        except Exception:
            tb.print_exc()
    except Exception:
        tb.print_exc()
コード例 #7
0
def get_around_item_except():
    # 获取每个零售户 city sale_center_id
    area_code=get_area(spark).dropDuplicates(["com_id","sale_center_id"]).select("com_id","sale_center_id","city")
    co_cust=get_valid_co_cust(spark).select("cust_id","com_id","sale_center_id")\
                                          .join(area_code,["com_id","sale_center_id"])

    #获取近30天订单数据
    co_co_line = get_co_co_line(spark,scope=[0,30])\
                    .select("cust_id", "item_id", "qty_ord", "price", "amt")

    #获取cust_id周边一公里零售户
    around_cust = get_around_cust(spark, 1).select("cust_id1","cust_id0")
    #获取聚类结果
    cust_cluster=get_cust_cluster(spark)

    print(f"{str(dt.now())}  零售户高档烟订货比例与周边一公里范围内零售户订货比例不符")
    try:
        cols = {"value": "retail_month_high",
                "level1_code": "classify_level1_code",
                }
        #订货总量
        qty_sum = co_co_line.groupBy("cust_id").agg(f.sum("qty_ord").alias("qty_sum"))
        #高价烟比例
        high_ratio = co_co_line.where(col("price") >= 500) \
                            .groupBy("cust_id") \
                            .agg(f.sum("qty_ord").alias("high_qty_sum")) \
                            .join(qty_sum, "cust_id") \
                            .withColumn(cols["value"], col("high_qty_sum")/col("qty_sum"))

        grade = [3, 4, 5]
        result = around_except_grade(around_cust, high_ratio, cust_cluster,cols, grade) \
                                    .join(co_cust, "cust_id") \
                                    .withColumn(cols["level1_code"], f.lit("YJFL008"))

        values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id"
                 ] + list(cols.values())
        result.foreachPartition(lambda x: write_hbase2(x, values, hbase))
    except Exception:
        tb.print_exc()


    #零售户省内外烟订货金额比例=省内烟订货金额 / 省外烟订货金额
    #零售户省内外烟订货比例 = 省内烟订货数量 / 省外烟订货数量
    try:
        #金额
        cols1 = {"value": "month_amount_ratio",
                 "level1_code": "classify_level1_code",
                 }
        #数量
        cols2 = {"value": "month_count_ratio",
                 "level1_code": "classify_level1_code",
                 }

        line_plm = get_plm_item(spark).select("item_id", "yieldly_type")\
                                      .join(co_co_line,"item_id")

        # 零售户所定省内烟 数量 订货金额
        in_prov = line_plm.where(col("yieldly_type") == "0") \
            .groupBy("cust_id") \
            .agg(f.sum("qty_ord").alias("in_prov_qty"), f.sum("amt").alias("in_prov_amt"))
        # 零售户所定省外烟 数量 订货金额
        out_prov = line_plm.where(col("yieldly_type") == "1") \
            .groupBy("cust_id") \
            .agg(f.sum("qty_ord").alias("out_prov_qty"), f.sum("amt").alias("out_prov_amt"))


        #省内外烟  订货数量比例  订货金额比例
        in_out_prov_ratio = in_prov.join(out_prov, "cust_id") \
            .withColumn(cols2["value"], col("in_prov_qty") / col("out_prov_qty")) \
            .withColumn(cols1["value"], col("in_prov_amt") / col("out_prov_amt"))


        print(f"{str(dt.now())}  零售户省内外烟订货金额比例与周边一公里范围内零售户省内外烟订货金额比例不符")
        #省内外烟  订货金额比例 异常
        result=around_except_grade(around_cust, in_out_prov_ratio,cust_cluster, cols1, grade)\
                 .join(co_cust, "cust_id") \
                 .withColumn(cols1["level1_code"], f.lit("YJFL010"))

        values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id"
                 ] + list(cols1.values())
        result.foreachPartition(lambda x: write_hbase2(x, values, hbase))



        print(f"{str(dt.now())}  零售户省内外烟订货数量比例与周边一公里范围内零售户省内外烟订货数量比例不符")
        #省内外烟  订货数量比例 异常

        result=around_except_grade(around_cust, in_out_prov_ratio,cust_cluster, cols2, grade) \
                        .join(co_cust, "cust_id") \
                        .withColumn(cols2["level1_code"], f.lit("YJFL011"))

        values = [
                 "avg_orders_plus3", "avg_orders_minu3", "avg_orders_plus4",
                 "avg_orders_minu4", "avg_orders_plus5", "avg_orders_minu5",
                 "warning_level_code", "cust_id", "city", "sale_center_id"
                 ] + list(cols2.values())
        result.foreachPartition(lambda x: write_hbase2(x, values, hbase))
    except Exception:
        tb.print_exc()