예제 #1
0
    def special_brand_loading(self):
        ex_brand_dict = tool.get_exchange_brand_pair()

        special_brand_list = []
        special_brand_dict = {}
        b_id_dict = {}
        with open(self.special_brand_file, "r", encoding="utf-8") as f1:
            for line in f1:
                line = line.strip()
                if line == "": continue
                # brand_id, brand_name, cat1_id, cat1, gmv
                lst1 = line.split("\t")
                if len(lst1) != 2:
                    continue
                lst1 = [tmp.strip() for tmp in lst1]
                b_id, b_name = lst1
                if b_id in b_id_dict:
                    continue
                b_id_dict[b_id] = ''
                if b_name in ex_brand_dict:
                    b_name = ex_brand_dict[b_name]
                b_name = tool.brand_clean(b_name.lower())
                ext_band_name = self.english_brand_extension(b_name)
                special_brand_list.append(ext_band_name)
                special_brand_dict[b_id] = b_name

        return special_brand_dict, special_brand_list
예제 #2
0
    def brand_recall_local_file(self):
        try:
            ex_brand_dict = self.brand_exchange_dict
            r_lst = []
            with open(self.in_file) as f1:
                for line in f1:
                    line = line.strip()
                    if line == "": continue
                    lst1 = line.split("\t")
                    if len(lst1) != 5:
                        continue
                    lst1 = [tmp.strip() for tmp in lst1]
                    b_id, ori_b_name, cat1_name, cat2_name, cat3_name = lst1
                    if ori_b_name in self.brand_del_dict: continue
                    if b_id in self.del_brandID_dict: continue
                    if ori_b_name in ex_brand_dict:
                        b_name = ex_brand_dict[ori_b_name]
                    else:
                        b_name = ori_b_name

                    b_name = tool.brand_clean(b_name)
                    ext_band_name = self.english_brand_extension(b_name)
                    r_lst.append("\t".join([
                        b_id, ext_band_name, ori_b_name, cat1_name, cat2_name,
                        cat3_name
                    ]))

            with open(self.out_file, "w") as f1:
                f1.write("\n".join(r_lst))
                f1.flush()

        except Exception as e:
            raise e
예제 #3
0
    def brand_recall(self, output_file="pdd_xjd_brand_recall_info.txt"):
        ex_brand_dict = tool.get_exchange_brand_pair()

        special_brand_list = []
        mijia_lst = []
        with open(self.standard_brand_file, "r", encoding="utf-8") as f1:
            for line in f1:
                line = line.strip()
                if line == "": continue
                # brand_id, brand_name, cat2_id, cat2, gmv
                lst1 = line.split("\t")
                if len(lst1) != 5:
                    continue
                lst1 = [tmp.strip() for tmp in lst1]
                b_id, b_name, cat2_id, cat2, gmv = lst1
                if len(b_name) == 1: continue
                if self.del_brand_id(b_id): continue
                ori_b_name = b_name
                if b_name in ex_brand_dict:
                    b_name = ex_brand_dict[b_name]
                b_name = tool.brand_clean(b_name.lower())
                ext_band_name = self.english_brand_extension(b_name)
                special_brand_list.append("\t".join(
                    [b_id, ori_b_name, ext_band_name, cat2_id, cat2, gmv]))
                s9 = self.mijia_special_brand_recall(b_id, cat2_id, cat2)
                if s9 != "": mijia_lst.append(s9)

        r_lst = special_brand_list + mijia_lst
        with open(output_file, "w", encoding="utf-8") as f1:
            f1.write("\n".join(r_lst))
            f1.flush()
예제 #4
0
    def special_brand_dealing(self, lst1, ex_brand_dict):
        b_id, b_name = lst1
        if b_name in ex_brand_dict:
            b_name = ex_brand_dict[b_name]
        b_name = tool.brand_clean(b_name.lower())
        ext_band_name = self.english_brand_extension(b_name)

        return ext_band_name
예제 #5
0
    def _brand_ext(self, ori_b_name):
        if ori_b_name in self.brand_exchange_dict:
            b_name = self.brand_exchange_dict[ori_b_name]
        else:
            b_name = ori_b_name
        b_name = b_name.strip()
        if b_name == "": return ""
        b_name = tool.brand_clean(b_name)
        ext_band_name = self.english_brand_extension(b_name)

        return ext_band_name
예제 #6
0
    def brand_recall(self, output_file="brand_recall_info.txt"):
        try:
            ex_brand_dict = tool.get_exchange_brand_pair()
            special_brand_list = self.special_brand_loading()
            recall_brand_dict = {}
            mijia_lst = []
            idx = 0
            with open(self.standard_brand_file, "r", encoding="utf-8") as f1:
                for line in f1:
                    line = line.strip()
                    if line == "": continue
                    lst1 = line.split("\t")
                    if len(lst1) != 5:
                        continue
                    lst1 = [tmp.strip() for tmp in lst1]
                    b_id, ori_b_name, cat1_id, cat1, gmv = lst1
                    mijia_str = self.mijia_special_brand_recall(
                        b_id, cat1_id, cat1)
                    if mijia_str != "": mijia_lst.append(mijia_str)
                    if ori_b_name in ex_brand_dict:
                        b_name = ex_brand_dict[ori_b_name]
                    else:
                        b_name = ori_b_name
                    b_name = tool.brand_clean(b_name)
                    ext_band_name = self.english_brand_extension(b_name)
                    for s_name in special_brand_list:
                        s_name_list = s_name.strip().split("/")
                        for s_name_item in s_name_list:
                            idx += 1
                            if idx % 1000000 == 0: print("idx: %s" % idx)
                            if len(ext_band_name.lower().split(
                                    s_name_item)) > 1:
                                # 单个“后”字召回的品牌错误率很高
                                if s_name_item == "后":
                                    continue
                                k = "\t".join([
                                    b_id, ori_b_name, ext_band_name, cat1_id,
                                    cat1, gmv
                                ])
                                recall_brand_dict[k] = ''
            r_lst = list(recall_brand_dict.keys()) + mijia_lst
            with open(output_file, "w", encoding="utf-8") as f1:
                f1.write("\n".join(r_lst))
                f1.flush()

        except Exception as e:
            raise e
예제 #7
0
    def brand_recall(self, output_file="brand_recall_info_tmp_.txt"):
        try:
            ex_brand_dict = tool.get_exchange_brand_pair()
            special_brand_list = self.special_brand_list
            recall_brand_dict = {}
            mijia_lst = []
            pr_list = []
            luolamima_list = []
            boshikou_list = []
            idx = 0
            with open(self.standard_brand_file, "r", encoding="utf-8") as f1:
                for line in f1:
                    line = line.strip()
                    if line == "": continue
                    lst1 = line.split("\t")
                    if len(lst1) != 5:
                        continue
                    lst1 = [tmp.strip() for tmp in lst1]
                    b_id, ori_b_name, cat1_id, cat1, gmv = lst1
                    if self.del_brand_func(b_id): continue
                    # 米家扩展
                    mijia_str = self.mijia_special_brand_recall(
                        b_id, cat1_id, cat1)
                    if mijia_str != "": mijia_lst.append(mijia_str)
                    # 红米扩展
                    redmi_str = self.redmi_special_brand_recall(
                        b_id, cat1_id, cat1)
                    if redmi_str != "": mijia_lst.append(redmi_str)

                    if ori_b_name in ex_brand_dict:
                        b_name = ex_brand_dict[ori_b_name]
                    else:
                        b_name = ori_b_name
                    b_name = tool.brand_clean(b_name)
                    ext_band_name = self.english_brand_extension(b_name)
                    for s_name in special_brand_list:
                        s_name_list = s_name.strip().split("/")
                        for s_name_item in s_name_list:
                            s_name_item = s_name_item.strip()
                            if s_name_item == "": continue
                            idx += 1
                            if idx % 1000000 == 0: print("idx: %s" % idx)
                            ext_band_name = ext_band_name.lower()
                            if len(ext_band_name.split(s_name_item)) > 1:
                                # 单个“后”字召回的品牌错误率很高
                                if s_name_item == "后":
                                    continue
                                if b_id == '10414486' and cat1_id == '100010':
                                    ok = 1
                                '''
                                99714433 蓝月亮(纸品)  100012 家庭清洁 / 纸品 1685455.83320000
                                '''
                                if b_id == '99714433': continue
                                if not self.brand_cat1_relation_del(
                                        b_id, cat1_id):
                                    k = "\t".join([
                                        b_id, ori_b_name, ext_band_name,
                                        cat1_id, cat1, gmv
                                    ])
                                    recall_brand_dict[k] = ''
                                # 强制添加二手商品的一级类目
                                if not self.brand_cat1_relation_del(
                                        b_id, "100040"):
                                    k_tmp = "\t".join([
                                        b_id, ori_b_name, ext_band_name,
                                        "100040", "二手商品", '0.0'
                                    ])
                                    recall_brand_dict[k_tmp] = ''

            # 硬添加【米家】添加一级类目【家用电器】
            mijia_lst += [
                "\t".join([
                    "10698337", "MJ/米家",
                    self.english_brand_extension("MJ/米家/小米米家"), "100031",
                    "家用电器", "0.0"
                ])
            ]
            mijia_lst += [
                "\t".join([
                    "10698337", "MJ/米家",
                    self.english_brand_extension("MJ/米家/小米米家"), "100034",
                    "家装建材", "0.0"
                ])
            ]
            # 硬添加【PLUS RAPIDE】添加一级类目【服饰内衣,运动户外,鞋靴】
            pr_list += [
                "\t".join([
                    "7830374549", "PLUS RAPIDE",
                    self.english_brand_extension(
                        "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100006",
                    "服饰内衣", "0.0"
                ])
            ]
            pr_list += [
                "\t".join([
                    "7830374549", "PLUS RAPIDE",
                    self.english_brand_extension(
                        "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100008",
                    "运动户外", "0.0"
                ])
            ]
            pr_list += [
                "\t".join([
                    "7830374549", "PLUS RAPIDE",
                    self.english_brand_extension(
                        "PLUS RAPIDE/GXG旗下品牌PR/GXG旗下PR/GXG旗下男装PR"), "100001",
                    "鞋靴", "0.0"
                ])
            ]
            # 硬添加【罗拉密码】添加一级类目【服饰内衣、鞋靴】
            luolamima_list += [
                "\t".join([
                    "99714457", "LOORA PWD/罗拉密码",
                    self.english_brand_extension("LOORA PWD/罗拉密码"), "100006",
                    "服饰内衣", "0.0"
                ])
            ]
            luolamima_list += [
                "\t".join([
                    "99714457", "LOORA PWD/罗拉密码",
                    self.english_brand_extension("LOORA PWD/罗拉密码"), "100001",
                    "鞋靴", "0.0"
                ])
            ]

            # 硬添加【ROYALAPOTHIC/泊诗蔻】添加一级类目【美妆护肤】
            boshikou_list += [
                "\t".join([
                    "11015914", "ROYALAPOTHIC/泊诗蔻",
                    self.english_brand_extension("ROYALAPOTHIC/泊诗蔻"), "100007",
                    "美妆护肤", "0.0"
                ])
            ]

            # 品牌丢失一级类目的情况
            miss_lst = self.brand_miss_cat1()
            r_lst = list(
                recall_brand_dict.keys()
            ) + mijia_lst + miss_lst + pr_list + luolamima_list + boshikou_list
            with open(output_file, "w", encoding="utf-8") as f1:
                f1.write("\n".join(r_lst))
                f1.flush()

        except Exception as e:
            raise e