Ejemplo n.º 1
0
    def brand_reg(self, s_name, brand_lst, brand_id):
        r_lst = []

        for ext_bname in brand_lst:
            if tool.is_all_eng(ext_bname) and tool.is_own_eng(s_name) and ext_bname in s_name:
                en_reg_bname = self.english_brand_recognition(ext_bname, s_name)
                if en_reg_bname != None:
                    reg_bname = en_reg_bname
                else:
                    continue
            elif ext_bname in s_name:
                reg_bname = ext_bname
            else:
                continue
            r_lst.append((reg_bname, brand_id))
        #rule_opt_lst = self.rule_opt(s_name, r_lst)
        #return rule_opt_lst
        return r_lst
Ejemplo n.º 2
0
    def english_brand_extension(self, brand_name):
        """
        target: 将扩展的品牌直接保存值召回品牌中
        1)指定品牌
        2)标准品牌

        第一种情况:去特殊字符
        A.H.C/爱和纯  ->  AHC爱和纯  -> A.H.C/爱和纯/AHC爱和纯
        A.O.史密斯    ->  AO史密斯   -> A.O.史密斯/AO史密斯

        第二种情况:去英文的空格
        MAKE UP FOR EVER  -> MAKEUPFOREVER
        COLOR KEY -> COLORKEY
        a b c/某某某  -> abc/a b c/某某某/abc某某某
        :return:
        """
        def _single_brand_ext(tmp_b_name):
            # 去除空格
            b1 = re.sub(r"[\s]+", "", tmp_b_name)
            # 去除.
            b2 = tmp_b_name.replace(".", "").replace(".", "")
            r_lst = list(set([tmp_b_name, b1, b2]))
            return r_lst

        # 10943455        Hisense/海信(黑电)
        ok_brand_name = ""
        tmp = brand_name.strip().replace("(",
                                         "(").replace(")",
                                                      "").replace(")", "")
        lst2 = tmp.split("(")
        if len(lst2) == 2:
            b1 = lst2[0]
            if tool.is_all_eng(lst2[1]):
                b2 = lst2[1]
                ok_brand_name = b2 + "/" + b1
            else:
                ok_brand_name = b1
        else:
            ok_brand_name = brand_name

        brand_lst = ok_brand_name.strip().split("/")
        re_brand_lst = []
        if len(brand_lst) == 1:
            re_brand_lst += _single_brand_ext(brand_lst[0])
        else:
            en_brand_lst = []
            ch_brand_lst = []
            other_brand_lst = []
            for b in brand_lst:
                if tool.is_all_eng(b):
                    en_brand_lst.append(b)
                elif tool.is_all_chinese(b):
                    ch_brand_lst.append(b)
                else:
                    other_brand_lst.append(b)
            en_brand_ext_lst = []
            for z in en_brand_lst:
                en_brand_ext_lst += _single_brand_ext(z)
            mix_brand_lst = []
            for y in en_brand_ext_lst:
                for x in ch_brand_lst:
                    mix_brand_lst.append(y + x)
                    mix_brand_lst.append(x + y)

            if len(en_brand_lst) > 1:
                for i in range(len(en_brand_ext_lst)):
                    for j in range(i + 1, len(en_brand_lst)):
                        mix_brand_lst.append(en_brand_lst[i] + en_brand_lst[j])
                        mix_brand_lst.append(en_brand_lst[j] + en_brand_lst[i])

            re_brand_lst = mix_brand_lst + en_brand_ext_lst + ch_brand_lst + other_brand_lst

        re_brand_lst = list(set(re_brand_lst))
        #print(re_brand_lst)

        return "/".join(re_brand_lst)
Ejemplo n.º 3
0
    def brand_info_loading(self):
        cat1_brand_dict = {}  # 一级类下包含哪些品牌
        cat1_clean_brand_dict = {}
        cat1_dict = {}
        brand_cat1_dict = {}  # {brand_id: [cat1, cat2]}
        brand_idx_dict = {}  # {"苹果": [1, 2]}
        idx_ori_brand_dict = {}  #
        name_ori_brand_dict = {}  # 品牌名称原始字符串
        brand_gmv_dict = {}  # 品牌的gmv
        idx_brand_lst_dict = {
        }  # 处理后的品牌,比如:[1 Apple/苹果] -》{'1': ['apple', '苹果']}
        idx = 0

        with open(self._brand_info_file, "r", encoding="utf-8") as f1:
            for line in f1:
                line = line.strip()
                if line == "": continue
                if line.startswith("#"): continue
                # brand_id, brand_name, cat1_id, cat1, gmv
                lst1 = line.split("\t")
                if len(lst1) != 6:
                    continue
                lst1 = [tmp.strip() for tmp in lst1]
                b_id, b_name_ori, b_name, cat1_id, cat1, gmv = lst1
                # idx-brand
                idx_ori_brand_dict[b_id] = b_name
                if b_id == '10698337':
                    name_ori_brand_dict[b_id] = 'Xiaomi/小米'
                else:
                    name_ori_brand_dict[b_id] = b_name_ori
                cat1_dict[cat1_id] = cat1
                if b_name in self._exchange_brand_dict:
                    b_name = self._exchange_brand_dict[b_name]
                r_brand_set = tool.brand_dealing(b_name)

                brand_gmv_dict[b_id] = round(float(gmv), 3)
                idx_brand_lst_dict[b_id] = list(r_brand_set)
                # brand-idx
                for r in r_brand_set:
                    if len(r) == 1: continue
                    if tool.is_number(r): continue
                    is_eng = tool.is_all_eng(r)
                    if is_eng and len(r) < 3: continue
                    # 需要删除的品牌
                    if r in self._del_brand_dict: continue
                    flag = "0" if is_eng else "1"

                    r = "%s|%s" % (r, flag)
                    if r in brand_idx_dict:
                        z = brand_idx_dict[r]
                        z = [b_id] + z
                        z = list(set(z))
                        brand_idx_dict[r] = z
                    else:
                        brand_idx_dict[r] = [b_id]

                    if cat1_id in cat1_clean_brand_dict:
                        p = cat1_clean_brand_dict[cat1_id]
                        cat1_clean_brand_dict[cat1_id] = p + [r]
                    else:
                        cat1_clean_brand_dict[cat1_id] = [r]

                # mkt2-brand
                if cat1 != "NULL":
                    if b_id in brand_cat1_dict:
                        xx = brand_cat1_dict[b_id]
                        brand_cat1_dict[b_id] = xx + [cat1_id]
                    else:
                        brand_cat1_dict[b_id] = [cat1_id]

                    if cat1_id in cat1_brand_dict:
                        lst_9 = cat1_brand_dict[cat1_id]
                        cat1_brand_dict[cat1_id] = ["%s|%s" %
                                                    (b_id, b_name)] + lst_9
                    else:
                        cat1_brand_dict[cat1_id] = ["%s|%s" % (b_id, b_name)]
                else:
                    continue

        return brand_idx_dict, idx_ori_brand_dict, name_ori_brand_dict,\
               brand_cat1_dict, cat1_brand_dict, \
               cat1_clean_brand_dict, brand_gmv_dict, cat1_dict
Ejemplo n.º 4
0
 def _is_all_eng(self, s1):
     return tool.is_all_eng(s1)