예제 #1
0
    def statistic(self, threshold=Parameters.mainSoldThreshold):
        if Mode.statsLOCAL:
            try:
                load("statsSubBrandSoldNum")
                load("statsSubBrandSoldShare")
                load("statsSubBrandSoldRank")
                load("statsSubBrandSoldReRank")
            except FileNotFoundError:
                print(
                    "StatisticSubmarketBrandSoldMethod: Don't Have Local Result Files"
                )
            else:
                return

        words = load("submarketWords")
        submarket_total_sold_price = load("statsAllSubTotalSoldPrice")
        itemid_set = load("statsAllSubItemidSet")
        itemid_mapping = load("statsAllItemidMapping")

        submarket_brand_num = {word: dict() for word in words}
        submarket_brand_share = {word: dict() for word in words}
        submarket_brand_rank = {word: dict() for word in words}
        submarket_brand_rerank = {word: dict() for word in words}

        for word in words:
            try:
                itemids = itemid_set[word]
            except KeyError:
                continue
            for itemid in itemids:
                brand, _, total_sold_price = itemid_mapping[itemid]
                if brand != brand or not brand:
                    continue
                if brand in submarket_brand_num[word].keys():
                    submarket_brand_num[word][brand] += total_sold_price
                else:
                    submarket_brand_num[word][brand] = total_sold_price

            items = sorted(submarket_brand_num[word].items(),
                           key=lambda x: x[1],
                           reverse=True)
            prev, rank = 0, 0
            for brand, value in items:
                try:
                    submarket_brand_share[word][
                        brand] = value / submarket_total_sold_price[word]
                except ZeroDivisionError:
                    submarket_brand_share[word][brand] = 0
                if prev != value:
                    rank += 1
                    prev = value
                submarket_brand_rank[word][brand] = rank
                submarket_brand_rerank[word].setdefault(rank,
                                                        list()).append(brand)
                submarket_brand_rerank[word][rank].sort()

        dump(submarket_brand_num, "statsSubBrandSoldNum")
        dump(submarket_brand_share, "statsSubBrandSoldShare")
        dump(submarket_brand_rank, "statsSubBrandSoldRank")
        dump(submarket_brand_rerank, "statsSubBrandSoldReRank")
예제 #2
0
    def cut(self):
        df = read("factItem")()
        title = "".join(list(df['title']))
        cut_word = jieba.analyse.textrank(title, topK=self.threshold["title"])
        title_words = set()
        for i, word in enumerate(list(cut_word)):
            title_words.add(word)
        dump(title_words, "titleHotWords")

        df = read("hotWords")()
        title = "".join(list(df['hotwords']))
        cut_word = jieba.analyse.textrank(title,
                                          topK=self.threshold["hot search"])
        search_words = set()
        for i, word in enumerate(list(cut_word)):
            search_words.add(word)
        dump(search_words, "searchHotWords")
예제 #3
0
    def trans(self):
        keep = load("smKeep")

        mapping = set()
        for key, words in keep.items():
            key_and_words = words
            key_and_words.add(key)
            for word1 in key_and_words:
                for word2 in key_and_words:
                    if word1 == word2:
                        continue
                    if len(word1) > len(word2):
                        w1, w2 = word2, word1
                    elif len(word1) == len(word2) and word1 > word2:
                        w1, w2 = word2, word1
                    else:
                        w1, w2 = word1, word2
                    mapping.add((w1, w2))

        dump(mapping, "smMapping")
예제 #4
0
    def trans(self):
        mapping = load("smMapping")
        keep_set = list()
        for pair in mapping:
            keep_set.append(set(pair))

        while True:
            temp_keep_set = list()
            for words in keep_set:
                merge_flag = False
                for merge_words in temp_keep_set:
                    if 0 != len(words & merge_words):
                        merge_words |= words
                        merge_flag = True
                if not merge_flag:
                    temp_keep_set.append(words)
            if keep_set == temp_keep_set:
                break
            keep_set = temp_keep_set

        dump(keep_set, "smKeep")
예제 #5
0
    def erase(self):
        words, drop = super().load()
        try:
            history = load("history", FileBase.history)
        except FileNotFoundError:
            history = dict()

        cidname = Entrance().cidname
        if cidname in history.keys():
            sim_words, values = history[cidname]
        else:
            print("request ...")
            sim_words, values = WordSimilarity().process(cidname)
            history[cidname] = [sim_words, values]
            dump(history, "history", repath=FileBase.history)
        for word in words:
            for sim_word, sim in zip(sim_words, values):
                if sim_word in word and sim > self.threshold:
                    drop.add(word)
        drop.add(cidname)
        super().dump(words, drop)
예제 #6
0
    def statistic(self):
        if Mode.statsLOCAL:
            try:
                load("statsTopSoldBrand")
            except FileNotFoundError:
                print(
                    "StatisticTopSoldBrandMethod: Don't Have Local Result Files"
                )
            else:
                return

        items = read("factItem")()
        items = items[items["brand"] == items["brand"]]
        macro_condition = load("statsAllSubMacroCondition")

        def sum_sold(df):
            sold_sum = df["total_sold_price"].sum()
            df["sold_sum"] = sold_sum
            try:
                df["sold_share"] = sold_sum / macro_condition["total"]
            except ZeroDivisionError:
                df["sold_share"] = 0
            return df

        items["total_sold_price"] = items["total_sold_price"].fillna(0)
        items = items.groupby(["brand"]).apply(sum_sold).drop_duplicates(
            ["brand"]).sort_values("sold_sum", ascending=False)
        items = items[["brand", "sold_sum", "sold_share"]]

        prev, rank = 0, 0
        for k, v in items.iterrows():
            if prev != v["sold_sum"]:
                rank += 1
                prev = v["sold_sum"]
            items.at[k, "rank"] = rank
        dump(items, "statsTopSoldBrands")
예제 #7
0
 def merge(self):
     title_words = load("titleHotWords")
     search_words = load("searchHotWords")
     words = title_words | search_words
     dump(words, "submarketWords")
예제 #8
0
    def statistic(self):
        if Mode.statsLOCAL:
            try:
                load("statsAllSubItemidSet")
                load("statsAllSubBrandSet")
                load("statsAllSubSellerSet")
                load("statsAllSubBiz30day")
                load("statsAllSubTotalSoldPrice")
                load("statsAllSubSoldAverPrice")

                load("statsAllSubMacroCondition")
                load("statsAllItemidMapping")

                load("statsAllSubBiz30dayShare")
                load("statsAllSubBiz30dayRank")
                load("statsAllSubBiz30dayReRank")
                load("statsAllSubTotalSoldPriceShare")
                load("statsAllSubTotalSoldPriceRank")
                load("statsAllSubTotalSoldPriceReRank")
            except FileNotFoundError:
                print(
                    "StatisticAllSubmarketMethod: Don't Have Local Result Files"
                )
            else:
                return

        words = load("submarketWords")
        items = read("factItem")()

        itemid_set = dict()
        brand_set = dict()
        seller_set = dict()
        biz30day = {word: 0 for word in words}
        total_sold_price = {word: 0 for word in words}

        macro_conditions = dict()
        macro_conditions["biz30day"] = 0
        macro_conditions["total"] = 0

        itemid_mapping = dict()

        for k, v in items.iterrows():
            if k % 100 == 0:
                print("process", k, "/", len(items))
            for word in words:
                if word in v["title"]:
                    itemid_set.setdefault(word, set()).add(v["itemid"])
                    brand_set.setdefault(word, set()).add(v["brand"])
                    seller_set.setdefault(word, set()).add(v["sellernick"])
                    biz30day[word] += v["biz30day"]
                    total_sold_price[word] += v["total_sold_price"]

            macro_conditions.setdefault("itemid", set()).add(v["itemid"])
            macro_conditions.setdefault("brand", set()).add(v["brand"])
            macro_conditions.setdefault("seller", set()).add(v["sellernick"])
            macro_conditions["biz30day"] += v["biz30day"]
            macro_conditions["total"] += v["total_sold_price"]

            if v["itemid"] in itemid_mapping.keys():
                print("Warning: Duplicate Itemid", v["itemid"])
            itemid_mapping[v["itemid"]] = (v["brand"], v["biz30day"],
                                           v["total_sold_price"])

        sold_price_aver = dict()
        for word in words:
            try:
                sold_price_aver[word] = \
                    round(total_sold_price[word] / biz30day[word], 2)
            except ZeroDivisionError:
                sold_price_aver[word] = 0

        try:
            macro_conditions["aver"] = \
                round(macro_conditions["total"] / macro_conditions["biz30day"], 2)
        except ZeroDivisionError:
            macro_conditions["aver"] = 0

        # itemid_set["macro conditions"] = macro_conditions["itemid"]
        # brand_set["macro conditions"] = macro_conditions["brand"]
        # seller_set["macro conditions"] = macro_conditions["seller"]
        # biz30day["macro conditions"] = macro_conditions["biz30day"]
        # total_sold_price["macro conditions"] = macro_conditions["total"]
        # sold_price_aver["macro conditions"] = macro_conditions["aver"]

        dump(itemid_set, "statsAllSubItemidSet")
        dump(brand_set, "statsAllSubBrandSet")
        dump(seller_set, "statsAllSubSellerSet")
        dump(biz30day, "statsAllSubBiz30day")
        dump(total_sold_price, "statsAllSubTotalSoldPrice")
        dump(sold_price_aver, "statsAllSubSoldAverPrice")

        dump(macro_conditions, "statsAllSubMacroCondition")
        dump(itemid_mapping, "statsAllItemidMapping")

        biz30day_share = dict()
        biz30day_rank = dict()
        biz30day_rerank = dict()
        total_sold_price_share = dict()
        total_sold_price_rank = dict()
        total_sold_price_rerank = dict()

        items = sorted(biz30day.items(), key=lambda x: x[1], reverse=True)
        prev, rank = 0, 0
        for key, value in items:
            try:
                biz30day_share[key] = value / macro_conditions["biz30day"]
            except ZeroDivisionError:
                biz30day_share[key] = 0
            if prev != value:
                rank += 1
                prev = value
            biz30day_rank[key] = rank
            biz30day_rerank.setdefault(rank, list()).append(key)

        items = sorted(total_sold_price.items(),
                       key=lambda x: x[1],
                       reverse=True)
        prev, rank = 0, 0
        for key, value in items:
            try:
                total_sold_price_share[key] = value / macro_conditions["total"]
            except ZeroDivisionError:
                total_sold_price_share[key] = 0
            if prev != value:
                rank += 1
                prev = value
            total_sold_price_rank[key] = rank
            total_sold_price_rerank.setdefault(rank, list()).append(key)

        dump(biz30day_share, "statsAllSubBiz30dayShare")
        dump(biz30day_rank, "statsAllSubBiz30dayRank")
        dump(biz30day_rerank, "statsAllSubBiz30dayReRank")
        dump(total_sold_price_share, "statsAllSubTotalSoldPriceShare")
        dump(total_sold_price_rank, "statsAllSubTotalSoldPriceRank")
        dump(total_sold_price_rerank, "statsAllSubTotalSoldPriceReRank")
예제 #9
0
 def dump(words):
     dump(words, "submarketWords")
예제 #10
0
 def dump(words, keep, drop, mapping):
     dump(words, "submarketWords")
     dump(keep, "smKeep")
     dump(drop, "smDrop")
     dump(mapping, "smMapping")
예제 #11
0
 def dump(data, name):
     pcid, cid, _ = Entrance().params
     dump(data,
          _,
          repath=FileBase.result.format(pcid=pcid, cid=cid, name=name))
예제 #12
0
 def dump(words, drops):
     dump(words, "submarketWords")
     dump(drops, "smDrop")