Example #1
0
 def cut(self):
     try:
         load("titleHotWords")
         load("searchHotWords")
     except FileNotFoundError:
         self.method().cut()
     except Exception as e:
         raise e
    def build(self):
        items = read("factItem")()
        items = items[items["brand"] == items["brand"]]
        macro_condition = load("statsAllSubMacroCondition")

        def sum_sold(df):
            sold_sum = df["total_sold_price"].sum()
            df["sold_sum"] = sold_sum
            try:
                df["sold_share"] = sold_sum / macro_condition["total"]
            except ZeroDivisionError:
                df["sold_share"] = 0
            return df

        items["total_sold_price"] = items["total_sold_price"].fillna(0)
        items = items.groupby(["brand"]).apply(sum_sold).drop_duplicates(
            ["brand"]).sort_values("sold_sum", ascending=False)
        items = items[["brand", "sold_sum", "sold_share"]]

        prev, rank = 0, 0
        for k, v in items.iterrows():
            if prev != v["sold_sum"]:
                rank += 1
                prev = v["sold_sum"]
            items.at[k, "rank"] = rank

        super().dump(items, "statsTopSoldBrands")
    def statistic(self):
        if Mode.statsLOCAL:
            try:
                load("statsTopSoldBrand")
            except FileNotFoundError:
                print(
                    "StatisticTopSoldBrandMethod: Don't Have Local Result Files"
                )
            else:
                return

        items = read("factItem")()
        items = items[items["brand"] == items["brand"]]
        macro_condition = load("statsAllSubMacroCondition")

        def sum_sold(df):
            sold_sum = df["total_sold_price"].sum()
            df["sold_sum"] = sold_sum
            try:
                df["sold_share"] = sold_sum / macro_condition["total"]
            except ZeroDivisionError:
                df["sold_share"] = 0
            return df

        items["total_sold_price"] = items["total_sold_price"].fillna(0)
        items = items.groupby(["brand"]).apply(sum_sold).drop_duplicates(
            ["brand"]).sort_values("sold_sum", ascending=False)
        items = items[["brand", "sold_sum", "sold_share"]]

        prev, rank = 0, 0
        for k, v in items.iterrows():
            if prev != v["sold_sum"]:
                rank += 1
                prev = v["sold_sum"]
            items.at[k, "rank"] = rank
        dump(items, "statsTopSoldBrands")
    def trans(self):
        keep = load("smKeep")

        mapping = set()
        for key, words in keep.items():
            key_and_words = words
            key_and_words.add(key)
            for word1 in key_and_words:
                for word2 in key_and_words:
                    if word1 == word2:
                        continue
                    if len(word1) > len(word2):
                        w1, w2 = word2, word1
                    elif len(word1) == len(word2) and word1 > word2:
                        w1, w2 = word2, word1
                    else:
                        w1, w2 = word1, word2
                    mapping.add((w1, w2))

        dump(mapping, "smMapping")
    def trans(self):
        mapping = load("smMapping")
        keep_set = list()
        for pair in mapping:
            keep_set.append(set(pair))

        while True:
            temp_keep_set = list()
            for words in keep_set:
                merge_flag = False
                for merge_words in temp_keep_set:
                    if 0 != len(words & merge_words):
                        merge_words |= words
                        merge_flag = True
                if not merge_flag:
                    temp_keep_set.append(words)
            if keep_set == temp_keep_set:
                break
            keep_set = temp_keep_set

        dump(keep_set, "smKeep")
    def erase(self):
        words, drop = super().load()
        try:
            history = load("history", FileBase.history)
        except FileNotFoundError:
            history = dict()

        cidname = Entrance().cidname
        if cidname in history.keys():
            sim_words, values = history[cidname]
        else:
            print("request ...")
            sim_words, values = WordSimilarity().process(cidname)
            history[cidname] = [sim_words, values]
            dump(history, "history", repath=FileBase.history)
        for word in words:
            for sim_word, sim in zip(sim_words, values):
                if sim_word in word and sim > self.threshold:
                    drop.add(word)
        drop.add(cidname)
        super().dump(words, drop)
 def merge(self):
     title_words = load("titleHotWords")
     search_words = load("searchHotWords")
     words = title_words | search_words
     dump(words, "submarketWords")
    def statistic(self, threshold=Parameters.mainSoldThreshold):
        if Mode.statsLOCAL:
            try:
                load("statsSubBrandSoldNum")
                load("statsSubBrandSoldShare")
                load("statsSubBrandSoldRank")
                load("statsSubBrandSoldReRank")
            except FileNotFoundError:
                print(
                    "StatisticSubmarketBrandSoldMethod: Don't Have Local Result Files"
                )
            else:
                return

        words = load("submarketWords")
        submarket_total_sold_price = load("statsAllSubTotalSoldPrice")
        itemid_set = load("statsAllSubItemidSet")
        itemid_mapping = load("statsAllItemidMapping")

        submarket_brand_num = {word: dict() for word in words}
        submarket_brand_share = {word: dict() for word in words}
        submarket_brand_rank = {word: dict() for word in words}
        submarket_brand_rerank = {word: dict() for word in words}

        for word in words:
            try:
                itemids = itemid_set[word]
            except KeyError:
                continue
            for itemid in itemids:
                brand, _, total_sold_price = itemid_mapping[itemid]
                if brand != brand or not brand:
                    continue
                if brand in submarket_brand_num[word].keys():
                    submarket_brand_num[word][brand] += total_sold_price
                else:
                    submarket_brand_num[word][brand] = total_sold_price

            items = sorted(submarket_brand_num[word].items(),
                           key=lambda x: x[1],
                           reverse=True)
            prev, rank = 0, 0
            for brand, value in items:
                try:
                    submarket_brand_share[word][
                        brand] = value / submarket_total_sold_price[word]
                except ZeroDivisionError:
                    submarket_brand_share[word][brand] = 0
                if prev != value:
                    rank += 1
                    prev = value
                submarket_brand_rank[word][brand] = rank
                submarket_brand_rerank[word].setdefault(rank,
                                                        list()).append(brand)
                submarket_brand_rerank[word][rank].sort()

        dump(submarket_brand_num, "statsSubBrandSoldNum")
        dump(submarket_brand_share, "statsSubBrandSoldShare")
        dump(submarket_brand_rank, "statsSubBrandSoldRank")
        dump(submarket_brand_rerank, "statsSubBrandSoldReRank")
    def statistic(self):
        if Mode.statsLOCAL:
            try:
                load("statsAllSubItemidSet")
                load("statsAllSubBrandSet")
                load("statsAllSubSellerSet")
                load("statsAllSubBiz30day")
                load("statsAllSubTotalSoldPrice")
                load("statsAllSubSoldAverPrice")

                load("statsAllSubMacroCondition")
                load("statsAllItemidMapping")

                load("statsAllSubBiz30dayShare")
                load("statsAllSubBiz30dayRank")
                load("statsAllSubBiz30dayReRank")
                load("statsAllSubTotalSoldPriceShare")
                load("statsAllSubTotalSoldPriceRank")
                load("statsAllSubTotalSoldPriceReRank")
            except FileNotFoundError:
                print(
                    "StatisticAllSubmarketMethod: Don't Have Local Result Files"
                )
            else:
                return

        words = load("submarketWords")
        items = read("factItem")()

        itemid_set = dict()
        brand_set = dict()
        seller_set = dict()
        biz30day = {word: 0 for word in words}
        total_sold_price = {word: 0 for word in words}

        macro_conditions = dict()
        macro_conditions["biz30day"] = 0
        macro_conditions["total"] = 0

        itemid_mapping = dict()

        for k, v in items.iterrows():
            if k % 100 == 0:
                print("process", k, "/", len(items))
            for word in words:
                if word in v["title"]:
                    itemid_set.setdefault(word, set()).add(v["itemid"])
                    brand_set.setdefault(word, set()).add(v["brand"])
                    seller_set.setdefault(word, set()).add(v["sellernick"])
                    biz30day[word] += v["biz30day"]
                    total_sold_price[word] += v["total_sold_price"]

            macro_conditions.setdefault("itemid", set()).add(v["itemid"])
            macro_conditions.setdefault("brand", set()).add(v["brand"])
            macro_conditions.setdefault("seller", set()).add(v["sellernick"])
            macro_conditions["biz30day"] += v["biz30day"]
            macro_conditions["total"] += v["total_sold_price"]

            if v["itemid"] in itemid_mapping.keys():
                print("Warning: Duplicate Itemid", v["itemid"])
            itemid_mapping[v["itemid"]] = (v["brand"], v["biz30day"],
                                           v["total_sold_price"])

        sold_price_aver = dict()
        for word in words:
            try:
                sold_price_aver[word] = \
                    round(total_sold_price[word] / biz30day[word], 2)
            except ZeroDivisionError:
                sold_price_aver[word] = 0

        try:
            macro_conditions["aver"] = \
                round(macro_conditions["total"] / macro_conditions["biz30day"], 2)
        except ZeroDivisionError:
            macro_conditions["aver"] = 0

        # itemid_set["macro conditions"] = macro_conditions["itemid"]
        # brand_set["macro conditions"] = macro_conditions["brand"]
        # seller_set["macro conditions"] = macro_conditions["seller"]
        # biz30day["macro conditions"] = macro_conditions["biz30day"]
        # total_sold_price["macro conditions"] = macro_conditions["total"]
        # sold_price_aver["macro conditions"] = macro_conditions["aver"]

        dump(itemid_set, "statsAllSubItemidSet")
        dump(brand_set, "statsAllSubBrandSet")
        dump(seller_set, "statsAllSubSellerSet")
        dump(biz30day, "statsAllSubBiz30day")
        dump(total_sold_price, "statsAllSubTotalSoldPrice")
        dump(sold_price_aver, "statsAllSubSoldAverPrice")

        dump(macro_conditions, "statsAllSubMacroCondition")
        dump(itemid_mapping, "statsAllItemidMapping")

        biz30day_share = dict()
        biz30day_rank = dict()
        biz30day_rerank = dict()
        total_sold_price_share = dict()
        total_sold_price_rank = dict()
        total_sold_price_rerank = dict()

        items = sorted(biz30day.items(), key=lambda x: x[1], reverse=True)
        prev, rank = 0, 0
        for key, value in items:
            try:
                biz30day_share[key] = value / macro_conditions["biz30day"]
            except ZeroDivisionError:
                biz30day_share[key] = 0
            if prev != value:
                rank += 1
                prev = value
            biz30day_rank[key] = rank
            biz30day_rerank.setdefault(rank, list()).append(key)

        items = sorted(total_sold_price.items(),
                       key=lambda x: x[1],
                       reverse=True)
        prev, rank = 0, 0
        for key, value in items:
            try:
                total_sold_price_share[key] = value / macro_conditions["total"]
            except ZeroDivisionError:
                total_sold_price_share[key] = 0
            if prev != value:
                rank += 1
                prev = value
            total_sold_price_rank[key] = rank
            total_sold_price_rerank.setdefault(rank, list()).append(key)

        dump(biz30day_share, "statsAllSubBiz30dayShare")
        dump(biz30day_rank, "statsAllSubBiz30dayRank")
        dump(biz30day_rerank, "statsAllSubBiz30dayReRank")
        dump(total_sold_price_share, "statsAllSubTotalSoldPriceShare")
        dump(total_sold_price_rank, "statsAllSubTotalSoldPriceRank")
        dump(total_sold_price_rerank, "statsAllSubTotalSoldPriceReRank")
 def load():
     return load("submarketWords")
Example #11
0
 def load():
     return load("submarketWords"), load("smKeep"), load("smDrop"), load(
         "smMapping")
    def build(self):
        words = load("submarketWords")
        info = dict()

        itemid_set = load("statsAllSubItemidSet")
        brand_set = load("statsAllSubBrandSet")
        seller_set = load("statsAllSubSellerSet")
        biz30day = load("statsAllSubBiz30day")
        total_sold_price = load("statsAllSubTotalSoldPrice")
        sold_price_aver = load("statsAllSubSoldAverPrice")

        biz30day_share = load("statsAllSubBiz30dayShare")
        biz30day_rank = load("statsAllSubBiz30dayRank")
        total_sold_price_share = load("statsAllSubTotalSoldPriceShare")
        total_sold_price_rank = load("statsAllSubTotalSoldPriceRank")

        biz_brand_num = load("statsSubBrandBizNum")
        biz_brand_share = load("statsSubBrandBizShare")
        # biz_brand_rank = load("statsSubBrandBizRank")
        biz_brand_rerank = load("statsSubBrandBizReRank")

        sold_brand_num = load("statsSubBrandSoldNum")
        sold_brand_share = load("statsSubBrandSoldShare")
        # sold_brand_rank = load("statsSubBrandSoldRank")
        sold_brand_rerank = load("statsSubBrandSoldReRank")

        for word in words:
            info[word] = dict()
            if word not in itemid_set.keys():
                info[word]["inTitle"] = False
                continue
            info[word]["inTitle"] = True

            info[word]["itemid set"] = itemid_set[word]
            info[word]["brand set"] = brand_set[word]
            info[word]["seller set"] = seller_set[word]
            info[word]["biz30day"] = biz30day[word]
            info[word]["total sold price"] = total_sold_price[word]
            info[word]["sold price aver"] = sold_price_aver[word]

            info[word]["biz30day share"] = biz30day_share[word]
            info[word]["biz30day rank"] = biz30day_rank[word]
            info[word]["total sold price share"] = total_sold_price_share[word]
            info[word]["total sold price rank"] = total_sold_price_rank[word]

            biz_word_brand_num = 0
            for rank, brands in biz_brand_rerank[word].items():
                biz_word_brand_num += len(brands)
            biz_main_size = biz_word_brand_num * self.threshold["biz main"]
            if biz_main_size < self.threshold["biz top"]:
                biz_main_size = self.threshold["biz top"]

            rank, num = 0, 0
            info[word]["top biz brand"] = dict()
            while True:
                rank += 1
                try:
                    brands = biz_brand_rerank[word][rank]
                except KeyError:
                    # print("Don't have enough words as expect.")
                    break
                if num < biz_main_size:
                    info[word].setdefault("main biz brand",
                                          list()).extend(brands)

                if num < self.threshold["biz top"]:
                    for brand in brands:
                        info[word]["top biz brand"][brand] = dict()
                        info[word]["top biz brand"][brand][
                            "num"] = biz_brand_num[word][brand]
                        info[word]["top biz brand"][brand][
                            "share"] = biz_brand_share[word][brand]
                        info[word]["top biz brand"][brand]["rank"] = rank

                num += len(brands)
                if biz_main_size <= num and self.threshold["biz top"] <= num:
                    break

            sold_word_brand_num = 0
            for rank, brands in sold_brand_rerank[word].items():
                sold_word_brand_num += len(brands)
            sold_main_size = sold_word_brand_num * self.threshold["sold main"]
            if sold_main_size < self.threshold["sold top"]:
                sold_main_size = self.threshold["sold top"]

            rank, num = 0, 0
            info[word]["top sold brand"] = dict()
            while True:
                rank += 1
                try:
                    brands = sold_brand_rerank[word][rank]
                except KeyError:
                    # print("Don't have enough words as expect.")
                    break
                if num < sold_main_size:
                    info[word].setdefault("main sold brand",
                                          list()).extend(brands)

                if num < self.threshold["sold top"]:
                    for brand in brands:
                        info[word]["top sold brand"][brand] = dict()
                        info[word]["top sold brand"][brand][
                            "num"] = sold_brand_num[word][brand]
                        info[word]["top sold brand"][brand][
                            "share"] = sold_brand_share[word][brand]
                        info[word]["top sold brand"][brand]["rank"] = rank

                num += len(brands)
                if sold_main_size <= num and self.threshold["sold top"] <= num:
                    break

        super().dump(info, "submarketInfo")
Example #13
0
 def load():
     return load("submarketWords"), load("smDrop")