def cut(self): try: load("titleHotWords") load("searchHotWords") except FileNotFoundError: self.method().cut() except Exception as e: raise e
def build(self): items = read("factItem")() items = items[items["brand"] == items["brand"]] macro_condition = load("statsAllSubMacroCondition") def sum_sold(df): sold_sum = df["total_sold_price"].sum() df["sold_sum"] = sold_sum try: df["sold_share"] = sold_sum / macro_condition["total"] except ZeroDivisionError: df["sold_share"] = 0 return df items["total_sold_price"] = items["total_sold_price"].fillna(0) items = items.groupby(["brand"]).apply(sum_sold).drop_duplicates( ["brand"]).sort_values("sold_sum", ascending=False) items = items[["brand", "sold_sum", "sold_share"]] prev, rank = 0, 0 for k, v in items.iterrows(): if prev != v["sold_sum"]: rank += 1 prev = v["sold_sum"] items.at[k, "rank"] = rank super().dump(items, "statsTopSoldBrands")
def statistic(self): if Mode.statsLOCAL: try: load("statsTopSoldBrand") except FileNotFoundError: print( "StatisticTopSoldBrandMethod: Don't Have Local Result Files" ) else: return items = read("factItem")() items = items[items["brand"] == items["brand"]] macro_condition = load("statsAllSubMacroCondition") def sum_sold(df): sold_sum = df["total_sold_price"].sum() df["sold_sum"] = sold_sum try: df["sold_share"] = sold_sum / macro_condition["total"] except ZeroDivisionError: df["sold_share"] = 0 return df items["total_sold_price"] = items["total_sold_price"].fillna(0) items = items.groupby(["brand"]).apply(sum_sold).drop_duplicates( ["brand"]).sort_values("sold_sum", ascending=False) items = items[["brand", "sold_sum", "sold_share"]] prev, rank = 0, 0 for k, v in items.iterrows(): if prev != v["sold_sum"]: rank += 1 prev = v["sold_sum"] items.at[k, "rank"] = rank dump(items, "statsTopSoldBrands")
def trans(self): keep = load("smKeep") mapping = set() for key, words in keep.items(): key_and_words = words key_and_words.add(key) for word1 in key_and_words: for word2 in key_and_words: if word1 == word2: continue if len(word1) > len(word2): w1, w2 = word2, word1 elif len(word1) == len(word2) and word1 > word2: w1, w2 = word2, word1 else: w1, w2 = word1, word2 mapping.add((w1, w2)) dump(mapping, "smMapping")
def trans(self): mapping = load("smMapping") keep_set = list() for pair in mapping: keep_set.append(set(pair)) while True: temp_keep_set = list() for words in keep_set: merge_flag = False for merge_words in temp_keep_set: if 0 != len(words & merge_words): merge_words |= words merge_flag = True if not merge_flag: temp_keep_set.append(words) if keep_set == temp_keep_set: break keep_set = temp_keep_set dump(keep_set, "smKeep")
def erase(self): words, drop = super().load() try: history = load("history", FileBase.history) except FileNotFoundError: history = dict() cidname = Entrance().cidname if cidname in history.keys(): sim_words, values = history[cidname] else: print("request ...") sim_words, values = WordSimilarity().process(cidname) history[cidname] = [sim_words, values] dump(history, "history", repath=FileBase.history) for word in words: for sim_word, sim in zip(sim_words, values): if sim_word in word and sim > self.threshold: drop.add(word) drop.add(cidname) super().dump(words, drop)
def merge(self): title_words = load("titleHotWords") search_words = load("searchHotWords") words = title_words | search_words dump(words, "submarketWords")
def statistic(self, threshold=Parameters.mainSoldThreshold): if Mode.statsLOCAL: try: load("statsSubBrandSoldNum") load("statsSubBrandSoldShare") load("statsSubBrandSoldRank") load("statsSubBrandSoldReRank") except FileNotFoundError: print( "StatisticSubmarketBrandSoldMethod: Don't Have Local Result Files" ) else: return words = load("submarketWords") submarket_total_sold_price = load("statsAllSubTotalSoldPrice") itemid_set = load("statsAllSubItemidSet") itemid_mapping = load("statsAllItemidMapping") submarket_brand_num = {word: dict() for word in words} submarket_brand_share = {word: dict() for word in words} submarket_brand_rank = {word: dict() for word in words} submarket_brand_rerank = {word: dict() for word in words} for word in words: try: itemids = itemid_set[word] except KeyError: continue for itemid in itemids: brand, _, total_sold_price = itemid_mapping[itemid] if brand != brand or not brand: continue if brand in submarket_brand_num[word].keys(): submarket_brand_num[word][brand] += total_sold_price else: submarket_brand_num[word][brand] = total_sold_price items = sorted(submarket_brand_num[word].items(), key=lambda x: x[1], reverse=True) prev, rank = 0, 0 for brand, value in items: try: submarket_brand_share[word][ brand] = value / submarket_total_sold_price[word] except ZeroDivisionError: submarket_brand_share[word][brand] = 0 if prev != value: rank += 1 prev = value submarket_brand_rank[word][brand] = rank submarket_brand_rerank[word].setdefault(rank, list()).append(brand) submarket_brand_rerank[word][rank].sort() dump(submarket_brand_num, "statsSubBrandSoldNum") dump(submarket_brand_share, "statsSubBrandSoldShare") dump(submarket_brand_rank, "statsSubBrandSoldRank") dump(submarket_brand_rerank, "statsSubBrandSoldReRank")
def statistic(self): if Mode.statsLOCAL: try: load("statsAllSubItemidSet") load("statsAllSubBrandSet") load("statsAllSubSellerSet") load("statsAllSubBiz30day") load("statsAllSubTotalSoldPrice") load("statsAllSubSoldAverPrice") load("statsAllSubMacroCondition") load("statsAllItemidMapping") load("statsAllSubBiz30dayShare") load("statsAllSubBiz30dayRank") load("statsAllSubBiz30dayReRank") load("statsAllSubTotalSoldPriceShare") load("statsAllSubTotalSoldPriceRank") load("statsAllSubTotalSoldPriceReRank") except FileNotFoundError: print( "StatisticAllSubmarketMethod: Don't Have Local Result Files" ) else: return words = load("submarketWords") items = read("factItem")() itemid_set = dict() brand_set = dict() seller_set = dict() biz30day = {word: 0 for word in words} total_sold_price = {word: 0 for word in words} macro_conditions = dict() macro_conditions["biz30day"] = 0 macro_conditions["total"] = 0 itemid_mapping = dict() for k, v in items.iterrows(): if k % 100 == 0: print("process", k, "/", len(items)) for word in words: if word in v["title"]: itemid_set.setdefault(word, set()).add(v["itemid"]) brand_set.setdefault(word, set()).add(v["brand"]) seller_set.setdefault(word, set()).add(v["sellernick"]) biz30day[word] += v["biz30day"] total_sold_price[word] += v["total_sold_price"] macro_conditions.setdefault("itemid", set()).add(v["itemid"]) macro_conditions.setdefault("brand", set()).add(v["brand"]) macro_conditions.setdefault("seller", set()).add(v["sellernick"]) macro_conditions["biz30day"] += v["biz30day"] macro_conditions["total"] += v["total_sold_price"] if v["itemid"] in itemid_mapping.keys(): print("Warning: Duplicate Itemid", v["itemid"]) itemid_mapping[v["itemid"]] = (v["brand"], v["biz30day"], v["total_sold_price"]) sold_price_aver = dict() for word in words: try: sold_price_aver[word] = \ round(total_sold_price[word] / biz30day[word], 2) except ZeroDivisionError: sold_price_aver[word] = 0 try: macro_conditions["aver"] = \ round(macro_conditions["total"] / macro_conditions["biz30day"], 2) except ZeroDivisionError: macro_conditions["aver"] = 0 # itemid_set["macro conditions"] = macro_conditions["itemid"] # brand_set["macro conditions"] = macro_conditions["brand"] # seller_set["macro conditions"] = macro_conditions["seller"] # biz30day["macro conditions"] = macro_conditions["biz30day"] # total_sold_price["macro conditions"] = macro_conditions["total"] # sold_price_aver["macro conditions"] = macro_conditions["aver"] dump(itemid_set, "statsAllSubItemidSet") dump(brand_set, "statsAllSubBrandSet") dump(seller_set, "statsAllSubSellerSet") dump(biz30day, "statsAllSubBiz30day") dump(total_sold_price, "statsAllSubTotalSoldPrice") dump(sold_price_aver, "statsAllSubSoldAverPrice") dump(macro_conditions, "statsAllSubMacroCondition") dump(itemid_mapping, "statsAllItemidMapping") biz30day_share = dict() biz30day_rank = dict() biz30day_rerank = dict() total_sold_price_share = dict() total_sold_price_rank = dict() total_sold_price_rerank = dict() items = sorted(biz30day.items(), key=lambda x: x[1], reverse=True) prev, rank = 0, 0 for key, value in items: try: biz30day_share[key] = value / macro_conditions["biz30day"] except ZeroDivisionError: biz30day_share[key] = 0 if prev != value: rank += 1 prev = value biz30day_rank[key] = rank biz30day_rerank.setdefault(rank, list()).append(key) items = sorted(total_sold_price.items(), key=lambda x: x[1], reverse=True) prev, rank = 0, 0 for key, value in items: try: total_sold_price_share[key] = value / macro_conditions["total"] except ZeroDivisionError: total_sold_price_share[key] = 0 if prev != value: rank += 1 prev = value total_sold_price_rank[key] = rank total_sold_price_rerank.setdefault(rank, list()).append(key) dump(biz30day_share, "statsAllSubBiz30dayShare") dump(biz30day_rank, "statsAllSubBiz30dayRank") dump(biz30day_rerank, "statsAllSubBiz30dayReRank") dump(total_sold_price_share, "statsAllSubTotalSoldPriceShare") dump(total_sold_price_rank, "statsAllSubTotalSoldPriceRank") dump(total_sold_price_rerank, "statsAllSubTotalSoldPriceReRank")
def load(): return load("submarketWords")
def load(): return load("submarketWords"), load("smKeep"), load("smDrop"), load( "smMapping")
def build(self): words = load("submarketWords") info = dict() itemid_set = load("statsAllSubItemidSet") brand_set = load("statsAllSubBrandSet") seller_set = load("statsAllSubSellerSet") biz30day = load("statsAllSubBiz30day") total_sold_price = load("statsAllSubTotalSoldPrice") sold_price_aver = load("statsAllSubSoldAverPrice") biz30day_share = load("statsAllSubBiz30dayShare") biz30day_rank = load("statsAllSubBiz30dayRank") total_sold_price_share = load("statsAllSubTotalSoldPriceShare") total_sold_price_rank = load("statsAllSubTotalSoldPriceRank") biz_brand_num = load("statsSubBrandBizNum") biz_brand_share = load("statsSubBrandBizShare") # biz_brand_rank = load("statsSubBrandBizRank") biz_brand_rerank = load("statsSubBrandBizReRank") sold_brand_num = load("statsSubBrandSoldNum") sold_brand_share = load("statsSubBrandSoldShare") # sold_brand_rank = load("statsSubBrandSoldRank") sold_brand_rerank = load("statsSubBrandSoldReRank") for word in words: info[word] = dict() if word not in itemid_set.keys(): info[word]["inTitle"] = False continue info[word]["inTitle"] = True info[word]["itemid set"] = itemid_set[word] info[word]["brand set"] = brand_set[word] info[word]["seller set"] = seller_set[word] info[word]["biz30day"] = biz30day[word] info[word]["total sold price"] = total_sold_price[word] info[word]["sold price aver"] = sold_price_aver[word] info[word]["biz30day share"] = biz30day_share[word] info[word]["biz30day rank"] = biz30day_rank[word] info[word]["total sold price share"] = total_sold_price_share[word] info[word]["total sold price rank"] = total_sold_price_rank[word] biz_word_brand_num = 0 for rank, brands in biz_brand_rerank[word].items(): biz_word_brand_num += len(brands) biz_main_size = biz_word_brand_num * self.threshold["biz main"] if biz_main_size < self.threshold["biz top"]: biz_main_size = self.threshold["biz top"] rank, num = 0, 0 info[word]["top biz brand"] = dict() while True: rank += 1 try: brands = biz_brand_rerank[word][rank] except KeyError: # print("Don't have enough words as expect.") break if num < biz_main_size: info[word].setdefault("main biz brand", list()).extend(brands) if num < self.threshold["biz top"]: for brand in brands: info[word]["top biz brand"][brand] = dict() info[word]["top biz brand"][brand][ "num"] = biz_brand_num[word][brand] info[word]["top biz brand"][brand][ "share"] = biz_brand_share[word][brand] info[word]["top biz brand"][brand]["rank"] = rank num += len(brands) if biz_main_size <= num and self.threshold["biz top"] <= num: break sold_word_brand_num = 0 for rank, brands in sold_brand_rerank[word].items(): sold_word_brand_num += len(brands) sold_main_size = sold_word_brand_num * self.threshold["sold main"] if sold_main_size < self.threshold["sold top"]: sold_main_size = self.threshold["sold top"] rank, num = 0, 0 info[word]["top sold brand"] = dict() while True: rank += 1 try: brands = sold_brand_rerank[word][rank] except KeyError: # print("Don't have enough words as expect.") break if num < sold_main_size: info[word].setdefault("main sold brand", list()).extend(brands) if num < self.threshold["sold top"]: for brand in brands: info[word]["top sold brand"][brand] = dict() info[word]["top sold brand"][brand][ "num"] = sold_brand_num[word][brand] info[word]["top sold brand"][brand][ "share"] = sold_brand_share[word][brand] info[word]["top sold brand"][brand]["rank"] = rank num += len(brands) if sold_main_size <= num and self.threshold["sold top"] <= num: break super().dump(info, "submarketInfo")
def load(): return load("submarketWords"), load("smDrop")