def test02(): print("Test 02") eventsDF: DataFrame = Events.readFromFile() ratings4DF: DataFrame = eventsDF[[ Events.COL_VISITOR_ID, Events.COL_ITEM_ID, Events.COL_EVENT ]] ratings4DF = ratings4DF.drop_duplicates() ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "view", "rating"] = 1 ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "addtocart", "rating"] = 2 ratings4DF.loc[ratings4DF[Events.COL_EVENT] == "transaction", "rating"] = 3 ratingsDF: DataFrame = ratings4DF[[ Events.COL_VISITOR_ID, Events.COL_ITEM_ID, "rating" ]] ratingsDF = ratingsDF.groupby([Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)["rating"].max() print(ratingsDF.head(40)) print(len(eventsDF)) print(len(ratingsDF))
def generateFileRR(numberOfItems: int, countOfRepetitions: int, behaviourID: str, uBehavDesc: UserBehaviourDescription): np.random.seed(42) random.seed(42) print("Generate Behaviour RR " + behaviourID) behaviourFile: str = BehavioursRR.getFile(behaviourID) eventsDF: DataFrame = Events.readFromFile() eventsCopyDF: DataFrame = eventsDF[[ Events.COL_VISITOR_ID, Events.COL_ITEM_ID ]].copy() eventsCopyDF[BehavioursRR.COL_REPETITION] = [ range(countOfRepetitions) ] * len(eventsCopyDF) behavioursDF: DataFrame = eventsCopyDF.explode( BehavioursRR.COL_REPETITION) behavioursDF[BehavioursRR.COL_BEHAVIOUR] = [None] * len(behavioursDF) behavioursDF.reset_index(inplace=True) #print("General") BehavioursRR.__generateGeneralBehaviourrRR(behavioursDF, numberOfItems, countOfRepetitions, uBehavDesc) #print(behavioursDF.head(10)) del behavioursDF['index'] #print(behavioursDF.head(10)) behavioursDF.to_csv(behaviourFile, sep='\t', index=False)
def test04(): print("Test 04") print("Running RecommenderVSKNN RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderVMContextKNN("test", {}) print("train") rec.train(HistoryDF("test"), dataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) recommendation = rec.recommend(1093035, 20, {}) print("Recommendation:") print(recommendation) print("================== END OF TEST 04 ======================\n\n\n\n\n")
def readDatasetsWithFilter(minEventCount:int): eventsDF:DataFrame = Events.readFromFileWithFilter(minEventCount=minEventCount) categoryTreeDF:DataFrame = CategoryTree.readFromFile() itemPropertiesDF:DataFrame = ItemProperties.readFromFile() return DatasetRetailRocket("rrDivAll", eventsDF, categoryTreeDF, itemPropertiesDF)
def readDatasets(): eventsDF:DataFrame = Events.readFromFile() categoryTreeDF:DataFrame = CategoryTree.readFromFile() itemPropertiesDF:DataFrame = ItemProperties.readFromFile() return DatasetRetailRocket("rrDivAll", eventsDF, categoryTreeDF, itemPropertiesDF)
def test01(): print("Test 01") print("Running RecommenderRepeatedPurchase RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) #print(eventsDF) userID: int = 904351 trainSer = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 350688, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) trainDF = pd.DataFrame([trainSer]) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderRepeatedPurchase("rRepeatedPurchase", {}) rec.train(HistoryDF("test"), dataset) # nejcasteji opakovane kupovane itemy: 119736, 119736, 119736, 213834, 119736, 227311, 382885, 119736, 213834, 119736, 432171, 183756, 119736, 305675, 320130 update1Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 119736, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update1DF: DataFrame = pd.DataFrame([update1Ser]) update2Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 213834, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update2DF: DataFrame = pd.DataFrame([update2Ser]) rec.update(update1DF, {}) rec.update(update2DF, {}) recommendationSer: Series = rec.recommend(userID, 20, {}) print("Recommendation:") print(recommendationSer)
def test03(): print("Test 03") print("Running RecommenderTheMostPopular ST:") from datasets.slantour.events import Events # class eventsDF: DataFrame = Events.readFromFile() dataset: ADataset = DatasetST("test", eventsDF, DataFrame()) rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {}) rec.train(HistoryDF("test"), dataset) recommendation = rec.recommend( 1, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(recommendation)
def test02(): print("Test 02") print("Running RecommenderTheMostPopular RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {}) rec.train(HistoryDF("test"), dataset) recommendation = rec.recommend(1, 20, {}) print("Recommendation:") print(recommendation)
def test21(): print("Test 21") print("Running RecommenderItemBasedKNN ST:") from datasets.slantour.events import Events # class eventsDF: DataFrame = Events.readFromFile() dataset: ADataset = DatasetST("test", eventsDF, DataFrame()) rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test"), dataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) r = rec.recommend(3325463, 20, {}) print(r) print("================== END OF TEST 05 ======================\n\n\n\n\n")
def test01(): print("Test 01") userID: int = 565892 currentItemID: int = 168952 repetition: int = 0 # 5919585 b11111100011100000000 # 5947295 b11010110001101000010 # 5980935 b11111000000010101000 userID: int = 1043630 currentItemID: int = 349104 repetition: int = 0 #5952135 b10111101000000000000 #5962570 b11111100000100100000 #5980920 b11111011101100101000 #5980945 b11011100100110101100 #5995975 b11111011000010001010 eventsDF: DataFrame = Events.readFromFile() #evensDF = evensDF.loc[evens[Events.COL_EVENT] == "transaction"] eventsDF: DataFrame = eventsDF.sort_values(by=Events.COL_TIME_STAMP) users = eventsDF[Events.COL_VISITOR_ID].tolist() items = eventsDF[Events.COL_ITEM_ID].tolist() print(users[0]) eventsDF = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == userID] eventsDF = eventsDF.loc[eventsDF[Events.COL_ITEM_ID] == currentItemID] # for userIDI in users: # eventsDF = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == userIDI] # eventsDF = eventsDF.loc[eventsDF[Events.COL_ITEM_ID] == currentItemID] # if len(eventsDF) > 0: # print(len(eventsDF)) print(eventsDF.head(100))
def test01(): DEBUG:bool = False # First get Dataset Data eventsDF:DataFrame = Events.readFromFile() eventsDF = eventsDF.sort_values(Events.COL_TIME_STAMP) userIDs:List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique()) #eventsIDs:List[int] = list(eventsDF[Events.COL_EVENT].unique()) print("Number of all events: " + str(len(eventsDF))) print("Number of all usersIDs: " + str(len(userIDs))) print("Number of all addtocart: " + str(len(eventsDF.loc[eventsDF[Events.COL_EVENT] == "addtocart"]))) print("Number of all transaction: " + str(len(eventsDF.loc[eventsDF[Events.COL_EVENT] == "transaction"]))) print("---------------------------------------------------------") # kolik vsech uziatelu si vlozilo do kosiku item vice nez jednou eventsACartDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_EVENT] == "addtocart"] eventsACart2DF = eventsACartDF.groupby( [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count() eventsACart2DF = eventsACart2DF.loc[eventsACart2DF[Events.COL_TIME_STAMP] > 1] # kolik vsech uzivatelu si pridalo 1-N polozek usersAndAddedItemsCountDF:DataFrame = eventsACartDF[[Events.COL_VISITOR_ID, Events.COL_ITEM_ID]].drop_duplicates().groupby( [Events.COL_VISITOR_ID], as_index=False)[Events.COL_ITEM_ID].count() plt.hist(list(usersAndAddedItemsCountDF[Events.COL_ITEM_ID])) plt.yscale('log') if DEBUG: plt.show() userIDs:List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique()) print("Number of all users: " + str(len(userIDs))) print("Number of all users 1< #itemI added: " + str(len(eventsACart2DF))) print("---------------------------------------------------------") # filtrace eventu - vymazeme uzivatele, kteri maji mene nez K zaznamu userIdAndTimestampDF:DataFrame[int, int] = eventsDF.groupby( [Events.COL_VISITOR_ID], as_index=False)[Events.COL_TIME_STAMP].count() userIdAndTimestampSelDF:DataFrame[int, int] = userIdAndTimestampDF.loc[ userIdAndTimestampDF[Events.COL_TIME_STAMP] > 5] #print(userIdAndTimestampSelDF) userIDsSel:List[int] = list(userIdAndTimestampSelDF[Events.COL_VISITOR_ID].unique()) eventsSelDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID].isin(userIDsSel)] print("Number of selected events: " + str(len(eventsSelDF))) print("Number of selected usersIDs: " + str(len(userIDsSel))) print("Number of all addtocart: " + str(len(eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"]))) print("Number of selected transaction: " + str(len(eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "transaction"]))) print("---------------------------------------------------------") # kolik vybranych uzivatelu si vlozilo do kosiku item aspon jednou eventsSelACartDF:DataFrame = eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"] userIDSelsACart:List[int] = list(eventsSelACartDF[Events.COL_VISITOR_ID].unique()) eventsSelACart1DF = eventsSelACartDF.groupby( [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count() eventsSelACart1DF = eventsSelACart1DF.loc[eventsSelACart1DF[Events.COL_TIME_STAMP] > 0] print("Number of selected users: " + str(len(userIDsSel))) print("Number of selected users who added something to cart: " + str(len(userIDSelsACart))) print("---------------------------------------------------------") # kolik vybranych uzivatelu koupilo vice jak dva ne nutne ruzne itemy eventsSelACartDF:DataFrame = eventsSelDF.loc[eventsSelDF[Events.COL_EVENT] == "addtocart"] eventsSelACartDF_ = eventsSelACartDF.groupby( [Events.COL_VISITOR_ID], as_index=False)[Events.COL_TIME_STAMP].count() eventsSelACart2DF = eventsSelACartDF_.loc[eventsSelACartDF_[Events.COL_TIME_STAMP] > 1] userSelACart2IDs:List[int] = list(eventsSelACart2DF[Events.COL_VISITOR_ID]) print("Number of selected users 1< #addItem: " + str(len(userSelACart2IDs))) eventsSelACart1DF = eventsSelACartDF_.loc[eventsSelACartDF_[Events.COL_TIME_STAMP] == 1] userSelACart1IDs:List[int] = list(eventsSelACart1DF[Events.COL_VISITOR_ID]) print("Number of selected users 1= #addItem: " + str(len(userSelACart1IDs))) # kolik vybranych uzivatelu si pridalo jednu jedinou polozku libovolnekrat usersSelAndAddedItemsCountDF:DataFrame = eventsSelACartDF[[Events.COL_VISITOR_ID, Events.COL_ITEM_ID]].drop_duplicates().groupby( [Events.COL_VISITOR_ID], as_index=False)[Events.COL_ITEM_ID].count() plt.hist(list(usersSelAndAddedItemsCountDF[Events.COL_ITEM_ID]), bins=700) if DEBUG: plt.show() a = len(usersSelAndAddedItemsCountDF.loc[usersSelAndAddedItemsCountDF[Events.COL_ITEM_ID] == 1]) print("Number of selected users who added only 1 unique itemI (possibly many times): " + str(a)) print("---------------------------------------------------------") # kolik procent uzivatelu si koupilo jen jednu vec vicekrat b = (a - len(userSelACart1IDs)) / len(userSelACart2IDs) * 100 print(str(b) + " %") # zajimalo by me kolikrat si uzivatele kupovali stejnou vec eventsTransDF:DataFrame = eventsDF.loc[eventsDF[Events.COL_EVENT] == "transaction"] events2DF = eventsTransDF.groupby( [Events.COL_VISITOR_ID, Events.COL_ITEM_ID], as_index=False)[Events.COL_TIME_STAMP].count() events2DF = events2DF.loc[events2DF[Events.COL_TIME_STAMP] > 4] #print(events2DF.head(10)) events152963DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 152963] events152963DF = events152963DF.loc[events152963DF[Events.COL_ITEM_ID] == 119736] #print(events152963DF.head(1000000).to_string()) events530559DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 530559] events530559DF = events530559DF.loc[events530559DF[Events.COL_ITEM_ID] == 119736] #print(events530559DF.head(1000000).to_string()) events76757DF:DataFrame = eventsDF.loc[eventsDF[Events.COL_VISITOR_ID] == 76757] events76757DF:DataFrame = events76757DF.loc[events76757DF[Events.COL_EVENT] == "transaction"]