def test03(): print("Test 03") print("Running RecommenderCosineCB ST:") dataset: DatasetST = DatasetST.readDatasets() args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbSTDataFileWithPathTFIDF, # RecommenderCosineCB.ARG_CB_DATA_PATH:Configuration.cbSTDataFileWithPathOHE, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: False, RecommenderCosineCB.ARG_ALLOWED_ITEMIDS: list(range(0, 1000)) } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), dataset) #eventsDFDFUpdate:DataFrame = dataset.eventsDF.iloc[5003:5004] #print(eventsDFDFUpdate) #rec.update(eventsDFDFUpdate, args) # user with very outdated profile - no recent objects r: Series = rec.recommend(3500678, 10, args) #print(type(r)) print(r) args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbSTDataFileWithPathTFIDF, # RecommenderCosineCB.ARG_CB_DATA_PATH:Configuration.cbSTDataFileWithPathOHE, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: True, RecommenderCosineCB.ARG_MMR_LAMBDA: 0.5, RecommenderCosineCB.ARG_ALLOWED_ITEMIDS: list(range(0, 1000)) } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), dataset) r: Series = rec.recommend(3500678, 10, args) print(r) """
def test01(): print("Test 01") # number of recommended items N: int = 5 methodsResultDict: dict = { "method1": pd.Series([0.9, 0.5, 0.4, 0.2], [1, 2, 3, 4], name="rating"), "method2": pd.Series([0.2, 0.9, 0.01], [3, 4, 5], name="rating"), "method3": pd.Series([0.01, 0.9, 0.9], [5, 6, 7], name="rating") } # methods parametes methodsParamsData: List[tuple] = [['method1', 5, 10, 1, 1], ['method2', 5, 12, 1, 1], ['method3', 6, 13, 1, 1]] modelDF: DataFrame = pd.DataFrame( methodsParamsData, columns=["methodID", "r", "n", "alpha0", "beta0"]) modelDF.set_index("methodID", inplace=True) #modelDF:DataFrame = DataFrame({"votes": [0.5, 0.4, 0.1]}, index=["method1", "method2", "method3"]) #print(modelDF) userID: int = 0 aggr: AggrDHondtDirectOptimizeThompsonSampling = AggrDHondtDirectOptimizeThompsonSampling( HistoryDF(""), { AggrDHondtDirectOptimizeThompsonSampling.ARG_SELECTOR: TheMostVotedItemSelector({}), AggrDHondtDirectOptimizeThompsonSampling.ARG_DISCOUNT_FACTOR: "uniform" }) itemIDs: List[tuple] = aggr.run(methodsResultDict, modelDF, userID, N) print("itemIDs:" + str(itemIDs)) aggr: AggrDHondtDirectOptimizeThompsonSampling = AggrDHondtDirectOptimizeThompsonSampling( HistoryDF(""), { AggrDHondtDirectOptimizeThompsonSampling.ARG_SELECTOR: TheMostVotedItemSelector({}), AggrDHondtDirectOptimizeThompsonSampling.ARG_DISCOUNT_FACTOR: "DCG" }) itemIDs: List[tuple] = aggr.run(methodsResultDict, modelDF, userID, N) print("itemIDs:" + str(itemIDs)) aggr: AggrDHondtDirectOptimizeThompsonSampling = AggrDHondtDirectOptimizeThompsonSampling( HistoryDF(""), { AggrDHondtDirectOptimizeThompsonSampling.ARG_SELECTOR: TheMostVotedItemSelector({}), AggrDHondtDirectOptimizeThompsonSampling.ARG_DISCOUNT_FACTOR: "PowerLaw" }) itemIDs: List[tuple] = aggr.run(methodsResultDict, modelDF, userID, N) print("itemIDs:" + str(itemIDs))
def test01(): print("Test 01") print("Running RecommenderTheMostPopular ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) rec: ARecommender = RecommenderTheMostPopular("test", {}) rec.train(HistoryDF("test"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) r: Series = rec.recommend(331, 50, {}) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, {}) print(type(r)) print(r)
def test01(): print("Test 01") print("Running AggrRandomRecsSwitching:") # number of recommended items N = 3 # method results, items=[1,2,4,5,6,7,8,12,32,64,77] methodsResultDict:dict = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") , "metoda4": pd.Series([], [], name="rating") } # print(methodsResultDict) aggr:AggrRandomRecsSwitching = AggrRandomRecsSwitching(HistoryDF(""), {AggrRandomRecsSwitching.ARG_MAIN_METHOD:"metoda1"}) userID:int = 101 itemIDs:List[tuple] = aggr.runWithResponsibility(methodsResultDict, {}, userID, N) print(itemIDs) itemIDs:List[tuple] = aggr.runWithResponsibility(methodsResultDict, {}, userID, N) print(itemIDs) userID:int = 102 itemIDs:List[tuple] = aggr.runWithResponsibility(methodsResultDict, {}, userID, N) print(itemIDs)
def test01(): print("Test 01") print("Running AggrBanditTSRun:") # number of recommended items N = 120 # method results, items=[1,2,4,5,6,7,8,12,32,64,77] methodsResultDict:dict = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") } # print(methodsResultDict) # methods parametes methodsParamsData:List[tuple] = [['metoda1', 5, 10, 1, 1], ['metoda2', 5, 12, 1, 1], ['metoda3', 6, 13, 1, 1]] methodsParamsDF:DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "r", "n", "alpha0", "beta0"]) methodsParamsDF.set_index("methodID", inplace=True) # print(methodsParamsDF) aggr:AggrBanditTS = AggrBanditTS(HistoryDF(""), {AggrBanditTS.ARG_SELECTOR:RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT:1})}) itemIDs:List[tuple] = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, N) #itemIDs:List[tuple] = aggr.run(methodsResultDict, methodsParamsDF, N) print(itemIDs)
def test01(): print("Test 01") # result items=[21,3,4,5,6,7,8] inputItemIDsDict: dict[str, pd.Series] = { "input1": pd.Series([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [21, 3, 4, 6, 7, 8], name="rating"), "input2": pd.Series([1.0, 1.0, 1.0, 1.0, 1.0], [21, 2, 4, 5, 9], name="rating"), "negative": pd.Series([1.0, 1.0], [21, 2], name="rating") } #print(inputItemIDsDict) modelDF: DataFrame = DataFrame() history: AHistory = HistoryDF("test") userID: int = 1 numberOfItems: int = 20 argumentsDict: Dict[str, object] = { AggrD21.ARG_RATING_THRESHOLD_FOR_NEG: 1.0 } aggrD21 = AggrD21(history, argumentsDict) aggrD21.runWithResponsibility(inputItemIDsDict, modelDF, userID, numberOfItems, argumentsDict)
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006] # get recommendations: print("Recommendations before update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) rec.update(ratingsDFUpdate, {}) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test02(): print("Test 02") print("Running RecommenderW2V RR:") dataset: DatasetRetailRocket = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) trainDataset: DatasetRetailRocket = dataset eventsDF: DataFrame = dataset.eventsDF # train recommender argsDict: Dict[str, str] = { RecommenderW2V.ARG_ITERATIONS: 50000, RecommenderW2V.ARG_TRAIN_VARIANT: 'posneg', RecommenderW2V.ARG_USER_PROFILE_SIZE: -1, RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean', RecommenderW2V.ARG_VECTOR_SIZE: 128, RecommenderW2V.ARG_WINDOW_SIZE: 5 } rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict) rec.train(HistoryDF("test02"), trainDataset) uDF: DataFrame = DataFrame([eventsDF.iloc[900]]) # print(str(eventsDF.tail(10))) # print(str(type(uDF))) rec.update(uDF, {}) r: Series = rec.recommend(1093035, 50, argsDict) print("Recommendation:") print(r)
def test03(): print("Test 03") print("Running RecommenderW2V ST:") dataset: DatasetST = DatasetST.readDatasets() trainDataset: DatasetST = dataset eventsDF: DataFrame = dataset.eventsDF # train recommender argsDict: Dict[str, str] = { RecommenderW2V.ARG_ITERATIONS: 50000, RecommenderW2V.ARG_TRAIN_VARIANT: 'all', RecommenderW2V.ARG_USER_PROFILE_SIZE: 5, RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean', RecommenderW2V.ARG_VECTOR_SIZE: 32, RecommenderW2V.ARG_WINDOW_SIZE: 1, RecommenderW2V.ARG_ALLOWED_ITEMIDS: list(range(0, 1000)) } rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict) rec.train(HistoryDF("test03"), trainDataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, argsDict) r: Series = rec.recommend(2760420, 50, argsDict) print(r)
def evaluate(self, rDscr: RecommenderDescription): print("StaticEvaluation") recom: ARecommender = rDscr.exportRecommender("test") args: dict = rDscr.getArguments() eventsDF: DataFrame = self.dataset.eventsDF eventsTrainDF: DataFrame = eventsDF[0:int(len(eventsDF) / 2)] eventsTestDF: DataFrame = eventsDF[int(len(eventsDF) / 2):] datasetTrain = DatasetRetailRocket("rrTrain", eventsTrainDF, DataFrame(), DataFrame()) userIDs: List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique()) recom.train(HistoryDF("test"), datasetTrain) counter: int = 0 for userIdI in userIDs: #print("userId: " + str(userIdI)) itemIDs: List[int] = list( eventsTestDF.loc[eventsTestDF[Events.COL_VISITOR_ID] == userIdI][Events.COL_ITEM_ID].unique()) recommendationI: List[int] = recom.recommend(userIdI, 20, args).keys() intersectionI: List[int] = [ value for value in itemIDs if value in recommendationI ] #print(" " + str(len(intersectionI))) counter += len(intersectionI) print(" counter: " + str(counter))
def test01(): print("Test 01") rDescr: RecommenderDescription = RecommenderDescription( RecommenderTheMostPopular, {}) recommenderID: str = "TheMostPopular" pDescr: Portfolio1MethDescription = Portfolio1MethDescription( recommenderID.title(), recommenderID, rDescr) dataset: ADataset = DatasetST.readDatasets() history: AHistory = HistoryDF("test") p: APortfolio = pDescr.exportPortfolio("jobID", history) portFolioModel: DataFrame = DataFrame() p.train(history, dataset) df: DataFrame = DataFrame( [[1, 555]], columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID]) p.update(ARecommender.UPDT_CLICK, df) userID: int = 1 r, rp = p.recommend(userID, portFolioModel, {APortfolio.ARG_NUMBER_OF_AGGR_ITEMS: 20}) print(r)
def test04(): print("Test 04") print("Running RecommenderVSKNN RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderVMContextKNN("test", {}) print("train") rec.train(HistoryDF("test"), dataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) recommendation = rec.recommend(1093035, 20, {}) print("Recommendation:") print(recommendation) print("================== END OF TEST 04 ======================\n\n\n\n\n")
def test03(): print("Test 03") print("Running Recommender BPRMF on RR:") dataset: DatasetRetailRocket = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) trainDataset: DatasetRetailRocket = dataset eventsDF: DataFrame = dataset.eventsDF # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test03"), trainDataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) r: Series = rec.recommend(1093035, 50, {}) print("Recommendation:") print(r)
def test02(): print("Test 02") print("Running RecommenderCosineCB RR:") dataset: DatasetST = DatasetRetailRocket.readDatasets() args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbRRDataFileWithPathOHE, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: False, } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), dataset) eventsDFDFUpdate: DataFrame = dataset.eventsDF.iloc[5003:5004] print(eventsDFDFUpdate) rec.update(eventsDFDFUpdate, args) # user with very outdated profile - no recent objects r: Series = rec.recommend(863743, 20, args) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, args) print(type(r)) print(r)
def test02(): print("Test 02") print("Running Recommender BPRMF on ML:") dataset: DatasetML = DatasetML.readDatasets() # Take only first 500k trainDataset: DatasetML = DatasetML("test", dataset.ratingsDF.iloc[0:800000], dataset.usersDF, dataset.itemsDF) print(dataset.ratingsDF.iloc[655924:655926]) # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test02"), trainDataset) # get recommendations: print("Recommendations before update") r: Series = rec.recommend(23, 50, {}) print(r) print("================== END OF TEST 02 ======================\n\n\n\n\n")
def test02(): print("Test 02") print("Running of comparing Dataframe vs. Database based History:") history1 = HistorySQLite("databse1") history2 = HistoryDF("databse2") start1 = time.time() for i in range(100): # userID, itemID, position, observation, clicked history1.insertRecommendation(1, i, 1, 0.5, False) isClicked: bool = history1.isObjectClicked(1, 1, 200) print("isClicked: " + str(isClicked)) end1 = time.time() start2 = time.time() for i in range(100): # userID, itemID, position, observation, clicked history2.insertRecommendation(1, i, 1, 0.5, False) end2 = time.time() print() print("Time HistorySQLite: " + format(end1 - start1, '.5f') + " s") print("Time HistoryDF: " + format(end2 - start2, '.5f') + " s") #print(history1.getPreviousRecomOfUser(1)) print(history1.getPreviousRecomOfUser(1)[0]) #print(history2.getPreviousRecomOfUser(1)) print(history2.getPreviousRecomOfUser(1)[0])
def test04(): print("Test 04") print("Running Recommender BPRMF on SL:") from datasets.slantour.events import Events # class dataset: DatasetST = DatasetST.readDatasets() trainDataset: DatasetST = dataset eventsDF: DataFrame = dataset.eventsDF uIDMax: int = eventsDF[Events.COL_USER_ID].max() print("uIDMax: " + str(uIDMax)) iIDMax: int = eventsDF[Events.COL_OBJECT_ID].max() print("iIDMax: " + str(iIDMax)) # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test04"), trainDataset) uDF1: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF1) rec.update(uDF1, {}) userID1: int = uIDMax + 1 itemID1: int = iIDMax + 1 itemID2: int = iIDMax + 2 # update with unknown user and unknown item uDF2: DataFrame = DataFrame( columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID]) uDF2.loc[0] = [userID1, itemID1] print(uDF2) rec.update(uDF2, {}) # update with unknown item uDF3: DataFrame = DataFrame( columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID]) uDF3.loc[0] = [userID1, itemID2] print(uDF3) rec.update(uDF3, {}) r: Series = rec.recommend(23, 50, {}) print(r) print() # recommend for unknown user r: Series = rec.recommend(userID1, 50, {}) print(r)
def test01(): print("Test 01") # number of recommended items N = 120 #a = observationalLinearProbabilityFnc(0.1, 0.9, 5) #print(a) uBehaviourDesc: UserBehaviourDescription = UserBehaviourDescription( observationalLinearProbabilityFnc, [0.1, 0.9]) # method results, items=[1,2,4,5,6,7,8,12,32,64,77] methodsResultDict: dict[str, pd.Series] = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") } #print(methodsResultDict) # methods parametes methodsParamsData: List[tuple] = [['metoda1', 100], ['metoda2', 80], ['metoda3', 60]] methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "votes"]) methodsParamsDF.set_index("methodID", inplace=True) #print(methodsParamsDF) userID: int = 0 itemID: int = 7 historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, 0.9, True) historyDF.insertRecommendation(userID, itemID, 1, 0.9, True) historyDF.insertRecommendation(userID, itemID, 1, 0.9, True) historyDF.print() ignoringValue: float = historyDF.getIgnoringValue(userID, itemID, limit=3) print("IgnoringValue: " + str(ignoringValue)) aggr: AggrDHont = AggrDHontNegativeImplFeedback( historyDF, { AggrDHontNegativeImplFeedback.ARG_SELECTORFNC: (AggrDHontNegativeImplFeedback.selectorOfTheMostVotedItem, []), AggrDHontNegativeImplFeedback.AGR_LENGTH_OF_HISTORY: 10, AggrDHontNegativeImplFeedback.AGR_BORDER_NEGATIVE_FEEDBACK: 1.0 }) itemIDs: List[tuple] = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, N) print(itemIDs)
def test02(): print("Test 02") methodsResultDict: dict[str, pd.Series] = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") } print(methodsResultDict) print() userID: int = 0 itemID: int = 1 historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 0, 0.9, False) historyDF.insertRecommendation(userID, itemID, 0, 0.9, False) historyDF.insertRecommendation(userID, itemID, 0, 0.9, False) #historyDF.print() ################### maxPenaltyValue: float = 1.2 minPenaltyValue: float = 0.2 lengthOfHistory: int = 5 p = PenalizationOfResultsByNegImpFeedbackUsingReduceRelevance( historyDF, maxPenaltyValue, minPenaltyValue, lengthOfHistory) methodsResultDict: dict[str, pd.Series] = p.proportionalRelevanceReduction( methodsResultDict, userID) print("methodsResultDict") print(methodsResultDict) ################### i: int = 2 maxPenaltyValue: float = 1.2 minPenaltyValue: float = 0.2 lengthOfHistory: int = 5 value: float = PenalizationOfResultsByNegImpFeedbackUsingReduceRelevance.getPenaltyLinear2( i, maxPenaltyValue, minPenaltyValue, lengthOfHistory) print("value: " + str(value)) ################### minTimeDiff: float = 1.0 maxTimeDiff: float = 1.5 minPenalty: float = 0.0 maxPenalty: float = 1.0 timeDiff: float = minTimeDiff + 0.25 value: float = PenalizationOfResultsByNegImpFeedbackUsingReduceRelevance.getPenaltyLinear( timeDiff, minTimeDiff, maxTimeDiff, minPenalty, maxPenalty) print("value: " + str(value))
def test01(): print("Test 01") # number of recommended items N = 120 #a = observationalLinearProbabilityFnc(0.1, 0.9, 5) #print(a) uBehaviourDesc: UserBehaviourDescription = UserBehaviourDescription( observationalLinearProbabilityFnc, [0.1, 0.9]) # method results, items=[1,2,4,5,6,7,8,12,32,64,77] methodsResultDict: dict[str, pd.Series] = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") } #print(methodsResultDict) # methods parametes methodsParamsData: List[tuple] = [['metoda1', 100], ['metoda2', 80], ['metoda3', 60]] methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "votes"]) methodsParamsDF.set_index("methodID", inplace=True) #print(methodsParamsDF) userID: int = 0 itemID: int = 7 historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, True, None) historyDF.insertRecommendation(userID, itemID, 1, True, None) historyDF.insertRecommendation(userID, itemID, 1, True, None) historyDF.print() #ignoringValue:float = historyDF.getIgnoringValue(userID, itemID, limit=3) #print("IgnoringValue: " + str(ignoringValue)) penalization: APenalization = PenalUsingReduceRelevance( penaltyLinear, [0.8, 0.2, 3], penaltyLinear, [1.0, 0.2, 3], 3) aggr: AggrFuzzyDHondt = AggrFuzzyDHondtINF( historyDF, { AggrFuzzyDHondtINF.ARG_SELECTOR: TheMostVotedItemSelector({}), AggrFuzzyDHondtINF.ARG_PENALTY_TOOL: penalization }) itemIDs: List[tuple] = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, N) print(itemIDs)
def test02(): print("Test 02") rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescTheMostPopular( ) recommenderID: str = InputRecomRRDefinition.THE_MOST_POPULAR rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescKNN() recommenderID: str = InputRecomRRDefinition.KNN rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescBPRMFIMPL( ) recommenderID: str = InputRecomRRDefinition.BPRMFIMPL rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescVMContextKNN( ) recommenderID: str = InputRecomRRDefinition.VMC_KNN rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescCosineCB( ) recommenderID: str = InputRecomRRDefinition.COSINECB rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescW2V() recommenderID: str = InputRecomRRDefinition.W2V pDescr: Portfolio1MethDescription = Portfolio1MethDescription( recommenderID.title(), recommenderID, rDescr) dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) behavioursDF: DataFrame = BehavioursRR.readFromFileRR( BehavioursRR.getFile("static08")) history: AHistory = HistoryDF("test") p: APortfolio = pDescr.exportPortfolio("jobID", history) p.train(history, dataset) argsSimulationDict: Dict[str, str] = { SimulationRR.ARG_WINDOW_SIZE: 50, SimulationRR.ARG_RECOM_REPETITION_COUNT: 1, SimulationRR.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationRR.ARG_NUMBER_OF_AGGR_ITEMS: InputSimulatorDefinition.numberOfAggrItems, SimulationRR.ARG_DIV_DATASET_PERC_SIZE: 90, SimulationRR.ARG_HISTORY_LENGTH: 10 } # simulation of portfolio simulator: Simulator = Simulator("test", SimulationRR, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF("a")])
def test21(): print("Simulation: ST ContextDHondtINF") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) #pProbToolOLin0802HLin1002:APenalization = PenalizationToolDefinition.exportProbPenaltyToolOStat08HLin1002( # InputSimulatorDefinition.numberOfAggrItems) pToolOLin0802HLin1002: APenalization = PenalizationToolDefinition.exportPenaltyToolOLin0802HLin1002( InputSimulatorDefinition.numberOfAggrItems) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() dataset: ADataset = DatasetST.readDatasets() events = dataset.eventsDF serials = dataset.serialsDF historyDF: AHistory = HistoryDF("test01") # Init evalTool evalTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_ITEMS: serials, # ITEMS EvalToolContext.ARG_EVENTS: events, # EVENTS (FOR CALCULATING HISTORY OF USER) EvalToolContext.ARG_DATASET: "st", # WHAT DATASET ARE WE IN EvalToolContext.ARG_HISTORY: historyDF }) # empty instance of AHistory is OK for ST dataset pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "ContextDHondtNIF" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDContextHondtINF( selector, pToolOLin0802HLin1002, evalTool)) batchID: str = "stDiv90Ulinear0109R1" dataset: DatasetST = DatasetST.readDatasets() behaviourFile: str = BehavioursST.getFile(BehavioursST.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursST.readFromFileST(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) print(model) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationST, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [evalTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Test 01") print("Running RecommenderRepeatedPurchase RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) #print(eventsDF) userID: int = 904351 trainSer = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 350688, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) trainDF = pd.DataFrame([trainSer]) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderRepeatedPurchase("rRepeatedPurchase", {}) rec.train(HistoryDF("test"), dataset) # nejcasteji opakovane kupovane itemy: 119736, 119736, 119736, 213834, 119736, 227311, 382885, 119736, 213834, 119736, 432171, 183756, 119736, 305675, 320130 update1Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 119736, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update1DF: DataFrame = pd.DataFrame([update1Ser]) update2Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 213834, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update2DF: DataFrame = pd.DataFrame([update2Ser]) rec.update(update1DF, {}) rec.update(update2DF, {}) recommendationSer: Series = rec.recommend(userID, 20, {}) print("Recommendation:") print(recommendationSer)
def test01(): print("Test 01") print("Running Recommender BPRMF on ML:") dataset: DatasetML = DatasetML.readDatasets() # Take only first 500k trainDataset: DatasetML = DatasetML("test", dataset.ratingsDF.iloc[0:499965], dataset.usersDF, dataset.itemsDF) # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = dataset.ratingsDF.iloc[499965:503006] # get recommendations: print("Recommendations before update") print(rec._movieFeaturesMatrixLIL[:, ratingsDFUpdate['userId'].iloc[0]]. getnnz()) r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print(r) for i in range(ratingsDFUpdate.shape[0]): rUp = ratingsDFUpdate.iloc[i:i + 1, :] rec.update(rUp, {}) print("Recommendations after update") print(rec._movieFeaturesMatrixLIL[:, ratingsDFUpdate['userId'].iloc[0]]. getnnz()) r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print(r) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test03(): print("Test 03") print("Running RecommenderVSKNN ST:") dataset: DatasetST = DatasetST.readDatasets() testD = dataset.eventsDF.iloc[4000:10000] dataset.eventsDF = dataset.eventsDF.iloc[0:4000] # train recommender rec: ARecommender = RecommenderVMContextKNN("test", {}) start = time.time() rec.train(HistoryDF("test03"), dataset) end = time.time() print("Time to train: " + str(end - start)) i = 0 """ maxI = 5990 while i < maxI: eventsDFDFUpdate:DataFrame = dataset.eventsDF.iloc[i:i+1] rec.update(rec.UPDT_CLICK, eventsDFDFUpdate) i = i+1 if i%100 == 0: print(i) """ r: Series = rec.recommend(3342336, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(type(r)) print(r) r: Series = rec.recommend(2035310, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(type(r)) print(r) r: Series = rec.recommend(3342341, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(type(r)) print(r) print("================== END OF TEST 03 ======================\n\n\n\n\n")
def test03(): print("Test 03") # userID: 23 # currentItemID: 196 # repetition: 0 print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsSortedDF: DataFrame = ratingsDF.sort_values( by=Ratings.COL_TIMESTAMP) filmsDF: DataFrame = Items.readFromFileMl1m() print(len(ratingsSortedDF)) ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_USERID] != 23] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_MOVIEID] != 10] print(ratingsDFTrain.head(25)) trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test1", {}) rec.train(HistoryDF("test03"), trainDataset) uDdata = [[23, 10, 4, 10000]] uDF: DataFrame = pd.DataFrame(uDdata, columns=[ Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP ]) rec.update(uDF, {}) r: Series = rec.recommend(23, 10, {}) print(r) print("\n") r: Series = rec.recommend(23, 10, {}) print(r) print("================== END OF TEST 03 ======================\n\n\n\n\n")
def test03(): print("Test 03") print("Running RecommenderTheMostPopular ST:") from datasets.slantour.events import Events # class eventsDF: DataFrame = Events.readFromFile() dataset: ADataset = DatasetST("test", eventsDF, DataFrame()) rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {}) rec.train(HistoryDF("test"), dataset) recommendation = rec.recommend( 1, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))}) print(recommendation)
def run(self, batchID: str, jobID: str): divisionDatasetPercentualSize: int uBehaviour: str repetition: int divisionDatasetPercentualSize, uBehaviour, repetition = InputABatchDefinition( ).getBatchParameters(self.datasetID)[batchID] selector: ADHondtSelector = self.getParameters()[jobID] portfolioID: str = self.getBatchName() + jobID history: AHistory = HistoryHierDF(portfolioID) dataset: ADataset = DatasetST.readDatasets() events = dataset.eventsDF serials = dataset.serialsDF historyDF: AHistory = HistoryDF("test01") # Init evalTool evalTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_ITEMS: serials, # ITEMS EvalToolContext.ARG_EVENTS: events, # EVENTS (FOR CALCULATING HISTORY OF USER) EvalToolContext.ARG_DATASET: "st", # WHAT DATASET ARE WE IN EvalToolContext.ARG_HISTORY: historyDF }) # empty instance of AHistory is OK for ST dataset rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs( ) aDescContextDHont: AggregationDescription = InputAggrDefinition.exportADescDContextHondt( selector, evalTool) pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( portfolioID, rIDs, rDescs, aDescContextDHont) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) simulator: Simulator = InputSimulatorDefinition( ).exportSimulatorSlantour(batchID, divisionDatasetPercentualSize, uBehaviour, repetition) simulator.simulate([pDescr], [model], [evalTool], [history])
def test22(): print("Test 22") print("Running RecommenderItemBasedKNN ST:") from datasets.slantour.events import Events # class userID1: int = 1 userID2: int = 2 userID3: int = 3 trainEventsDF: DataFrame = DataFrame( columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID]) trainEventsDF.loc[0] = [userID1, 101] trainEventsDF.loc[1] = [userID1, 102] trainEventsDF.loc[2] = [userID1, 103] trainEventsDF.loc[3] = [userID1, 104] trainEventsDF.loc[4] = [userID2, 101] trainEventsDF.loc[5] = [userID2, 102] print(trainEventsDF.head(10)) trainDataset: ADataset = DatasetST("test", trainEventsDF, DataFrame()) rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test"), trainDataset) print("update 1:") updateEvents1DF: DataFrame = DataFrame(columns=trainEventsDF.columns) updateEvents1DF.loc[0] = [userID1, 105] print(updateEvents1DF.head()) rec.update(updateEvents1DF, {}) print("update 2:") updateEvents2DF: DataFrame = DataFrame(columns=trainEventsDF.columns) updateEvents2DF.loc[0] = [userID3, 106] print(updateEvents2DF.head()) rec.update(updateEvents2DF, {}) print("recommend:") r = rec.recommend(userID2, 10, {}) print(r) print("================== END OF TEST 06 ======================\n\n\n\n\n")
def test02(): print("Test 02") print("Running RecommenderTheMostPopular RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {}) rec.train(HistoryDF("test"), dataset) recommendation = rec.recommend(1, 20, {}) print("Recommendation:") print(recommendation)