def evaluate(self, rDscr: RecommenderDescription): print("StaticEvaluation") recom: ARecommender = rDscr.exportRecommender("test") args: dict = rDscr.getArguments() eventsDF: DataFrame = self.dataset.eventsDF eventsTrainDF: DataFrame = eventsDF[0:int(len(eventsDF) / 2)] eventsTestDF: DataFrame = eventsDF[int(len(eventsDF) / 2):] datasetTrain = DatasetRetailRocket("rrTrain", eventsTrainDF, DataFrame(), DataFrame()) userIDs: List[int] = list(eventsDF[Events.COL_VISITOR_ID].unique()) recom.train(HistoryDF("test"), datasetTrain) counter: int = 0 for userIdI in userIDs: #print("userId: " + str(userIdI)) itemIDs: List[int] = list( eventsTestDF.loc[eventsTestDF[Events.COL_VISITOR_ID] == userIdI][Events.COL_ITEM_ID].unique()) recommendationI: List[int] = recom.recommend(userIdI, 20, args).keys() intersectionI: List[int] = [ value for value in itemIDs if value in recommendationI ] #print(" " + str(len(intersectionI))) counter += len(intersectionI) print(" counter: " + str(counter))
def exportSimulatorRetailRocket(self, batchID: str, divisionDatasetPercentualSize: int, uBehaviourID: str, repetition: int): argsSimulationDict: dict = { SimulationML.ARG_WINDOW_SIZE: 5, SimulationML.ARG_RECOM_REPETITION_COUNT: repetition, SimulationML.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationML.ARG_NUMBER_OF_AGGR_ITEMS: self.numberOfAggrItems, SimulationML.ARG_DIV_DATASET_PERC_SIZE: divisionDatasetPercentualSize, SimulationML.ARG_HISTORY_LENGTH: 10 } # dataset reading dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) behaviourFile: str = BehavioursRR.getFile(uBehaviourID) behavioursDF: DataFrame = BehavioursRR.readFromFileRR(behaviourFile) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationRR, argsSimulationDict, dataset, behavioursDF) return simulator
def test03(): print("Test 03") print("Running Recommender BPRMF on RR:") dataset: DatasetRetailRocket = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) trainDataset: DatasetRetailRocket = dataset eventsDF: DataFrame = dataset.eventsDF # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test03"), trainDataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) r: Series = rec.recommend(1093035, 50, {}) print("Recommendation:") print(r)
def test04(): print("Test 04") print("Running RecommenderVSKNN RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderVMContextKNN("test", {}) print("train") rec.train(HistoryDF("test"), dataset) uDF: DataFrame = DataFrame([eventsDF.iloc[9000]]) print(uDF) rec.update(uDF, {}) recommendation = rec.recommend(1093035, 20, {}) print("Recommendation:") print(recommendation) print("================== END OF TEST 04 ======================\n\n\n\n\n")
def test02(): print("Test 02") print("Running RecommenderW2V RR:") dataset: DatasetRetailRocket = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) trainDataset: DatasetRetailRocket = dataset eventsDF: DataFrame = dataset.eventsDF # train recommender argsDict: Dict[str, str] = { RecommenderW2V.ARG_ITERATIONS: 50000, RecommenderW2V.ARG_TRAIN_VARIANT: 'posneg', RecommenderW2V.ARG_USER_PROFILE_SIZE: -1, RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean', RecommenderW2V.ARG_VECTOR_SIZE: 128, RecommenderW2V.ARG_WINDOW_SIZE: 5 } rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict) rec.train(HistoryDF("test02"), trainDataset) uDF: DataFrame = DataFrame([eventsDF.iloc[900]]) # print(str(eventsDF.tail(10))) # print(str(type(uDF))) rec.update(uDF, {}) r: Series = rec.recommend(1093035, 50, argsDict) print("Recommendation:") print(r)
def test02(): print("Test 02") print("Running Recommender BPRMF on RR:") batchID: str = "batchID" dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter(50) history: AHistory = HistoryHierDF(["aa"]) argumentsDict: Dict[str, object] = { RecommenderBPRMF.ARG_EPOCHS: 2, RecommenderBPRMF.ARG_FACTORS: 10, RecommenderBPRMF.ARG_LEARNINGRATE: 0.05, RecommenderBPRMF.ARG_UREGULARIZATION: 0.0025, RecommenderBPRMF.ARG_BREGULARIZATION: 0, RecommenderBPRMF.ARG_PIREGULARIZATION: 0.0025, RecommenderBPRMF.ARG_NIREGULARIZATION: 0.00025 } r: ARecommender = RecommenderBPRMF(batchID, argumentsDict) r.train(history, dataset) numberOfItems: int = 20 userId: int = 1118731 res = r.recommend(userId, numberOfItems, argumentsDict) print(res) userId: int = 85734 res = r.recommend(userId, numberOfItems, argumentsDict) print(res)
def test02(): print("Test 02") print("Running RecommenderCosineCB RR:") dataset: DatasetST = DatasetRetailRocket.readDatasets() args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbRRDataFileWithPathOHE, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: False, } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), dataset) eventsDFDFUpdate: DataFrame = dataset.eventsDF.iloc[5003:5004] print(eventsDFDFUpdate) rec.update(eventsDFDFUpdate, args) # user with very outdated profile - no recent objects r: Series = rec.recommend(863743, 20, args) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, args) print(type(r)) print(r)
def test02(): print("Test 02") rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescTheMostPopular( ) recommenderID: str = InputRecomRRDefinition.THE_MOST_POPULAR rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescKNN() recommenderID: str = InputRecomRRDefinition.KNN rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescBPRMFIMPL( ) recommenderID: str = InputRecomRRDefinition.BPRMFIMPL rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescVMContextKNN( ) recommenderID: str = InputRecomRRDefinition.VMC_KNN rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescCosineCB( ) recommenderID: str = InputRecomRRDefinition.COSINECB rDescr: RecommenderDescription = InputRecomRRDefinition.exportRDescW2V() recommenderID: str = InputRecomRRDefinition.W2V pDescr: Portfolio1MethDescription = Portfolio1MethDescription( recommenderID.title(), recommenderID, rDescr) dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) behavioursDF: DataFrame = BehavioursRR.readFromFileRR( BehavioursRR.getFile("static08")) history: AHistory = HistoryDF("test") p: APortfolio = pDescr.exportPortfolio("jobID", history) p.train(history, dataset) argsSimulationDict: Dict[str, str] = { SimulationRR.ARG_WINDOW_SIZE: 50, SimulationRR.ARG_RECOM_REPETITION_COUNT: 1, SimulationRR.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationRR.ARG_NUMBER_OF_AGGR_ITEMS: InputSimulatorDefinition.numberOfAggrItems, SimulationRR.ARG_DIV_DATASET_PERC_SIZE: 90, SimulationRR.ARG_HISTORY_LENGTH: 10 } # simulation of portfolio simulator: Simulator = Simulator("test", SimulationRR, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF("a")])
def test01(): print("Test 01") print("Running RecommenderRepeatedPurchase RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) #print(eventsDF) userID: int = 904351 trainSer = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 350688, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) trainDF = pd.DataFrame([trainSer]) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderRepeatedPurchase("rRepeatedPurchase", {}) rec.train(HistoryDF("test"), dataset) # nejcasteji opakovane kupovane itemy: 119736, 119736, 119736, 213834, 119736, 227311, 382885, 119736, 213834, 119736, 432171, 183756, 119736, 305675, 320130 update1Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 119736, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update1DF: DataFrame = pd.DataFrame([update1Ser]) update2Ser = pd.Series( [1433221523348, userID, Events.EVENT_ADDTOCART, 213834, "Nan"], index=[ Events.COL_TIME_STAMP, Events.COL_VISITOR_ID, Events.COL_EVENT, Events.COL_ITEM_ID, Events.EVENT_TRANSACTION ]) update2DF: DataFrame = pd.DataFrame([update2Ser]) rec.update(update1DF, {}) rec.update(update2DF, {}) recommendationSer: Series = rec.recommend(userID, 20, {}) print("Recommendation:") print(recommendationSer)
def test01(): print("Simulation: RR Dynamic") lrClick: float = 0.03 #lrView:float = lrClick / 300 lrViewDivisor: float = 250 jobID: str = "Fixed" + "Clk" + str(lrClick).replace( ".", "") + "ViewDivisor" + str(lrViewDivisor).replace(".", "") selector: ADHondtSelector = TheMostVotedItemSelector({}) rIDs, rDescs = InputRecomRRDefinition.exportPairOfRecomIdsAndRecomDescrs() p1AggrDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "FDHont" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDHondt(selector)) recommenderID: str = "TheMostPopular" rDescr: RecommenderDescription = RecommenderDescription( RecommenderTheMostPopular, {}) pDescr: APortfolioDescription = PortfolioDynamicDescription( "Dynamic" + "FDHontPersStat" + jobID, recommenderID, rDescr, "FDHondt", p1AggrDescr) batchID: str = "rrDiv90Ulinear0109R1" dataset: DatasetRetailRocket = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) behaviourFile: str = BehavioursRR.getFile(BehavioursRR.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursRR.readFromFileRR(behaviourFile) model: DataFrame = PModelDHondtPersonalisedStat( p1AggrDescr.getRecommendersIDs()) eTool: AEvalTool = EvalToolDHondtPersonal({ EvalToolDHondt.ARG_LEARNING_RATE_CLICKS: lrClick, EvalToolDHondt.ARG_LEARNING_RATE_VIEWS: lrClick / lrViewDivisor }) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationRR, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test11(): print("Simulation: RR TheMostPopular") rDescr:RecommenderDescription = InputRecomMLDefinition.exportRDescTheMostPopular() pDescr:APortfolioDescription = Portfolio1MethDescription(InputRecomMLDefinition.THE_MOST_POPULAR.title(), InputRecomMLDefinition.THE_MOST_POPULAR, rDescr) batchID:str = "retailrocketDiv90Ulinear0109R1" dataset:DatasetRetailRocket = DatasetRetailRocket.readDatasets() behaviourFile:str = BehavioursRR.getFile(BehavioursRR.BHVR_LINEAR0109) behavioursDF:DataFrame = BehavioursRR.readFromFileRR(behaviourFile) # simulation of portfolio simulator:Simulator = Simulator(batchID, SimulationRR, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF(pDescr.getPortfolioID())])
def test12(): print("Test 12") print("Running Recommender BPRMF on RR:") from datasets.retailrocket.events import Events # class batchID: str = "batchID" trainDataset: ADataset testDataset: ADataset trainDataset, testDataset = DatasetRetailRocket.readDatasetsWithFilter( 50).divideDataset(90) testUserIDs: ndarray = testDataset.eventsDF[Events.COL_VISITOR_ID].unique() history: AHistory = HistoryHierDF(["aa"]) numberOfItems: int = 20 rd: RecommenderDescription = InputRecomRRDefinition.exportRDescBPRMFIMPL() #rd:RecommenderDescription = InputRecomRRDefinition.exportRDescBPRMF() #rd:RecommenderDescription = InputRecomRRDefinition.exportRDescTheMostPopular() rd: RecommenderDescription = InputRecomRRDefinition.exportRDescKNN() #rd:RecommenderDescription = InputRecomRRDefinition.exportRDescCosineCBcbdOHEupsweightedMeanups3() #rd:RecommenderDescription = InputRecomRRDefinition.exportRDescW2Vtpositivei50000ws1vs32upsweightedMeanups3() #rd:RecommenderDescription = InputRecomRRDefinition.exportRDescW2Vtpositivei50000ws1vs64upsweightedMeanups7() r: ARecommender = rd.exportRecommender("aaa") argumentsDict: Dict = rd.getArguments() r.train(history, trainDataset) numberOfHit: int = 0 for userIdI in testUserIDs[0:500]: recI: Series = r.recommend(int(userIdI), numberOfItems, argumentsDict) recItemIDsI: List[int] = [i for i in recI.keys()] windowItemIds: List[int] = testDataset.eventsDF.loc[ testDataset.eventsDF[Events.COL_VISITOR_ID] == userIdI][ Events.COL_ITEM_ID].unique() itemIdsHitted: List[int] = list(set(recItemIDsI) & set(windowItemIds)) numberOfHit += len(itemIdsHitted) print("") print("numberOfHit: " + str(numberOfHit))
def test02(): print("Test 02") print("Running RecommenderTheMostPopular RR:") from datasets.retailrocket.events import Events # class #eventsDF:DataFrame = Events.readFromFile() eventsDF: DataFrame = Events.readFromFileWithFilter(minEventCount=50) dataset: ADataset = DatasetRetailRocket("test", eventsDF, DataFrame(), DataFrame()) rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {}) rec.train(HistoryDF("test"), dataset) recommendation = rec.recommend(1, 20, {}) print("Recommendation:") print(recommendation)
def visualizationRR(): print("visualizationRR") from datasets.retailrocket.events import Events # class # dataset reading dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) print(dataset.eventsDF.head()) userIdsWithDuplicites: List[int] = dataset.eventsDF[ Events.COL_VISITOR_ID].tolist() userIds: List[int] = list(set(userIdsWithDuplicites)) plt.hist(userIdsWithDuplicites, len(userIds), None, fc='none', lw=1.5, histtype='step') plt.ticklabel_format(style='plain') plt.show()
def divideDataset(dataset: ADataset, behaviourDF: DataFrame, divisionDatasetPercentualSize: int, testDatasetPercentualSize: int, recomRepetitionCount: int): eventsDF: DataFrame = dataset.eventsDF categoryTreeDF: DataFrame = dataset.categoryTreeDF itemPropertiesDF: DataFrame = dataset.itemPropertiesDF # create train Dataset eventsSortedDF: DataFrame = eventsDF.sort_values( by=Events.COL_TIME_STAMP) numberOfEvents: int = eventsSortedDF.shape[0] trainSize: int = (int)(numberOfEvents * divisionDatasetPercentualSize / 100) #print("trainSize: " + str(trainSize)) trainRatingsDF: DataFrame = eventsSortedDF[0:trainSize] datasetID: str = "rr" + "Div" + str(divisionDatasetPercentualSize) trainDataset: ADataset = DatasetRetailRocket(datasetID, trainRatingsDF, categoryTreeDF, itemPropertiesDF) # create test Event DataFrame testSize: int = (int)(numberOfEvents * testDatasetPercentualSize / 100) #print("testSize: " + str(testSize)) testEventsPartDF: DataFrame = eventsSortedDF[trainSize:(trainSize + testSize)] testEventsDF: DataFrame = testEventsPartDF.loc[testEventsPartDF[ Events.COL_EVENT] == "view"] # create test relevant Event DataFrame testRelevantEventsDF: DataFrame = testEventsDF # create behaviour dictionary of DataFrame indexed by recomRepetition recomRepetitionCountInDataset: int = behaviourDF[ BehavioursRR.COL_REPETITION].max() + 1 #print("count: " + str(recomRepetitionCountInDataset)) #print(behaviourDF.head(10)) #print(behaviourDF.tail(10)) #print("events: " + str(len(eventsDF))) #print("behaviours: " + str(len(behaviourDF))) #print("max behaviours: " + str(max(behaviourDF.index))) #print("recomRepetitionCount: " + str(recomRepetitionCount)) #print("range: " + str(range(recomRepetitionCount))) testRepeatedBehaviourDict: dict = {} bIndexes: List[int] = list( [recomRepetitionCountInDataset * i for i in testEventsDF.index]) for repetitionI in range(recomRepetitionCount): # indexes of behaviour indexes: List[int] = [vI + repetitionI for vI in bIndexes] # indexes behaviourDFI: DataFrame = DataFrame( behaviourDF.take(indexes).values.tolist(), index=testEventsDF.index, columns=behaviourDF.keys()) testRepeatedBehaviourDict[repetitionI] = behaviourDFI return (trainDataset, testEventsDF, testRelevantEventsDF, testRepeatedBehaviourDict)
def test01(): print("Test 01") recommenderID: str = "TheMostPopular" pRDescr: RecommenderDescription = RecommenderDescription( RecommenderTheMostPopular, {}) selectorFixed: ADHondtSelector = TheMostVotedItemSelector({}) aDescDHont: AggregationDescription = InputAggrDefinition.exportADescDHondtDirectOptimizeThompsonSampling( selectorFixed) rIDs: List[str] rDescs: List[AggregationDescription] rIDs, rDescs = InputRecomRRDefinition.exportPairOfRecomIdsAndRecomDescrs() rIDs = [recommenderID] rDescs = [pRDescr] p1AggrDescrID: str = "p1AggrDescrID" p1AggrDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( p1AggrDescrID, rIDs, rDescs, aDescDHont) pProbTool: APenalization = PenalizationToolDefinition.exportProbPenaltyToolOLin0802HLin1002( InputSimulatorDefinition.numberOfAggrItems) pProbTool: APenalization = PenalizationToolDefinition.exportPenaltyToolOStat08HLin1002( InputSimulatorDefinition.numberOfAggrItems) aHierDescr: AggregationDescription = AggregationDescription( AggrD21, {AggrD21.ARG_RATING_THRESHOLD_FOR_NEG: 2.0}) pHierDescr: PortfolioHierDescription = PortfolioHierDescription( "pHierDescr", recommenderID, pRDescr, p1AggrDescrID, p1AggrDescr, aHierDescr, pProbTool) userID: int = 1 dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) history: AHistory = HistoryDF("test") history.insertRecommendation(userID, 45, 1, False) history.insertRecommendation(userID, 45, 2, False) history.insertRecommendation(userID, 78, 3, False) p: APortfolio = pHierDescr.exportPortfolio("test", history) portFolioModel: DataFrame = PModelDHondtBanditsVotes( p1AggrDescr.getRecommendersIDs()) p.train(history, dataset) #df:DataFrame = DataFrame([[1, 555]], columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID]) #p.update(ARecommender.UPDT_CLICK, df) args = { APortfolio.ARG_NUMBER_OF_AGGR_ITEMS: 20, APortfolio.ARG_ITEM_ID: 1, APortfolio.ARG_NUMBER_OF_RECOMM_ITEMS: 100, AggrD21.ARG_RATING_THRESHOLD_FOR_NEG: 0.5 } r, rp = p.recommend(userID, portFolioModel, args) print(r)
userIdI][Events.COL_ITEM_ID].unique()) recommendationI: List[int] = recom.recommend(userIdI, 20, args).keys() intersectionI: List[int] = [ value for value in itemIDs if value in recommendationI ] #print(" " + str(len(intersectionI))) counter += len(intersectionI) print(" counter: " + str(counter)) if __name__ == "__main__": os.chdir("..") dataset: ADataset = DatasetRetailRocket.readDatasetsWithFilter( minEventCount=50) rDscrTheMostPopular: RecommenderDescription = InputRecomRRDefinition.exportRDescTheMostPopular( ) rDscrItemBasedKNN: RecommenderDescription = InputRecomRRDefinition.exportRDescKNN( ) rDscrBPRMF: RecommenderDescription = InputRecomRRDefinition.exportRDescBPRMFIMPL( ) rDscrVMContextKNN: RecommenderDescription = InputRecomRRDefinition.exportRDescVMContextKNN( ) rDscrCosineCB: RecommenderDescription = InputRecomRRDefinition.exportRDescCosineCB( ) rDscrW2V: RecommenderDescription = InputRecomRRDefinition.exportRDescW2V() rDscrs: List[object] = [ rDscrTheMostPopular, rDscrItemBasedKNN, rDscrBPRMF, rDscrVMContextKNN,