def test01(): print("Test 01") os.chdir("..") print("Running RecommenderTheMostPopular:") cbDataPath:str = Configuration.cbDataFileWithPathTFIDF #ratingsDF:DataFrame = Ratings.readFromFileMl100k() ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain:DataFrame = ratingsDF.iloc[0:50000] rec:ARecommender = RecommenderTheMostPopular("test", {}) rec.train(pd.DataFrame(), ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) ratingsDFUpdate:DataFrame = ratingsDF.iloc[50003:50004] rec.update(ratingsDFUpdate) r:Series = rec.recommend(331, 50, {}) print(type(r)) print(r) # testing of a non-existent user r:Series =rec.recommend(10000, 50, {}) print(type(r)) print(r)
def readDatasets(): # dataset reading ratingsDF: DataFrame = Ratings.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() itemsDF: DataFrame = Items.readFromFileMl1m() return DatasetML("ml1mDivAll", ratingsDF, usersDF, itemsDF)
def exportSimulatorML1M(batchID: str, divisionDatasetPercentualSize: int, uBehaviourID: str, repetition: int): argsSimulationDict: dict = { SimulationPortfolioToUser.ARG_WINDOW_SIZE: 5, SimulationPortfolioToUser.ARG_REPETITION_OF_RECOMMENDATION: repetition, SimulationPortfolioToUser.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationPortfolioToUser.ARG_NUMBER_OF_AGGR_ITEMS: InputSimulatorDefinition.numberOfAggrItems, SimulationPortfolioToUser.ARG_DIV_DATASET_PERC_SIZE: divisionDatasetPercentualSize } # dataset reading ratingsDF: DataFrame = Ratings.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() itemsDF: DataFrame = Items.readFromFileMl1m() behaviourFile: str = Behaviours.getFile(uBehaviourID) behavioursDF: DataFrame = Behaviours.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationPortfolioToUser, argsSimulationDict, ratingsDF, usersDF, itemsDF, behavioursDF) return simulator
def test01(): print("Test 01") print("Running RecommenderTheMostPopular ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) rec: ARecommender = RecommenderTheMostPopular("test", {}) rec.train(HistoryDF("test"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) r: Series = rec.recommend(331, 50, {}) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, {}) print(type(r)) print(r)
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006] # get recommendations: print("Recommendations before update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) rec.update(ratingsDFUpdate, {}) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test03(): print("Test 03") # userID: 23 # currentItemID: 196 # repetition: 0 print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsSortedDF: DataFrame = ratingsDF.sort_values( by=Ratings.COL_TIMESTAMP) filmsDF: DataFrame = Items.readFromFileMl1m() print(len(ratingsSortedDF)) ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_USERID] != 23] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_MOVIEID] != 10] print(ratingsDFTrain.head(25)) trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test1", {}) rec.train(HistoryDF("test03"), trainDataset) uDdata = [[23, 10, 4, 10000]] uDF: DataFrame = pd.DataFrame(uDdata, columns=[ Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP ]) rec.update(uDF, {}) r: Series = rec.recommend(23, 10, {}) print(r) print("\n") r: Series = rec.recommend(23, 10, {}) print(r) print("================== END OF TEST 03 ======================\n\n\n\n\n")
def generateFileMl1m(numberOfItems: int, countOfRepetitions: int, behaviourID: str, uBehavDesc: UserBehaviourDescription): np.random.seed(42) random.seed(42) print("Generate Behaviour " + behaviourID) behaviourFile: str = Behaviours.getFile(behaviourID) ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsCopyDF: DataFrame = ratingsDF[[ Ratings.COL_USERID, Ratings.COL_MOVIEID ]].copy() ratingsCopyDF[Behaviours.COL_REPETITION] = [range(countOfRepetitions) ] * len(ratingsCopyDF) behavioursDF: DataFrame = ratingsCopyDF.explode( Behaviours.COL_REPETITION) behavioursDF[Behaviours.COL_BEHAVIOUR] = [None] * len(behavioursDF) behavioursDF.reset_index(inplace=True) if behaviourID is Behaviours.BHVR_LINEAR0109: #print(Behaviours.BHVR_LINEAR0109) Behaviours.__generateLinear0109BehaviourMl1m( behavioursDF, numberOfItems, countOfRepetitions, uBehavDesc) elif behaviourID is Behaviours.BHVR_STATIC08: #print(Behaviours.BHVR_STATIC08) Behaviours.__generateStatic08BehaviourMl1m(behavioursDF, numberOfItems, countOfRepetitions, uBehavDesc) else: #print("General") Behaviours.__generateGeneralBehaviourMl1m(behavioursDF, numberOfItems, countOfRepetitions, uBehavDesc) print(behavioursDF.head(10)) del behavioursDF['index'] print(behavioursDF.head(10)) behavioursDF.to_csv(behaviourFile, sep='\t', index=False)
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:800000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderVMContextKNN("test", {}) start = time.time() rec.train(HistoryDF("test01"), trainDataset) end = time.time() print("Time to train: " + str(end - start)) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[800006:800007] # get recommendations: print("Recommendations before update") start = time.time() r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) end = time.time() print("Time to train: " + str(end - start)) rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test01(): print("Test 01") print("Running RecommenderCosineCB ML:") #cbDataPath:str = Configuration.cbDataFileWithPathTFIDF cbDataPath: str = Configuration.cbML1MDataFileWithPathOHE ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbML1MDataFileWithPathTFIDF, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: True } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] #ratingsDFUpdate:DataFrame = ratingsDF.iloc[3:4] rec.update(ratingsDFUpdate, args) print(len(rec.userProfiles[331])) print("max") r: Series = rec.recommend(331, 20, args) print(type(r)) print(r) # testing of a non-existent user print("mean") r: Series = rec.recommend(10000, 20, args) print(type(r)) print(r)
def test02(): print("Test 02") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:1000000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test02"), trainDataset) r: Series = rec.recommend(1, 50, {}) print(r) print("================== END OF TEST 02 ======================\n\n\n\n\n")
def test01(): print("Test 01") os.chdir("..") print("Running RecommenderW2V:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] id: str = "ml1mDiv90" #id:str = "test" rec: ARecommender = RecommenderW2V( id, {RecommenderW2V.ARG_TRAIN_VARIANT: "posneg"}) #rec:ARecommender = RecommenderW2V(id, {RecommenderW2V.ARG_TRAIN_VARIANT:"positive"}) rec.train(HistoryDF("w2v"), ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ratingsDFUpdate) print(len(rec.userProfiles[331])) r: Series = rec.recommend( 331, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "max"}) print("max") print(type(r)) print(r) r: Series = rec.recommend( 331, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "window10"}) print("window10") print(type(r)) print(r) r: Series = rec.recommend( 10000, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "window10"}) print("mean") print(type(r)) print(r)
def test01(): print("Test 01") print("Running RecommenderW2V ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("ml1mDiv90", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) argsDict: Dict[str, str] = { RecommenderW2V.ARG_ITERATIONS: 50000, RecommenderW2V.ARG_TRAIN_VARIANT: 'positive', RecommenderW2V.ARG_USER_PROFILE_SIZE: -1, RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean', RecommenderW2V.ARG_VECTOR_SIZE: 128, RecommenderW2V.ARG_WINDOW_SIZE: 5 } rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict) rec.train(HistoryDF("w2v"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ratingsDFUpdate, {}) print(len(rec.userProfiles[331])) r: Series = rec.recommend(331, 50, argsDict) print("max") print(type(r)) print(r) r: Series = rec.recommend(10000, 50, argsDict) print("mean") print(type(r)) print(r)
def test01(): print("Test 01") os.chdir("..") print("Running RecommenderCosineCB:") cbDataPath: str = Configuration.cbDataFileWithPathTFIDF cbDataPath: str = Configuration.cbDataFileWithPathOHE #ratingsDF:DataFrame = Ratings.readFromFileMl100k() ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] rec: ARecommender = RecommenderCosineCB( "test", {RecommenderCosineCB.ARG_CB_DATA_PATH: cbDataPath}) rec.train(pd.DataFrame(), ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] #ratingsDFUpdate:DataFrame = ratingsDF.iloc[3:4] rec.update(ratingsDFUpdate) print(len(rec.userProfiles[331])) print("max") r: Series = rec.recommend( 331, 50, {RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max"}) print(type(r)) print(r) # testing of a non-existent user print("mean") r: Series = rec.recommend( 10000, 50, {RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "mean"}) print(type(r)) print(r)
def test02(repetitions=1): N = 100 # get dataset itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain = ratingsDF[:50000] ratingsDFUpdate: DataFrame = ratingsDF.iloc[50001:50100] trainDataset: ADataset = DatasetML("ml", ratingsDFTrain, usersDF, itemsDF) historyDF: AHistory = HistoryDF("test01") # train KNN rec1: ARecommender = RecommenderItemBasedKNN("run", {}) rec1.train(HistoryDF("test01"), trainDataset) # train Most Popular rec2: ARecommender = RecommenderTheMostPopular("run", {}) rec2.train(historyDF, trainDataset) # methods parametes methodsParamsData: List[tuple] = [['ItembasedKNN', 0.4], ['MostPopular', 0.6]] methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "votes"]) methodsParamsDF.set_index("methodID", inplace=True) userID = 352 ratingsDFuserID = ratingsDF[ratingsDF['userId'] == userID] itemID = ratingsDFuserID.iloc[0]['movieId'] historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, True, 10) r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } evalToolDHondt = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) aggr: AggrContextFuzzyDHondt = AggrContextFuzzyDHondt( historyDF, { AggrContextFuzzyDHondt.ARG_EVAL_TOOL: evalToolDHondt, AggrContextFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({}) }) aggrInit: AggrFuzzyDHondt = AggrFuzzyDHondt( historyDF, {AggrFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})}) l1 = aggrInit.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, N) import random print("l1:" + str(l1)) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, random.choice(l1)[0], methodsParamsDF, evaluationDict) timestamp = 10 counter = 0 r1c = 0 r2c = 0 for _ in range(repetitions): for index, row in ratingsDFuserID.iterrows(): r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evalDict = {"a": 1} historyDF.insertRecommendation(userID, row['movieId'], 1, True, timestamp) timestamp += 1 l1 = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, argumentsDict=evalDict, numberOfItems=N) import random randomItem = random.choice(l1)[0] if randomItem in r1.index: r1c += 1 if randomItem in r2.index: r2c += 1 evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } print("votes Items: ", r1c) print("votes mostPopular ", r2c) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, randomItem, methodsParamsDF, evaluationDict) rec1.update(ratingsDFuserID.loc[[index]], {}) # rec2.update(ratingsDFuserID.loc[index]) Not implemented #print("Counter = ", counter, "; All = ", len(ratingsDFuserID.iloc[800:]), "; Index: ", index) print(methodsParamsDF) counter += 1
#file:str = "results/ml1mDiv90Ustatic06R1/portfModelTimeEvolution-ClusterFixedClk003ViewDivisor500NRFalse.txt" file: str = "../results/ml1mDiv90Ulinear0109R1/portfModelTimeEvolution-ClusterFixedClk003ViewDivisor500NRFalse.txt" modelDF: PModelDHondtPersonalised = PModelDHondtPersonalised.readModel( file, 38000) #print(modelDF.head()) userID = 11.0 modelOfUserI: DataFrame = modelDF.getModel(float('nan')) modelOfUserI: DataFrame = modelDF.getModel(userID) print(modelOfUserI.head(25)) print(list(modelDF.index)) ratingsDF: DataFrame = Ratings.readFromFileMl1m() #print(ratingsDF.head()) ratingsOfuser: DataFrame = ratingsDF[ratingsDF[Ratings.COL_USERID] == userID] itemIds: List[int] = ratingsOfuser[Ratings.COL_MOVIEID].tolist() #print(itemIds) itemsDF: DataFrame = Items.readFromFileMl1m() r = Items.countA(itemsDF, itemIds) print(r) #from matplotlib import pyplot as plt #from matplotlib import font_manager as fm # make a square figure and axes