def exportPairOfRecomIdsAndRecomDescrsCluster(cls): recom: str = "Recom" + "Cluster" rIds: List[str] = [] rDescs: List[RecommenderDescription] = [] for gIdI in range(0, len(Items.getAllGenres())): genreI: str = Items.getAllGenres()[gIdI] rIds.append(recom + genreI) rDescs.append(cls.exportRDescClusterBased(gIdI)) return (rIds, rDescs)
def run(self, batchID:str, jobID:str): divisionDatasetPercentualSize:int uBehaviour:str repetition:int divisionDatasetPercentualSize, uBehaviour, repetition = InputABatchDefinition().getBatchParameters(self.datasetID)[batchID] selector:ADHondtSelector = self.getParameters()[jobID] itemsDF:DataFrame = Items.readFromFileMl1m() usersDF:DataFrame = Users.readFromFileMl1m() historyDF:AHistory = HistoryHierDF("test01") eTool:AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF}) rIDs, rDescs = InputRecomMLDefinition.exportPairOfRecomIdsAndRecomDescrs() aDescDHont:AggregationDescription = InputAggrDefinition.exportADescContextFuzzyDHondtDirectOptimize(selector, eTool) pDescr:Portfolio1AggrDescription = Portfolio1AggrDescription( self.getBatchName() + jobID, rIDs, rDescs, aDescDHont) model:DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) simulator:Simulator = InputSimulatorDefinition().exportSimulatorML1M( batchID, divisionDatasetPercentualSize, uBehaviour, repetition) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def exportSimulatorML1M(batchID: str, divisionDatasetPercentualSize: int, uBehaviourID: str, repetition: int): argsSimulationDict: dict = { SimulationPortfolioToUser.ARG_WINDOW_SIZE: 5, SimulationPortfolioToUser.ARG_REPETITION_OF_RECOMMENDATION: repetition, SimulationPortfolioToUser.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationPortfolioToUser.ARG_NUMBER_OF_AGGR_ITEMS: InputSimulatorDefinition.numberOfAggrItems, SimulationPortfolioToUser.ARG_DIV_DATASET_PERC_SIZE: divisionDatasetPercentualSize } # dataset reading ratingsDF: DataFrame = Ratings.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() itemsDF: DataFrame = Items.readFromFileMl1m() behaviourFile: str = Behaviours.getFile(uBehaviourID) behavioursDF: DataFrame = Behaviours.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationPortfolioToUser, argsSimulationDict, ratingsDF, usersDF, itemsDF, behavioursDF) return simulator
def readDatasets(): # dataset reading ratingsDF: DataFrame = Ratings.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() itemsDF: DataFrame = Items.readFromFileMl1m() return DatasetML("ml1mDivAll", ratingsDF, usersDF, itemsDF)
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006] # get recommendations: print("Recommendations before update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) rec.update(ratingsDFUpdate, {}) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test03(): print("Test 03") # userID: 23 # currentItemID: 196 # repetition: 0 print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsSortedDF: DataFrame = ratingsDF.sort_values( by=Ratings.COL_TIMESTAMP) filmsDF: DataFrame = Items.readFromFileMl1m() print(len(ratingsSortedDF)) ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_USERID] != 23] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_MOVIEID] != 10] print(ratingsDFTrain.head(25)) trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test1", {}) rec.train(HistoryDF("test03"), trainDataset) uDdata = [[23, 10, 4, 10000]] uDF: DataFrame = pd.DataFrame(uDdata, columns=[ Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP ]) rec.update(uDF, {}) r: Series = rec.recommend(23, 10, {}) print(r) print("\n") r: Series = rec.recommend(23, 10, {}) print(r) print("================== END OF TEST 03 ======================\n\n\n\n\n")
def test01(): print("Simulation: ML ContextDHondtINF") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) #pProbToolOLin0802HLin1002:APenalization = PenalizationToolDefinition.exportProbPenaltyToolOStat08HLin1002( # InputSimulatorDefinition.numberOfAggrItems) pToolOLin0802HLin1002: APenalization = PenalizationToolDefinition.exportPenaltyToolOLin0802HLin1002( InputSimulatorDefinition.numberOfAggrItems) itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() historyDF: AHistory = HistoryDF("test01") eTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "ContextDHondtINF" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDContextHondtINF( selector, pToolOLin0802HLin1002, eTool)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:800000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderVMContextKNN("test", {}) start = time.time() rec.train(HistoryDF("test01"), trainDataset) end = time.time() print("Time to train: " + str(end - start)) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[800006:800007] # get recommendations: print("Recommendations before update") start = time.time() r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) end = time.time() print("Time to train: " + str(end - start)) rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def train(self, history:AHistory, dataset: ADataset): if not isinstance(history, AHistory): raise ValueError("Argument history isn't type AHistory.") if not isinstance(dataset, ADataset): raise ValueError("Argument dataset isn't type ADataset.") self.trainDataset = dataset if type(dataset) is DatasetML: from datasets.ml.items import Items #class allGenres:List[str] = Items.getAllGenres() self._sortedAscRatings4CountDF:DataFrame = dataset.getTheMostPopularOfGenre( allGenres[self.recommenderNumericId]) elif type(dataset) is DatasetRetailRocket: self._sortedAsceventsTransCountDF:DataFrame = dataset.getTheMostSold() elif type(dataset) is DatasetST: self._sortedTheMostCommon = dataset.getTheMostSold() else: raise ValueError("Argument dataset isn't of expected type.")
def test02(): print("Test 02") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:1000000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test02"), trainDataset) r: Series = rec.recommend(1, 50, {}) print(r) print("================== END OF TEST 02 ======================\n\n\n\n\n")
def test01(): print("Simulation: ML ContextFuzzyDHondtDirectOptimize") jobID: str = "Roulette1" selector: ADHondtSelector = RouletteWheelSelector( {RouletteWheelSelector.ARG_EXPONENT: 1}) itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() historyDF: AHistory = HistoryHierDF("test01") eTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "ContextFuzzyDHondtDirectOptimize" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescContextFuzzyDHondtDirectOptimize( selector, eTool)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [historyDF])
def test02(repetitions=1): N = 100 # get dataset itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain = ratingsDF[:50000] ratingsDFUpdate: DataFrame = ratingsDF.iloc[50001:50100] trainDataset: ADataset = DatasetML("ml", ratingsDFTrain, usersDF, itemsDF) historyDF: AHistory = HistoryDF("test01") # train KNN rec1: ARecommender = RecommenderItemBasedKNN("run", {}) rec1.train(HistoryDF("test01"), trainDataset) # train Most Popular rec2: ARecommender = RecommenderTheMostPopular("run", {}) rec2.train(historyDF, trainDataset) # methods parametes methodsParamsData: List[tuple] = [['ItembasedKNN', 0.4], ['MostPopular', 0.6]] methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "votes"]) methodsParamsDF.set_index("methodID", inplace=True) userID = 352 ratingsDFuserID = ratingsDF[ratingsDF['userId'] == userID] itemID = ratingsDFuserID.iloc[0]['movieId'] historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, True, 10) r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } evalToolDHondt = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) aggr: AggrContextFuzzyDHondt = AggrContextFuzzyDHondt( historyDF, { AggrContextFuzzyDHondt.ARG_EVAL_TOOL: evalToolDHondt, AggrContextFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({}) }) aggrInit: AggrFuzzyDHondt = AggrFuzzyDHondt( historyDF, {AggrFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})}) l1 = aggrInit.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, N) import random print("l1:" + str(l1)) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, random.choice(l1)[0], methodsParamsDF, evaluationDict) timestamp = 10 counter = 0 r1c = 0 r2c = 0 for _ in range(repetitions): for index, row in ratingsDFuserID.iterrows(): r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evalDict = {"a": 1} historyDF.insertRecommendation(userID, row['movieId'], 1, True, timestamp) timestamp += 1 l1 = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, argumentsDict=evalDict, numberOfItems=N) import random randomItem = random.choice(l1)[0] if randomItem in r1.index: r1c += 1 if randomItem in r2.index: r2c += 1 evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } print("votes Items: ", r1c) print("votes mostPopular ", r2c) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, randomItem, methodsParamsDF, evaluationDict) rec1.update(ratingsDFuserID.loc[[index]], {}) # rec2.update(ratingsDFuserID.loc[index]) Not implemented #print("Counter = ", counter, "; All = ", len(ratingsDFuserID.iloc[800:]), "; Index: ", index) print(methodsParamsDF) counter += 1
def test01(): print("Test 01") #print("Running Two paralel History Databases:") # method results, items=[1,2,4,5,6,7,8,12,32,64,77] methodsResultDict: dict = { "metoda1": pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"), "metoda2": pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"), "metoda3": pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating") } rItemIDsWithResponsibility: List = [(7, { 'metoda1': 0, 'metoda2': 24.0, 'metoda3': 18.0 }), (1, { 'metoda1': 30.0, 'metoda2': 8.0, 'metoda3': 0 }), (32, { 'metoda1': 20.0, 'metoda2': 16.0, 'metoda3': 0 }), (8, { 'metoda1': 30.0, 'metoda2': 0, 'metoda3': 0 }), (6, { 'metoda1': 0, 'metoda2': 24.0, 'metoda3': 0 }), (64, { 'metoda1': 0, 'metoda2': 0, 'metoda3': 18.0 }), (2, { 'metoda1': 10.0, 'metoda2': 0, 'metoda3': 6.0 }), (77, { 'metoda1': 0, 'metoda2': 0, 'metoda3': 12.0 }), (4, { 'metoda1': 10.0, 'metoda2': 0, 'metoda3': 0 }), (5, { 'metoda1': 0, 'metoda2': 8.0, 'metoda3': 0 }), (12, { 'metoda1': 0, 'metoda2': 0, 'metoda3': 6.0 })] # methods parametes portfolioModelData = [['metoda1', 100], ['metoda2', 80], ['metoda3', 60]] portfolioModelDF: DataFrame = pd.DataFrame(portfolioModelData, columns=["methodID", "votes"]) portfolioModelDF.set_index("methodID", inplace=True) itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() print("Definition:") print(portfolioModelDF) print(sumMethods(portfolioModelDF)) print() userID = 1 itemID = 2 historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, True, 10) historyDF.insertRecommendation(userID, itemID, 1, True, 20) historyDF.insertRecommendation(userID, itemID, 1, True, 30) historyDF.insertRecommendation(userID, itemID, 1, False, 40) evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } evalToolDHondt = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) print("Clicked:") #print("rItemIDsWithResponsibility: " + str(rItemIDsWithResponsibility)) evalToolDHondt.click(rItemIDsWithResponsibility, 7, portfolioModelDF, evaluationDict) evalToolDHondt.click(rItemIDsWithResponsibility, 1, portfolioModelDF, evaluationDict) evalToolDHondt.click(rItemIDsWithResponsibility, 7, portfolioModelDF, evaluationDict) print(portfolioModelDF) print(sumMethods(portfolioModelDF)) print() print("Displayed - start:") for i in range(100): print("rItemIDsWithResponsibility: " + str(rItemIDsWithResponsibility)) evalToolDHondt.displayed(rItemIDsWithResponsibility, portfolioModelDF, evaluationDict) print(portfolioModelDF) print(sumMethods(portfolioModelDF)) print() print(portfolioModelDF) print(sumMethods(portfolioModelDF)) print("Displayed - end:") print() print("Clicked:") evalToolDHondt.click(rItemIDsWithResponsibility, 4, portfolioModelDF, evaluationDict) print(portfolioModelDF) print(sumMethods(portfolioModelDF)) print()
modelOfUserI: DataFrame = modelDF.getModel(float('nan')) modelOfUserI: DataFrame = modelDF.getModel(userID) print(modelOfUserI.head(25)) print(list(modelDF.index)) ratingsDF: DataFrame = Ratings.readFromFileMl1m() #print(ratingsDF.head()) ratingsOfuser: DataFrame = ratingsDF[ratingsDF[Ratings.COL_USERID] == userID] itemIds: List[int] = ratingsOfuser[Ratings.COL_MOVIEID].tolist() #print(itemIds) itemsDF: DataFrame = Items.readFromFileMl1m() r = Items.countA(itemsDF, itemIds) print(r) #from matplotlib import pyplot as plt #from matplotlib import font_manager as fm # make a square figure and axes #fig = plt.figure(1, figsize=(6, 6), dpi=50) #ax = fig.add_axes([0.16, 0.16, 0.68, 0.68]) #plt.title("Scripting languages") #ax.title.set_fontsize(30) # vytvoření koláčového grafu #ax.pie(r.values(), labels=r.keys(), autopct='%1.1f%%', shadow=True)