class WebServer(object): logging.basicConfig(level=logging.INFO) def __init__(self, configMap): self.db = DatabaseInterface(configMap['data_dir']) # numberToServe: the number of items finally served to the users self.numberToServe = configMap['numberToServe'] self.log = logging.getLogger(__name__) def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) self.offlineLearner.trainModel() # when we start the webserver, we should let offline learner to train the models, # such that, after the start(), we can start to give recommendation self.recEngine = RecEngine(self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY)) def getAction(self, action): assert (isinstance(action, Action)) # taking the action from users self.onlineLearner.trainModel(action) # analyze action type, and save the registered user's action actionType = self.userAnalyzer.analyzeAction(action) if actionType == "registered": self.log.info("Recording action %s" % action) self.db.putAction(action) def provideRecommendation(self, request): # return the ID's for the recommended items assert (isinstance(request, Request)) # provide recommendations to user self.log.info("responding to request: %s" % request) recommendations = self.recEngine.provideRecommendation(request) recsReranked = self.ranker.rerank(recommendations) return recsReranked # a list of item ids def renderRecommendation(self, request): assert (isinstance(request, Request)) recsReranked = self.provideRecommendation(request) # for the purpose of testing, we sort the index, output item names # output is ordered by the id value return self.db.extract(DatabaseInterface.INVENTORY_KEY).loc[recsReranked].sort_index() def increment(self): self.log.info("incrementing the system, update the models") # increment the whole system by one day, trigger offline training self.offlineLearner.trainModel() self.modelStore.cleanOnlineModel() self.recEngine.resetCache() def getFromInventory(self, itemId): return self.db.extract(DatabaseInterface.INVENTORY_KEY).loc[itemId]
class WebServer(object): logging.basicConfig(level=logging.INFO) def __init__(self, configMap): self.db = DatabaseInterface(configMap['data_dir']) self.numberToServe = configMap['numberToServe'] self.log = logging.getLogger(__name__) # numberToServe: the number of items finally served to the users def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.user_analyzer = UserAnalyzer() self.model_store = ModelStore() self.online_learner = OnlineLearner(self.db, self.model_store) self.offline_learner = OfflineLearner(self.db, self.model_store) self.increment() self.rec_engine = RecEngine( self.user_analyzer, self.model_store, self.db.connTable[DatabaseInterface.USER_ACTIVITY_KEY]) def getAction(self, action): assert (isinstance(action, Action)) #analyze user type user_type = self.user_analyzer.analyzeAction(action) self.online_learner.trainModel(action) if user_type == "registered": self.log.info("Recording action %s", action) self.db.putAction(action) def provideRecommendation(self, request): # return the ID's for the recommended items assert (isinstance(request, Request)) recommendations = self.rec_engine.provideRecommendation(request) item_ids = self.ranker.rerank(recommendations) return item_ids def renderRecommendation(self, request): assert (isinstance(request, Request)) item_ids = self.provideRecommendation(request) return self.getFromInventory(item_ids).sort_index() def increment(self): self.log.info("incrementing the system, update the models") # increment the whole system by one day, trigger offline training self.model_store.cleanOnlineModel() self.offline_learner.trainModel() def getFromInventory(self, itemId): return self.db.extract(DatabaseInterface.INVENTORY_KEY).loc[itemId]
class WebServer(object): logging.basicConfig(level=logging.INFO) #configMap is in main def __init__(self, configMap): self.db = DatabaseInterface(configMap['data_dir']) self.numberToServe = configMap['numberToServe'] self.log = logging.getLogger(__name__) #要用key idk why, why not a direct string? #initialize everything def start(self): self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) #so that immediately after we start, we can start to give recommendations self.offlineLearner.trainModel() #had to extract it here self.recEngine = RecEngine( self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY)) def getAction(self, action): assert (isinstance(action, Action)) self.onlineLearner.trainModel(action) actionType = self.userAnalyzer.analyzeAction(action) if actionType == "registered": self.db.putAction(action) def provideRec(self, request): assert (isinstance(request, Request)) rec = self.recEngine.provideRec(request) recReRanked = self.ranker.rerank(rec) return recReRanked def renderRec(self, request): assert (isinstance(request, Request)) recReRanked = self.provideRec(request) return self.db.extract( DatabaseInterface.INVENTORY_KEY).loc[recReRanked].sort_index() def increment(self): #offline, online, recengine(find the new most popular one) self.offlineLearner.trainModel() self.modelStore.cleanOnlineModel() self.recEngine.resetCache() def getFromInventory(self, itemId): return self.db.extract(DatabaseInterface.INVENTORY_KEY).loc[itemId]
def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.user_analyzer = UserAnalyzer() self.model_store = ModelStore() self.online_learner = OnlineLearner(self.db, self.model_store) self.offline_learner = OfflineLearner(self.db, self.model_store) self.increment() self.rec_engine = RecEngine( self.user_analyzer, self.model_store, self.db.connTable[DatabaseInterface.USER_ACTIVITY_KEY])
def start(self): self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) #so that immediately after we start, we can start to give recommendations self.offlineLearner.trainModel() #had to extract it here self.recEngine = RecEngine( self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY))
def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) self.offlineLearner.trainModel() # when we start the webserver, let offline learner to train the models, # so that after the start(), we can start to give recommendation self.recEngine = RecEngine( self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY))
def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) self.offlineLearner.trainModel() # when we start the webserver, we should let offline learner to train the models, # such that, after the start(), we can start to give recommendation self.recEngine = RecEngine(self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY))
distances, indices = self.knnModel.kneighbors(userFeature) # indices are the nearest neighbors' index in the matrix, which is different from userId. return self.userIds[indices[0]] def provideRec(self, userId): # data is a tuple of (user feature, item feature) userIds = self.predict(self.userFeatureTable.loc[userId].as_matrix().reshape(1, -1)) # remove himself as a nearest neighbor userIds = np.array(list(set(userIds) - set([userId]))) # for all nearest neighbors, compute the the average score, sorted from large to small # then report the item ids return self.ratingsMat[userIds - 1].mean(axis=0).argsort()[::-1] + 1 if __name__ == "__main__": from DatabaseInterface import DatabaseInterface from Learners.OfflineLearner import OfflineLearner db = DatabaseInterface("../DATA") db.startEngine() history = db.extract("history") userFeatureTable = db.extract(DatabaseInterface.USER_FEATURE_KEY).loc[:, "age":] ratingsMat = OfflineLearner.transformToMat(history) model = KNNmodel() model.train(userFeatureTable, ratingsMat) print model.provideRec(97)[:20] print ratingsMat[96, model.provideRec(97) - 1][:20]
# now we have the filled matrix for matrix factorization self.log.info("Number of ratings added by content-based model: %s" % fillCount) self._CFSVD(ratingsMatFinal) def predict(self, userId): return self.all_user_predicted_ratings[userId - 1] def provideRec(self, userId): # data is a tuple of (user feature, item feature) # compute the the average score, sorted from large to small, then report the item ids return self.all_user_predicted_ratings[userId - 1].argsort()[::-1] + 1 if __name__ == "__main__": from DatabaseInterface import DatabaseInterface from Learners.OfflineLearner import OfflineLearner db = DatabaseInterface("../DATA") db.startEngine() history = db.extract("history") itemFeatureTable = db.extract( DatabaseInterface.ITEM_FEATURE_KEY).loc[:, "unknown":] ratingsMat = OfflineLearner.transformToMat(history) model = CFmodel() model.train(ratingsMat, itemFeatureTable) recs = model.provideRec(1) print(recs) print(ratingsMat[0, recs - 1])
class WebServer(object): logging.basicConfig(level=logging.INFO) def __init__(self, configMap): self.db = DatabaseInterface(configMap['data_dir']) # numberToServe: the number of items finally served to the users self.numberToServe = configMap['numberToServe'] self.log = logging.getLogger(__name__) def start(self): # each object here simulates the API calls through network # passing an object A to the constructor of B means A will communication to B self.db.startEngine() self.ranker = Ranker(self.numberToServe, self.db) self.userAnalyzer = UserAnalyzer() self.modelStore = ModelStore() self.offlineLearner = OfflineLearner(self.db, self.modelStore) self.onlineLearner = OnlineLearner(self.db, self.modelStore) self.offlineLearner.trainModel() # when we start the webserver, let offline learner to train the models, # so that after the start(), we can start to give recommendation self.recEngine = RecEngine( self.userAnalyzer, self.modelStore, self.db.extract(DatabaseInterface.USER_ACTIVITY_KEY)) def getAction(self, action): assert (isinstance(action, Action)) # taking the action from users self.onlineLearner.trainModel(action) # analyze action type, and save the registered user's action actionType = self.userAnalyzer.analyzeAction(action) if actionType == "registered": self.log.info("Recording action %s" % action) self.db.putAction(action) def provideRecommendation(self, request): # return the ID's for the recommended items assert (isinstance(request, Request)) # provide recommendations to user self.log.info("responding to request: %s" % request) recommendations = self.recEngine.provideRecommendation(request) recsReranked = self.ranker.rerank(recommendations) return recsReranked # a list of item ids def renderRecommendation(self, request): assert (isinstance(request, Request)) recsReranked = self.provideRecommendation(request) # for the purpose of testing, we sort the index, output item names # output is ordered by the id value return self.db.extract( DatabaseInterface.INVENTORY_KEY).loc[recsReranked].sort_index() def increment(self): self.log.info("incrementing the system, update the models") # increment the whole system by one day, trigger offline training self.offlineLearner.trainModel() self.modelStore.cleanOnlineModel() self.recEngine.resetCache() def getFromInventory(self, itemId): return self.db.extract(DatabaseInterface.INVENTORY_KEY).loc[itemId]