def __init__(self, model_prefix='model', riter=10): self.olda = OLDA() self.olda.load_model(model_prefix) self.examples = self.olda.resource_pool.examples self.shuffle_iteration = riter self.user_topic_vectors = self.report_theta() self.shufflled_user_topic_vectors = [] self.user_items = [] self.blocks = self.olda.make_block_list() # get shuffuled vectors for i in range(riter): self.shufflled_user_topic_vectors.append(ShuffleVectors(self.user_topic_vectors))
class Recommend: def __init__(self, model_prefix='model', riter=10): self.olda = OLDA() self.olda.load_model(model_prefix) self.examples = self.olda.resource_pool.examples self.shuffle_iteration = riter self.user_topic_vectors = self.report_theta() self.shufflled_user_topic_vectors = [] self.user_items = [] self.blocks = self.olda.make_block_list() # get shuffuled vectors for i in range(riter): self.shufflled_user_topic_vectors.append(ShuffleVectors(self.user_topic_vectors)) def report_theta(self): theta = [] for m, line in enumerate(open(self.olda.model_prefix+'.theta', 'r')): # iterate number of traning examples vals = line[:].rstrip().split(" ") theta.append(map(float, vals)) return theta def innerproduct(self, vec_one, vec_two): ip = 0 for i in range(len(vec_one)): if vec_one[i] in vec_two: ip += 1 return ip def get_recommend_items(self, line): items = {} target_uid_str, words_str = line[:].rstrip().split("\t") target_uid = int(target_uid_str) uvecter = self.olda.inference(line, 1000) # online learning wvecter = self.olda.new_resource_pool.examples[0] # need rewrite... sim_users = self.__get_similar_users(uvecter) # get the users who have the same interest as the input user for uid in sim_users: titems = self.__get_items(uid) if self.innerproduct(wvecter, titems) < 1: continue for item in titems: if not items.has_key(item): items[item] = 0 items[item] += 1 rt_items = [] #item_list = items.keys() #print "\titem_list:",item_list i = 0 for itm, count in items.items(): if (itm in wvecter) or (itm in self.__get_items(target_uid)): continue if i > 20: break if not self.blocks.has_key(itm): rt_items.append(itm) i += 1 return rt_items def __get_similar_users(self, uvecter): susers = [] for i in range(self.shuffle_iteration): tlist = self.shufflled_user_topic_vectors[i].get_similar_vectors(uvecter) susers = susers + tlist return list(set(susers)) # uniq def __get_items(self, user_id): return self.examples[user_id].features