예제 #1
0
    def __init__(self, model_prefix='model', riter=10):
        self.olda = OLDA()
        self.olda.load_model(model_prefix)
        self.examples           = self.olda.resource_pool.examples
        self.shuffle_iteration  = riter
        self.user_topic_vectors = self.report_theta() 
        self.shufflled_user_topic_vectors = []
        self.user_items         = [] 
        self.blocks = self.olda.make_block_list()

        # get shuffuled vectors
        for i in range(riter):
            self.shufflled_user_topic_vectors.append(ShuffleVectors(self.user_topic_vectors))
예제 #2
0
class Recommend:

    def __init__(self, model_prefix='model', riter=10):
        self.olda = OLDA()
        self.olda.load_model(model_prefix)
        self.examples           = self.olda.resource_pool.examples
        self.shuffle_iteration  = riter
        self.user_topic_vectors = self.report_theta() 
        self.shufflled_user_topic_vectors = []
        self.user_items         = [] 
        self.blocks = self.olda.make_block_list()

        # get shuffuled vectors
        for i in range(riter):
            self.shufflled_user_topic_vectors.append(ShuffleVectors(self.user_topic_vectors))
    

    def report_theta(self):
        theta = []  
        for m, line in enumerate(open(self.olda.model_prefix+'.theta', 'r')): # iterate number of traning examples
            vals = line[:].rstrip().split(" ")
            theta.append(map(float, vals))

        return theta

    def innerproduct(self, vec_one, vec_two):
        ip = 0
        for i in range(len(vec_one)):
            if vec_one[i] in vec_two:
                ip += 1

        return ip

    def get_recommend_items(self, line):
        items  = {}
        target_uid_str, words_str = line[:].rstrip().split("\t")
        target_uid = int(target_uid_str)
        uvecter   = self.olda.inference(line, 1000)   # online learning
        wvecter   = self.olda.new_resource_pool.examples[0] # need rewrite...
        sim_users = self.__get_similar_users(uvecter) # get the users who have the same interest as the input user

        for uid in sim_users:
            titems = self.__get_items(uid)
            if self.innerproduct(wvecter, titems) < 1:
                continue
                
            for item in titems:
                if not items.has_key(item):
                    items[item] = 0
                items[item] += 1

        rt_items = []
        #item_list = items.keys()
        #print "\titem_list:",item_list
        i = 0

        for itm, count in items.items():
            if (itm in wvecter) or (itm in self.__get_items(target_uid)):
                continue

            if i > 20:
                break

            if not self.blocks.has_key(itm):
                rt_items.append(itm)

            i += 1
        
        return rt_items
                    
    def __get_similar_users(self, uvecter):
        susers = []
        for i in range(self.shuffle_iteration):
            tlist = self.shufflled_user_topic_vectors[i].get_similar_vectors(uvecter)
            susers = susers + tlist
        
        return list(set(susers)) # uniq

    def __get_items(self, user_id):
        return self.examples[user_id].features