def __init__(self,
                 data,
                 train_ratio=0.99,
                 ratio=0.1,
                 pool_size=10,
                 hash_count=8):
        super().__init__(data, train_ratio, ratio)

        self.lsh_family = []
        self.hash_count = hash_count
        for i in range(self.hash_count):
            self.lsh_family.append(LSH(pool_size))
            self.lsh_family[i].fit(self.train_data.shape[1])
Beispiel #2
0
 def __init__(self,
              data,
              train_ratio=0.95,
              ratio=0.1,
              pool_size=10,
              hash_count=8):
     super().__init__(data, train_ratio, ratio)
     self.num_of_services = self.data.shape[1]
     self.lsh_family = []
     self.hash_count = hash_count
     for i in range(self.hash_count):
         self.lsh_family.append(LSH(pool_size))
         ##这里拟合的是和UCFRecommender的标准不一样
         self.lsh_family[i].fit(self.num_of_train)
 def __init__(self,
              data,
              ratio=0.1,
              num_hash_functions=10,
              num_hash_tables=8):
     self.raw_data = data
     self.num_hash_functions = num_hash_functions
     self.num_hash_tables = num_hash_tables
     #random erased
     self.data = self.randomErased()
     #initialize lsh_family
     self.lsh_family = []
     for i in range(num_hash_tables):
         self.lsh_family.append(LSH(num_hash_functions))
         #initialize the parameters of hash functions
         self.lsh_family[i].fit(self.data.shape[1])
Beispiel #4
0
    def __init__(self, data, num_of_functions=4, num_of_tables=8, seed=1):
        '''
        :param data: shape (num_of_users, num_of_services, num_of_time_slices)
        :param ratio: erase ratio of data[:, :, 63], the erased element will be set to 0
        :param parameters: parameters of local sensitive hash function, in consistent with num_of_hash_tables
        '''
        self.data = data
        (self.num_of_users, self.num_of_services) = data.shape
        #initialize lsh tables
        self.num_of_tables = num_of_tables
        self.lsh_family = []

        for i in range(self.num_of_tables):
            self.lsh_family.append(LSH(num_of_functions))
            self.lsh_family[i].fit(self.num_of_users, seed)
            seed += 1
Beispiel #5
0
    def __init__(self, data, num_of_functions=4, num_of_tables=8, seed=1):
        '''
        :param data: shape (num_of_users, num_of_services, num_of_time_slices)
        :param num_of_functions:
        :param num_of_tables:
        :param seed: 随机函数的种子
        '''
        self.data = data
        (self.num_of_users, self.num_of_services) = data.shape
        #initialize lsh tables
        self.num_of_tables = num_of_tables
        self.lsh_family = []

        for i in range(self.num_of_tables):
            self.lsh_family.append(LSH(num_of_functions))
            self.lsh_family[i].fit(self.num_of_users, seed)
            seed += 1
    def __init__(self, data, num_of_functions = 4, num_of_tables = 8, seed = 1, threshold = 0):
        '''

        :param data: 测试数据
        :param num_of_functions:
        :param num_of_tables:
        :param seed:
        :param threshold: 找相似用户的阈值,默认为0, 与传统的LSH方法一致
        '''
        self.data = data
        (self.num_of_users, self.num_of_services) = data.shape
        #initialize lsh tables
        self.num_of_tables = num_of_tables
        self.lsh_family = []

        for i in range(self.num_of_tables):
            self.lsh_family.append(LSH(num_of_functions))
            self.lsh_family[i].fit(self.num_of_users, seed)
            seed += 1
    def __init__(self, data, ratio = 0.8,
                 pool_size = 10, hash_count = 8):
        '''
        :param data: 从文件中读取的评分数据,矩阵形式,行数为用户数,列数为项目数
        :param user:测试用户的索引
        :param ratio: 为了让测试预测的效果,设定保留的数据的比例,剩余的数据抹零
        :param pool_size: 每个LSH返回的hash值的位数(二进制)
        :param hash_count: LSH的个数
        '''
        # self.num_of_users = self.data.shape[0]
        self.data = data
        self.processed_data = np.copy(self.data)

        self.sample(ratio)

        self.user = np.random.randint(0, self.data.shape[0])
        self.remove_similar_users()

        self.lsh_family = []
        self.hash_count = hash_count
        for i in range(self.hash_count):
            self.lsh_family.append(LSH(pool_size))
            #这里是查找用户的相似度
            self.lsh_family[i].fit(self.data.shape[1])