class PairwiseSampler(object): def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1): self.batch_size = batch_size self.data_name = data_name self.data = Data() self.num_neg = num_neg self.dataset = Dataset(data_name=self.data_name) def get_train_data(self): user_movie = self.dataset.get_user_movie() num_item = self.dataset.get_max_movie_id() data_value = self.data.get_train_data(data_name=self.data_name) for idx in range(len(data_value)): j = np.random.choice(num_item) + 1 while j in user_movie[data_value[idx, 0]]: j = np.random.choice(num_item) + 1 data_value[idx, 2] = j return data_value def get_train_batch(self): data_value = self.get_train_data() for start in range(0, len(data_value), self.batch_size): end = min(start + self.batch_size, len(data_value)) yield data_value[start:end] def get_test_batch(self): data_value = self.data.get_test_data(data_name=self.data_name) for start in range(0, len(data_value), self.batch_size): end = min(start + self.batch_size, len(data_value)) yield data_value[start:end] def get_batch_number(self): data_value = self.data.get_train_data(data_name=self.data_name) return (len(data_value) + self.batch_size - 1) // self.batch_size
class PointSampler(object): def __init__(self, batch_size=512, data_name="ml_100k", num_neg=1): self.batch_size = batch_size self.data = Data() self.data_name = data_name self.num_neg = num_neg self.dataset = Dataset(data_name=self.data_name) def get_train_data(self, keep_label=False, value_for_negative=0.): self.data_value = self.data.get_train_data(data_name=self.data_name) user_movie_train = self.dataset.get_user_movie_for_train() num_item = self.dataset.get_max_movie_id() new_data_value = [] for user_item in self.data_value: user, item, label = user_item[0], user_item[1], user_item[2] if keep_label: new_data_value.append([user, item, label]) else: new_data_value.append([user, item, 1]) for i in range(self.num_neg): j = np.random.choice(num_item) while j in user_movie_train[user]: j = np.random.choice(num_item) new_data_value.append([user, j, value_for_negative]) new_data_value = np.array(new_data_value) new_data_value[:, 2].astype(np.float32) return new_data_value def get_train_batch(self, shuffle=False, keep_label=False, value_for_negative=0.): self.data_value_batch = self.get_train_data( keep_label=keep_label, value_for_negative=value_for_negative) if shuffle: index = [i for i in range(len(self.data_value_batch))] random.shuffle(index) self.data_value_batch = self.data_value_batch[index] for start in range(0, len(self.data_value_batch), self.batch_size): end = min(start + self.batch_size, len(self.data_value_batch)) yield self.data_value_batch[start:end] def get_test_batch(self): test_data = self.data.get_test_data() len_test_data = len(test_data) batch_index = np.random.choice(len_test_data, size=self.batch_size) batch_data = test_data[batch_index, :] return batch_data def get_batch_number(self): i = 1 data_value = self.data.get_train_data(data_name=self.data_name) return (len(data_value)) // self.batch_size * (i + self.num_neg)