def lookup_negative_items(self, negative_users, **kwargs): output = np.zeros(shape=negative_users.shape, dtype=rconst.ITEM_DTYPE) - 1 left_index = self.index_bounds[negative_users] right_index = self.index_bounds[negative_users + 1] - 1 num_positives = right_index - left_index + 1 num_negatives = self._num_items - num_positives neg_item_choice = stat_utils.very_slightly_biased_randint( num_negatives) # Shortcuts: # For points where the negative is greater than or equal to the tally before # the last positive point there is no need to bisect. Instead the item id # corresponding to the negative item choice is simply: # last_postive_index + 1 + (neg_choice - last_negative_tally) # Similarly, if the selection is less than the tally at the first positive # then the item_id is simply the selection. # # Because MovieLens organizes popular movies into low integers (which is # preserved through the preprocessing), the first shortcut is very # efficient, allowing ~60% of samples to bypass the bisection. For the same # reason, the second shortcut is rarely triggered (<0.02%) and is therefore # not worth implementing. use_shortcut = neg_item_choice >= self._total_negatives[right_index] output[use_shortcut] = ( self._sorted_train_pos_items[right_index] + 1 + (neg_item_choice - self._total_negatives[right_index]))[use_shortcut] if np.all(use_shortcut): # The bisection code is ill-posed when there are no elements. return output not_use_shortcut = np.logical_not(use_shortcut) left_index = left_index[not_use_shortcut] right_index = right_index[not_use_shortcut] neg_item_choice = neg_item_choice[not_use_shortcut] num_loops = np.max( np.ceil(np.log2(num_positives[not_use_shortcut])).astype(np.int32)) for i in range(num_loops): mid_index = (left_index + right_index) // 2 right_criteria = self._total_negatives[mid_index] > neg_item_choice left_criteria = np.logical_not(right_criteria) right_index[right_criteria] = mid_index[right_criteria] left_index[left_criteria] = mid_index[left_criteria] # Expected state after bisection pass: # The right index is the smallest index whose tally is greater than the # negative item choice index. assert np.all((right_index - left_index) <= 1) output[not_use_shortcut] = ( self._sorted_train_pos_items[right_index] - (self._total_negatives[right_index] - neg_item_choice)) assert np.all(output >= 0) return output
def lookup_negative_items(self, negative_users, **kwargs): output = np.zeros(shape=negative_users.shape, dtype=rconst.ITEM_DTYPE) - 1 left_index = self.index_bounds[negative_users] right_index = self.index_bounds[negative_users + 1] - 1 num_positives = right_index - left_index + 1 num_negatives = self._num_items - num_positives neg_item_choice = stat_utils.very_slightly_biased_randint(num_negatives) # Shortcuts: # For points where the negative is greater than or equal to the tally before # the last positive point there is no need to bisect. Instead the item id # corresponding to the negative item choice is simply: # last_postive_index + 1 + (neg_choice - last_negative_tally) # Similarly, if the selection is less than the tally at the first positive # then the item_id is simply the selection. # # Because MovieLens organizes popular movies into low integers (which is # preserved through the preprocessing), the first shortcut is very # efficient, allowing ~60% of samples to bypass the bisection. For the same # reason, the second shortcut is rarely triggered (<0.02%) and is therefore # not worth implementing. use_shortcut = neg_item_choice >= self._total_negatives[right_index] output[use_shortcut] = ( self._sorted_train_pos_items[right_index] + 1 + (neg_item_choice - self._total_negatives[right_index]) )[use_shortcut] if np.all(use_shortcut): # The bisection code is ill-posed when there are no elements. return output not_use_shortcut = np.logical_not(use_shortcut) left_index = left_index[not_use_shortcut] right_index = right_index[not_use_shortcut] neg_item_choice = neg_item_choice[not_use_shortcut] num_loops = np.max( np.ceil(np.log2(num_positives[not_use_shortcut])).astype(np.int32)) for i in range(num_loops): mid_index = (left_index + right_index) // 2 right_criteria = self._total_negatives[mid_index] > neg_item_choice left_criteria = np.logical_not(right_criteria) right_index[right_criteria] = mid_index[right_criteria] left_index[left_criteria] = mid_index[left_criteria] # Expected state after bisection pass: # The right index is the smallest index whose tally is greater than the # negative item choice index. assert np.all((right_index - left_index) <= 1) output[not_use_shortcut] = ( self._sorted_train_pos_items[right_index] - (self._total_negatives[right_index] - neg_item_choice) ) assert np.all(output >= 0) return output
def lookup_negative_items(self, negative_users, **kwargs): negative_item_choice = stat_utils.very_slightly_biased_randint( self._per_user_neg_count[negative_users]) return self._negative_table[negative_users, negative_item_choice]
def lookup_negative_items(self, negative_users, **kwargs): negative_item_choice = stat_utils.very_slightly_biased_randint( self._per_user_neg_count[negative_users]) return self._negative_table[negative_users, negative_item_choice]