def predict_for_scores(self, scores): """ Binary discrete choice vector :math:`y` represents the choices amongst the objects in :math:`Q`, such that :math:`y(k) = 1` represents that the object :math:`x_k` is chosen and :math:`y(k) = 0` represents it is not chosen. For choice to be discrete :math:`\sum_{x_i \in Q} y(i) = 1`. Predict discrete choices for the scores for a given collection of sets of objects (query sets). Parameters ---------- scores : dict or numpy array Dictionary with a mapping from query set size to numpy arrays or a single numpy array of size containing scores of each object of size: (n_instances, n_objects) Returns ------- Y : dict or numpy array Dictionary with a mapping from query set size to numpy arrays or a single numpy array containing predicted discrete choice vectors of size: (n_instances, n_objects) """ if isinstance(scores, dict): result = dict() for n, s in scores.items(): result[n] = s.argmax(axis=1) result[n] = convert_to_label_encoding(result[n], n) else: n = scores.shape[-1] result = scores.argmax(axis=1) result = convert_to_label_encoding(result, n) return result
def predict_for_scores(self, scores): """ Predict discrete choice for a given collection scores for the sets of objects. Parameters ---------- scores : dict or numpy array Dictionary with a mapping from size of the choice set to numpy arrays or a single numpy array of size containing scores of each object of size: (n_instances, n_objects) Returns ------- Y : dict or numpy array Dictionary with a mapping from size of the choice set to numpy arrays or a single numpy array containing discrete choices of size: (n_instances, 1) """ if isinstance(scores, dict): result = dict() for n, s in scores.items(): result[n] = s.argmax(axis=1) result[n] = convert_to_label_encoding(result[n], n) else: n = scores.shape[-1] result = scores.argmax(axis=1) result = convert_to_label_encoding(result, n) return result
def trivial_discrete_choice_problem(): random_state = np.random.RandomState(42) x = random_state.randn(500, 5, 2) w = random_state.rand(2) y_true = np.argmax(np.dot(x, w), axis=1) y_true = convert_to_label_encoding(y_true, 5) return x, y_true
def dataset_generator(n_instances, n_objects, seed, **kwargs): self.logger.info( 'For instances {} objects {}, seed {}, direction {}'.format( n_instances, n_objects, seed, direction)) random_state = check_random_state(seed) X = [] scores = [] length = (int(n_instances / self.n_movies) + 1) popular_tags = self.get_genre_tag_id() for i, feature in enumerate(self.movie_features): if direction == 1: quartile_tags = np.where( np.logical_and(feature >= 1 / 3, feature < 2 / 3))[0] else: quartile_tags = np.where(feature > 1 / 2)[0] if len(quartile_tags) < length: quartile_tags = popular_tags tag_ids = random_state.choice(quartile_tags, size=length) distances = [ self.similarity_matrix[get_key_for_indices(i, j)] for j in range(self.n_movies) ] critique_d = critique_dist(feature, self.movie_features, tag_ids, direction=direction, relu=False) critique_fit = np.multiply(critique_d, distances) orderings = np.argsort(critique_fit, axis=-1)[:, ::-1] minimum = np.zeros(length, dtype=int) for k, dist in enumerate(critique_fit): quartile = np.percentile(dist, [0, 5]) last = np.where( np.logical_and((dist >= quartile[0]), (dist <= quartile[1])))[0] if i in last: index = np.where(last == i)[0][0] last = np.delete(last, index) minimum[k] = random_state.choice(last, size=1)[0] orderings = orderings[:, 0:n_objects - 2] orderings = np.append(orderings, minimum[:, None], axis=1) orderings = np.append(orderings, np.zeros(length, dtype=int)[:, None] + i, axis=1) for o in orderings: random_state.shuffle(o) scores.extend(critique_fit[np.arange(length)[:, None], orderings]) X.extend(self.movie_features[orderings]) X = np.array(X) scores = np.array(scores) indices = random_state.choice(X.shape[0], n_instances, replace=False) X = X[indices, :, :] scores = scores[indices, :] Y = scores.argmin(axis=1) Y = convert_to_label_encoding(Y, n_objects) return X, Y
def __init__(self, random_state=None, **kwargs): super(SushiDiscreteChoiceDatasetReader, self).__init__( learning_problem=DISCRETE_CHOICE, **kwargs ) self.random_state = check_random_state(random_state) self.Y = np.argmin(self.Y, axis=1) self.Y = convert_to_label_encoding(self.Y, self.X.shape[1])
def dataset_generator(n_instances, n_objects, seed, **kwargs): X, scores = super(TagGenomeDiscreteChoiceDatasetReader, self).make_critique_fit_dataset( n_instances=n_instances, n_objects=n_objects, seed=seed, direction=direction) Y = scores.argmax(axis=1) Y = convert_to_label_encoding(Y, n_objects) return X, Y
def make_nearest_neighbour_dataset(self, n_instances, n_objects, seed, **kwargs): X, scores = super(TagGenomeDiscreteChoiceDatasetReader, self).make_nearest_neighbour_dataset( n_instances=n_instances, n_objects=n_objects, seed=seed) # Higher the similarity lower the rank of the object, getting the object with second highest similarity Y = np.argsort(scores, axis=1)[:, -2] Y = convert_to_label_encoding(Y, n_objects) return X, Y