コード例 #1
0
    def predict_for_scores(self, scores):
        """
            Binary discrete choice vector :math:`y` represents the choices amongst the objects in :math:`Q`, such that
            :math:`y(k) = 1` represents that the object :math:`x_k` is chosen and :math:`y(k) = 0` represents
            it is not chosen. For choice to be discrete :math:`\sum_{x_i \in Q} y(i) = 1`. Predict discrete choices for
            the scores for a given collection of sets of objects (query sets).

            Parameters
            ----------
            scores : dict or numpy array
                Dictionary with a mapping from query set size to numpy arrays
                or a single numpy array of size containing scores of each object of size:
                (n_instances, n_objects)


            Returns
            -------
            Y : dict or numpy array
                Dictionary with a mapping from query set size to numpy arrays
                or a single numpy array containing predicted discrete choice vectors of size:
                (n_instances, n_objects)
        """
        if isinstance(scores, dict):
            result = dict()
            for n, s in scores.items():
                result[n] = s.argmax(axis=1)
                result[n] = convert_to_label_encoding(result[n], n)

        else:
            n = scores.shape[-1]
            result = scores.argmax(axis=1)
            result = convert_to_label_encoding(result, n)
        return result
コード例 #2
0
    def predict_for_scores(self, scores):
        """ Predict discrete choice for a given collection scores for the sets of objects.

        Parameters
        ----------
        scores : dict or numpy array
            Dictionary with a mapping from size of the choice set to numpy arrays
            or a single numpy array of size containing scores of each object of size:
            (n_instances, n_objects)


        Returns
        -------
        Y : dict or numpy array
            Dictionary with a mapping from size of the choice set to numpy arrays
            or a single numpy array containing discrete choices of size:
            (n_instances, 1)
        """
        if isinstance(scores, dict):
            result = dict()
            for n, s in scores.items():
                result[n] = s.argmax(axis=1)
                result[n] = convert_to_label_encoding(result[n], n)

        else:
            n = scores.shape[-1]
            result = scores.argmax(axis=1)
            result = convert_to_label_encoding(result, n)
        return result
コード例 #3
0
def trivial_discrete_choice_problem():
    random_state = np.random.RandomState(42)
    x = random_state.randn(500, 5, 2)
    w = random_state.rand(2)
    y_true = np.argmax(np.dot(x, w), axis=1)
    y_true = convert_to_label_encoding(y_true, 5)
    return x, y_true
コード例 #4
0
        def dataset_generator(n_instances, n_objects, seed, **kwargs):
            self.logger.info(
                'For instances {} objects {}, seed {}, direction {}'.format(
                    n_instances, n_objects, seed, direction))
            random_state = check_random_state(seed)
            X = []
            scores = []
            length = (int(n_instances / self.n_movies) + 1)
            popular_tags = self.get_genre_tag_id()
            for i, feature in enumerate(self.movie_features):
                if direction == 1:
                    quartile_tags = np.where(
                        np.logical_and(feature >= 1 / 3, feature < 2 / 3))[0]
                else:
                    quartile_tags = np.where(feature > 1 / 2)[0]
                if len(quartile_tags) < length:
                    quartile_tags = popular_tags
                tag_ids = random_state.choice(quartile_tags, size=length)
                distances = [
                    self.similarity_matrix[get_key_for_indices(i, j)]
                    for j in range(self.n_movies)
                ]
                critique_d = critique_dist(feature,
                                           self.movie_features,
                                           tag_ids,
                                           direction=direction,
                                           relu=False)
                critique_fit = np.multiply(critique_d, distances)
                orderings = np.argsort(critique_fit, axis=-1)[:, ::-1]
                minimum = np.zeros(length, dtype=int)
                for k, dist in enumerate(critique_fit):
                    quartile = np.percentile(dist, [0, 5])
                    last = np.where(
                        np.logical_and((dist >= quartile[0]),
                                       (dist <= quartile[1])))[0]
                    if i in last:
                        index = np.where(last == i)[0][0]
                        last = np.delete(last, index)
                    minimum[k] = random_state.choice(last, size=1)[0]
                orderings = orderings[:, 0:n_objects - 2]
                orderings = np.append(orderings, minimum[:, None], axis=1)
                orderings = np.append(orderings,
                                      np.zeros(length, dtype=int)[:, None] + i,
                                      axis=1)
                for o in orderings:
                    random_state.shuffle(o)
                scores.extend(critique_fit[np.arange(length)[:, None],
                                           orderings])
                X.extend(self.movie_features[orderings])
            X = np.array(X)
            scores = np.array(scores)
            indices = random_state.choice(X.shape[0],
                                          n_instances,
                                          replace=False)
            X = X[indices, :, :]
            scores = scores[indices, :]

            Y = scores.argmin(axis=1)
            Y = convert_to_label_encoding(Y, n_objects)
            return X, Y
コード例 #5
0
 def __init__(self, random_state=None, **kwargs):
     super(SushiDiscreteChoiceDatasetReader, self).__init__(
         learning_problem=DISCRETE_CHOICE, **kwargs
     )
     self.random_state = check_random_state(random_state)
     self.Y = np.argmin(self.Y, axis=1)
     self.Y = convert_to_label_encoding(self.Y, self.X.shape[1])
コード例 #6
0
 def dataset_generator(n_instances, n_objects, seed, **kwargs):
     X, scores = super(TagGenomeDiscreteChoiceDatasetReader,
                       self).make_critique_fit_dataset(
                           n_instances=n_instances,
                           n_objects=n_objects,
                           seed=seed,
                           direction=direction)
     Y = scores.argmax(axis=1)
     Y = convert_to_label_encoding(Y, n_objects)
     return X, Y
コード例 #7
0
 def make_nearest_neighbour_dataset(self, n_instances, n_objects, seed,
                                    **kwargs):
     X, scores = super(TagGenomeDiscreteChoiceDatasetReader,
                       self).make_nearest_neighbour_dataset(
                           n_instances=n_instances,
                           n_objects=n_objects,
                           seed=seed)
     # Higher the similarity lower the rank of the object, getting the object with second highest similarity
     Y = np.argsort(scores, axis=1)[:, -2]
     Y = convert_to_label_encoding(Y, n_objects)
     return X, Y