def predict_for_scores(self, scores, **kwargs): """ Predict rankings for scores for a given collection of sets of objects. Parameters ---------- scores : dict or numpy array Dictionary with a mapping from ranking size to numpy arrays or a single numpy array of size containing scores of each object of size: (n_instances, n_objects) Returns ------- Y : dict or numpy array Dictionary with a mapping from ranking size to numpy arrays or a single numpy array containing predicted ranking of size: (n_instances, n_objects) """ if isinstance(scores, dict): result = dict() for n, score in scores.items(): rankings = scores_to_rankings(score) result[n] = rankings else: result = scores_to_rankings(scores) return result
def predict_for_scores(self, scores, **kwargs): """ The permutation vector :math:`\\pi` represents the ranking amongst the objects in :math:`Q`, such that :math:`\\pi(k)` is the position of the :math:`k`-th object :math:`x_k`, and :math:`\\pi^{-1}(k)` is the index of the object on position :math:`k`. Predict rankings for the scores for a given collection of sets of objects (query sets). Parameters ---------- scores : dict or numpy array Dictionary with a mapping from query set size to numpy arrays or a single numpy array of size containing scores of each object of size: (n_instances, n_objects) Returns ------- Y : dict or numpy array Dictionary with a mapping from objects size to numpy arrays or a single numpy array containing predicted rankings of size: (n_samples, n_objects) """ if isinstance(scores, dict): result = dict() for n, score in scores.items(): rankings = scores_to_rankings(score) result[n] = rankings else: result = scores_to_rankings(scores) return result
def make_nearest_neighbour_dataset(self, n_instances, n_objects, seed, **kwargs): X, scores = super().make_nearest_neighbour_dataset( n_instances=n_instances, n_objects=n_objects, seed=seed) # Higher the similarity lower the rank of the object Y = scores_to_rankings(scores) return X, Y
def spearman_correlation_for_scores_scipy(y_true, s_pred): y_pred = scores_to_rankings(s_pred) rho = [] for r1, r2 in zip(y_true, y_pred): s = spearmanr(r1, r2)[0] rho.append(s) return np.nanmean(np.array(rho))
def make_gp_transitive(self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, kernel_params=None, seed=42, **kwd): """Creates a nonlinear object ranking problem by sampling from a Gaussian process as the latent utility function. Note that this function needs to compute a kernel matrix of size (n_instances * n_objects) ** 2, which could allocate a large chunk of the memory.""" random_state = check_random_state(seed=seed) if kernel_params is None: kernel_params = dict() n_total = n_instances * n_objects X = random_state.rand(n_total, n_features) L = np.linalg.cholesky(Matern(**kernel_params)(X)) f = (L.dot(random_state.randn(n_total)) + random_state.normal(scale=noise, size=n_total)) X = X.reshape(n_instances, n_objects, n_features) f = f.reshape(n_instances, n_objects) Y = scores_to_rankings(f) return X, Y
def dataset_generator(n_instances, n_objects, seed, **kwargs): X, scores = super(TagGenomeObjectRankingDatasetReader, self).make_critique_fit_dataset( n_instances=n_instances, n_objects=n_objects, seed=seed, direction=direction) Y = scores_to_rankings(scores) return X, Y
def spearman_correlation_for_scores_np(y_true, s_pred): y_pred = scores_to_rankings(s_pred) rho = [] n_objects = y_true.shape[1] denominator = n_objects * (n_objects**2 - 1) for r1, r2 in zip(y_true, y_pred): if len(np.unique(r2)) == len(r2): s = 1 - (6 * np.sum((r1 - r2)**2) / denominator) rho.append(s) else: rho.append(np.nan) return np.nanmean(np.array(rho))
def make_linear_transitive(self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, n_informative=10, seed=42, **kwd): random_state = check_random_state(seed=seed) X, y, coeff = make_regression(n_samples=n_instances * n_objects, n_features=n_features, n_informative=n_informative, coef=True, noise=noise, random_state=random_state) X = X.reshape(n_instances, n_objects, n_features) y = y.reshape(n_instances, n_objects) Y = scores_to_rankings(y) return X, Y
def make_similarity_based_dataset(self, datatype="train", seed=42): """Picks a random subset of objects, determines the medoid and ranks the objects based on the distance to the medoid. The medoid is also included in the ordering.""" random_state = np.random.RandomState(seed=seed) if datatype == "train": image_features = self.image_features_train n_instances = self.n_train_instances similarity_matrix_file = self.similarity_matrix_train_file elif datatype == "test": image_features = self.image_features_test n_instances = self.n_test_instances similarity_matrix_file = self.similarity_matrix_test_file X = np.empty((n_instances, self.n_objects, self.n_features), dtype=float) similarity_scores = np.empty((n_instances, self.n_objects), dtype=float) similarity_matrix_lin_list = get_similarity_matrix( similarity_matrix_file) for i in range(n_instances): subset = random_state.choice(image_features.shape[0], size=self.n_objects, replace=False) X[i] = image_features[subset] query = random_state.choice(self.n_objects, size=1) one_row = [ similarity_matrix_lin_list[get_key_for_indices(i, j)] for i, j in product(subset[query], subset) ] similarity_scores[i] = np.array(one_row) Y = scores_to_rankings(similarity_scores) for i, x in enumerate(X): x = StandardScaler().fit_transform(x) X[i] = x return X, Y
def predict(self, Xo, Xc, **kwargs): s = self.predict_scores(Xo, Xc, **kwargs) return scores_to_rankings(s)
def predict_for_scores(self, scores, **kwargs): self.logger('Predicting rankings') return scores_to_rankings(scores)
def zero_one_accuracy_for_scores_np(y_true, s_pred): y_pred = scores_to_rankings(s_pred) acc = np.sum(np.all(np.equal(y_true, y_pred), axis=1)) / y_pred.shape[0] return acc
def predict_for_scores(self, scores, **kwargs): return scores_to_rankings(scores)