def predict(self, X, **kwargs): """ Predict rankings for a given collection of sets of objects. Parameters ---------- X : dict or numpy array Dictionary with a mapping from ranking size to numpy arrays or a single numpy array of size: (n_instances, n_objects, n_features) Returns ------- Y : dict or numpy array Dictionary with a mapping from ranking size to numpy arrays or a single numpy array of size: (n_instances, n_objects) Predicted ranking """ self.logger.debug('Predicting started') predicted_scores = self.predict_scores(X, **kwargs) self.logger.debug('Predicting scores complete') predicted_rankings = scores_to_rankings(predicted_scores) self.logger.debug('Predicting ranks complete') del predicted_scores return predicted_rankings
def make_similarity_based_dataset(self, n_instances, seed=42): random_state = np.random.RandomState(seed=seed) X = np.empty((n_instances, self.n_objects, self.n_features), dtype=float) rankings = np.empty((n_instances, self.n_objects), dtype=int) similarity_scores = np.empty_like(rankings, dtype=float) movie_features = self.movies_df.as_matrix()[:, 4:] for i in range(n_instances): subset = random_state.choice(movie_features.shape[0], size=self.n_objects, replace=False) X[i] = movie_features[subset] query = random_state.choice(self.n_objects, size=1) one_row = [ self.similarity_matrix_lin_list[get_key_for_indices(i, j)] for i, j in product(subset[query], subset) ] similarity_scores[i] = np.array(one_row) # Higher the similarity lower the rank of the object rankings = scores_to_rankings(similarity_scores) for i, x in enumerate(X): x = StandardScaler().fit_transform(x) X[i] = x return X, rankings
def make_gp_transitive(self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, kernel_params=None, seed=42, **kwd): """Creates a nonlinear object ranking problem by sampling from a Gaussian process as the latent utility function. Note that this function needs to compute a kernel matrix of size (n_instances * n_objects) ** 2, which could allocate a large chunk of the memory.""" random_state = np.random.RandomState(seed=seed) if kernel_params is None: kernel_params = dict() n_total = n_instances * n_objects X = random_state.rand(n_total, n_features) L = np.linalg.cholesky(Matern(**kernel_params)(X)) f = (L.dot(random_state.randn(n_total)) + random_state.normal(scale=noise, size=n_total)) X = X.reshape(n_instances, n_objects, n_features) f = f.reshape(n_instances, n_objects) rankings = scores_to_rankings(f) return X, rankings
def make_similarity_based_dataset(self, datatype='train', seed=42): """Picks a random subset of objects, determines the medoid and ranks the objects based on the distance to the medoid. The medoid is also included in the ordering.""" random_state = np.random.RandomState(seed=seed) if datatype == 'train': image_features = self.image_features_train n_instances = self.n_train_instances similarity_matrix_file = self.similarity_matrix_train_file elif datatype == 'test': image_features = self.image_features_test n_instances = self.n_test_instances similarity_matrix_file = self.similarity_matrix_test_file X = np.empty((n_instances, self.n_objects, self.n_features), dtype=float) similarity_scores = np.empty((n_instances, self.n_objects), dtype=float) similarity_matrix_lin_list = initialize_similarity_matrix(similarity_matrix_file) for i in range(n_instances): subset = random_state.choice(image_features.shape[0], size=self.n_objects, replace=False) X[i] = image_features[subset] query = random_state.choice(self.n_objects, size=1) one_row = [similarity_matrix_lin_list[get_key_for_indices(i, j)] for i, j in product(subset[query], subset)] similarity_scores[i] = np.array(one_row) rankings = scores_to_rankings(similarity_scores) for i, x in enumerate(X): x = StandardScaler().fit_transform(x) X[i] = x return X, rankings
def predict(self, X, **kwargs): self.logger.info("Predicting ranks") if isinstance(X, dict): result = dict() for n, scores in self.predict_scores(X, **kwargs).items(): predicted_rankings = scores_to_rankings(scores) result[n] = predicted_rankings return result return ObjectRanker.predict(self, X, **kwargs)
def make_linear_transitive(self, n_instances=1000, n_objects=5, noise=0.0, n_features=100, n_informative=10, seed=42, **kwd): random_state = np.random.RandomState(seed=seed) X, y, coeff = make_regression(n_samples=n_instances * n_objects, n_features=n_features, n_informative=n_informative, coef=True, noise=noise, random_state=random_state) X = X.reshape(n_instances, n_objects, n_features) y = y.reshape(n_instances, n_objects) rankings = scores_to_rankings(y) return X, rankings
def predict(self, X, **kwargs): """ Predict orderings for the labels for a given context vector. Parameters ---------- X : array-like, shape (n_samples, n_features) Returns ------- Y : array-like, shape (n_samples, n_labels) Predicted orderings """ self.logger.debug('Predicting started') predicted_scores = self.predict_scores(X, **kwargs) self.logger.debug('Predicting scores complete') predicted_orderings = scores_to_rankings(predicted_scores) self.logger.debug('Predicting ranks complete') del predicted_scores return predicted_orderings
def predict(self, Xo, Xc, **kwargs): s = self.predict_scores(Xo, Xc, **kwargs) return scores_to_rankings(s)