def make_hv_dataset( self, n_instances=1000, n_objects=5, n_features=5, seed=42, cluster_spread=1.0, **kwd, ): try: from pygmo import hypervolume except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("pygmo", "data") def sample_unit_ball(n_f=2, rng=None, radius=1.0): rng = check_random_state(rng) X = rng.randn(1, n_f) u = rng.uniform(size=1)[:, None] X /= np.linalg.norm(X, axis=1, ord=2)[:, None] X *= radius * u return X[0] random_state = check_random_state(seed=seed) X = random_state.rand(n_instances, n_objects, n_features) # Normalize to unit circle and fold to lower quadrant X = -np.abs(X / np.sqrt(np.power(X, 2).sum(axis=2))[..., None]) Y = np.empty(n_instances, dtype=int) for i in range(n_instances): center = sample_unit_ball(n_f=n_features, rng=i, radius=cluster_spread) X[i] = X[i] + center hv = hypervolume(X[i]) cont = hv.contributions(center) Y[i] = np.argmax(cont) Y = convert_to_label_encoding(Y, n_objects) return X, Y
def make_hv_dataset(self, n_instances=1000, n_objects=5, n_features=5, seed=42, **kwd): try: from pygmo import hypervolume except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("pygmo", "data") random_state = check_random_state(seed=seed) X = random_state.randn(n_instances, n_objects, n_features) # Normalize to unit circle and fold to lower quadrant X = -np.abs(X / np.sqrt(np.power(X, 2).sum(axis=2))[..., None]) Y = np.empty((n_instances, n_objects), dtype=int) reference = np.zeros(n_features) for i, x in enumerate(X): hv = hypervolume(x) cont = hv.contributions(reference) Y[i] = np.argsort(cont)[::-1].argsort() return X, Y
import itertools as iter import sys import numpy as np try: import pandas as pd except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("pandas", "data") from sklearn.metrics import f1_score from sklearn.preprocessing import StandardScaler def strongly_connected_components(graph): """ Find the strongly connected components in a graph using Tarjan's algorithm. # Taken from http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py graph should be a dictionary mapping node names to lists of successor nodes. """ result = [] stack = [] low = {} def visit(node): if node in low:
import logging import os import numpy as np from csrank.dataset_reader.util import standardize_features from csrank.util import create_dir_recursively from csrank.util import print_dictionary from .dataset_reader import DatasetReader try: import h5py except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("h5py", "data") logger = logging.getLogger(__name__) class LetorRankingDatasetReader(DatasetReader, metaclass=ABCMeta): def __init__(self, year=2007, fold_id=0, exclude_qf=False, **kwargs): super(LetorRankingDatasetReader, self).__init__(dataset_folder="letor", **kwargs) self.DATASET_FOLDER_2007 = "MQ{}".format(year) self.DATASET_FOLDER_2008 = "MQ{}".format(year) if year not in [2007, 2008]: raise ValueError("year must be either 2007 or 2008") self.year = year self.exclude_qf = exclude_qf
import csrank.numpy_util as npu import csrank.theano_util as ttu from csrank.util import print_dictionary from .discrete_choice import DiscreteObjectChooser from .likelihoods import create_weight_dictionary from .likelihoods import fit_pymc3_model from .likelihoods import likelihood_dict from .likelihoods import LogLikelihood try: import pymc3 as pm from pymc3.variational.callbacks import CheckParametersConvergence except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("pymc3", "probabilistic") try: import theano from theano import tensor as tt except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("theano", "probabilistic") logger = logging.getLogger(__name__) class PairedCombinatorialLogit(DiscreteObjectChooser, Learner): def __init__( self,
from sklearn.datasets.samples_generator import make_blobs from sklearn.gaussian_process.kernels import Matern from sklearn.utils import check_random_state from csrank.constants import OBJECT_RANKING from csrank.numpy_util import scores_to_rankings from ..synthetic_dataset_generator import SyntheticDatasetGenerator from ..util import create_pairwise_prob_matrix from ..util import quicksort try: from pygmo import hypervolume except ImportError: from csrank.util import MissingExtraError raise MissingExtraError("pygmo", "data") class ObjectRankingDatasetGenerator(SyntheticDatasetGenerator): def __init__(self, dataset_type="medoid", **kwargs): super(ObjectRankingDatasetGenerator, self).__init__(learning_problem=OBJECT_RANKING, **kwargs) dataset_function_options = { "linear": self.make_linear_transitive, "medoid": self.make_intransitive_medoids, "gp_transitive": self.make_gp_transitive, "gp_non_transitive": self.make_gp_non_transitive, "hyper_volume": self.make_hv_dataset, } if dataset_type not in dataset_function_options: raise ValueError(