Esempio n. 1
0
    def make_hv_dataset(
        self,
        n_instances=1000,
        n_objects=5,
        n_features=5,
        seed=42,
        cluster_spread=1.0,
        **kwd,
    ):
        try:
            from pygmo import hypervolume
        except ImportError:
            from csrank.util import MissingExtraError

            raise MissingExtraError("pygmo", "data")

        def sample_unit_ball(n_f=2, rng=None, radius=1.0):
            rng = check_random_state(rng)
            X = rng.randn(1, n_f)
            u = rng.uniform(size=1)[:, None]
            X /= np.linalg.norm(X, axis=1, ord=2)[:, None]
            X *= radius * u
            return X[0]

        random_state = check_random_state(seed=seed)
        X = random_state.rand(n_instances, n_objects, n_features)
        # Normalize to unit circle and fold to lower quadrant
        X = -np.abs(X / np.sqrt(np.power(X, 2).sum(axis=2))[..., None])
        Y = np.empty(n_instances, dtype=int)
        for i in range(n_instances):
            center = sample_unit_ball(n_f=n_features, rng=i, radius=cluster_spread)
            X[i] = X[i] + center
            hv = hypervolume(X[i])
            cont = hv.contributions(center)
            Y[i] = np.argmax(cont)
        Y = convert_to_label_encoding(Y, n_objects)
        return X, Y
Esempio n. 2
0
    def make_hv_dataset(self,
                        n_instances=1000,
                        n_objects=5,
                        n_features=5,
                        seed=42,
                        **kwd):
        try:
            from pygmo import hypervolume
        except ImportError:
            from csrank.util import MissingExtraError

            raise MissingExtraError("pygmo", "data")
        random_state = check_random_state(seed=seed)
        X = random_state.randn(n_instances, n_objects, n_features)
        # Normalize to unit circle and fold to lower quadrant
        X = -np.abs(X / np.sqrt(np.power(X, 2).sum(axis=2))[..., None])
        Y = np.empty((n_instances, n_objects), dtype=int)
        reference = np.zeros(n_features)
        for i, x in enumerate(X):
            hv = hypervolume(x)
            cont = hv.contributions(reference)
            Y[i] = np.argsort(cont)[::-1].argsort()

        return X, Y
Esempio n. 3
0
import itertools as iter
import sys

import numpy as np

try:
    import pandas as pd
except ImportError:
    from csrank.util import MissingExtraError

    raise MissingExtraError("pandas", "data")

from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler


def strongly_connected_components(graph):
    """ Find the strongly connected components in a graph using
        Tarjan's algorithm.
        # Taken from http://www.logarithmic.net/pfh-files/blog/01208083168/sort.py

        graph should be a dictionary mapping node names to
        lists of successor nodes.
        """

    result = []
    stack = []
    low = {}

    def visit(node):
        if node in low:
Esempio n. 4
0
import logging
import os

import numpy as np

from csrank.dataset_reader.util import standardize_features
from csrank.util import create_dir_recursively
from csrank.util import print_dictionary
from .dataset_reader import DatasetReader

try:
    import h5py
except ImportError:
    from csrank.util import MissingExtraError

    raise MissingExtraError("h5py", "data")

logger = logging.getLogger(__name__)


class LetorRankingDatasetReader(DatasetReader, metaclass=ABCMeta):
    def __init__(self, year=2007, fold_id=0, exclude_qf=False, **kwargs):
        super(LetorRankingDatasetReader, self).__init__(dataset_folder="letor",
                                                        **kwargs)
        self.DATASET_FOLDER_2007 = "MQ{}".format(year)
        self.DATASET_FOLDER_2008 = "MQ{}".format(year)

        if year not in [2007, 2008]:
            raise ValueError("year must be either 2007 or 2008")
        self.year = year
        self.exclude_qf = exclude_qf
Esempio n. 5
0
import csrank.numpy_util as npu
import csrank.theano_util as ttu
from csrank.util import print_dictionary
from .discrete_choice import DiscreteObjectChooser
from .likelihoods import create_weight_dictionary
from .likelihoods import fit_pymc3_model
from .likelihoods import likelihood_dict
from .likelihoods import LogLikelihood

try:
    import pymc3 as pm
    from pymc3.variational.callbacks import CheckParametersConvergence
except ImportError:
    from csrank.util import MissingExtraError

    raise MissingExtraError("pymc3", "probabilistic")

try:
    import theano
    from theano import tensor as tt
except ImportError:
    from csrank.util import MissingExtraError

    raise MissingExtraError("theano", "probabilistic")

logger = logging.getLogger(__name__)


class PairedCombinatorialLogit(DiscreteObjectChooser, Learner):
    def __init__(
        self,
from sklearn.datasets.samples_generator import make_blobs
from sklearn.gaussian_process.kernels import Matern
from sklearn.utils import check_random_state

from csrank.constants import OBJECT_RANKING
from csrank.numpy_util import scores_to_rankings
from ..synthetic_dataset_generator import SyntheticDatasetGenerator
from ..util import create_pairwise_prob_matrix
from ..util import quicksort

try:
    from pygmo import hypervolume
except ImportError:
    from csrank.util import MissingExtraError

    raise MissingExtraError("pygmo", "data")


class ObjectRankingDatasetGenerator(SyntheticDatasetGenerator):
    def __init__(self, dataset_type="medoid", **kwargs):
        super(ObjectRankingDatasetGenerator,
              self).__init__(learning_problem=OBJECT_RANKING, **kwargs)
        dataset_function_options = {
            "linear": self.make_linear_transitive,
            "medoid": self.make_intransitive_medoids,
            "gp_transitive": self.make_gp_transitive,
            "gp_non_transitive": self.make_gp_non_transitive,
            "hyper_volume": self.make_hv_dataset,
        }
        if dataset_type not in dataset_function_options:
            raise ValueError(