Beispiel #1
0
    def setUpClass(cls):

        set_seed(2017)

        cls.A = [[random.random() for e in range(3)] for x in range(3)]
        cls.B = [[random.random() for e in range(3)] for x in range(3)]

        cls.X, cls.y = regression(100, seed=1970)
        cls.X_train, cls.y_train, cls.X_test, cls.y_test = train_test_split(
            cls.X, cls.y, train_split=0.8, seed=1970)
        cls.regressor = KNNRegressor(n=5)
        cls.regressor.train(X=cls.X_train, y=cls.y_train)
Beispiel #2
0
    def __init__(self,
                 k=3,
                 initialisation='Forgy',
                 max_iterations=100,
                 min_change=1,
                 seed=None,
                 norm='l1'):
        """
        KMeans implementation

        :type k: int
        :type initialisation: str
        :type max_iterations: int
        :type min_change: int
        :type seed: None or int
        :type norm: str or int

        :param k: number of clusters
        :param initialisation: indicates method to initialise clusters (currently Forgy or Random)
        :param max_iterations: maximum number of iterations
        :param min_change: minimum assignment changes after each iteration required to continue algorithm
        :param seed: sets random seed
        :param norm: norm to use in the calculation of distances between each point and all centroids,
         e.g. 'l2' or 2 are equivalent to using the euclidean distance


        Example:
        --------

        >>> from pyml.cluster import KMeans
        >>> from pyml.datasets.random_data import gaussian
        >>> from pyml.preprocessing import train_test_split
        >>> datapoints, labels = gaussian(n=100, d=2, labels=3, sigma=0.1, seed=1970)
        >>> X_train, y_train, X_test, y_test = train_test_split(datapoints, labels, train_split=0.95, seed=1970)
        >>> kmeans = KMeans(k=3, max_iterations=1000, seed=1970)
        >>> _ = kmeans.train(X_train, y_train)
        >>> kmeans.iterations
        7
        >>> kmeans.centroids[1]
        [0.12801075816403754, 0.21926563270201577]

        """
        ClusterBase.__init__(self)

        if isinstance(norm, int) or norm in ['l1', 'l2']:
            self._norm = norm
        else:
            raise ValueError("Unknown norm.")

        self._seed = set_seed(seed)
        self._k = k
        self._max_iterations = max_iterations
        self._min_change = min_change

        if initialisation in ['Forgy', 'Random']:
            self._initialisation = initialisation
        else:
            raise ValueError("Unknown initialisation method.")
Beispiel #3
0
    def __init__(self, learning_rate, epsilon, max_iterations, alpha,
                 fudge_factor, batch_size, method, seed, _type):
        """
        Inherits methods from BaseLearner
        """
        BaseLearner.__init__(self)
        Predictor.__init__(self)

        self._epsilon = epsilon
        self._max_iterations = max_iterations
        self._learning_rate = learning_rate

        self._alpha = alpha
        self._batch_size = batch_size

        if method in ['normal', 'nesterov', 'adagrad', 'adadelta', 'rmsprop']:
            self._method = method

        else:
            raise ValueError("Unknown GD method")

        self._seed = set_seed(seed)
        self._type = _type

        if self._method == 'adagrad' or self._method == 'adadelta' or self._method == 'rmsprop':
            if fudge_factor == 0:
                warnings.warn(
                    "Fudge factor for {} optimisation is 0, it will be set to 10e-8 for your own "
                    "safety".format(self._method))
                fudge_factor = 10e-8

        if self._method == 'adadelta' and self._learning_rate != 1:
            warnings.warn(
                "Adadelta does not use a learning rate, setting this value to 1!"
            )
            self._learning_rate = 1

        self._fudge_factor = fudge_factor
Beispiel #4
0
    def setUpClass(cls):

        set_seed(1970)

        cls.A = [[random.random() for e in range(8)] for x in range(10)]