def setUpClass(cls): set_seed(2017) cls.A = [[random.random() for e in range(3)] for x in range(3)] cls.B = [[random.random() for e in range(3)] for x in range(3)] cls.X, cls.y = regression(100, seed=1970) cls.X_train, cls.y_train, cls.X_test, cls.y_test = train_test_split( cls.X, cls.y, train_split=0.8, seed=1970) cls.regressor = KNNRegressor(n=5) cls.regressor.train(X=cls.X_train, y=cls.y_train)
def __init__(self, k=3, initialisation='Forgy', max_iterations=100, min_change=1, seed=None, norm='l1'): """ KMeans implementation :type k: int :type initialisation: str :type max_iterations: int :type min_change: int :type seed: None or int :type norm: str or int :param k: number of clusters :param initialisation: indicates method to initialise clusters (currently Forgy or Random) :param max_iterations: maximum number of iterations :param min_change: minimum assignment changes after each iteration required to continue algorithm :param seed: sets random seed :param norm: norm to use in the calculation of distances between each point and all centroids, e.g. 'l2' or 2 are equivalent to using the euclidean distance Example: -------- >>> from pyml.cluster import KMeans >>> from pyml.datasets.random_data import gaussian >>> from pyml.preprocessing import train_test_split >>> datapoints, labels = gaussian(n=100, d=2, labels=3, sigma=0.1, seed=1970) >>> X_train, y_train, X_test, y_test = train_test_split(datapoints, labels, train_split=0.95, seed=1970) >>> kmeans = KMeans(k=3, max_iterations=1000, seed=1970) >>> _ = kmeans.train(X_train, y_train) >>> kmeans.iterations 7 >>> kmeans.centroids[1] [0.12801075816403754, 0.21926563270201577] """ ClusterBase.__init__(self) if isinstance(norm, int) or norm in ['l1', 'l2']: self._norm = norm else: raise ValueError("Unknown norm.") self._seed = set_seed(seed) self._k = k self._max_iterations = max_iterations self._min_change = min_change if initialisation in ['Forgy', 'Random']: self._initialisation = initialisation else: raise ValueError("Unknown initialisation method.")
def __init__(self, learning_rate, epsilon, max_iterations, alpha, fudge_factor, batch_size, method, seed, _type): """ Inherits methods from BaseLearner """ BaseLearner.__init__(self) Predictor.__init__(self) self._epsilon = epsilon self._max_iterations = max_iterations self._learning_rate = learning_rate self._alpha = alpha self._batch_size = batch_size if method in ['normal', 'nesterov', 'adagrad', 'adadelta', 'rmsprop']: self._method = method else: raise ValueError("Unknown GD method") self._seed = set_seed(seed) self._type = _type if self._method == 'adagrad' or self._method == 'adadelta' or self._method == 'rmsprop': if fudge_factor == 0: warnings.warn( "Fudge factor for {} optimisation is 0, it will be set to 10e-8 for your own " "safety".format(self._method)) fudge_factor = 10e-8 if self._method == 'adadelta' and self._learning_rate != 1: warnings.warn( "Adadelta does not use a learning rate, setting this value to 1!" ) self._learning_rate = 1 self._fudge_factor = fudge_factor
def setUpClass(cls): set_seed(1970) cls.A = [[random.random() for e in range(8)] for x in range(10)]