예제 #1
0
    def test_l1_l2(self):
        lmda1 = 0.001
        lmda2 = 0.001

        model = Sequential(verbose=1)
        model.add(
            Dense(10,
                  kernel_regularizer=l1_l2(lmda1, lmda2),
                  input_dim=2,
                  seed=1))
        model.add(Activation('sigmoid'))

        model.add(Dense(2, kernel_regularizer=l1_l2(lmda1, lmda2), seed=6))
        model.add(Activation('tanh'))

        model.add(Dense(2, kernel_regularizer=l1_l2(lmda1, lmda2), seed=6))
        model.add(Activation('softmax'))

        sgd = StochasticGradientDescent(learning_rate=0.05)
        model.compile(optimizer=sgd, loss="cross_entropy")

        model.fit(self.X_train, self.y_train, epochs=9, batch_size=2)
        print(model.layers[-2].biases)
        print(model.layers[-2].weights)

        expected_biases = np.array([[-0.95917324, -0.32783731]],
                                   dtype=np.float64)
        self.assertTrue(np.allclose(expected_biases, model.layers[-2].biases))

        expected_weights = np.array(
            [[0.71132812, -2.20343103], [1.44723471, -2.40020303]],
            dtype=np.float64)
        self.assertTrue(np.allclose(expected_weights,
                                    model.layers[-2].weights))
예제 #2
0
    def test_l2(self):
        lmda = 0.001

        model = Sequential(verbose=1)
        model.add(Dense(10, kernel_regularizer=l2(lmda), input_dim=2, seed=1))
        model.add(Activation('sigmoid'))

        model.add(Dense(2, kernel_regularizer=l2(lmda), seed=6))
        model.add(Activation('tanh'))

        model.add(Dense(2, kernel_regularizer=l2(lmda), seed=6))
        model.add(Activation('softmax'))

        sgd = StochasticGradientDescent(learning_rate=0.05)
        model.compile(optimizer=sgd, loss="cross_entropy")

        model.fit(self.X_train, self.y_train, epochs=10, batch_size=2)
        print(model.layers[-2].biases)
        print(model.layers[-2].weights)

        expected_biases = np.array([[-0.95917324, -0.32783731]],
                                   dtype=np.float64)
        self.assertTrue(np.allclose(expected_biases, model.layers[-2].biases))

        expected_weights = np.array(
            [[1.58872834, -1.65159914], [2.04547398, -1.63848661]],
            dtype=np.float64)
        self.assertTrue(np.allclose(expected_weights,
                                    model.layers[-2].weights))
예제 #3
0
    def test_predictions(self):
        skl_db = skl_DBSCAN(self.eps, self.min_points)
        skl_db.fit(self.X)

        fs2ml_db = fs2ml_DBSCAN(self.eps, self.min_points)
        labels = fs2ml_db.fit_predict(self.X)
        self.assertTrue(np.allclose(np.array(labels, dtype=np.int64), np.array(skl_db.labels_, dtype=np.int64)))
예제 #4
0
    def test_predictions(self):
        skl_km = skl_KMeans(n_clusters=self.n_clusters, random_state=5)
        skl_km.fit(self.X)
        skl_labels = sorted(np.array(skl_km.labels_, dtype=np.int64))

        fs2ml_km = fs2ml_KMeans(n_clusters=self.n_clusters, seed=5)
        fs2ml_labels = fs2ml_km.fit_predict(self.X)
        fs2ml_labels = sorted(np.array(fs2ml_labels, dtype=np.int64))

        self.assertTrue(np.allclose(fs2ml_labels, skl_labels))
예제 #5
0
    def predict(self, X_test):
        """Fits and predicts using the dbscan unsupervised clustering algorithm.

        Parameters
        ----------
        X_test : numpy.ndarray
            The testing features.

        Returns
        -------
        y_target : numpy.ndarray
            The class label corresponding to each testing feature.
        """

        y_target = np.zeros(len(X_test), dtype=np.int64)

        for i, x_test in enumerate(X_test):
            # get its euclidean distance from each feature in training set.
            dist = np.array([np.linalg.norm(x_test - x_train) for x_train in self.X])

            # get the id of top n_neighbors closest neighbours
            sorted_index = dist.argsort()[:self.n_neighbors]

            # get the votes of these neighbors
            k_nearest_neighbor_votes = self.y[sorted_index]

            # get the mode of all the votes to get the final prediction
            votes = Counter(k_nearest_neighbor_votes).most_common()
            winner = votes[0][0]

            y_target[i] = winner

        return y_target
예제 #6
0
    def test_rbf_kernel(self):
        # Tests RBF kernel of svc.
        X1 = Distribution.radial_binary(pts=100,
                                        mean=[0, 0],
                                        st=1,
                                        ed=2,
                                        seed=100)
        X2 = Distribution.radial_binary(pts=100,
                                        mean=[0, 0],
                                        st=4,
                                        ed=5,
                                        seed=100)

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X1.shape[0])

        X_train = np.vstack((X1, X2))
        y_train = np.hstack((Y1, Y2))

        clf = svm.SVC(kernel='rbf', gamma=10)
        clf.fit(X_train, y_train)

        X1 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=1,
                                        ed=2,
                                        seed=100)
        X2 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=4,
                                        ed=5,
                                        seed=100)

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X2.shape[0])

        X_test = np.vstack((X1, X2))
        y_test = np.hstack((Y1, Y2))

        predictions, projections = clf.predict(X_test, return_projection=True)

        expected_projections = np.array([
            1.2630574, 1.3302442, 1.502788, 1.2003369, 1.4567516, 1.0555044,
            1.434326, 1.4227715, 1.1069533, 1.104987, -1.6992458, -1.5001097,
            -1.0005158, -1.8284273, -1.0863144, -2.238042, -1.2274336,
            -1.2235101, -2.1250129, -2.0870237
        ], )

        self.assertTrue(np.allclose(projections, expected_projections))
        self.assertTrue(np.allclose(predictions, y_test))
예제 #7
0
    def test_linear_kernel(self):
        # Tests linear kernel of svc.
        X1 = Distribution.linear(pts=100,
                                 mean=[8, 10],
                                 covr=[[1.5, 1], [1, 1.5]],
                                 seed=100)
        X2 = Distribution.linear(pts=100,
                                 mean=[9, 5],
                                 covr=[[1.5, 1], [1, 1.5]],
                                 seed=100)

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X2.shape[0])
        X_train = np.vstack((X1, X2))
        y_train = np.hstack((Y1, Y2))

        clf_lin = svm.SVC(kernel='linear')
        clf_lin.fit(X_train, y_train)

        X1 = Distribution.linear(pts=10,
                                 mean=[8, 10],
                                 covr=[[1.5, 1], [1, 1.5]],
                                 seed=100)
        X2 = Distribution.linear(pts=10,
                                 mean=[9, 5],
                                 covr=[[1.5, 1], [1, 1.5]],
                                 seed=100)

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X2.shape[0])

        X_test = np.vstack((X1, X2))
        y_test = np.hstack((Y1, Y2))

        predictions, projections = clf_lin.predict(X_test,
                                                   return_projection=True)

        expected_projections = np.array([
            5.2844825, 2.8846788, 3.898558, 2.4527097, 4.271367, 4.6425023,
            5.170607, 3.3408344, 5.3939104, 2.779106, -2.909471, -5.3092747,
            -4.2953954, -5.7412434, -3.9225864, -3.551451, -3.0233462,
            -4.853119, -2.8000426, -5.4148474
        ])
        self.assertTrue(np.allclose(projections, expected_projections))
        self.assertTrue(np.allclose(predictions, y_test))
예제 #8
0
    def __create_kernel_matrix(self, X):
        """Creates a gram kernel matrix of training data.
        Refer - https://en.wikipedia.org/wiki/Gramian_matrix

        Parameters
        ----------
        X : numpy.array

        Returns
        -------
        kernel_matrix : numpy.array
            The gram kernel matrix.

        """

        kernel_matrix = [
            self.kernel(X[i], X[j]) for i in range(self.n)
            for j in range(self.n)
        ]

        kernel_matrix = np.array(kernel_matrix).reshape(self.n, self.n)
        return kernel_matrix
예제 #9
0
    def fit(self, X, y, multiplier_threshold=1e-5):
        """Fits the svc model on training data.

        Parameters
        ----------
        X : numpy.array
            The training features.
        y : numpy.array
            The training labels.
        multiplier_threshold : float
            The threshold for selecting lagrange multipliers.

        Returns
        -------
        kernel_matrix : list of svm.SVC
            A list of all the classifiers used for multi class classification
        """
        X = np.array(X)
        self.y = y
        self.n = self.y.shape[0]

        self.uniques, self.ind = np.unique(self.y, return_index=True)
        self.n_classes = len(self.uniques)

        # Do multi class classification
        if sorted(self.uniques) != [-1, 1]:
            y_list = [np.where(self.y == u, 1, -1) for u in self.uniques]

            for y_i in y_list:
                # Copy the current initializer
                clf = SVC()
                clf.kernel = self.kernel
                clf.C = self.C

                self.classifiers.append(clf.fit(X, y_i))
            return

        # create a gram matrix by taking the outer product of y
        gram_matrix_y = np.outer(self.y, self.y)
        K = self.__create_kernel_matrix(X)
        gram_matrix_xy = gram_matrix_y * K

        P = cvxopt.matrix(gram_matrix_xy)
        q = cvxopt.matrix(-np.ones(self.n))

        G1 = cvxopt.spmatrix(-1.0, range(self.n), range(self.n))
        G2 = cvxopt.spmatrix(1, range(self.n), range(self.n))
        G = cvxopt.matrix([[G1, G2]])

        h1 = cvxopt.matrix(np.zeros(self.n))
        h2 = cvxopt.matrix(np.ones(self.n) * self.C)
        h = cvxopt.matrix([[h1, h2]])

        A = cvxopt.matrix(self.y.astype(np.double)).trans()
        b = cvxopt.matrix(0.0)

        lagrange_multipliers = np.array(
            list(cvxopt.solvers.qp(P, q, G, h, A, b)['x']))

        lagrange_multiplier_indices = np.greater_equal(lagrange_multipliers,
                                                       multiplier_threshold)
        lagrange_multiplier_indices = list(
            map(list, lagrange_multiplier_indices.nonzero()))[0]

        # self.support_vectors = np.take(X, lagrange_multiplier_indices, axis=1)
        self.support_vectors = X[lagrange_multiplier_indices]
        # print(X)
        # print(lagrange_multiplier_indices)
        # print(self.support_vectors)
        # self.support_vectors_y = np.take(self.y, lagrange_multiplier_indices)
        self.support_vectors_y = self.y[lagrange_multiplier_indices]
        # self.support_lagrange_multipliers = np.take(lagrange_multipliers, lagrange_multiplier_indices)
        self.support_lagrange_multipliers = lagrange_multipliers[
            lagrange_multiplier_indices]
        self.b = 0
        self.n_support_vectors = self.support_vectors.shape[0]

        for i in range(self.n_support_vectors):
            kernel_trick = K[[lagrange_multiplier_indices[i]],
                             lagrange_multiplier_indices]

            self.b += self.support_vectors_y[i] - np.sum(
                self.support_lagrange_multipliers * self.support_vectors_y *
                kernel_trick)

        self.b /= self.n_support_vectors

        self.classifiers = [self]
        return self
예제 #10
0
    def fit(self, X):
        """Fits the dbscan unsupervised clustering algorithm.

        Parameters
        ----------
        X : numpy.ndarray
            The training features.

        """
        self.village = X
        self.n_houses = len(self.village)

        # When a villager is not assigned a clan his clan is None
        self.clan = np.array([None] * self.n_houses)
        current_clan_id = 0

        for villager_id in range(self.n_houses):
            # if the villager is not assigned a clan
            if self.clan[villager_id] is None:
                # get all his neighbors, fitting the criteria in __init__
                neighbor_ids = self.__get_neighbours(villager_id)

                # if he is an isolated villager he will be assigned -1 clan
                # AKA the isolated clan.
                if len(neighbor_ids) < self.min_neigh:
                    self.clan[villager_id] = -1
                    continue

                # else he and his neighbors will be assigned the same clan
                for neighbor_id in neighbor_ids:
                    self.clan[neighbor_id] = current_clan_id

                for neighbor_id in neighbor_ids:
                    # these neighbors will try to convince their neighbors
                    # to join there clan.
                    neighbors_neighbors_ids = self.__get_neighbours(
                        neighbor_id)

                    # if their number is more than the required threshold they
                    # are allowed to join the clan.
                    # Only those villagers are allowed to join the clan who are
                    # not already a part of any clan or are part of the isolated
                    # clan.
                    if len(neighbors_neighbors_ids) >= self.min_neigh:
                        for neighbors_neighbor_id in neighbors_neighbors_ids:
                            # if they have not been allocated a clan before
                            # they also have the priviledge to recruit more
                            # villagers.
                            if self.clan[neighbors_neighbor_id] is None:
                                neighbor_ids.append(neighbors_neighbor_id)
                                self.clan[
                                    neighbors_neighbor_id] = current_clan_id

                            # isolated ones have already been given the chance to
                            # recruit more members, but they are indeed isloated.
                            elif self.clan[neighbors_neighbor_id] == -1:
                                self.clan[
                                    neighbors_neighbor_id] = current_clan_id

                # When a new clan is formed we get a new clan ID.
                current_clan_id += 1
        return self
예제 #11
0
    def test_poly_kernel(self):
        # Tests polynomial kernel of svc.
        X1 = Distribution.linear(pts=50,
                                 mean=[8, 20],
                                 covr=[[1.5, 1], [1, 2]],
                                 seed=100)
        X2 = Distribution.linear(pts=50,
                                 mean=[8, 15],
                                 covr=[[1.5, -1], [-1, 2]],
                                 seed=100)

        X3 = Distribution.linear(pts=50,
                                 mean=[15, 20],
                                 covr=[[1.5, 1], [1, 2]],
                                 seed=100)
        X4 = Distribution.linear(pts=50,
                                 mean=[15, 15],
                                 covr=[[1.5, -1], [-1, 2]],
                                 seed=100)

        X1 = np.vstack((X1, X2))
        X2 = np.vstack((X3, X4))

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X2.shape[0])

        X_train = np.vstack((X1, X2))
        y_train = np.hstack((Y1, Y2))

        clf = svm.SVC(kernel='polynomial', const=1, degree=2)
        clf.fit(X_train, y_train)

        X1 = Distribution.linear(pts=5,
                                 mean=[8, 20],
                                 covr=[[1.5, 1], [1, 2]],
                                 seed=100)
        X2 = Distribution.linear(pts=5,
                                 mean=[8, 15],
                                 covr=[[1.5, -1], [-1, 2]],
                                 seed=100)

        X3 = Distribution.linear(pts=5,
                                 mean=[15, 20],
                                 covr=[[1.5, 1], [1, 2]],
                                 seed=100)
        X4 = Distribution.linear(pts=5,
                                 mean=[15, 15],
                                 covr=[[1.5, -1], [-1, 2]],
                                 seed=100)

        X1 = np.vstack((X1, X2))
        X2 = np.vstack((X3, X4))

        Y1 = np.ones(X1.shape[0])
        Y2 = -np.ones(X2.shape[0])

        X_test = np.vstack((X1, X2))
        y_test = np.hstack((Y1, Y2))

        predictions, projections = clf.predict(X_test, return_projection=True)
        expected_projections = np.array([
            1.2630574, 1.3302442, 1.502788, 1.2003369, 1.4567516, 1.0555044,
            1.434326, 1.4227715, 1.1069533, 1.104987, -1.6992458, -1.5001097,
            -1.0005158, -1.8284273, -1.0863144, -2.238042, -1.2274336,
            -1.2235101, -2.1250129, -2.0870237
        ])
        expected_projections = np.array([
            1.9282368, 4.1053743, 4.449601, 2.8149981, 3.337817, 1.5934888,
            4.237419, 3.699658, 3.8548565, 2.8402433, -6.7378554, -2.9163127,
            -2.5978136, -4.833237, -4.421687, -5.2333884, -2.2744238,
            -3.0598483, -2.4422958, -3.890006
        ], )
        self.assertTrue(np.allclose(projections, expected_projections))
        self.assertTrue(np.allclose(predictions, y_test))
예제 #12
0
 def setUp(self):
     # Linearly separable data.
     self.X = np.array([[8.0, 7], [4, 10], [9, 7], [7, 10], [9, 6], [4, 8],
                        [10, 10], [2, 7], [8, 3], [7, 5], [4, 4], [4, 6],
                        [1, 3], [2, 5]])
     self.y = np.array([1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1])
예제 #13
0
    def test_multiclass(self):
        X1 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=1,
                                        ed=2,
                                        seed=100)
        X2 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=4,
                                        ed=5,
                                        seed=100)
        X3 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=6,
                                        ed=7,
                                        seed=100)
        X4 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=8,
                                        ed=9,
                                        seed=100)

        Y1 = -np.ones(X1.shape[0])
        Y2 = np.ones(X2.shape[0])
        Y3 = 2 * np.ones(X3.shape[0])
        Y4 = 3000 * np.ones(X4.shape[0])

        X_train = np.vstack((X1, X2, X3, X4))
        y_train = np.hstack((Y1, Y2, Y3, Y4))

        clf = svm.SVC(kernel='rbf', gamma=10)
        clf.fit(X_train, y_train)

        X1 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=1,
                                        ed=2,
                                        seed=100)
        X2 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=4,
                                        ed=5,
                                        seed=100)
        X3 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=6,
                                        ed=7,
                                        seed=100)
        X4 = Distribution.radial_binary(pts=10,
                                        mean=[0, 0],
                                        st=8,
                                        ed=9,
                                        seed=100)

        X_test = np.vstack((X1, X2, X3, X4))

        _, projections = clf.predict(X_test, return_projection=True)

        expected_projections = np.array([
            1.23564788, 1.15519477, 1.32441802, 1.04496554, 1.29740627, 0.,
            1.25561797, 1.22925452, 0., 1.11920321, 0.2991908, 0.23818634,
            0.55359011, 0.29655677, 0., 0.59992803, 0.52733203, 0.30456398,
            0.6027897, 0.33755249, 0., 0.04997651, 0.12099712, 0.12276944, 0.,
            0.19631702, 0.11836214, 0.06221966, 0.24539362, 0., 1.00000106,
            1.0000021, 1.00000092, 1.19952335, 1.00000283, 1.17741522,
            1.40596479, 1.60945299, 1.41534644, 1.27928235
        ])

        self.assertTrue(np.allclose(projections, expected_projections))