예제 #1
0
 def test_normalize_with_scaler(self):
     scaler = MinMaxScaler()
     scaler.fit(self.data)
     self.assertTrue(
         np.array_equal(scaler.transform(self.data),
                        Normalizer.normalize(self.data, scaler)[1]))
     self.assertEqual(scaler, Normalizer.normalize(self.data, scaler)[0])
예제 #2
0
 def test_standardize_with_scaler(self):
     scaler = StandardScaler()
     scaler.fit(self.data)
     self.assertTrue(
         np.array_equal(scaler.transform(self.data),
                        Normalizer.standardize(self.data, scaler)[1]))
     self.assertEqual(scaler, Normalizer.standardize(self.data, scaler)[0])
    def __get_training_testing_sets(self, feat_matrix, label_matrix,
                                    num_per_class):
        """
        Obtain training and testing features by random sampling

        Args:
            feat_matrix   (np.ndarray): input features
            label_matrix  (np.ndarray): label matrix for input features
            num_per_class        (int): number of training samples from each category

        Return:
            train_feats  (np.ndarray): training features
            train_labels (np.ndarray): label matrix for training features
            test_feats   (np.ndarray): testing features
            test_labels  (np.ndarray): label matrix for testing features
        """
        assert isinstance(feat_matrix, np.ndarray)
        assert isinstance(label_matrix, np.ndarray)
        assert isinstance(num_per_class, int)

        num_class = label_matrix.shape[0]  # number of objects
        test_feats = np.empty((feat_matrix.shape[0], 0))
        test_labels = np.empty((label_matrix.shape[0], 0))
        train_feats = np.empty((feat_matrix.shape[0], 0))
        train_labels = np.empty((label_matrix.shape[0], 0))

        for classid in range(num_class):
            col_ids = np.array(
                np.nonzero(label_matrix[classid, :] == 1)).ravel()
            data_ids = np.array(np.nonzero(np.sum(feat_matrix[:, col_ids]**2, axis=0) > 1e-6))\
                         .ravel()
            trainids = col_ids[np.random.choice(data_ids,
                                                num_per_class,
                                                replace=False)]
            testids = np.setdiff1d(col_ids, trainids)
            test_feats = np.c_[test_feats, feat_matrix[:, testids]]
            test_labels = np.c_[test_labels, label_matrix[:, testids]]
            train_feats = np.c_[train_feats, feat_matrix[:, trainids]]
            train_labels = np.c_[train_labels, label_matrix[:, trainids]]

        if self.normalizer in Normalizer.CHOICES[:4]:
            # sample normalization
            train_feats = Normalizer()(self.normalizer, data=train_feats.T).T
            test_feats = Normalizer()(self.normalizer, data=test_feats.T).T
        else:
            # feature scaling
            scaler, train_feats = Normalizer()(self.normalizer,
                                               data=train_feats.T)
            train_feats = train_feats.T
            test_feats = Normalizer()(self.normalizer,
                                      data=test_feats.T,
                                      fitted_scaler=scaler)[1].T

        return train_feats, train_labels, test_feats, test_labels
예제 #4
0
 def test_get_normalizer_normalize_with_scaler(self):
     scaler = MinMaxScaler()
     scaler.fit(self.data)
     self.assertTrue(
         np.array_equal(
             Normalizer.normalize(self.data, fitted_scaler=scaler)[1],
             Normalizer.get_normalizer(Normalizer.NORMALIZE,
                                       data=self.data,
                                       fitted_scaler=scaler)[1]))
     self.assertTrue(
         Normalizer.normalize(self.data, fitted_scaler=scaler)[0],
         Normalizer.get_normalizer(Normalizer.NORMALIZE,
                                   data=self.data,
                                   fitted_scaler=scaler)[0])
예제 #5
0
 def test_get_normalizer_standardize_with_scaler(self):
     scaler = StandardScaler()
     scaler.fit(self.data)
     self.assertTrue(
         np.array_equal(
             Normalizer.standardize(self.data, fitted_scaler=scaler)[1],
             Normalizer.get_normalizer(Normalizer.STANDARDIZE,
                                       data=self.data,
                                       fitted_scaler=scaler)[1]))
     self.assertTrue(
         Normalizer.standardize(self.data, fitted_scaler=scaler)[0],
         Normalizer.get_normalizer(Normalizer.STANDARDIZE,
                                   data=self.data,
                                   fitted_scaler=scaler)[0])
예제 #6
0
    def __init__(self, normalizer=Normalizer.NONE):
        """
        Loads the training and testing datasets

        Args:
            normalizer (Normalizer option): Normalization to apply
        """
        with open(settings.TRAINING_DATA_DIRECTORY_DATASET_PATH, 'r') as file_:
            self.training_data = json.load(file_)
            self.to_numpy(self.training_data)

            if normalizer in Normalizer.CHOICES[:4]:
                # sample normalization
                self.training_data['codes'] = Normalizer()(
                    normalizer, data=self.training_data['codes'].T).T
            else:
                # feature scaling
                scaler, self.training_data['codes'] = Normalizer()(
                    normalizer, data=self.training_data['codes'].T)
                self.training_data['codes'] = self.training_data['codes'].T

            # sorting dataset
            self.training_data['codes'], self.training_data[
                'labels'] = self.sort_dataset(self.training_data['codes'],
                                              self.training_data['labels'])
            self.training_data['labels'] = self.training_data['labels'].astype(
                np.float64)

        with open(settings.TESTING_DATA_DIRECTORY_DATASET_PATH, 'r') as file_:
            self.testing_data = json.load(file_)
            self.to_numpy(self.testing_data)

            if normalizer in Normalizer.CHOICES[:4]:
                # sample normalization
                self.testing_data['codes'] = Normalizer()(
                    normalizer, data=self.testing_data['codes'].T).T
            else:
                # feature scaling
                self.testing_data['codes'] = Normalizer()(
                    normalizer,
                    data=self.testing_data['codes'].T,
                    fitted_scaler=scaler)[1].T

            # sorting dataset
            self.testing_data['codes'], self.testing_data[
                'labels'] = self.sort_dataset(self.testing_data['codes'],
                                              self.testing_data['labels'])
            self.testing_data['labels'] = self.testing_data['labels'].astype(
                np.float64)
예제 #7
0
 def test_get_normalizer_standardize(self):
     self.assertTrue(
         np.array_equal(
             Normalizer.standardize(self.data)[1],
             Normalizer.get_normalizer(Normalizer.STANDARDIZE,
                                       data=self.data)[1]))
예제 #8
0
 def test_get_normalizer_max_norm(self):
     self.assertTrue(
         np.array_equal(
             Normalizer.max_norm(self.data),
             Normalizer.get_normalizer(Normalizer.MAX_NORM,
                                       data=self.data)))
예제 #9
0
 def test_get_normalizer_l2_norm(self):
     self.assertTrue(
         np.array_equal(
             Normalizer.l2_norm(self.data),
             Normalizer.get_normalizer(Normalizer.L2_NORM, data=self.data)))
예제 #10
0
 def test_get_normalizer_none(self):
     self.assertTrue(
         np.array_equal(
             Normalizer.none(self.data),
             Normalizer.get_normalizer(Normalizer.NONE, data=self.data)))
예제 #11
0
 def test_get_normalizer_normalize(self):
     self.assertTrue(
         np.array_equal(
             Normalizer.normalize(self.data)[1],
             Normalizer.get_normalizer(Normalizer.NORMALIZE,
                                       data=self.data)[1]))
예제 #12
0
 def test_normalize(self):
     self.assertTrue(
         np.array_equal(MinMaxScaler().fit_transform(self.data),
                        Normalizer.normalize(self.data)[1]))
예제 #13
0
 def test_standardize(self):
     self.assertTrue(
         np.array_equal(StandardScaler().fit_transform(self.data),
                        Normalizer.standardize(self.data)[1]))
예제 #14
0
 def test_max_norm(self):
     self.assertTrue(
         np.array_equal(normalize(self.data, 'max'),
                        Normalizer.max_norm(self.data)))
예제 #15
0
 def test_l2_norm(self):
     self.assertTrue(
         np.array_equal(normalize(self.data, 'l2'),
                        Normalizer.l2_norm(self.data)))
예제 #16
0
 def test_none(self):
     self.assertTrue(np.array_equal(self.data, Normalizer.none(self.data)))
예제 #17
0
 def test_functor(self):
     self.assertTrue(
         np.array_equal(Normalizer.l1_norm(self.data),
                        Normalizer()(Normalizer.L1_NORM, data=self.data)))