Esempio n. 1
0
 def test_transform_matrix_plain(self):
     """
     Test matrix with random size and values with no transformation.
     """
     for i in range(100):
         size_x = int(99 * np.random.random() + 1)
         size_y = int(99 * np.random.random() + 1)
         matrix = np.full((size_x, size_y), np.random.random())
         self.assertEqual(
             Scaler(False, False).transform(matrix).tolist(),
             matrix.tolist())
         self.assertEqual(
             Scaler(False, False, axis=1).transform(matrix).tolist(),
             matrix.tolist())
Esempio n. 2
0
    def test_transform_matrix_mean(self):
        """
        Test matrix with mean centering.
        """
        matrix = np.array([[21, 6, 9, 0], [9, 4, 1, 2], [6, 5, 8, 1]],
                          dtype=np.float64)

        input = Scaler(calc_mean=True, calc_std=False,
                       axis=0).transform(matrix)

        result = np.array([[9., 1., 3., -1.], [-3., -1., -5., 1.],
                           [-6., 0., 2., 0.]])

        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])

        input = Scaler(calc_mean=True, calc_std=False,
                       axis=1).transform(matrix)

        result = np.array([[12., -3., 0., -9.], [5., 0., -3., -2.],
                           [1., 0., 3., -4.]])

        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])

        matrix = np.array([[0.5, -0.9, 0.12], [7.1, 9.5, 2], [2.36, 1, 1]],
                          dtype=np.float64)

        input = Scaler(calc_mean=True, calc_std=False,
                       axis=0).transform(matrix)

        result = np.array([[-2.82, -4.1, -.92], [3.78, 6.3, 0.96],
                           [-0.96, -2.2, -0.04]])

        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])

        input = Scaler(calc_mean=True, calc_std=False,
                       axis=1).transform(matrix).round(4)

        result = np.array([[0.5933, -.8067, .2133], [0.9, 3.3, -4.2],
                           [0.9067, -.4533, -.4533]])

        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])
Esempio n. 3
0
 def test_transform_size_plain(self):
     """
     Test vectors and matrix with random size and values for size.
     """
     for i in range(100):
         size_x = int(99 * np.random.random() + 1)
         size_y = int(99 * np.random.random() + 1)
         dim = np.random.randint(0, 1)
         matrix = np.full((size_x, size_y), np.random.random())
         transformed = Scaler(False, False, inplace=False,
                              axis=dim).transform(matrix)
         self.assertEqual(matrix.size, transformed.size)
Esempio n. 4
0
    def test_transform_vec_plain(self):
        """
        Test vectors with random size and values with no transformation.
        """
        for i in range(100):
            size = int(99 * np.random.random() + 1)
            vector = np.full((1, size),
                             np.random.random())  # "horizontal" vector
            self.assertEqual(
                Scaler(False, False).transform(vector).tolist(),
                vector.tolist())
            self.assertEqual(
                Scaler(False, False, axis=1).transform(vector).tolist(),
                vector.tolist())

            vector = np.full((size, 1),
                             np.random.random())  # "vertical" vector
            self.assertEqual(
                Scaler(False, False).transform(vector).tolist(),
                vector.tolist())
            self.assertEqual(
                Scaler(False, False, axis=1).transform(vector).tolist(),
                vector.tolist())
Esempio n. 5
0
    def test_transform_vec_mean(self):
        """
        Test vectors with mean centering.
        """

        # 1x5 vector of zeros
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True).transform(np.zeros(
                (1, 5))).tolist(),
            np.zeros((1, 5)).tolist())
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.zeros((1, 5))).tolist(),
            np.zeros((1, 5)).tolist())

        # 5x1 vector of zeros
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True).transform(np.zeros(
                (5, 1))).tolist(),
            np.zeros((5, 1)).tolist())
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.zeros((5, 1))).tolist(),
            np.zeros((5, 1)).tolist())

        # 1x5 vector of ones
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True).transform(np.ones(
                (1, 5))).tolist(),
            np.zeros((1, 5)).tolist())
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.ones((1, 5))).tolist(),
            np.zeros((1, 5)).tolist())

        # 5x1 vector of ones
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True).transform(np.ones(
                (5, 1))).tolist(),
            np.zeros((5, 1)).tolist())
        self.assertEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.ones((5, 1))).tolist(),
            np.zeros((5, 1)).tolist())

        self.assertAlmostEqual(
            Scaler(calc_std=False,
                   calc_mean=True).transform(np.array([[0, 5, 10]])).tolist(),
            [[0, 0, 0]])

        self.assertAlmostEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.array([[0, 5, 10]])).tolist(),
            [[-5, 0, 5]])

        input = Scaler(calc_std=False, calc_mean=True,
                       axis=1).transform(np.array([[0], [5], [10]]))
        result = [[0], [0], [0]]
        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])

        self.assertAlmostEqual(
            Scaler(calc_std=False, calc_mean=True,
                   axis=1).transform(np.array([[1, 0, 3.5]])).tolist(),
            [[-.5, -1.5, 2]])

        self.assertAlmostEqual(
            Scaler(calc_std=False,
                   calc_mean=True).transform(np.array([[1, 0, 3.5]])).tolist(),
            [[0, 0, 0]])

        input = Scaler(calc_std=False, calc_mean=True,
                       axis=1).transform(np.array([[1], [0], [3.5]]))
        result = [[0], [0], [0]]
        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])

        input = Scaler(calc_std=False, calc_mean=True,
                       axis=0).transform(np.array([[1], [0], [3.5]]))
        result = [[-.5], [-1.5], [2]]
        for i in range(input.shape[0]):
            for j in range(input.shape[1]):
                self.assertAlmostEqual(input[i, j], result[i][j])
def preprocess_data(pagination_settings, tab, _n_clicks_preprocessing,
                    _n_clicks_group, dimension, mean, std, groups):
    if app.context.original_data.empty and tab == 'home':
        errors = []

    if tab in ["pre"]:
        grouped_data = pd.DataFrame(index=app.context.data.index)
        if groups:
            column_groups = []
            for item in groups:
                if item['type'] == 'Div':
                    for e in item['props']['children']:
                        if e['type'] == 'Dropdown' and e['props']['value']:
                            column_groups.append(e['props']['value'])

            grouped_data = pd.DataFrame(
                index=app.context.data.index)  # TODO move grouping to function
            for group in column_groups:
                grouped_data[', '.join(group)] = app.context.data[group].mean(
                    axis=1)
        else:
            grouped_data = app.context.data

        errors = []

        app.context.axis = int(dimension)

        app.context.calc_mean = False if mean == "False" else True
        app.context.calc_std = False if std == "False" else True

        matrix = generate(grouped_data)

        app.context.scaler = Scaler(calc_mean=app.context.calc_mean,
                                    calc_std=app.context.calc_std,
                                    axis=(0 if app.context.axis == 1 else 1))

        normalized = app.context.scaler.transform(matrix.values)

        app.context.normalized_data = pd.DataFrame(normalized,
                                                   index=matrix.index,
                                                   columns=matrix.columns)

        if not app.context.normalized_data.empty:
            for t in get_invalid_data(app.context.normalized_data):
                errors.append('Invalid values in: {}'.format(t))

            page_data = app.context.normalized_data.iloc[
                pagination_settings['current_page'] *
                pagination_settings['page_size']:
                (pagination_settings['current_page'] + 1) *
                pagination_settings['page_size']].to_dict('rows')

            page_columns = [{
                'name': i,
                'id': i
            } for i in app.context.normalized_data.columns]

            return page_data, page_columns, errors
        else:
            errors.append('Preprocessing: No data loaded.')

    return [], [], []  # errors