def test_transform_matrix_plain(self): """ Test matrix with random size and values with no transformation. """ for i in range(100): size_x = int(99 * np.random.random() + 1) size_y = int(99 * np.random.random() + 1) matrix = np.full((size_x, size_y), np.random.random()) self.assertEqual( Scaler(False, False).transform(matrix).tolist(), matrix.tolist()) self.assertEqual( Scaler(False, False, axis=1).transform(matrix).tolist(), matrix.tolist())
def test_transform_matrix_mean(self): """ Test matrix with mean centering. """ matrix = np.array([[21, 6, 9, 0], [9, 4, 1, 2], [6, 5, 8, 1]], dtype=np.float64) input = Scaler(calc_mean=True, calc_std=False, axis=0).transform(matrix) result = np.array([[9., 1., 3., -1.], [-3., -1., -5., 1.], [-6., 0., 2., 0.]]) for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j]) input = Scaler(calc_mean=True, calc_std=False, axis=1).transform(matrix) result = np.array([[12., -3., 0., -9.], [5., 0., -3., -2.], [1., 0., 3., -4.]]) for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j]) matrix = np.array([[0.5, -0.9, 0.12], [7.1, 9.5, 2], [2.36, 1, 1]], dtype=np.float64) input = Scaler(calc_mean=True, calc_std=False, axis=0).transform(matrix) result = np.array([[-2.82, -4.1, -.92], [3.78, 6.3, 0.96], [-0.96, -2.2, -0.04]]) for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j]) input = Scaler(calc_mean=True, calc_std=False, axis=1).transform(matrix).round(4) result = np.array([[0.5933, -.8067, .2133], [0.9, 3.3, -4.2], [0.9067, -.4533, -.4533]]) for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j])
def test_transform_size_plain(self): """ Test vectors and matrix with random size and values for size. """ for i in range(100): size_x = int(99 * np.random.random() + 1) size_y = int(99 * np.random.random() + 1) dim = np.random.randint(0, 1) matrix = np.full((size_x, size_y), np.random.random()) transformed = Scaler(False, False, inplace=False, axis=dim).transform(matrix) self.assertEqual(matrix.size, transformed.size)
def test_transform_vec_plain(self): """ Test vectors with random size and values with no transformation. """ for i in range(100): size = int(99 * np.random.random() + 1) vector = np.full((1, size), np.random.random()) # "horizontal" vector self.assertEqual( Scaler(False, False).transform(vector).tolist(), vector.tolist()) self.assertEqual( Scaler(False, False, axis=1).transform(vector).tolist(), vector.tolist()) vector = np.full((size, 1), np.random.random()) # "vertical" vector self.assertEqual( Scaler(False, False).transform(vector).tolist(), vector.tolist()) self.assertEqual( Scaler(False, False, axis=1).transform(vector).tolist(), vector.tolist())
def test_transform_vec_mean(self): """ Test vectors with mean centering. """ # 1x5 vector of zeros self.assertEqual( Scaler(calc_std=False, calc_mean=True).transform(np.zeros( (1, 5))).tolist(), np.zeros((1, 5)).tolist()) self.assertEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.zeros((1, 5))).tolist(), np.zeros((1, 5)).tolist()) # 5x1 vector of zeros self.assertEqual( Scaler(calc_std=False, calc_mean=True).transform(np.zeros( (5, 1))).tolist(), np.zeros((5, 1)).tolist()) self.assertEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.zeros((5, 1))).tolist(), np.zeros((5, 1)).tolist()) # 1x5 vector of ones self.assertEqual( Scaler(calc_std=False, calc_mean=True).transform(np.ones( (1, 5))).tolist(), np.zeros((1, 5)).tolist()) self.assertEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.ones((1, 5))).tolist(), np.zeros((1, 5)).tolist()) # 5x1 vector of ones self.assertEqual( Scaler(calc_std=False, calc_mean=True).transform(np.ones( (5, 1))).tolist(), np.zeros((5, 1)).tolist()) self.assertEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.ones((5, 1))).tolist(), np.zeros((5, 1)).tolist()) self.assertAlmostEqual( Scaler(calc_std=False, calc_mean=True).transform(np.array([[0, 5, 10]])).tolist(), [[0, 0, 0]]) self.assertAlmostEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.array([[0, 5, 10]])).tolist(), [[-5, 0, 5]]) input = Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.array([[0], [5], [10]])) result = [[0], [0], [0]] for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j]) self.assertAlmostEqual( Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.array([[1, 0, 3.5]])).tolist(), [[-.5, -1.5, 2]]) self.assertAlmostEqual( Scaler(calc_std=False, calc_mean=True).transform(np.array([[1, 0, 3.5]])).tolist(), [[0, 0, 0]]) input = Scaler(calc_std=False, calc_mean=True, axis=1).transform(np.array([[1], [0], [3.5]])) result = [[0], [0], [0]] for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j]) input = Scaler(calc_std=False, calc_mean=True, axis=0).transform(np.array([[1], [0], [3.5]])) result = [[-.5], [-1.5], [2]] for i in range(input.shape[0]): for j in range(input.shape[1]): self.assertAlmostEqual(input[i, j], result[i][j])
def preprocess_data(pagination_settings, tab, _n_clicks_preprocessing, _n_clicks_group, dimension, mean, std, groups): if app.context.original_data.empty and tab == 'home': errors = [] if tab in ["pre"]: grouped_data = pd.DataFrame(index=app.context.data.index) if groups: column_groups = [] for item in groups: if item['type'] == 'Div': for e in item['props']['children']: if e['type'] == 'Dropdown' and e['props']['value']: column_groups.append(e['props']['value']) grouped_data = pd.DataFrame( index=app.context.data.index) # TODO move grouping to function for group in column_groups: grouped_data[', '.join(group)] = app.context.data[group].mean( axis=1) else: grouped_data = app.context.data errors = [] app.context.axis = int(dimension) app.context.calc_mean = False if mean == "False" else True app.context.calc_std = False if std == "False" else True matrix = generate(grouped_data) app.context.scaler = Scaler(calc_mean=app.context.calc_mean, calc_std=app.context.calc_std, axis=(0 if app.context.axis == 1 else 1)) normalized = app.context.scaler.transform(matrix.values) app.context.normalized_data = pd.DataFrame(normalized, index=matrix.index, columns=matrix.columns) if not app.context.normalized_data.empty: for t in get_invalid_data(app.context.normalized_data): errors.append('Invalid values in: {}'.format(t)) page_data = app.context.normalized_data.iloc[ pagination_settings['current_page'] * pagination_settings['page_size']: (pagination_settings['current_page'] + 1) * pagination_settings['page_size']].to_dict('rows') page_columns = [{ 'name': i, 'id': i } for i in app.context.normalized_data.columns] return page_data, page_columns, errors else: errors.append('Preprocessing: No data loaded.') return [], [], [] # errors