예제 #1
0
    def test_preprocessor_encode_drop(self):
        dataset = new_labels(random_seed=0)

        num_columns = 10
        categorical = list(range(num_columns // 2))
        preprocessor = StandardPreprocessor(
            categorical=categorical,
            normalization=None,
            encoding=EncodingStrategy.DROP,
            threshold=-1,
        )

        num_batches = 10
        for _ in range(num_batches):
            X, y = dataset.input_fn()

            # Inputs are all 1D
            self.assertEqual(1, len(shape_of_array(X)))
            self.assertEqual(1, len(shape_of_array(y)))

            # Feed labels to preprocessor
            labels_2d = [y] * num_columns
            y_ = preprocessor.fit(labels_2d).transform(labels_2d)
            self.assertEqual(2, y_.ndim)
            self.assertEqual(num_columns - len(categorical), y_.shape[0])

            # Reverse transformation to get back original data should fail
            self.assertRaises(NotImplementedError,
                              lambda: preprocessor.inverse_transform(y_))
예제 #2
0
    def test_preprocessor_encode_onehot(self):
        dataset = new_labels(random_seed=0)
        preprocessor = StandardPreprocessor(categorical=[0],
                                            normalization=None,
                                            encoding=EncodingStrategy.ONEHOT,
                                            threshold=-1)

        num_batches = 10
        for _ in range(num_batches):
            X, y = dataset.input_fn()

            # Inputs are all 1D
            self.assertEqual(1, len(shape_of_array(X)))
            self.assertEqual(1, len(shape_of_array(y)))

            # Feed labels to preprocessor
            y_ = preprocessor.fit(y).transform(y)
            self.assertEqual(2, len(shape_of_array(y_)))
            self.assertEqual(len(unique(y)), shape_of_array(y_)[0])
            self.assertEqual(DTYPE_UINT8[0], y_.dtype)

            # Reverse transformation to get back original data
            y_ = preprocessor.inverse_transform(y_)
            self.assertEqual(1, len(shape_of_array(y_)))
            self.assertListEqual(y.tolist(), y_.tolist())
예제 #3
0
    def test_preprocessor_normalization(self):
        dataset = new_line(random_seed=0)
        target_preprocessor = StandardPreprocessor(continuous=[0],
                                                   threshold=-1)
        feature_preprocessor = StandardPreprocessor(continuous=[0],
                                                    threshold=-1)

        num_batches = 8
        for _ in range(num_batches):
            X, y = dataset.input_fn()

            # Inputs are all 1D
            self.assertEqual(1, len(shape_of_array(X)))
            self.assertEqual(1, len(shape_of_array(y)))

            # Scale up to be able to put pressure on normalization
            X = (X - 0.5) * 100
            y = (y - 0.5) * 100
            self.assertGreaterEqual(X.max() - X.min(), 1)
            self.assertGreaterEqual(y.max() - y.min(), 1)

            X_ = feature_preprocessor.fit(X).transform(X)
            self.assertEqual(1, X_.ndim)
            self.assertEqual(round(X_.mean()), 0, X_.mean())
            self.assertNotAlmostEqual(X_.min(), 0)
            self.assertNotAlmostEqual(X_.max(), 0)

            y_ = target_preprocessor.fit(y).transform(y)
            self.assertEqual(round(y_.mean()), 0, y_.mean())
            self.assertNotAlmostEqual(y_.min(), 0)
            self.assertNotAlmostEqual(y_.max(), 0)
예제 #4
0
파일: base.py 프로젝트: owahltinez/coconuts
    def input_to_tensor(self, arr, flatten: bool = True) -> Tensor:
        """
        Convert input n-dim array to tensor and copy to GPU if available. This function also
        transposes input n-dim array from column-first to sample-first shape.
        """
        # For matrix multiplication performance, we actually need X transposed back to sample-first
        # We hope for an NN framework that supports our columnar approach to input one day...
        expected_shape = shape_of_array(arr[0])[1:]
        for col in arr[1:]:
            feature_shape = shape_of_array(col)[1:]
            assert expected_shape == feature_shape, (
                "Consistent shape required for all input features. "
                f"Found {feature_shape}, expected {expected_shape}, input {shape_of_array(arr)}."
            )

        arr = numpy.asarray(arr, dtype=DTYPE_FLOAT[0])
        if arr.ndim <= 2:
            arr = numpy.transpose(arr)
        tensor = from_numpy(arr).float()
        if self._is_cuda:
            tensor = tensor.cuda()
        if flatten and not isinstance(self,
                                      HighDimensionalMixin) and arr.ndim > 2:
            # TODO: this should be done at the base library instead of by PyTorch
            tensor = self._flattener(tensor)
        return tensor
예제 #5
0
 def test_get_shape_of_objects_ndarray(self):
     shape = (10, 2)
     arr = numpy.empty((shape[0]), dtype=object)
     for i in range(shape[0]):
         arr[i] = numpy.array(list(range(shape[1])))
     self.assertEqual(shape, shape_of_array(arr))
     self.assertNotEqual(shape, arr.shape)
예제 #6
0
    def test_concat_arrays(self):
        array_count = 4
        sample_size = 128

        # 1D arrays
        array_list = [
            generate_array_ints(n=sample_size) for _ in range(array_count)
        ]
        array_concat = concat_arrays(*array_list)
        self.assertEqual(
            shape_of_array(array_list[0])[1:],
            shape_of_array(array_concat)[1:])
        self.assertEqual(len(array_concat), sample_size * array_count)

        # 2D arrays (2 columns)
        array_list = [
            generate_array_ints(n=sample_size * 2).reshape(-1, 2)
            for _ in range(array_count)
        ]
        array_concat = concat_arrays(*array_list)
        self.assertEqual(
            shape_of_array(array_list[0])[1:],
            shape_of_array(array_concat)[1:])
        self.assertEqual(len(array_concat), sample_size * array_count)

        # N-D arrays (array of vectors)
        array_list = generate_onehot_matrix(n=sample_size, ndim=array_count)
        array_concat = concat_arrays(*array_list)
        self.assertEqual(
            shape_of_array(array_list[0])[1:],
            shape_of_array(array_concat)[1:])
        self.assertEqual(len(array_concat), sample_size * array_count)

        # N-D arrays (array of images)
        array_list = [
            generate_images(n=sample_size) for _ in range(array_count)
        ]
        array_concat = concat_arrays(*array_list)
        self.assertEqual(
            shape_of_array(array_list[0])[1:],
            shape_of_array(array_concat)[1:])
        self.assertEqual(len(array_concat), sample_size * array_count)
예제 #7
0
    def input_to_tensor(self, arr: numpy.ndarray, flatten: bool = False):
        assert not flatten, "Input for this learner must be high dimensional"

        # This learner ONLY accepts RBG images, since most models are trained with that
        input_shape = shape_of_array(arr)[1:]
        assert len(input_shape) == 3 and input_shape[0] == 3, (
            "%s supports only 3x224x224 features (2D matrix + RGB channels), found %r"
            % (self.__class__.__name__, input_shape))

        # Apply transformations that put input images in the expected format
        # https://github.com/pytorch/examples/blob/42e5b996718797e45c46a25c55b031e6768f8440/imagenet/main.py#L89-L101
        arr_transformed = []
        for img in arr:
            img = numpy.array(img).astype(numpy.float)
            img = normalize(crop(img, height=224, width=224))
            arr_transformed.append(img)

        # Use super's implementation to convert the numpy array to a tensor
        arr = super().input_to_tensor(arr_transformed, flatten=False)

        return arr
예제 #8
0
 def test_get_shape_of_ndarray(self):
     for shape in [(10, ), (10, 10), (10, 10, 10), (20, 10, 10)]:
         arr = numpy.empty(shape)
         self.assertEqual(shape, shape_of_array(arr))
예제 #9
0
 def test_get_shape_of_list_2d(self):
     shape = (10, 2)
     arr = [list(range(shape[1])) for _ in range(shape[0])]
     self.assertEqual(shape, shape_of_array(arr))
예제 #10
0
 def test_get_shape_of_list_1d(self):
     shape = (10, )
     arr = list(range(shape[0]))
     self.assertEqual(shape, shape_of_array(arr))