Python Data.preprocess 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: preprocessing

클래스/타입: Data

메소드/함수: preprocess

hotexamples.com에서의 예제들: 3

Python Data.preprocess - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 preprocessing.Data.preprocess에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Data(24)

load_data(5)

create_data(4)

preprocess(3)

fill_missing_data(2)

selectFeature(2)

LLR(1)

clear_memory(1)

featurewise_center(1)

featurewise_std_normalization(1)

get_batch(1)

label_making(1)

load(1)

split_data(1)

예제 #1

파일 보기

파일: Test.py 프로젝트: tico2303/AI

def TestFeatureSelection():
	data = Data("cs_170_small80.txt")
	dat = data.preprocess()
	print dat.shape
	print "dat type: ", type(dat)
	col_nums = dat.shape[1]
	feature_indices = []
	for d in range(col_nums):
		feature_indices.append(d)
		print data.selectFeature(dat,feature_indices )

예제 #2

파일 보기

파일: Test.py 프로젝트: tico2303/AI

def TestDistance():
	data = Data("testData.txt")
	validator = LeaveOneOutValidator(data.data, KnearestNeighbor)
	d = data.preprocess()
	test, train = validator.leaveOneOut(d, 0)
	knn = KnearestNeighbor()
	print "test: ", test
	print "train: \n", train

	print "distance: ",  knn.distance(train, test)
	print "\n\n"

예제 #3

파일 보기

파일: test_preprocessing.py 프로젝트: datadonK23/Thoughtful_DL

class PreprocessingTests(unittest.TestCase):
    def setUp(self):
        self.data = Data()

    def test_load(self):
        """
        Test existence, type, length of loaded data
        """
        self.data.load()

        self.assertIsNotNone(self.data._dataset, "loaded no data")
        self.assertEqual(type(("foo", "bar")), type(self.data._dataset),
                         "loaded no tuple")
        self.assertEqual(2, len(self.data._dataset),
                         "loaded tuple has false length")

    def test_preprocess(self):
        """
        Test one-hot-encoding and type conversions of preprocessed data
        """
        self.data.load()
        self.data.preprocess()

        # one-hot-encodings
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[0][0]),
                                      "false one-hot-encoding of train_data")
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[1][0]),
                                      "false one-hot-encoding of test_data")
        self.assertEqual("float64", self.data._dataset[0][0].dtype,
                         "wrong type of train_data values")
        self.assertEqual("float64", self.data._dataset[1][0].dtype,
                         "wrong type of test_data values")

        # label vectorization
        self.assertEqual(np.ndarray, type(self.data._dataset[0][1]),
                         "wrong type of train_labels set")
        self.assertEqual(np.ndarray, type(self.data._dataset[1][1]),
                         "wrong type of test_labels set")
        self.assertEqual("float32", self.data._dataset[0][1].dtype,
                         "wrong type of train_labels values")
        self.assertEqual("float32", self.data._dataset[1][1].dtype,
                         "wrong type of test_labels values")

    def test_split_data(self):
        """
        Test correct train-dev-test-split
        """
        self.data.load()
        self.data.preprocess()
        self.data.split_data()

        # correct number of tuples
        self.assertEqual(3, len(self.data._dataset), "wrong number of splits")
        self.assertEqual(2, len(self.data._dataset[0]),
                         "wrong number of train splits")
        self.assertEqual(2, len(self.data._dataset[1]),
                         "wrong number of dev splits")
        self.assertEqual(2, len(self.data._dataset[2]),
                         "wrong number of test splits")

        # existence
        self.assertIsNotNone(self.data._dataset[0][0], "train_data is None")
        self.assertIsNotNone(self.data._dataset[0][1], "train_labels is None")
        self.assertIsNotNone(self.data._dataset[1][0], "dev_data is None")
        self.assertIsNotNone(self.data._dataset[1][1], "dev_labels is None")
        self.assertIsNotNone(self.data._dataset[2][0], "test_data is None")
        self.assertIsNotNone(self.data._dataset[2][1], "test_labels is None")

    def test_train_dev_test(self):
        """
        Test type and shape of train, dev & test sets
        """
        (train_data,
         train_labels), (dev_data,
                         dev_labels), (test_data,
                                       test_labels) = self.data.train_dev_test

        # type
        self.assertEqual(np.ndarray, type(train_data),
                         "wrong type of train_data")
        self.assertEqual(np.ndarray, type(train_labels),
                         "wrong type of train_labels")
        self.assertEqual(np.ndarray, type(dev_data), "wrong type of dev_data")
        self.assertEqual(np.ndarray, type(dev_labels),
                         "wrong type of dev_labels")
        self.assertEqual(np.ndarray, type(test_data),
                         "wrong type of test_data")
        self.assertEqual(np.ndarray, type(test_labels),
                         "wrong type of test_labels")

        # shape
        self.assertEqual((15000, 10000), train_data.shape,
                         "train_data has wrong shape")
        self.assertEqual((15000, ), train_labels.shape,
                         "train_labels have wrong shape")
        self.assertEqual((10000, 10000), dev_data.shape,
                         "dev_data has wrong shape")
        self.assertEqual((10000, ), dev_labels.shape,
                         "dev_labels have wrong shape")
        self.assertEqual((25000, 10000), test_data.shape,
                         "test_data has wrong shape")
        self.assertEqual((25000, ), test_labels.shape,
                         "test_labels have wrong shape")