예제 #1
0
파일: Test.py 프로젝트: tico2303/AI
def TestFeatureSelection():
	data = Data("cs_170_small80.txt")
	dat = data.preprocess()
	print dat.shape
	print "dat type: ", type(dat)
	col_nums = dat.shape[1]
	feature_indices = []
	for d in range(col_nums):
		feature_indices.append(d)
		print data.selectFeature(dat,feature_indices )
예제 #2
0
파일: Test.py 프로젝트: tico2303/AI
def TestDistance():
	data = Data("testData.txt")
	validator = LeaveOneOutValidator(data.data, KnearestNeighbor)
	d = data.preprocess()
	test, train = validator.leaveOneOut(d, 0)
	knn = KnearestNeighbor()
	print "test: ", test
	print "train: \n", train

	print "distance: ",  knn.distance(train, test)
	print "\n\n"
class PreprocessingTests(unittest.TestCase):
    def setUp(self):
        self.data = Data()

    def test_load(self):
        """
        Test existence, type, length of loaded data
        """
        self.data.load()

        self.assertIsNotNone(self.data._dataset, "loaded no data")
        self.assertEqual(type(("foo", "bar")), type(self.data._dataset),
                         "loaded no tuple")
        self.assertEqual(2, len(self.data._dataset),
                         "loaded tuple has false length")

    def test_preprocess(self):
        """
        Test one-hot-encoding and type conversions of preprocessed data
        """
        self.data.load()
        self.data.preprocess()

        # one-hot-encodings
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[0][0]),
                                      "false one-hot-encoding of train_data")
        np.testing.assert_array_equal([0., 1.],
                                      np.unique(self.data._dataset[1][0]),
                                      "false one-hot-encoding of test_data")
        self.assertEqual("float64", self.data._dataset[0][0].dtype,
                         "wrong type of train_data values")
        self.assertEqual("float64", self.data._dataset[1][0].dtype,
                         "wrong type of test_data values")

        # label vectorization
        self.assertEqual(np.ndarray, type(self.data._dataset[0][1]),
                         "wrong type of train_labels set")
        self.assertEqual(np.ndarray, type(self.data._dataset[1][1]),
                         "wrong type of test_labels set")
        self.assertEqual("float32", self.data._dataset[0][1].dtype,
                         "wrong type of train_labels values")
        self.assertEqual("float32", self.data._dataset[1][1].dtype,
                         "wrong type of test_labels values")

    def test_split_data(self):
        """
        Test correct train-dev-test-split
        """
        self.data.load()
        self.data.preprocess()
        self.data.split_data()

        # correct number of tuples
        self.assertEqual(3, len(self.data._dataset), "wrong number of splits")
        self.assertEqual(2, len(self.data._dataset[0]),
                         "wrong number of train splits")
        self.assertEqual(2, len(self.data._dataset[1]),
                         "wrong number of dev splits")
        self.assertEqual(2, len(self.data._dataset[2]),
                         "wrong number of test splits")

        # existence
        self.assertIsNotNone(self.data._dataset[0][0], "train_data is None")
        self.assertIsNotNone(self.data._dataset[0][1], "train_labels is None")
        self.assertIsNotNone(self.data._dataset[1][0], "dev_data is None")
        self.assertIsNotNone(self.data._dataset[1][1], "dev_labels is None")
        self.assertIsNotNone(self.data._dataset[2][0], "test_data is None")
        self.assertIsNotNone(self.data._dataset[2][1], "test_labels is None")

    def test_train_dev_test(self):
        """
        Test type and shape of train, dev & test sets
        """
        (train_data,
         train_labels), (dev_data,
                         dev_labels), (test_data,
                                       test_labels) = self.data.train_dev_test

        # type
        self.assertEqual(np.ndarray, type(train_data),
                         "wrong type of train_data")
        self.assertEqual(np.ndarray, type(train_labels),
                         "wrong type of train_labels")
        self.assertEqual(np.ndarray, type(dev_data), "wrong type of dev_data")
        self.assertEqual(np.ndarray, type(dev_labels),
                         "wrong type of dev_labels")
        self.assertEqual(np.ndarray, type(test_data),
                         "wrong type of test_data")
        self.assertEqual(np.ndarray, type(test_labels),
                         "wrong type of test_labels")

        # shape
        self.assertEqual((15000, 10000), train_data.shape,
                         "train_data has wrong shape")
        self.assertEqual((15000, ), train_labels.shape,
                         "train_labels have wrong shape")
        self.assertEqual((10000, 10000), dev_data.shape,
                         "dev_data has wrong shape")
        self.assertEqual((10000, ), dev_labels.shape,
                         "dev_labels have wrong shape")
        self.assertEqual((25000, 10000), test_data.shape,
                         "test_data has wrong shape")
        self.assertEqual((25000, ), test_labels.shape,
                         "test_labels have wrong shape")