def testMultitypeArraylikeToMatrix(self): """ Test that if we pass an array with multiple types, we get back the right thing. The numpy ndarray should be filled with doubles only. """ a = [[0.01, 0.02, 3], [0.04, 0.05, 6], [0.07, 0.08, 9], [0.10, 0.11, 12]] m, _, dims = to_matrix_with_info(a, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.dtype, np.dtype(np.double)) self.assertEqual(m.shape[0], 4) self.assertEqual(m.shape[1], 3) for i in range(4): for j in range(3): self.assertEqual(a[i][j], m[i, j]) self.assertEqual(dims.shape[0], 3) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0) self.assertEqual(dims[2], 0)
def testArraylikeToMatrix(self): """ Test that if we pass some array, we get back the right thing. This array will be filled with doubles only. """ a = [[0.01, 0.02, 0.03], [0.04, 0.05, 0.06], [0.07, 0.08, 0.09], [0.10, 0.11, 0.12]] m, _, dims = to_matrix_with_info(a, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.dtype, np.dtype(np.double)) self.assertEqual(m.shape[0], 4) self.assertEqual(m.shape[1], 3) for i in range(4): for j in range(3): self.assertEqual(a[i][j], m[i, j]) self.assertEqual(dims.shape[0], 3) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0) self.assertEqual(dims[2], 0)
def testPandasMixedToMatrix(self): """ Test that a matrix with one int and one double feature are transformed correctly. """ d = pd.DataFrame({'a': range(50)}) d['b'] = np.random.randn(50, 1) self.assertTrue((d['a'].dtype == np.dtype('int32')) or (d['a'].dtype == np.dtype('int64'))) self.assertEqual(d['b'].dtype, np.dtype(np.double)) m, _, dims = to_matrix_with_info(d, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.dtype, np.dtype(np.double)) self.assertEqual(m.shape[0], 50) self.assertEqual(m.shape[1], 2) colNames = list('ab') for i in range(2): for j in range(50): self.assertEqual(d[colNames[i]][j], m[j, i]) self.assertEqual(dims.shape[0], 2) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0)
def testPandasIntToMatrix(self): """ Test that a matrix holding ints is properly turned into a double matrix. """ d = pd.DataFrame({'a': range(5)}) m, _, dims = to_matrix_with_info(d, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.shape[0], 5) self.assertEqual(m.shape[1], 1) for i in range(5): self.assertEqual(m[i], i) self.assertTrue(dims.shape[0], 1) self.assertEqual(dims[0], 0)
def testCategoricalOnly(self): """ Make sure that we can convert a categorical-only Pandas matrix. """ d = pd.DataFrame({"A": ["a", "b", "c", "a"] }) d["A"] = d["A"].astype('category') # Convert to categorical. m, _, dims = to_matrix_with_info(d, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.dtype, np.dtype(np.double)) self.assertEqual(dims.shape[0], 1) self.assertEqual(dims[0], 1) self.assertEqual(m.shape[0], 4) self.assertEqual(m.shape[1], 1) self.assertEqual(m[0], m[3]) self.assertTrue(m[0] != m[1]) self.assertTrue(m[1] != m[2]) self.assertTrue(m[0] != m[2])
def testNumpyToMatrix(self): """ Make sure we can convert a numpy matrix without copying anything. """ m1 = np.random.randn(100, 5) m2, _, dims = to_matrix_with_info(m1, np.double) self.assertTrue(isinstance(m2, np.ndarray)) self.assertEqual(m2.dtype, np.dtype(np.double)) p1 = m1.__array_interface__ p2 = m2.__array_interface__ self.assertEqual(p1['data'], p2['data']) self.assertEqual(dims.shape[0], 5) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0) self.assertEqual(dims[2], 0) self.assertEqual(dims[3], 0) self.assertEqual(dims[4], 0)
def testPandasToMatrix(self): """ Test that a simple pandas numeric matrix can be turned into a numpy ndarray. """ d = pd.DataFrame(np.random.randn(100, 4), columns=list('abcd')) m, _, dims = to_matrix_with_info(d, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.shape[0], 100) self.assertEqual(m.shape[1], 4) self.assertEqual(m.dtype, np.dtype(np.double)) colnames = list('abcd') for i in range(m.shape[1]): for j in range(m.shape[0]): self.assertEqual(m[j, i], d[colnames[i]][j]) self.assertTrue(dims.shape[0], 4) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0) self.assertEqual(dims[2], 0) self.assertEqual(dims[3], 0)
def testPandasMixedToMatrix(self): """ Test that a matrix with one int and one double feature are transformed correctly. """ d = pd.DataFrame({'a': range(50)}) d['b'] = np.random.randn(50, 1) self.assertEqual(d['a'].dtype, int) self.assertEqual(d['b'].dtype, np.dtype(np.double)) m, _, dims = to_matrix_with_info(d, np.double) self.assertTrue(isinstance(m, np.ndarray)) self.assertEqual(m.dtype, np.dtype(np.double)) self.assertEqual(m.shape[0], 50) self.assertEqual(m.shape[1], 2) colNames = list('ab') for i in range(2): for j in range(50): self.assertEqual(d[colNames[i]][j], m[j, i]) self.assertEqual(dims.shape[0], 2) self.assertEqual(dims[0], 0) self.assertEqual(dims[1], 0)