def testMask(self): npDataFrame = NumpyDataFrame(dict(a=range(5), b='a b c d e'.split(' '))) npDataFrame.mask = [False, False, True, False, True] self.assertListsOrDicts(npDataFrame.asArrayDict(), OrderedDict([('a', np.array([0, 1, 3])), ('b', np.array(['a', 'b', 'd']))])) self.assertListsOrDicts(npDataFrame.getArrayNoMask('a'), np.array([0, 1, 2, 3, 4])) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([0, 1, 3])) self.assertEquals(len(npDataFrame), 5) slicedDataFrame = npDataFrame[1:4] self.assertListsOrDicts(slicedDataFrame.mask, np.array([False, True, False])) self.assertListsOrDicts(slicedDataFrame.asArrayDict(), OrderedDict([('a', np.array([1, 3])), ('b', np.array(['b', 'd']))])) self.assertListsOrDicts(slicedDataFrame.getArrayNoMask('a'), np.array([1, 2, 3])) self.assertListsOrDicts(slicedDataFrame.getArray('a'), np.array([1, 3])) self.assertEquals(len(slicedDataFrame), 3) npDataFrame.mask[0:2] = True self.assertListsOrDicts(npDataFrame.asArrayDict(), OrderedDict([('a', np.array([3])), ('b', np.array(['d']))])) self.assertListsOrDicts(npDataFrame.getArrayNoMask('a'), np.array([0, 1, 2, 3, 4])) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([3])) self.assertEquals(len(npDataFrame), 5)
def _initDataFrame(self): dataFrame = NumpyDataFrame() listOfColNameToTrackBinArrayDicts = self._initializeRandAlgArrays() arrayLengths = self._addConcatenatedTrackBinArraysToDataFrame( dataFrame, listOfColNameToTrackBinArrayDicts) self._addMandatoryArraysToDataFrame(dataFrame, arrayLengths) if self._needsMask: # TODO: is len(dataFrame) safe? Can a member array be multidimensional? dataFrame.mask = np.zeros(len(dataFrame), dtype=bool) return dataFrame
def testMaskSorting(self): npDataFrame = NumpyDataFrame(dict(a=range(5), b='e d c b a'.split(' '))) npDataFrame.mask = [False, False, True, False, True] npDataFrame.sort(['b']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([3, 1, 0])) npDataFrame.mask = [False, False, False, False, False] self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([4, 3, 2, 1, 0]))
def testInitMask(self): npDataFrame = NumpyDataFrame(dict(a=range(5)), mask=[False, False, True, False, True]) self.assertListsOrDicts(npDataFrame.mask, np.array([False, False, True, False, True])) npDataFrame = NumpyDataFrame(dict(a=range(5)), mask=range(5)) self.assertListsOrDicts(npDataFrame.mask, np.array([False, True, True, True, True])) npDataFrame = NumpyDataFrame(dict(a=range(5))) self.assertIsNone(npDataFrame.mask) npDataFrame.mask = [False, False, True, False, True] self.assertListsOrDicts(npDataFrame.mask, np.array([False, False, True, False, True])) npDataFrame.mask = np.array([False, False, True, False, True]) self.assertListsOrDicts(npDataFrame.mask, np.array([False, False, True, False, True])) npDataFrame.mask = range(5) self.assertListsOrDicts(npDataFrame.mask, np.array([False, True, True, True, True])) self.assertRaises(ValueError, npDataFrame.__setattr__, 'mask', range(4)) npDataFrame.mask = None self.assertIsNone(npDataFrame.mask)
def _createTestDataFrame(): npDataFrame = NumpyDataFrame() npDataFrame.addArray('floats', [float(_) for _ in range(5)]) npDataFrame.addArray('strs', 'ab cd efg hi j'.split(' ')) npDataFrame.addArray('tuples', [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]) return npDataFrame
def testArrayAddRemove(self): npDataFrame = NumpyDataFrame(dict(a=range(5))) npDataFrame.addArray('b', list('abcde')) self.assertTrue(npDataFrame.hasArray('b')) self.assertEquals(npDataFrame.arrayKeys(), ['a', 'b']) self.assertListsOrDicts(npDataFrame.asArrayDict(), OrderedDict([('a', np.array(range(5))), ('b', np.array(['a', 'b', 'c', 'd', 'e']))])) self.assertEquals(len(npDataFrame), 5) self.assertRaises(ValueError, npDataFrame.addArray, 'c', range(4)) self.assertFalse(npDataFrame.hasArray('c')) npDataFrame.addArray('aa', [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]) self.assertTrue(npDataFrame.hasArray('aa')) self.assertEquals(npDataFrame.arrayKeys(), ['a', 'b', 'aa']) self.assertListsOrDicts(npDataFrame.getArray('aa'), np.array([(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)])) self.assertEquals(len(npDataFrame), 5) self.assertRaises(KeyError, npDataFrame.removeArray, 'd') npDataFrame.removeArray('a') self.assertFalse(npDataFrame.hasArray('a')) self.assertEquals(npDataFrame.arrayKeys(), ['b', 'aa']) self.assertListsOrDicts(npDataFrame.asArrayDict(), OrderedDict([ ('b', np.array(['a', 'b', 'c', 'd', 'e'])), ('aa', np.array([(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)])) ])) self.assertEquals(len(npDataFrame), 5)
def testSort(self): npDataFrame = NumpyDataFrame(dict(a=[2,3,2], b=[3,1,1], c=['c', 'b', 'a'])) npDataFrame.sort(order=['c']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([2,3,2])) self.assertListsOrDicts(npDataFrame.getArray('b'), np.array([1,1,3])) self.assertListsOrDicts(npDataFrame.getArray('c'), np.array(['a', 'b', 'c'])) npDataFrame = NumpyDataFrame(dict(a=[2, 3, 2], b=[3, 1, 1], c=['c', 'b', 'a'])) npDataFrame.sort(order=['a']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([2,2,3])) self.assertListsOrDicts(npDataFrame.getArray('b'), np.array([3,1,1])) self.assertListsOrDicts(npDataFrame.getArray('c'), np.array(['c', 'a', 'b'])) npDataFrame = NumpyDataFrame(dict(a=[2, 3, 2], b=[3, 1, 1], c=['c', 'b', 'a'])) npDataFrame.sort(order=['b']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([3,2,2])) self.assertListsOrDicts(npDataFrame.getArray('b'), np.array([1,1,3])) self.assertListsOrDicts(npDataFrame.getArray('c'), np.array(['b', 'a', 'c'])) npDataFrame = NumpyDataFrame(dict(a=[2, 3, 2], b=[3, 1, 1], c=['c', 'b', 'a'])) npDataFrame.sort(order=['b', 'a']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array([2,3,2])) self.assertListsOrDicts(npDataFrame.getArray('b'), np.array([1,1,3])) self.assertListsOrDicts(npDataFrame.getArray('c'), np.array(['a', 'b', 'c'])) self.assertRaises(AssertionError, npDataFrame.sort, order=[]) self.assertRaises(AssertionError, npDataFrame.sort, order=['a', 'd'])
def testInit(self): npDataFrame = NumpyDataFrame() self.assertEquals(npDataFrame.asArrayDict(), {}) self.assertFalse(npDataFrame.hasArray('a')) self.assertEquals(npDataFrame.arrayKeys(), []) self.assertEquals(len(npDataFrame), 0) npDataFrame = NumpyDataFrame(dict(a=range(5))) self.assertListsOrDicts(npDataFrame.asArrayDict(), OrderedDict([('a', np.array(range(5)))])) self.assertTrue(npDataFrame.hasArray('a')) self.assertEquals(npDataFrame.arrayKeys(), ['a']) self.assertListsOrDicts(npDataFrame.getArray('a'), np.array(range(5))) self.assertEquals(len(npDataFrame), 5) self.assertRaises(ValueError, NumpyDataFrame, dict(a=range(5), b=range(4))) self.assertRaises(ValueError, NumpyDataFrame, dict(a=1))