コード例 #1
0
ファイル: lme4.py プロジェクト: open-cogsci/python-datamatrix
def glmer_series(dm, formula, family, winlen=1):

    col = formula.split()[0]
    depth = dm[col].depth
    rm = None
    for i in range(0, depth, winlen):
        wm = dm[:]
        wm[col] = series.reduce_(
            series.window(wm[col], start=i, end=i + winlen))
        lm = glmer(wm, formula, family=family)
        print('Sample %d' % i)
        print(lm)
        if rm is None:
            rm = DataMatrix(length=len(lm))
            rm.effect = list(lm.effect)
            rm.p = SeriesColumn(depth=depth)
            rm.z = SeriesColumn(depth=depth)
            rm.est = SeriesColumn(depth=depth)
            rm.se = SeriesColumn(depth=depth)
        for lmrow, rmrow in zip(lm, rm):
            rmrow.p[i:i + winlen] = lmrow.p
            rmrow.z[i:i + winlen] = lmrow.z
            rmrow.est[i:i + winlen] = lmrow.est
            rmrow.se[i:i + winlen] = lmrow.se
    return rm
コード例 #2
0
def from_json(s):
    """
	desc: |
		*Requires json_tricks*

		Creates a DataMatrix from a `json` string.

	arguments:
		s:
			desc:	A json string.
			type:	str

	returns:
		desc:	A DataMatrix.
		type:	DataMatrix.
	"""

    import json_tricks

    d = json_tricks.loads(s)
    dm = DataMatrix(length=len(d['rowid']))
    for name, (coltype, seq) in d['columns'].items():
        if coltype == '_SeriesColumn':
            dm[name] = SeriesColumn(depth=seq.shape[1])
            dm[name]._seq = seq
        else:
            dm[name] = globals()[coltype]
            dm[name]._seq = seq
    return dm
コード例 #3
0
def test_seriescolumn():

    dm1 = DataMatrix(length=2)
    dm1.col1 = SeriesColumn(2)
    dm1.col1 = 1, 2
    dm1.col_shared = SeriesColumn(2)
    dm1.col_shared = 3, 4
    dm2 = DataMatrix(length=2)
    dm2.col2 = SeriesColumn(2)
    dm2.col2 = 5, 6
    dm2.col_shared = SeriesColumn(2)
    dm2.col_shared = 7, 8
    dm3 = dm1 << dm2
    check_series(dm3.col1,
                 [[1, 1], [2, 2], [np.nan, np.nan], [np.nan, np.nan]])
    check_series(dm3.col_shared, [[3, 3], [4, 4], [7, 7], [8, 8]])
    check_series(dm3.col2,
                 [[np.nan, np.nan], [np.nan, np.nan], [5, 5], [6, 6]])
    dm3.i = [4, 0, 2, 1]
    dm4 = dm3.i <= 2
    dm5 = (dm3.i <= 2) | (dm3.i >= 3)
    check_integrity(dm1)
    check_integrity(dm2)
    check_integrity(dm3)
    check_integrity(dm4)
    check_integrity(dm5)
コード例 #4
0
def test_concatenate():

    dm = DataMatrix(length=1)
    dm.s1 = SeriesColumn(depth=3)
    dm.s1[:] = 1, 2, 3
    dm.s2 = SeriesColumn(depth=3)
    dm.s2[:] = 3, 2, 1
    dm.s = series.concatenate(dm.s1, dm.s2)
    check_series(dm.s, [[1, 2, 3, 3, 2, 1]])
コード例 #5
0
def test_normalize_time():

    dm = DataMatrix(length=2)
    dm.s = SeriesColumn(depth=2)
    dm.s[0] = 1, 2
    dm.s[1] = np.nan, 3
    dm.t = SeriesColumn(depth=2)
    dm.t[0] = 0, 3
    dm.t[1] = 1, 2
    dm.n = series.normalize_time(dm.s, dm.t)
    check_series(dm.n, [[1, np.nan, np.nan, 2], [np.nan, np.nan, 3, np.nan]])
コード例 #6
0
def test_baseline():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=3)
    dm.series[0] = range(3)
    dm.series[1] = range(1, 4)
    dm.baseline = SeriesColumn(depth=3)
    dm.baseline[0] = range(1, 4)
    dm.baseline[1] = range(3)
    dm.norm = series.baseline(dm.series, dm.baseline)
    check_series(dm.norm, [[-2, -1, 0], [0, 1, 2]])
    check_integrity(dm)
コード例 #7
0
def generatedata(effectsize=EFFECTSIZE,
                 blinksinbaseline=BLINKSINBASELINE,
                 **kwargs):

    dm = DataMatrix(length=TRACES)
    dm.c = IntColumn
    dm.c[:TRACES // 2] = 1
    dm.c[TRACES // 2:] = 2
    dm.y = SeriesColumn(depth=TRACELEN)
    dm.y.setallrows(a)
    dm.y += np.random.randint(NOISERANGE[0], NOISERANGE[1], TRACES)
    dm.y[TRACES // 2:] += np.linspace(0, effectsize, TRACELEN)
    # Inroduce blinks
    for i, row in enumerate(dm):
        blinklen = np.random.randint(BLINKLEN[0], BLINKLEN[1], BLINKS)
        if i < blinksinbaseline:
            blinkstart = np.array([1])
        else:
            blinkstart = np.random.randint(BASELINE[1], TRACELEN, BLINKS)
        blinkend = blinkstart + blinklen
        for start, end in zip(blinkstart, blinkend):
            end = min(TRACELEN - 1, end)
            if end - start < 2 * BLINKMARGIN:
                continue
            row.y[start:start+BLINKMARGIN] = \
             np.linspace(row.y[start-1], 0, BLINKMARGIN)
            row.y[end-BLINKMARGIN:end] = \
             np.linspace(0, row.y[end], BLINKMARGIN)
            row.y[start:end] = np.random.randint(0, 100, end - start)
    return dm
コード例 #8
0
def test_reduce_():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=3)
    dm.series[0] = 1, 2, 3
    dm.series[1] = 2, 3, 4
    dm.col = series.reduce_(dm.series)
    check_col(dm.col, [2, 3])
    check_integrity(dm)
コード例 #9
0
def test_window():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=4)
    dm.series[0] = 0, 1, 1, 0
    dm.series[1] = 0, 2, 2, 0
    dm.window = series.window(dm.series, 1, 3)
    check_series(dm.window, [[1, 1], [2, 2]])
    check_integrity(dm)
コード例 #10
0
def test_interpolate():

    dm = DataMatrix(length=3)
    dm.s = SeriesColumn(depth=4)
    dm.s = 1, 2, 3, 4
    dm.s[0] = np.nan
    dm.s[1, 0] = np.nan
    dm.s[1, 2] = np.nan
    dm.i = series.interpolate(dm.s)
    check_series(dm.i, [[np.nan] * 4, [2, 2, 3, 4], [1, 2, 3, 4]])
コード例 #11
0
def test_lock():

    dm = DataMatrix(length=2)
    dm.s = SeriesColumn(depth=3)
    dm.s[0] = 1, 2, 3
    dm.s[1] = -1, -2, -3
    dm.l, zero_point = series.lock(dm.s, [-1, 1])
    assert zero_point == 1
    check_series(dm.l,
                 [[np.nan, np.nan, 1, 2, 3], [-1, -2, -3, np.nan, np.nan]])
コード例 #12
0
def test_seriescolumn():

	dm = DataMatrix(length=2)
	dm.col = SeriesColumn(depth=2)
	dm.col[0] = 1, 2
	dm.col[1] = 3, 4
	dm.col += 1
	check_series(dm.col, [[2,3], [4,5]])
	dm.col += 1, 2
	check_series(dm.col, [[3,4], [6,7]])
	dm.col -= 1
	check_series(dm.col, [[2,3], [5,6]])
	dm.col -= 1, 2
	check_series(dm.col, [[1,2], [3,4]])
	dm.col *= 2
	check_series(dm.col, [[2,4], [6,8]])
	dm.col *= 1.5, 3
	check_series(dm.col, [[3,6], [18,24]])
	dm.col /= 3
	check_series(dm.col, [[1,2], [6,8]])
	dm.col /= 1, 2
	check_series(dm.col, [[1,2], [3,4]])
	dm.col //= 1.5, 2.5
	check_series(dm.col, [[0,1], [1,1]])
	dm.col += np.array([
		[0,0],
		[10, 10]
		])
	check_series(dm.col, [[0,1], [11,11]])
	# Right-side operations
	dm.col[0] = 1, 2
	dm.col[1] = 3, 4
	dm.col = 1 + dm.col
	check_series(dm.col, [[2,3], [4,5]])
	dm.col = (1, 2) + dm.col
	check_series(dm.col, [[3,4], [6,7]])
	dm.col = 1 - dm.col
	check_series(dm.col, [[-2,-3], [-5,-6]])
	dm.col = (1, 2) - dm.col
	check_series(dm.col, [[3, 4], [7, 8]])
	dm.col = 2 * dm.col
	check_series(dm.col, [[6, 8], [14, 16]])
	dm.col = (1.5, 3) * dm.col
	check_series(dm.col, [[9, 12], [42, 48]])
	dm.col = 3 / dm.col
	check_series(dm.col, [[1./3, 1./4], [3./42, 1./16]])
	dm.col = (1, 2) / dm.col
	check_series(dm.col, [[3, 4], [28, 32]])
	dm.col = (1.5, 2.5) // dm.col
	check_series(dm.col, [[0, 0], [0, 0]])
	dm.col = np.array([
		[0, 0],
		[10, 10]
		]) + dm.col
	check_series(dm.col, [[0, 0], [10, 10]])
コード例 #13
0
def test_downsample():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=10)
    dm.series[0] = range(10)
    dm.series[1] = [0, 1] * 5
    dm.d3 = series.downsample(dm.series, 3)
    dm.d5 = series.downsample(dm.series, 5)
    check_series(dm.d3, [[1, 4, 7], [1. / 3, 2. / 3, 1. / 3]])
    check_series(dm.d5, [[2, 7], [.4, .6]])
    check_integrity(dm)
コード例 #14
0
def test_smooth():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=6)
    dm.series[0] = range(6)
    dm.series[1] = [0, 1, 2] * 2
    dm.s = series.smooth(dm.series, winlen=3, wintype='flat')
    check_series(
        dm.s,
        [[2. / 3, 1, 2, 3, 4, 4 + 1. / 3], [2. / 3, 1, 1, 1, 1, 1 + 1. / 3]])
    check_integrity(dm)
コード例 #15
0
def test_threshold():

    dm = DataMatrix(length=2)
    dm.series = SeriesColumn(depth=4)
    dm.series[0] = range(4)
    dm.series[1] = range(1, 5)
    dm.t1 = series.threshold(dm.series, lambda v: v > 1)
    dm.t2 = series.threshold(dm.series, lambda v: v > 1 and v < 3)
    dm.t3 = series.threshold(dm.series, lambda v: v < 3, min_length=3)
    check_series(dm.t1, [[0, 0, 1, 1], [0, 1, 1, 1]])
    check_series(dm.t2, [[0, 0, 1, 0], [0, 1, 0, 0]])
    check_series(dm.t3, [[1, 1, 1, 0], [0, 0, 0, 0]])
    check_integrity(dm)
コード例 #16
0
def test_endlock():

	dm = DataMatrix(length=4)
	dm.series = SeriesColumn(depth=3)
	dm.series[0] = 1, 2, 3
	dm.series[1] = 1, np.nan, 3
	dm.series[2] = 1, 2, np.nan
	dm.series[3] = np.nan, 2, np.nan
	dm.series = series.endlock(dm.series)
	check_series(dm.series, [
		[1,2,3],
		[1,np.nan,3],
		[np.nan,1,2],
		[np.nan,np.nan,2],
		])
コード例 #17
0
def test_seriescolumn():

    dm = DataMatrix(length=3)
    dm.col = SeriesColumn(depth=3)
    dm.col[0] = [1, 2, 3]
    dm.col[1] = [3, 3, 3]
    dm.col[2] = [4, 4, 4]
    assert all(dm.col.mean == [8. / 3, 9. / 3, 10 / 3.])
    assert all(dm.col.median == [3, 3, 3])
    assert all(dm.col.max == [4, 4, 4])
    assert all(dm.col.min == [1, 2, 3])
    assert all(dm.col.std == [
        np.std([4, 3, 1], ddof=1),
        np.std([4, 3, 2], ddof=1),
        np.std([4, 3, 3], ddof=1)
    ])
コード例 #18
0
ファイル: operations.py プロジェクト: whwqs/python-datamatrix
def _best_fitting_col_type(col):
    """
	visible: False

	desc:
		Determines the best fitting type for a column.
	"""

    from fastnumbers import isreal, isintlike

    if isinstance(col, _SeriesColumn):
        return SeriesColumn(depth=col.depth)
    if isinstance(col, (FloatColumn, IntColumn)):
        return type(col)
    if not all(isreal(val, allow_inf=True, allow_nan=True) for val in col):
        return MixedColumn
    if not all(isintlike(val) for val in col):
        return FloatColumn
    return IntColumn
コード例 #19
0
def test_replace():

    dm = DataMatrix(length=3)
    dm.a = 0, 1, 2
    dm.c = FloatColumn
    dm.c = np.nan, 1, 2
    dm.s = SeriesColumn(depth=3)
    dm.s[0] = 0, 1, 2
    dm.s[1] = np.nan, 1, 2
    dm.s[2] = np.nan, 1, 2
    dm.a = ops.replace(dm.a, {0: 100, 2: 200})
    dm.c = ops.replace(dm.c, {np.nan: 100, 2: np.nan})
    dm.s = ops.replace(dm.s, {np.nan: 100, 2: np.nan})
    check_col(dm.a, [100, 1, 200])
    check_col(dm.c, [100, 1, np.nan])
    check_series(dm.s, [
        [0, 1, np.nan],
        [100, 1, np.nan],
        [100, 1, np.nan],
    ])
コード例 #20
0
	def end_phase(self, l):

		self.trialdm['t_offset_%s' % self.current_phase] = l[1]
		for i, (tracelabel, prefix, trace) in enumerate([
				(u'pupil', u'ptrace_', self.ptrace),
				(u'xcoor', u'xtrace_', self.xtrace),
				(u'ycoor', u'ytrace_', self.ytrace),
				(u'time', u'ttrace_', self.ttrace),
				(None, u'fixxlist_', self.fixxlist),
				(None, u'fixylist_', self.fixylist),
				(None, u'fixstlist_', self.fixstlist),
				(None, u'fixetlist_', self.fixetlist),
				]):
			trace = np.array(trace)
			if tracelabel is not None and self._traceprocessor is not None:
				trace = self._traceprocessor(tracelabel, trace)
			if self._maxtracelen is not None \
				and len(trace) > self._maxtracelen:
					warnings.warn(u'Trace %s is too long (%d samples)' \
						% (self.current_phase, len(trace)))
					trace = trace[:self._maxtracelen]
			colname = prefix + self.current_phase
			self.trialdm[colname] = SeriesColumn(
				len(trace), defaultnan=True)
			self.trialdm[colname][0] = trace
			# Start the time trace at 0
			if len(trace) and prefix in (u'ttrace_', u'fixstlist_',
					u'fixetlist_'):
				self.trialdm[colname][0] -= self._t_onset
		# DEBUG CODE
		# 	from matplotlib import pyplot as plt
		# 	plt.subplot(4,2,i+1)
		# 	plt.title(colname)
		# 	plt.plot(_trace, color='blue')
		# 	xdata = np.linspace(0, len(_trace)-1, len(trace))
		# 	plt.plot(xdata, trace, color='red')
		# plt.show()
		self.current_phase = None
コード例 #21
0
    def _set_col(self, name, value):
        """
        visible: False

        desc:
            Sets columns in various formats. Is used by __setitem__ and
            __setattr__.
        """

        # Check if this is a valid column name
        if isinstance(name, bytes):
            name = safe_decode(name)
        if not isinstance(name, str):
            raise TypeError(u'Column names should be str, not %s' % type(name))
        # Create a new column by column type:
        # dm[name] = IntColumn
        # dm[name] = float
        if isinstance(value, type):
            if value == int:
                from datamatrix import IntColumn
                value = IntColumn
            elif value == float:
                from datamatrix import FloatColumn
                value = FloatColumn
            if issubclass(value, BaseColumn):
                self._cols[name] = value(self)
                return
        # Create a new column by type, kwdict tuple
        if (isinstance(value, tuple) and len(value) == 2
                and isinstance(value[0], type)
                and issubclass(value[0], BaseColumn)):
            cls, kwdict = value
            self._cols[name] = cls(self, **kwdict)
            return
        # Create new column by existing column
        if isinstance(value, BaseColumn):
            # If the column belongs to the same datamatrix we simply insert it
            # under a new name.
            if value._datamatrix is self:
                self._cols[name] = value
                return
            # If the column belongs to another datamatrix, we create a new
            # column of the same type
            if len(value) != len(self):
                raise ValueError(
                    u'Column should have the same length as the DataMatrix')
            self._cols[name] = value._empty_col(datamatrix=self)
        if name not in self:
            # Create a new SeriesColumn by assigning a 2D ndarray, but only if the
            # column doesn't exist yet
            if np is not None and isinstance(value, np.ndarray) and \
                    len(value.shape) == 2:
                if value.shape[0] == len(self):
                    depth = value.shape[1]
                elif value.shape[1] == len(self):
                    depth = value.shape[0]
                    value = np.swapaxes(value, 0, 1)
                else:
                    raise ValueError(
                        'Invalid shape for SeriesColumn: {}'.format(
                            value.shape))
                from datamatrix import SeriesColumn
                self[name] = SeriesColumn(depth=depth)
            else:
                self._cols[name] = self._default_col_type(self)
        self._cols[name][:] = value
        self._mutate()
コード例 #22
0
def group(dm, by):

    """
    desc: |
        *Requires numpy*

        Groups the DataMatrix by unique values in a set of grouping columns.
        Grouped columns are stored as SeriesColumns. The columns that are
        grouped should contain numeric values. The order in which groups appear
        in the grouped DataMatrix is unpredictable.

        __Example:__

        %--
        python: |
         from datamatrix import DataMatrix, operations as ops

         dm = DataMatrix(length=4)
         dm.A = 'x', 'x', 'y', 'y'
         dm.B = 0, 1, 2, 3
         print('Original:')
         print(dm)
         dm = ops.group(dm, by=dm.A)
         print('Grouped by A:')
         print(dm)
        --%

    arguments:
        dm:
            desc:	The DataMatrix to group.
            type:	DataMatrix
        by:
            desc:	A column or list of columns to group by.
            type:	[BaseColumn, list]

    returns:
        desc:	A grouped DataMatrix.
        type:	DataMatrix
    """

    bycol = MixedColumn(datamatrix=dm)
    bynames = []
    if by is not None:
        if isinstance(by, BaseColumn):
            bynames = [by.name]
            by = [by]
        for col in by:
            if col._datamatrix is not dm:
                raise ValueError(u'By-columns are from a different DataMatrix')
            bycol += col
            bynames += [col.name]
    bycol_hashed = IntColumn(datamatrix=dm)
    bycol_hashed[:] = [hash(key) for key in bycol]
    keys = bycol_hashed.unique
    groupcols = [
        (name, col) for name, col in dm.columns if name not in bynames
    ]
    nogroupcols = [(name, col) for name, col in dm.columns if name in bynames]
    cm = DataMatrix(length=len(keys))
    for name, col in groupcols:
        if isinstance(col, _SeriesColumn):
            warn(u'Failed to create series for SeriesColumn s%s' % name)
            continue
        cm[name] = SeriesColumn(depth=0)
    for name, col in nogroupcols:
        cm[name] = col.__class__

    for i, key in enumerate(keys):
        dm_ = bycol_hashed == int(key)
        for name, col in groupcols:
            if isinstance(col, _SeriesColumn):
                continue
            if cm[name].depth < len(dm_[name]):
                cm[name].defaultnan = True
                cm[name].depth = len(dm_[name])
                cm[name].defaultnan = False
            try:
                cm[name][i, :len(dm_[name])] = dm_[name]
            except ValueError:
                warn(u'Failed to create series for MixedColumn %s' % name)
        for name, col in nogroupcols:
            cm[name][i] = dm_[name][0]
    return cm
コード例 #23
0
def test_seriescolumn():

    _test_copying(SeriesColumn(depth=1))
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    # Set all rows to a single value
    dm.col = 1
    check_series(dm.col, [[1, 1, 1], [1, 1, 1]])
    # Set rows to different single values
    dm.col = 2, 3
    check_series(dm.col, [[2, 2, 2], [3, 3, 3]])
    # Set one row to a single value
    dm.col[0] = 4
    check_series(dm.col, [[4, 4, 4], [3, 3, 3]])
    # Set one row to different single values
    dm.col[1] = 5, 6, 7
    check_series(dm.col, [[4, 4, 4], [5, 6, 7]])
    # Set all rows to different single values
    dm.col.setallrows([8, 9, 10])
    check_series(dm.col, [[8, 9, 10], [8, 9, 10]])
    # Set the first value in all rows
    dm.col[:, 0] = 1
    check_series(dm.col, [[1, 9, 10], [1, 9, 10]])
    # Set all values in the first row
    dm.col[0, :] = 2
    check_series(dm.col, [[2, 2, 2], [1, 9, 10]])
    # Set all values
    dm.col[:, :] = 3
    check_series(dm.col, [[3, 3, 3], [3, 3, 3]])
    # Test shortening and lengthening
    dm.length = 0
    check_series(dm.col, [])
    dm.length = 3
    dm.col = 1, 2, 3
    dm.col.depth = 1
    check_series(dm.col, [[1], [2], [3]])
    dm.col.depth = 3
    check_series(dm.col, [[1, NAN, NAN], [2, NAN, NAN], [3, NAN, NAN]])
    check_integrity(dm)
    # Test
    dm = DataMatrix(length=2)
    dm.col = SeriesColumn(depth=3)
    dm.col = 1, 2
    check_series(dm.col, [[1, 1, 1], [2, 2, 2]])
    dm.col = 3, 4, 5
    check_series(dm.col, [[3, 4, 5]] * 2)
    dm.col.depth = 2
    dm.col[:] = 1, 2
    check_series(dm.col, [[1, 1], [2, 2]])
    dm.col[:, :] = 3, 4
    check_series(dm.col, [[3, 4], [3, 4]])
    # Check if series return right type
    dm = DataMatrix(length=4)
    dm.col = SeriesColumn(depth=5)
    dm.col = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20]]
    # (int, int) -> float
    val = dm.col[2, 2]
    eq_(val, 13)
    eq_(type(val), float)
    # (int) -> array
    val = dm.col[2]
    ok_(all(val == np.array([11, 12, 13, 14, 15])))
    eq_(type(val), np.ndarray)
    # (int, slice) -> array
    val = dm.col[2, 1:-1]
    ok_(all(val == np.array([12, 13, 14])))
    eq_(type(val), np.ndarray)
    # (int, (int, int)) -> array
    val = dm.col[2, (1, 3)]
    ok_(all(val == np.array([12, 14])))
    eq_(type(val), np.ndarray)
    # (slice) -> SeriesColumn
    val = dm.col[1:-1]
    check_series(val, [
        [6, 7, 8, 9, 10],
        [11, 12, 13, 14, 15],
    ])
    # (slice, int) -> FloatColumn
    val = dm.col[1:-1, 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 13])
    # ((int, int), int) -> FloatColumn
    val = dm.col[(1, 3), 2]
    ok_(isinstance(val, FloatColumn))
    check_col(val, [8, 18])
    # (slice, slice) -> SeriesColumn
    val = dm.col[1:-1, 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [12, 13, 14],
    ])
    # ((int, int), slice) -> SeriesColumn
    val = dm.col[(1, 3), 1:-1]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 8, 9],
        [17, 18, 19],
    ])
    # ((int, int), (int int)) -> SeriesColumn
    val = dm.col[(1, 3), (1, 3)]
    ok_(isinstance(val, _SeriesColumn))
    check_series(val, [
        [7, 9],
        [17, 19],
    ])
コード例 #24
0
def group(dm, by=None):

	"""
	desc: |
		*Requires numpy*

		Groups the DataMatrix by unique values in a set of grouping columns.
		Grouped columns are stored as SeriesColumns. The columns that are
		grouped should contain numeric values.

		For example:

		A B
		---
		x 0
		x 1
		y 2
		y 3

		>>> group(dm, by=[dm.a])

		Gives:

		A B
		---
		x [0, 1]
		y [2, 3]

	arguments:
		dm:
			desc:	The DataMatrix to group.
			type:	DataMatrix

	keywords:
		by:			A list of columns to group by.
		type:		[list, None]

	returns:
		desc:	A grouped DataMatrix.
		type:	DataMatrix
	"""

	import numpy as np

	bycol = MixedColumn(datamatrix=dm)
	if by is not None:
		for col in by:
			if col._datamatrix is not dm:
				raise ValueError(u'By-columns are from a different DataMatrix')
			bycol += col
	keys = bycol.unique
	groupcols = [(name, col) for name, col in dm.columns if col not in by]
	nogroupcols = [(name, col) for name, col in dm.columns if col in by]
	cm = DataMatrix(length=len(keys))
	for name, col in groupcols:
		if isinstance(col, _SeriesColumn):
			warn(u'Failed to create series for SeriesColumn s%s' % name)
			continue
		cm[name] = SeriesColumn(depth=0)
	for name, col in nogroupcols:
		cm[name] = col.__class__

	for i, key in enumerate(keys):
		dm_ = bycol == key
		for name, col in groupcols:
			if isinstance(col, _SeriesColumn):
				continue
			if cm[name].depth < len(dm_[name]):
				cm[name].defaultnan = True
				cm[name].depth = len(dm_[name])
				cm[name].defaultnan = False
			try:
				cm[name][i,:len(dm_[name])] = dm_[name]
			except ValueError:
				warn(u'Failed to create series for MixedColumn %s' % name)
		for name, col in nogroupcols:
			cm[name][i] = dm_[name][0]
	return cm