Esempio n. 1
0
    def _columns(cls, o, n_cols=0):
        """ Wraps columns from meta['schema'] with RowProxy and generates them.

        Args:
            o (any having .meta dict attr):

        Generates:
            RowProxy: column wrapped with RowProxy

        """
        s = o.meta['schema']

        assert len(s) >= 1  # Should always have header row.
        assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE, (o.meta['schema'][0], MPRowsFile.SCHEMA_TEMPLATE)

        # n_cols here is for columns in the data table, which are rows in the headers table
        n_cols = max(n_cols, o.n_cols, len(s) - 1)

        for i in range(1, n_cols + 1):
            # Normally, we'd only create one of these, and set the row on the singleton for
            # each row. But in this case, the caller may turn the output of the method into a list,
            # in which case all of the rows would have the values of the last one.
            rp = RowProxy(s[0])
            try:
                row = s[i]
            except IndexError:
                # Extend the row, but make sure the pos value is set property.
                ext_row = [i, 'col{}'.format(i)] + [None] * (len(s[0]) - 2)
                s.append(ext_row)
                row = s[i]

            yield rp.set_row(row)

        assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE
Esempio n. 2
0
    def _columns(cls, o, n_cols=0):
        """ Wraps columns from meta['schema'] with RowProxy and generates them.

        Args:
            o (any having .meta dict attr):

        Generates:
            RowProxy: column wrapped with RowProxy

        """
        s = o.meta['schema']

        assert len(s) >= 1  # Should always have header row.
        assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE, (
            o.meta['schema'][0], MPRowsFile.SCHEMA_TEMPLATE)

        # n_cols here is for columns in the data table, which are rows in the headers table
        n_cols = max(n_cols, o.n_cols, len(s) - 1)

        for i in range(1, n_cols + 1):
            # Normally, we'd only create one of these, and set the row on the singleton for
            # each row. But in this case, the caller may turn the output of the method into a list,
            # in which case all of the rows would have the values of the last one.
            rp = RowProxy(s[0])
            try:
                row = s[i]
            except IndexError:
                # Extend the row, but make sure the pos value is set property.
                ext_row = [i, 'col{}'.format(i)] + [None] * (len(s[0]) - 2)
                s.append(ext_row)
                row = s[i]

            yield rp.set_row(row)

        assert o.meta['schema'][0] == MPRowsFile.SCHEMA_TEMPLATE
Esempio n. 3
0
    def select(self, predicate=None, headers=None):
        """ Select rows from the reader using a predicate and itemgetter to return a subset of elements.

        Args:
            predicate (callable, optional): if defined, a callable that is called for each rowm and
                if it returns true, the row is included in the output.
            headers (list, optional): if defined, a list or tuple of header names to return from each row

        Returns:
            iterable: iterable of results

        WARNING: This routine works from the reader iterator, which returns RowProxy objects. RowProxy
            objects are reused, so if you construct a list directly from the output from
            this method, the list will have multiple copies of a single RowProxy,
            which will have as an inner row the last result row. If you will
            be directly constructing a list, use a getter that extracts the inner row, or which
            converted the RowProxy to a dict:

            list(s.datafile.select(lambda r: r.stusab == 'CA', lambda r: r.dict))

        """

        if headers:
            from operator import itemgetter
            ig = itemgetter(*headers)
            rp = RowProxy(headers)
            getter = lambda r: rp.set_row(ig(r.dict))
        else:
            getter = None

        if getter is not None and predicate is not None:
            return six.moves.map(getter, filter(predicate, iter(self)))

        elif getter is not None and predicate is None:
            return six.moves.map(getter, iter(self))

        elif getter is None and predicate is not None:
            return six.moves.filter(predicate, self)
        else:
            return iter(self)
Esempio n. 4
0
    def select(self, predicate=None, headers=None):
        """ Select rows from the reader using a predicate and itemgetter to return a subset of elements.

        Args:
            predicate (callable, optional): if defined, a callable that is called for each rowm and
                if it returns true, the row is included in the output.
            headers (list, optional): if defined, a list or tuple of header names to return from each row

        Returns:
            iterable: iterable of results

        WARNING: This routine works from the reader iterator, which returns RowProxy objects. RowProxy
            objects are reused, so if you construct a list directly from the output from
            this method, the list will have multiple copies of a single RowProxy,
            which will have as an inner row the last result row. If you will
            be directly constructing a list, use a getter that extracts the inner row, or which
            converted the RowProxy to a dict:

            list(s.datafile.select(lambda r: r.stusab == 'CA', lambda r: r.dict))

        """

        if headers:
            from operator import itemgetter
            ig = itemgetter(*headers)
            rp = RowProxy(headers)
            getter = lambda r: rp.set_row(ig(r.dict))
        else:
            getter = None

        if getter is not None and predicate is not None:
            return six.moves.map(getter, filter(predicate, iter(self)))

        elif getter is not None and predicate is None:
            return six.moves.map(getter, iter(self))

        elif getter is None and predicate is not None:
            return six.moves.filter(predicate, self)
        else:
            return iter(self)
Esempio n. 5
0
    def __iter__(self):
        """ Iterator for reading rows as RowProxy objects

        WARNING: This routine generates RowProxy objects. RowProxy objects
            are reused, so if you construct a list directly from the output from this method,
            the list will have multiple copies of a single RowProxy, which will
            have as an inner row the last result row. If you will be directly constructing
            a list, use a getter that extracts the inner row, or which converted the RowProxy
            to a dict.

        """
        rp = RowProxy(self.headers)
        try:
            if 'rows' not in self._h5_file.root.partition:
                # rows table was not created.
                raise StopIteration
            self._in_iteration = True
            table = self._h5_file.root.partition.rows
            for row in table.iterrows():
                r = [_deserialize(row[c]) for c in table.colnames]
                yield rp.set_row(r)
                self.pos += 1
        finally:
            self._in_iteration = False
Esempio n. 6
0
    def __iter__(self):
        """ Iterator for reading rows as RowProxy objects

        WARNING: This routine generates RowProxy objects. RowProxy objects
            are reused, so if you construct a list directly from the output from this method,
            the list will have multiple copies of a single RowProxy, which will
            have as an inner row the last result row. If you will be directly constructing
            a list, use a getter that extracts the inner row, or which converted the RowProxy
            to a dict.

        """
        rp = RowProxy(self.headers)
        try:
            if 'rows' not in self._h5_file.root.partition:
                # rows table was not created.
                raise StopIteration
            self._in_iteration = True
            table = self._h5_file.root.partition.rows
            for row in table.iterrows():
                r = [_deserialize(row[c]) for c in table.colnames]
                yield rp.set_row(r)
                self.pos += 1
        finally:
            self._in_iteration = False