Beispiel #1
0
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        pivot = Pivot(srs)

        # Pivot converts a group or records into one record per group
        self.assertEqual([[record._tuple for record in group]
                          for group in pivot],
                         [[((1, 2, 3, 4),
                            (0, 1, 0, 1))], [((5, 6),
                                              (0, 1))], [((7, 8, 9),
                                                          (0, 1, 0))]])

        srs.extend(simpleAddition)

        # adding data means the transform consumes the new data when checked
        self.assertEqual(
            [[record._tuple for record in group] for group in pivot],
            [[((1, 2, 3, 4),
               (0, 1, 0, 1))], [((5, 6), (0, 1))], [((7, 8, 9), (0, 1, 0))],
             [((11, 12, 13), (1, 0, 1))], [((14, 15, 16), (0, 1, 0))]])
Beispiel #2
0
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ElementScanner(srs, 'a')

        # Scanners are like generators...
        self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9])

        # ... and will exhaust when fully consumed
        self.assertEqual([v for v in scanner], [])

        srs.extend(simpleAddition)

        # adding data means the scanner consumes the new data
        self.assertEqual([v for v in scanner], [11, 12, 13, 14, 15, 16])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([v for v in scanner],
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16])
Beispiel #3
0
class Collation(Transform):
    """combine the source recordsets into one recordset.
    The new record type will have all the target fields
      along with the key (and optionally a coalescing) fields.
      
    Each
    """

    __slots__ = (
        '_key_field',
        '_collation_field',
        '_target_fields',
        '_scanner_coverage',
    )

    ScanClass = RecordScanner

    def __init__(self,
                 sources,
                 key_field,
                 target_fields=None,
                 collation_field=None,
                 *args,
                 **kwargs):
        super(Collation, self).__init__(*args, **kwargs)
        self.sources = tuple(sources)

        self._key_field = key_field
        self._collation_field = collation_field
        self._target_fields = target_fields

        self._resolveSources()

    def _resolveSources(self):

        rawSources = [
            source.results if isinstance(source, Composable) else source
            for source in self.sources
        ]

        scanner_coverage = {}
        scanners = []
        preset_targets = set(self._target_fields or [])
        covered_fields = set()

        # Gather all the fields
        for source in rawSources:
            covered_fields = set()
            for field in source._RecordType._fields:
                if field in (
                        self._key_field,
                        self._collation_field,
                ):
                    continue

                if not preset_targets or field in preset_targets:
                    covered_fields.add(field)

            # skip nops
            if not covered_fields:
                continue

            scanner = RecordScanner(source)
            scanner_coverage[scanner] = covered_fields
            scanners.append(scanner)

        all_covered_fields = set()
        for covered_fields in scanner_coverage.values():
            all_covered_fields.update(covered_fields)

        if preset_targets:
            assert preset_targets == all_covered_fields, 'Sources do not cover the target fields: given: %r -- covered: %r' % (
                preset_targets, covered_fields)
            target_fields = tuple(self._target_fields)
        else:
            target_fields = tuple(field for field in all_covered_fields)

        self._target_fields = target_fields

        self._scanner_coverage = scanner_coverage

        self.scanners = tuple(scanners)
        self._resultset = RecordSet(
            recordType=((self._key_field, ) + self._target_fields +
                        ((self._collation_field, ) or tuple())))

    def transform(self):

        scanners = set(self.scanners)

        def get_next(scanner, remaining=scanners):
            try:
                entry = next(scanner)
                return entry
            except StopIteration:
                remaining.remove(scanner)
                return None

        # initial conditions
        if self._resultset:
            cursor_values = dict(
                (field, value)
                for field, value in zip(self._resultset._RecordType._fields,
                                        self._resultset._groups[-1][-1]))
        else:
            cursor_values = dict(
                (field, None) for field in self._resultset._RecordType._fields
            )  # initialize to None to ensure _some_ value for all non-key/group fields

        cursor_value_heap = []
        for scanner in frozenset(scanners):
            entry = get_next(scanner)
            if entry is not None:
                cursor_value_heap.append(
                    (entry[self._key_field], entry,
                     scanner))  # include scanner for replacement lookup later
        heapify(cursor_value_heap)

        # generate results
        merged = []

        while cursor_value_heap:

            key_value, entry, scanner = heappop(cursor_value_heap)

            for field in self._scanner_coverage[scanner]:
                cursor_values[field] = entry[field]

            cursor_values[self._key_field] = entry[self._key_field]

            if self._collation_field:
                group_value = entry[self._collation_field]

                # when grouping for merge, assume group final value is most recent by key sort value
                if cursor_values[
                        self.
                        _collation_field] == group_value and group_value is not None:
                    merged[-1] = self._resultset._RecordType(cursor_values)
                else:
                    cursor_values[self._collation_field] = group_value
                    merged.append(self._resultset._RecordType(cursor_values))
            else:
                merged.append(self._resultset._RecordType(cursor_values))

            entry = get_next(scanner)
            if entry is not None:
                heappush(cursor_value_heap,
                         (entry[self._key_field], entry, scanner))

        if merged:
            self._resultset.extend([[v for v in merged]])