Exemple #1
0
	def test_basic(self):

		rsa = [(1,2,3,4),(5,6),(7,8,9)]
		rsb = [(0,1,0,1),(0,1),(0,1,0)]
		rsc = [(1,2,3),(4,),(5,6,7,8,9)]
		rsd = [(0,1,0),(1,),(0,1,0,1,0)]

		# source
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target
		rst = RecordSet(recordType='ef')
		for g in zip(rsc,rsd):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)

		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]] 
			)
Exemple #2
0
    def test_basic(self):

        rsa = [(1, 2, 3, 4), (5, 6), (7, 8, 9)]
        rsb = [(0, 1, 0, 1), (0, 1), (0, 1, 0)]
        rsc = [(9, 8, 7, 6, 5), (4, 3, 2), (1, )]
        rsd = [(1, 0, 1, 0, 1), (0, 1, 0), (1, )]

        rs1 = RecordSet(recordType='ab')
        for g in zip(rsa, rsb):
            rs1.append(v for v in zip(*g))

        rs2 = RecordSet(recordType='cb')
        for g in zip(rsc, rsd):
            rs2.append(v for v in zip(*g))

        merge = Merge([rs1, rs2])

        self.assertEqual(merge.results._RecordType._fields, ('a', 'b', 'c'))

        self.assertEqual(merge.results._groups[0][0]._tuple, (1, 1, 9))

        self.assertEqual([[record._tuple for record in group]
                          for group in merge],
                         [[(1, 1, 9), (2, 0, 8), (3, 1, 7), (4, 0, 6),
                           (5, 1, 5), (6, 0, 4), (7, 1, 3), (8, 0, 2),
                           (9, 1, 1)]])
Exemple #3
0
    def __init__(self, source, target, *args, **kwargs):
        # Initialize mixins
        super(Regroup, self).__init__(*args, **kwargs)

        self.sources = (source, target)
        self._resultset = RecordSet(recordType=target._RecordType)
        self._generateScanners()
Exemple #4
0
    def __init__(self, source, *args, **kwargs):
        # Initialize mixins
        super(Pivot, self).__init__(*args, **kwargs)

        self.sources = (source, )
        self._resultset = RecordSet(recordType=source._RecordType)
        self.scanners = (self.ScanClass(self.sources[0]), )
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ReplayingElementScanner(srs, 'a')

        # Replaying scanners are like generators...
        self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9])

        # ... but will NOT exhaust when fully consumed
        self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9])

        # partial iteration...
        self.assertEqual([v for v in sentinel(scanner, 4)], [1, 2, 3])
        # ... and an anchor...
        scanner.anchor()
        # ... resumes iteration from the anchor
        self.assertEqual([v for v in scanner], [5, 6, 7, 8, 9])

        # Anchoring after iteration stops emitting
        scanner.anchor()
        self.assertEqual([v for v in scanner], [])

        # But if the source adds more data...
        srs.extend(simpleAddition)

        # ... means the scanner consumes the new data
        self.assertEqual([v for v in scanner], [11, 12, 13, 14, 15, 16])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([v for v in scanner],
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16])
Exemple #6
0
    def test_basic(self):

        function = lambda a, b: sum(a) - sum(b)

        srs = RecordSet(simpleRecordSet)

        c = Aggregate([srs], function, 'c')

        # Calculations are lazily evaluated
        self.assertEqual(c._resultset._groups, [])

        # When evaluated, we get the following
        self.assertEqual([[v.c for v in group] for group in c.results.groups],
                         [[41]])

        srs.extend(simpleAddition)

        # adding data from a source doesn't immediately update
        self.assertEqual(len(c._resultset._groups), 1)

        # but upon evaluation we see an update has been applied
        # note that results are always one group
        self.assertEqual([[v.c for v in group] for group in c.results.groups],
                         [[119]])

        # Demonstrate slicing for columns works as expected
        self.assertEqual([tuple(group) for group in c.results['c', :]],
                         [(119, )])
Exemple #7
0
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = GroupScanner(srs, 'a')

        # Scanners are like generators...
        self.assertEqual([[record._tuple for record in group]
                          for group in scanner],
                         [[(1, 0), (2, 1), (3, 0),
                           (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1),
                                                       (9, 0)]])

        # ... and will exhaust when fully consumed
        self.assertEqual([group for group in scanner], [])

        srs.extend(simpleAddition)

        # adding data means the scanner consumes the new data
        self.assertEqual([[record._tuple for record in group]
                          for group in scanner],
                         [[(11, 1), (12, 0),
                           (13, 1)], [(14, 0), (15, 1), (16, 0)]])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([len(group) for group in scanner], [4, 2, 3, 3, 3])
Exemple #8
0
    def test_basic(self):

        function = lambda a, b: a + b

        srs = RecordSet(simpleRecordSet)

        c = Sweep([srs], function, 'c')

        # Calculations are lazily evaluated
        self.assertEqual(c._resultset._groups, [])

        # When evaluated, we get the following
        self.assertEqual([[v.c for v in group] for group in c.results.groups],
                         [[1, 3, 3, 5, 5, 7, 7, 9, 9]])

        srs.extend(simpleAddition)

        # adding data from a source doesn't immediately update
        self.assertEqual(len(c._resultset._groups), 1)

        # but upon evaluation we see an update has been applied
        # note that this is ONE update - groups are not maintained
        self.assertEqual(
            [[v.c for v in group] for group in c.results.groups],
            [[1, 3, 3, 5, 5, 7, 7, 9, 9], [12, 12, 14, 14, 16, 16]])

        # Demonstrate slicing for columns works as expected
        self.assertEqual([tuple(group) for group in c.results['c', :]],
                         [(1, 3, 3, 5, 5, 7, 7, 9, 9),
                          (12, 12, 14, 14, 16, 16)])
Exemple #9
0
    def test_basic(self):

        function = lambda a, b: sum(a) - sum(b)

        srs = RecordSet(simpleRecordSet)

        c = Window([srs], function, 'c')

        # Calculations are lazily evaluated
        self.assertEqual(c._resultset._groups, [])

        # When evaluated, we get the following
        self.assertEqual([[v.c for v in group] for group in c.results.groups],
                         [[8, 10, 23]])

        srs.extend(simpleAddition)

        # adding data from a source doesn't immediately update
        self.assertEqual(len(c._resultset._groups), 1)

        # but upon evaluation we see an update has been applied
        # note that this addition is ONE update - groups are not maintained
        self.assertEqual([[v.c for v in group] for group in c.results.groups],
                         [[8, 10, 23], [34, 44]])

        # Demonstrate slicing for columns works as expected
        self.assertEqual([tuple(group) for group in c.results['c', :]],
                         [(8, 10, 23), (34, 44)])
Exemple #10
0
    def _resolveSources(self):
        """Sources may overlap: if so, only take the latter."""
        rawSources = [
            source.results if isinstance(source, Composable) else source
            for source in self.sources
        ]

        allFields = []
        # Gather all the fields
        for source in rawSources:
            for field in source._RecordType._fields:
                allFields.append(field)

        scanners = []
        sourceFields = set(allFields)
        for source in reversed(rawSources):
            for field in source._RecordType._fields:
                if field in sourceFields:
                    sourceFields.remove(field)
                    scanners.append((field, self.ScanClass(source, field)))
                if not sourceFields:
                    break
            if not sourceFields:
                break

        # While we want to prioritize later sources, the fields should
        #   likely keep the same order, starting with the earlier sources.
        # see https://stackoverflow.com/a/12814719/1943640
        scanners.sort(key=lambda entry: allFields.index(entry[0]))

        self.scanners = tuple(scanner for field, scanner in scanners)
        self._resultset = RecordSet(recordType=genRecordType(
            field for field, scanner in scanners))
Exemple #11
0
    def __init__(self, source, key_field, *args, **kwargs):
        # Initialize mixins
        super(Cluster, self).__init__(*args, **kwargs)

        self._key_field = key_field
        self.sources = (source, )
        self._resultset = RecordSet(recordType=source._RecordType)
        self.scanners = (self.ScanClass(source), )
Exemple #12
0
	def test_misalignment_2(self):

		# source
		rsa = [(1,2,3,4)]
		rsb = [(0,1,0,1)]
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target - longer
		rsa = [(1,2,3),(4,),(5,6,7),(8,9,10)]
		rsb = [(0,1,0),(1,),(0,1,0),(1,0,10)]
		rst = RecordSet(recordType='ef')
		for g in zip(rsa,rsb):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)


		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		# Source only has one group, so that alone gets mapped
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)]]
			)

		# adding two more groups...
		rss.extend( [
			 ((5,0),(6,1)),
             ((7,0),(8,1),(9,0))
            ] )

		# ... allows two more groups to be added.
		# Again, note that the last target record is omitted, though, 
		#   since the source doesn't have a group to map to it
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]]
			)		
Exemple #13
0
 def __init__(self, source, lag=1, *args, **kwargs):
     #Initialize mixins
     super(LagBucket, self).__init__(*args, **kwargs)
     
     self._lag = lag
     self.sources = (source,)
     self._resultset = RecordSet(recordType=source._RecordType)
     self.scanners = (self.ScanClass(source),)
     self._lagRecords = []
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ReplayingGroupScanner(srs)

        # Replaying scanners are like generators...
        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(1, 0), (2, 1), (3, 0),
                           (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1),
                                                       (9, 0)]])

        # ... but will NOT exhaust when fully consumed
        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(1, 0), (2, 1), (3, 0),
                           (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1),
                                                       (9, 0)]])

        # Anchor after next(iter)
        self.assertEqual([r._tuple for r in next(scanner)], [(1, 0), (2, 1),
                                                             (3, 0), (4, 1)])
        scanner.anchor()

        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]])

        scanner.reset()

        # partial iteration with an anchor
        for i, r in enumerate(scanner):
            if i < 2:
                scanner.anchor()
                break

        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]])

        # Anchoring after iteration stops emitting
        scanner.anchor()
        self.assertEqual([v for v in scanner], [])

        # But if the source adds more data...
        srs.extend(simpleAddition)

        # ... means the scanner consumes the new data
        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(11, 1), (12, 0),
                           (13, 1)], [(14, 0), (15, 1), (16, 0)]])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([[r._tuple for r in g] for g in scanner],
                         [[(1, 0), (2, 1), (3, 0),
                           (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)],
                          [(11, 1), (12, 0),
                           (13, 1)], [(14, 0), (15, 1), (16, 0)]])
Exemple #15
0
	def test_misalignment_1(self):

		# source
		rsa = [(1,2,3,4),(5,6),(7,8,9)]
		rsb = [(0,1,0,1),(0,1),(0,1,0)]
		rss = RecordSet(recordType='ab')
		for g in zip(rsa,rsb):
		    rss.append(v for v in zip(*g))

		# target - shorter
		rsa = [(1,2,3),(4,),(5,6,7)]
		rsb = [(0,1,0),(1,),(0,1,0)]
		rst = RecordSet(recordType='ef')
		for g in zip(rsa,rsb):
		    rst.append(v for v in zip(*g))

		regroup = Regroup(rss, rst)


		# verify it has the same columns as the target
		self.assertEqual(
			regroup.results._RecordType._fields,
			('e', 'f')
			)

		# Up to 7 records can be grouped. The final source group
		#   must be incomplete, and is omitted
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)]]
			)

		# adding three more means the target has at least enough to complete
		rst.append( [(8,1),(9,0)] )

		# note that the last is omitted, though, 
		#   since the source doesn't have a group to map to it
		self.assertEqual(
			[[record._tuple for record in group] for group in regroup],
			[[(1, 0), (2, 1), (3, 0), (4, 1)], 
			 [(5, 0), (6, 1)], 
			 [(7, 0), (8, 1), (9, 0)]]
			)		
Exemple #16
0
    def _resolveSources(self):

        rawSources = [
            source.results if isinstance(source, Composable) else source
            for source in self.sources
        ]

        scanner_coverage = {}
        scanners = []
        preset_targets = set(self._target_fields or [])
        covered_fields = set()

        # Gather all the fields
        for source in rawSources:
            covered_fields = set()
            for field in source._RecordType._fields:
                if field in (
                        self._key_field,
                        self._collation_field,
                ):
                    continue

                if not preset_targets or field in preset_targets:
                    covered_fields.add(field)

            # skip nops
            if not covered_fields:
                continue

            scanner = RecordScanner(source)
            scanner_coverage[scanner] = covered_fields
            scanners.append(scanner)

        all_covered_fields = set()
        for covered_fields in scanner_coverage.values():
            all_covered_fields.update(covered_fields)

        if preset_targets:
            assert preset_targets == all_covered_fields, 'Sources do not cover the target fields: given: %r -- covered: %r' % (
                preset_targets, covered_fields)
            target_fields = tuple(self._target_fields)
        else:
            target_fields = tuple(field for field in all_covered_fields)

        self._target_fields = target_fields

        self._scanner_coverage = scanner_coverage

        self.scanners = tuple(scanners)
        self._resultset = RecordSet(
            recordType=((self._key_field, ) + self._target_fields +
                        ((self._collation_field, ) or tuple())))
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ReplayingRecordScanner(srs)

        # Replaying scanners are like generators...
        self.assertEqual([r._tuple for r in scanner], [(1, 0), (2, 1), (3, 0),
                                                       (4, 1), (5, 0), (6, 1),
                                                       (7, 0), (8, 1), (9, 0)])

        # ... but will NOT exhaust when fully consumed
        self.assertEqual([r._tuple for r in scanner], [(1, 0), (2, 1), (3, 0),
                                                       (4, 1), (5, 0), (6, 1),
                                                       (7, 0), (8, 1), (9, 0)])

        # partial iteration...
        for i, r in enumerate(scanner):
            if i >= 3:
                scanner.anchor()  # anchor in the iteration
                break
        # ... and an anchor...
        scanner.anchor()
        # ... resumes iteration from the anchor
        self.assertEqual([r._tuple for r in scanner], [(5, 0), (6, 1), (7, 0),
                                                       (8, 1), (9, 0)])

        # Anchoring after iteration stops emitting
        scanner.anchor()
        self.assertEqual([r._tuple for r in scanner], [])

        # But if the source adds more data...
        srs.extend(simpleAddition)

        # ... means the scanner consumes the new data
        self.assertEqual([r._tuple for r in scanner],
                         [(11, 1), (12, 0), (13, 1), (14, 0), (15, 1),
                          (16, 0)])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([r._tuple for r in scanner],
                         [(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1),
                          (7, 0), (8, 1), (9, 0), (11, 1), (12, 0), (13, 1),
                          (14, 0), (15, 1), (16, 0)])
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ReplayingChunkScanner(srs, 'a')

        # Replaying scanners are like generators...
        self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6),
                                                (7, 8, 9)])

        # ... but will NOT exhaust when fully consumed
        self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6),
                                                (7, 8, 9)])

        # Anchor after next(iter)
        self.assertEqual(next(scanner), (1, 2, 3, 4))
        scanner.anchor()

        self.assertEqual([v for v in scanner], [(5, 6), (7, 8, 9)])

        scanner.reset()

        # partial iteration with an anchor
        for i, v in enumerate(scanner):
            if i < 2:
                scanner.anchor()
                break

        self.assertEqual([v for v in scanner], [(5, 6), (7, 8, 9)])

        # Anchoring after iteration stops emitting
        scanner.anchor()
        self.assertEqual([v for v in scanner], [])

        # But if the source adds more data...
        srs.extend(simpleAddition)

        # ... means the scanner consumes the new data
        self.assertEqual([v for v in scanner], [(11, 12, 13), (14, 15, 16)])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6),
                                                (7, 8, 9), (11, 12, 13),
                                                (14, 15, 16)])
Exemple #19
0
    def __init__(self,
                 sources,
                 renamed_fields,
                 key_fields=tuple(),
                 *args,
                 **kwargs):
        super(Feed, self).__init__(*args, **kwargs)

        self._resultset = RecordSet(recordType=tuple(renamed_fields) +
                                    tuple(key_fields))

        self.sources = tuple()
        self.scanners = tuple()
        self._key_fields = key_fields
        self._source_keys = tuple()

        if sources:
            for source in sources:
                self.add_source(source)
Exemple #20
0
    def __init__(self,
                 sources,
                 function,
                 outputLabels,
                 mapInputs={},
                 *args,
                 **kwargs):
        # Initialize mixins
        super(Calculation, self).__init__(*args, **kwargs)

        self._resultset = RecordSet(recordType=genRecordType(outputLabels))
        self.subscribe(self._resultset)
        self.sources = tuple(sources)

        if isinstance(function, (str, unicode)):
            self.function = Expression(function)
        else:
            self.function = function

        self._mapInputs = mapInputs
        self._resolveSources()
Exemple #21
0
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        scanner = ElementScanner(srs, 'a')

        # Scanners are like generators...
        self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9])

        # ... and will exhaust when fully consumed
        self.assertEqual([v for v in scanner], [])

        srs.extend(simpleAddition)

        # adding data means the scanner consumes the new data
        self.assertEqual([v for v in scanner], [11, 12, 13, 14, 15, 16])

        # resetting the scanner means it will replay the whole dataset
        scanner.reset()

        self.assertEqual([v for v in scanner],
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16])
Exemple #22
0
    def test_basic(self):

        srs = RecordSet(simpleRecordSet)

        pivot = Pivot(srs)

        # Pivot converts a group or records into one record per group
        self.assertEqual([[record._tuple for record in group]
                          for group in pivot],
                         [[((1, 2, 3, 4),
                            (0, 1, 0, 1))], [((5, 6),
                                              (0, 1))], [((7, 8, 9),
                                                          (0, 1, 0))]])

        srs.extend(simpleAddition)

        # adding data means the transform consumes the new data when checked
        self.assertEqual(
            [[record._tuple for record in group] for group in pivot],
            [[((1, 2, 3, 4),
               (0, 1, 0, 1))], [((5, 6), (0, 1))], [((7, 8, 9), (0, 1, 0))],
             [((11, 12, 13), (1, 0, 1))], [((14, 15, 16), (0, 1, 0))]])
Exemple #23
0
from ligature.recordset import RecordSet


def genData(columns, rows, start=0):
    if not isinstance(columns, int):
        columns = len(columns)
    if columns == 1:
        return (i for i in range(start, start + rows * columns, columns))
    else:
        return (tuple(range(i, i + columns))
                for i in range(start, start + rows * columns, columns))


a1 = [(1, 2, 3, 4), (5, 6), (7, 8, 9)]
b1 = [(0, 1, 0, 1), (0, 1), (0, 1, 0)]

a2 = [(11, 12, 13), (14, 15, 16)]
b2 = [(1, 0, 1), (0, 1, 0)]

simpleRecordSet = RecordSet(recordType='ab')
for g in zip(a1, b1):
    simpleRecordSet.append(v for v in zip(*g))

simpleAddition = RecordSet(recordType='ab')
for g in zip(a2, b2):
    simpleAddition.append(v for v in zip(*g))