def test_basic(self): rsa = [(1,2,3,4),(5,6),(7,8,9)] rsb = [(0,1,0,1),(0,1),(0,1,0)] rsc = [(1,2,3),(4,),(5,6,7,8,9)] rsd = [(0,1,0),(1,),(0,1,0,1,0)] # source rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target rst = RecordSet(recordType='ef') for g in zip(rsc,rsd): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
def test_basic(self): rsa = [(1, 2, 3, 4), (5, 6), (7, 8, 9)] rsb = [(0, 1, 0, 1), (0, 1), (0, 1, 0)] rsc = [(9, 8, 7, 6, 5), (4, 3, 2), (1, )] rsd = [(1, 0, 1, 0, 1), (0, 1, 0), (1, )] rs1 = RecordSet(recordType='ab') for g in zip(rsa, rsb): rs1.append(v for v in zip(*g)) rs2 = RecordSet(recordType='cb') for g in zip(rsc, rsd): rs2.append(v for v in zip(*g)) merge = Merge([rs1, rs2]) self.assertEqual(merge.results._RecordType._fields, ('a', 'b', 'c')) self.assertEqual(merge.results._groups[0][0]._tuple, (1, 1, 9)) self.assertEqual([[record._tuple for record in group] for group in merge], [[(1, 1, 9), (2, 0, 8), (3, 1, 7), (4, 0, 6), (5, 1, 5), (6, 0, 4), (7, 1, 3), (8, 0, 2), (9, 1, 1)]])
def __init__(self, source, target, *args, **kwargs): # Initialize mixins super(Regroup, self).__init__(*args, **kwargs) self.sources = (source, target) self._resultset = RecordSet(recordType=target._RecordType) self._generateScanners()
def __init__(self, source, *args, **kwargs): # Initialize mixins super(Pivot, self).__init__(*args, **kwargs) self.sources = (source, ) self._resultset = RecordSet(recordType=source._RecordType) self.scanners = (self.ScanClass(self.sources[0]), )
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = ReplayingElementScanner(srs, 'a') # Replaying scanners are like generators... self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9]) # ... but will NOT exhaust when fully consumed self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9]) # partial iteration... self.assertEqual([v for v in sentinel(scanner, 4)], [1, 2, 3]) # ... and an anchor... scanner.anchor() # ... resumes iteration from the anchor self.assertEqual([v for v in scanner], [5, 6, 7, 8, 9]) # Anchoring after iteration stops emitting scanner.anchor() self.assertEqual([v for v in scanner], []) # But if the source adds more data... srs.extend(simpleAddition) # ... means the scanner consumes the new data self.assertEqual([v for v in scanner], [11, 12, 13, 14, 15, 16]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16])
def test_basic(self): function = lambda a, b: sum(a) - sum(b) srs = RecordSet(simpleRecordSet) c = Aggregate([srs], function, 'c') # Calculations are lazily evaluated self.assertEqual(c._resultset._groups, []) # When evaluated, we get the following self.assertEqual([[v.c for v in group] for group in c.results.groups], [[41]]) srs.extend(simpleAddition) # adding data from a source doesn't immediately update self.assertEqual(len(c._resultset._groups), 1) # but upon evaluation we see an update has been applied # note that results are always one group self.assertEqual([[v.c for v in group] for group in c.results.groups], [[119]]) # Demonstrate slicing for columns works as expected self.assertEqual([tuple(group) for group in c.results['c', :]], [(119, )])
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = GroupScanner(srs, 'a') # Scanners are like generators... self.assertEqual([[record._tuple for record in group] for group in scanner], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]]) # ... and will exhaust when fully consumed self.assertEqual([group for group in scanner], []) srs.extend(simpleAddition) # adding data means the scanner consumes the new data self.assertEqual([[record._tuple for record in group] for group in scanner], [[(11, 1), (12, 0), (13, 1)], [(14, 0), (15, 1), (16, 0)]]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([len(group) for group in scanner], [4, 2, 3, 3, 3])
def test_basic(self): function = lambda a, b: a + b srs = RecordSet(simpleRecordSet) c = Sweep([srs], function, 'c') # Calculations are lazily evaluated self.assertEqual(c._resultset._groups, []) # When evaluated, we get the following self.assertEqual([[v.c for v in group] for group in c.results.groups], [[1, 3, 3, 5, 5, 7, 7, 9, 9]]) srs.extend(simpleAddition) # adding data from a source doesn't immediately update self.assertEqual(len(c._resultset._groups), 1) # but upon evaluation we see an update has been applied # note that this is ONE update - groups are not maintained self.assertEqual( [[v.c for v in group] for group in c.results.groups], [[1, 3, 3, 5, 5, 7, 7, 9, 9], [12, 12, 14, 14, 16, 16]]) # Demonstrate slicing for columns works as expected self.assertEqual([tuple(group) for group in c.results['c', :]], [(1, 3, 3, 5, 5, 7, 7, 9, 9), (12, 12, 14, 14, 16, 16)])
def test_basic(self): function = lambda a, b: sum(a) - sum(b) srs = RecordSet(simpleRecordSet) c = Window([srs], function, 'c') # Calculations are lazily evaluated self.assertEqual(c._resultset._groups, []) # When evaluated, we get the following self.assertEqual([[v.c for v in group] for group in c.results.groups], [[8, 10, 23]]) srs.extend(simpleAddition) # adding data from a source doesn't immediately update self.assertEqual(len(c._resultset._groups), 1) # but upon evaluation we see an update has been applied # note that this addition is ONE update - groups are not maintained self.assertEqual([[v.c for v in group] for group in c.results.groups], [[8, 10, 23], [34, 44]]) # Demonstrate slicing for columns works as expected self.assertEqual([tuple(group) for group in c.results['c', :]], [(8, 10, 23), (34, 44)])
def _resolveSources(self): """Sources may overlap: if so, only take the latter.""" rawSources = [ source.results if isinstance(source, Composable) else source for source in self.sources ] allFields = [] # Gather all the fields for source in rawSources: for field in source._RecordType._fields: allFields.append(field) scanners = [] sourceFields = set(allFields) for source in reversed(rawSources): for field in source._RecordType._fields: if field in sourceFields: sourceFields.remove(field) scanners.append((field, self.ScanClass(source, field))) if not sourceFields: break if not sourceFields: break # While we want to prioritize later sources, the fields should # likely keep the same order, starting with the earlier sources. # see https://stackoverflow.com/a/12814719/1943640 scanners.sort(key=lambda entry: allFields.index(entry[0])) self.scanners = tuple(scanner for field, scanner in scanners) self._resultset = RecordSet(recordType=genRecordType( field for field, scanner in scanners))
def __init__(self, source, key_field, *args, **kwargs): # Initialize mixins super(Cluster, self).__init__(*args, **kwargs) self._key_field = key_field self.sources = (source, ) self._resultset = RecordSet(recordType=source._RecordType) self.scanners = (self.ScanClass(source), )
def test_misalignment_2(self): # source rsa = [(1,2,3,4)] rsb = [(0,1,0,1)] rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target - longer rsa = [(1,2,3),(4,),(5,6,7),(8,9,10)] rsb = [(0,1,0),(1,),(0,1,0),(1,0,10)] rst = RecordSet(recordType='ef') for g in zip(rsa,rsb): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) # Source only has one group, so that alone gets mapped self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)]] ) # adding two more groups... rss.extend( [ ((5,0),(6,1)), ((7,0),(8,1),(9,0)) ] ) # ... allows two more groups to be added. # Again, note that the last target record is omitted, though, # since the source doesn't have a group to map to it self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
def __init__(self, source, lag=1, *args, **kwargs): #Initialize mixins super(LagBucket, self).__init__(*args, **kwargs) self._lag = lag self.sources = (source,) self._resultset = RecordSet(recordType=source._RecordType) self.scanners = (self.ScanClass(source),) self._lagRecords = []
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = ReplayingGroupScanner(srs) # Replaying scanners are like generators... self.assertEqual([[r._tuple for r in g] for g in scanner], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]]) # ... but will NOT exhaust when fully consumed self.assertEqual([[r._tuple for r in g] for g in scanner], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]]) # Anchor after next(iter) self.assertEqual([r._tuple for r in next(scanner)], [(1, 0), (2, 1), (3, 0), (4, 1)]) scanner.anchor() self.assertEqual([[r._tuple for r in g] for g in scanner], [[(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]]) scanner.reset() # partial iteration with an anchor for i, r in enumerate(scanner): if i < 2: scanner.anchor() break self.assertEqual([[r._tuple for r in g] for g in scanner], [[(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]]) # Anchoring after iteration stops emitting scanner.anchor() self.assertEqual([v for v in scanner], []) # But if the source adds more data... srs.extend(simpleAddition) # ... means the scanner consumes the new data self.assertEqual([[r._tuple for r in g] for g in scanner], [[(11, 1), (12, 0), (13, 1)], [(14, 0), (15, 1), (16, 0)]]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([[r._tuple for r in g] for g in scanner], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)], [(11, 1), (12, 0), (13, 1)], [(14, 0), (15, 1), (16, 0)]])
def test_misalignment_1(self): # source rsa = [(1,2,3,4),(5,6),(7,8,9)] rsb = [(0,1,0,1),(0,1),(0,1,0)] rss = RecordSet(recordType='ab') for g in zip(rsa,rsb): rss.append(v for v in zip(*g)) # target - shorter rsa = [(1,2,3),(4,),(5,6,7)] rsb = [(0,1,0),(1,),(0,1,0)] rst = RecordSet(recordType='ef') for g in zip(rsa,rsb): rst.append(v for v in zip(*g)) regroup = Regroup(rss, rst) # verify it has the same columns as the target self.assertEqual( regroup.results._RecordType._fields, ('e', 'f') ) # Up to 7 records can be grouped. The final source group # must be incomplete, and is omitted self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)]] ) # adding three more means the target has at least enough to complete rst.append( [(8,1),(9,0)] ) # note that the last is omitted, though, # since the source doesn't have a group to map to it self.assertEqual( [[record._tuple for record in group] for group in regroup], [[(1, 0), (2, 1), (3, 0), (4, 1)], [(5, 0), (6, 1)], [(7, 0), (8, 1), (9, 0)]] )
def _resolveSources(self): rawSources = [ source.results if isinstance(source, Composable) else source for source in self.sources ] scanner_coverage = {} scanners = [] preset_targets = set(self._target_fields or []) covered_fields = set() # Gather all the fields for source in rawSources: covered_fields = set() for field in source._RecordType._fields: if field in ( self._key_field, self._collation_field, ): continue if not preset_targets or field in preset_targets: covered_fields.add(field) # skip nops if not covered_fields: continue scanner = RecordScanner(source) scanner_coverage[scanner] = covered_fields scanners.append(scanner) all_covered_fields = set() for covered_fields in scanner_coverage.values(): all_covered_fields.update(covered_fields) if preset_targets: assert preset_targets == all_covered_fields, 'Sources do not cover the target fields: given: %r -- covered: %r' % ( preset_targets, covered_fields) target_fields = tuple(self._target_fields) else: target_fields = tuple(field for field in all_covered_fields) self._target_fields = target_fields self._scanner_coverage = scanner_coverage self.scanners = tuple(scanners) self._resultset = RecordSet( recordType=((self._key_field, ) + self._target_fields + ((self._collation_field, ) or tuple())))
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = ReplayingRecordScanner(srs) # Replaying scanners are like generators... self.assertEqual([r._tuple for r in scanner], [(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0)]) # ... but will NOT exhaust when fully consumed self.assertEqual([r._tuple for r in scanner], [(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0)]) # partial iteration... for i, r in enumerate(scanner): if i >= 3: scanner.anchor() # anchor in the iteration break # ... and an anchor... scanner.anchor() # ... resumes iteration from the anchor self.assertEqual([r._tuple for r in scanner], [(5, 0), (6, 1), (7, 0), (8, 1), (9, 0)]) # Anchoring after iteration stops emitting scanner.anchor() self.assertEqual([r._tuple for r in scanner], []) # But if the source adds more data... srs.extend(simpleAddition) # ... means the scanner consumes the new data self.assertEqual([r._tuple for r in scanner], [(11, 1), (12, 0), (13, 1), (14, 0), (15, 1), (16, 0)]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([r._tuple for r in scanner], [(1, 0), (2, 1), (3, 0), (4, 1), (5, 0), (6, 1), (7, 0), (8, 1), (9, 0), (11, 1), (12, 0), (13, 1), (14, 0), (15, 1), (16, 0)])
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = ReplayingChunkScanner(srs, 'a') # Replaying scanners are like generators... self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6), (7, 8, 9)]) # ... but will NOT exhaust when fully consumed self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6), (7, 8, 9)]) # Anchor after next(iter) self.assertEqual(next(scanner), (1, 2, 3, 4)) scanner.anchor() self.assertEqual([v for v in scanner], [(5, 6), (7, 8, 9)]) scanner.reset() # partial iteration with an anchor for i, v in enumerate(scanner): if i < 2: scanner.anchor() break self.assertEqual([v for v in scanner], [(5, 6), (7, 8, 9)]) # Anchoring after iteration stops emitting scanner.anchor() self.assertEqual([v for v in scanner], []) # But if the source adds more data... srs.extend(simpleAddition) # ... means the scanner consumes the new data self.assertEqual([v for v in scanner], [(11, 12, 13), (14, 15, 16)]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([v for v in scanner], [(1, 2, 3, 4), (5, 6), (7, 8, 9), (11, 12, 13), (14, 15, 16)])
def __init__(self, sources, renamed_fields, key_fields=tuple(), *args, **kwargs): super(Feed, self).__init__(*args, **kwargs) self._resultset = RecordSet(recordType=tuple(renamed_fields) + tuple(key_fields)) self.sources = tuple() self.scanners = tuple() self._key_fields = key_fields self._source_keys = tuple() if sources: for source in sources: self.add_source(source)
def __init__(self, sources, function, outputLabels, mapInputs={}, *args, **kwargs): # Initialize mixins super(Calculation, self).__init__(*args, **kwargs) self._resultset = RecordSet(recordType=genRecordType(outputLabels)) self.subscribe(self._resultset) self.sources = tuple(sources) if isinstance(function, (str, unicode)): self.function = Expression(function) else: self.function = function self._mapInputs = mapInputs self._resolveSources()
def test_basic(self): srs = RecordSet(simpleRecordSet) scanner = ElementScanner(srs, 'a') # Scanners are like generators... self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9]) # ... and will exhaust when fully consumed self.assertEqual([v for v in scanner], []) srs.extend(simpleAddition) # adding data means the scanner consumes the new data self.assertEqual([v for v in scanner], [11, 12, 13, 14, 15, 16]) # resetting the scanner means it will replay the whole dataset scanner.reset() self.assertEqual([v for v in scanner], [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16])
def test_basic(self): srs = RecordSet(simpleRecordSet) pivot = Pivot(srs) # Pivot converts a group or records into one record per group self.assertEqual([[record._tuple for record in group] for group in pivot], [[((1, 2, 3, 4), (0, 1, 0, 1))], [((5, 6), (0, 1))], [((7, 8, 9), (0, 1, 0))]]) srs.extend(simpleAddition) # adding data means the transform consumes the new data when checked self.assertEqual( [[record._tuple for record in group] for group in pivot], [[((1, 2, 3, 4), (0, 1, 0, 1))], [((5, 6), (0, 1))], [((7, 8, 9), (0, 1, 0))], [((11, 12, 13), (1, 0, 1))], [((14, 15, 16), (0, 1, 0))]])
from ligature.recordset import RecordSet def genData(columns, rows, start=0): if not isinstance(columns, int): columns = len(columns) if columns == 1: return (i for i in range(start, start + rows * columns, columns)) else: return (tuple(range(i, i + columns)) for i in range(start, start + rows * columns, columns)) a1 = [(1, 2, 3, 4), (5, 6), (7, 8, 9)] b1 = [(0, 1, 0, 1), (0, 1), (0, 1, 0)] a2 = [(11, 12, 13), (14, 15, 16)] b2 = [(1, 0, 1), (0, 1, 0)] simpleRecordSet = RecordSet(recordType='ab') for g in zip(a1, b1): simpleRecordSet.append(v for v in zip(*g)) simpleAddition = RecordSet(recordType='ab') for g in zip(a2, b2): simpleAddition.append(v for v in zip(*g))