def _test_graphExecutionDriver(self, mode): """ A small test to check that DROPs executions can be driven externally if required, and not always internally by themselves """ a = InMemoryDROP('a', 'a', executionMode=mode, expectedSize=1) b = SumupContainerChecksum('b', 'b') c = InMemoryDROP('c', 'c') a.addConsumer(b) c.addProducer(b) # Write and check dropsToWaitFor = [] if mode == ExecutionMode.EXTERNAL else [c] with DROPWaiterCtx(self, dropsToWaitFor): a.write('1') if mode == ExecutionMode.EXTERNAL: # b hasn't been triggered self.assertEqual(c.status, DROPStates.INITIALIZED) self.assertEqual(b.status, DROPStates.INITIALIZED) self.assertEqual(b.execStatus, AppDROPStates.NOT_RUN) # Now let b consume a with DROPWaiterCtx(self, [c]): b.dropCompleted('a', DROPStates.COMPLETED) self.assertEqual(c.status, DROPStates.COMPLETED) elif mode == ExecutionMode.DROP: # b is already done self.assertEqual(c.status, DROPStates.COMPLETED)
def _test_graphExecutionDriver(self, mode): """ A small test to check that DROPs executions can be driven externally if required, and not always internally by themselves """ a = InMemoryDROP("a", "a", executionMode=mode, expectedSize=1) b = SumupContainerChecksum("b", "b") c = InMemoryDROP("c", "c") a.addConsumer(b) c.addProducer(b) # Write and check dropsToWaitFor = [] if mode == ExecutionMode.EXTERNAL else [c] with DROPWaiterCtx(self, dropsToWaitFor): a.write("1") if mode == ExecutionMode.EXTERNAL: # b hasn't been triggered self.assertEquals(c.status, DROPStates.INITIALIZED) self.assertEquals(b.status, DROPStates.INITIALIZED) self.assertEquals(b.execStatus, AppDROPStates.NOT_RUN) # Now let b consume a with DROPWaiterCtx(self, [c]): b.dropCompleted("a", DROPStates.COMPLETED) self.assertEquals(c.status, DROPStates.COMPLETED) elif mode == ExecutionMode.DROP: # b is already done self.assertEquals(c.status, DROPStates.COMPLETED)
def test_app_multiple_outputs(self): """ A small method that tests that the AppDROPs writing to two different DROPs outputs works The graph constructed by this example looks as follow: |--> E A --> B --> C --> D --| |--> F Here B and D are an AppDROPs, with D writing to two DROPs outputs (E and F) and reading from C. C, in turn, is written by B, which in turns reads the data from A """ # This is used as "B" class NumberWriterApp(BarrierAppDROP): def run(self): inputDrop = self.inputs[0] output = self.outputs[0] howMany = int(droputils.allDropContents(inputDrop)) for i in xrange(howMany): output.write(str(i) + " ") # This is used as "D" class OddAndEvenContainerApp(BarrierAppDROP): def run(self): inputDrop = self.inputs[0] outputs = self.outputs numbers = droputils.allDropContents(inputDrop).strip().split() for n in numbers: outputs[int(n) % 2].write(n + " ") # Create DROPs a = InMemoryDROP("oid:A", "uid:A") b = NumberWriterApp("oid:B", "uid:B") c = InMemoryDROP("oid:A", "uid:A") d = OddAndEvenContainerApp("oid:D", "uid:D") e = InMemoryDROP("oid:E", "uid:E") f = InMemoryDROP("oid:F", "uid:F") # Wire them together a.addConsumer(b) b.addOutput(c) c.addConsumer(d) d.addOutput(e) d.addOutput(f) # Start the execution with DROPWaiterCtx(self, [e, f]): a.write("20") a.setCompleted() # Check the final results are correct for drop in [a, b, c, d, e]: self.assertEquals(drop.status, DROPStates.COMPLETED, "%r is not yet COMPLETED" % (drop)) self.assertEquals("0 2 4 6 8 10 12 14 16 18", droputils.allDropContents(e).strip()) self.assertEquals("1 3 5 7 9 11 13 15 17 19", droputils.allDropContents(f).strip())
def test_app_multiple_outputs(self): """ A small method that tests that the AppDROPs writing to two different DROPs outputs works The graph constructed by this example looks as follow: |--> E A --> B --> C --> D --| |--> F Here B and D are an AppDROPs, with D writing to two DROPs outputs (E and F) and reading from C. C, in turn, is written by B, which in turns reads the data from A """ # This is used as "B" class NumberWriterApp(BarrierAppDROP): def run(self): inputDrop = self.inputs[0] output = self.outputs[0] howMany = int(droputils.allDropContents(inputDrop)) for i in range(howMany): output.write(str(i) + " ") # This is used as "D" class OddAndEvenContainerApp(BarrierAppDROP): def run(self): inputDrop = self.inputs[0] outputs = self.outputs numbers = droputils.allDropContents(inputDrop).strip().split() for n in numbers: outputs[int(n) % 2].write(n + six.b(" ")) # Create DROPs a = InMemoryDROP('oid:A', 'uid:A') b = NumberWriterApp('oid:B', 'uid:B') c = InMemoryDROP('oid:A', 'uid:A') d = OddAndEvenContainerApp('oid:D', 'uid:D') e = InMemoryDROP('oid:E', 'uid:E') f = InMemoryDROP('oid:F', 'uid:F') # Wire them together a.addConsumer(b) b.addOutput(c) c.addConsumer(d) d.addOutput(e) d.addOutput(f) # Start the execution with DROPWaiterCtx(self, [e,f]): a.write('20') a.setCompleted() # Check the final results are correct for drop in [a,b,c,d,e]: self.assertEqual(drop.status, DROPStates.COMPLETED, "%r is not yet COMPLETED" % (drop)) self.assertEqual(six.b("0 2 4 6 8 10 12 14 16 18"), droputils.allDropContents(e).strip()) self.assertEqual(six.b("1 3 5 7 9 11 13 15 17 19"), droputils.allDropContents(f).strip())
def test_stateMachine(self): """ A simple test to check that some transitions are invalid """ # Nice and easy drop = InMemoryDROP("a", "a") self.assertEquals(drop.status, DROPStates.INITIALIZED) drop.write("a") self.assertEquals(drop.status, DROPStates.WRITING) drop.setCompleted() self.assertEquals(drop.status, DROPStates.COMPLETED) # Try to overwrite the DROP's checksum and size self.assertRaises(Exception, lambda: setattr(drop, "checksum", 0)) self.assertRaises(Exception, lambda: setattr(drop, "size", 0)) # Try to write on a DROP that is already COMPLETED self.assertRaises(Exception, drop.write, "") # Invalid reading on a DROP that isn't COMPLETED yet drop = InMemoryDROP("a", "a") self.assertRaises(Exception, drop.open) self.assertRaises(Exception, drop.read, 1) self.assertRaises(Exception, drop.close, 1) # Invalid file descriptors used to read/close drop.setCompleted() fd = drop.open() otherFd = random.SystemRandom().randint(0, 1000) self.assertNotEquals(fd, otherFd) self.assertRaises(Exception, drop.read, otherFd) self.assertRaises(Exception, drop.close, otherFd) # but using the correct one should be OK drop.read(fd) self.assertTrue(drop.isBeingRead()) drop.close(fd) # Expire it, then try to set it as COMPLETED again drop.status = DROPStates.EXPIRED self.assertRaises(Exception, drop.setCompleted)
def test_stateMachine(self): """ A simple test to check that some transitions are invalid """ # Nice and easy drop = InMemoryDROP('a', 'a') self.assertEqual(drop.status, DROPStates.INITIALIZED) drop.write('a') self.assertEqual(drop.status, DROPStates.WRITING) drop.setCompleted() self.assertEqual(drop.status, DROPStates.COMPLETED) # Try to overwrite the DROP's checksum and size self.assertRaises(Exception, lambda: setattr(drop, 'checksum', 0)) self.assertRaises(Exception, lambda: setattr(drop, 'size', 0)) # Try to write on a DROP that is already COMPLETED self.assertRaises(Exception, drop.write, '') # Invalid reading on a DROP that isn't COMPLETED yet drop = InMemoryDROP('a', 'a') self.assertRaises(Exception, drop.open) self.assertRaises(Exception, drop.read, 1) self.assertRaises(Exception, drop.close, 1) # Invalid file descriptors used to read/close drop.setCompleted() fd = drop.open() otherFd = random.SystemRandom().randint(0, 1000) self.assertNotEqual(fd, otherFd) self.assertRaises(Exception, drop.read, otherFd) self.assertRaises(Exception, drop.close, otherFd) # but using the correct one should be OK drop.read(fd) self.assertTrue(drop.isBeingRead()) drop.close(fd) # Expire it, then try to set it as COMPLETED again drop.status = DROPStates.EXPIRED self.assertRaises(Exception, drop.setCompleted)
def test_objectAsNormalAndStreamingInput(self): """ A test that checks that a DROP can act as normal and streaming input of different AppDROPs at the same time. We use the following graph: A --|--> B --> D |--> C --> E Here B uses A as a streaming input, while C uses it as a normal input """ class LastCharWriterApp(AppDROP): def initialize(self, **kwargs): super(LastCharWriterApp, self).initialize(**kwargs) self._lastChar = None def dataWritten(self, uid, data): self.execStatus = AppDROPStates.RUNNING outputDrop = self.outputs[0] self._lastChar = data[-1] outputDrop.write(self._lastChar) def dropCompleted(self, uid, status): self.execStatus = AppDROPStates.FINISHED self._notifyAppIsFinished() a = InMemoryDROP("a", "a") b = LastCharWriterApp("b", "b") c = SumupContainerChecksum("c", "c") d = InMemoryDROP("d", "d") e = InMemoryDROP("e", "e") a.addStreamingConsumer(b) a.addConsumer(c) b.addOutput(d) c.addOutput(e) # Consumer cannot be normal and streaming at the same time self.assertRaises(Exception, lambda: a.addConsumer(b)) self.assertRaises(Exception, lambda: a.addStreamingConsumer(c)) # Write a little, then check the consumers def checkDropStates(aStatus, dStatus, eStatus, lastChar): self.assertEquals(aStatus, a.status) self.assertEquals(dStatus, d.status) self.assertEquals(eStatus, e.status) self.assertEquals(lastChar, b._lastChar) checkDropStates(DROPStates.INITIALIZED, DROPStates.INITIALIZED, DROPStates.INITIALIZED, None) a.write("abcde") checkDropStates(DROPStates.WRITING, DROPStates.WRITING, DROPStates.INITIALIZED, "e") a.write("fghij") checkDropStates(DROPStates.WRITING, DROPStates.WRITING, DROPStates.INITIALIZED, "j") a.write("k") with DROPWaiterCtx(self, [d, e]): a.setCompleted() checkDropStates(DROPStates.COMPLETED, DROPStates.COMPLETED, DROPStates.COMPLETED, "k") self.assertEquals("ejk", droputils.allDropContents(d))
def test_objectAsNormalAndStreamingInput(self): """ A test that checks that a DROP can act as normal and streaming input of different AppDROPs at the same time. We use the following graph: A --|--> B --> D |--> C --> E Here B uses A as a streaming input, while C uses it as a normal input """ class LastCharWriterApp(AppDROP): def initialize(self, **kwargs): super(LastCharWriterApp, self).initialize(**kwargs) self._lastByte = None def dataWritten(self, uid, data): self.execStatus = AppDROPStates.RUNNING outputDrop = self.outputs[0] self._lastByte = six.indexbytes(data, -1) outputDrop.write(self._lastByte) def dropCompleted(self, uid, status): self.execStatus = AppDROPStates.FINISHED self._notifyAppIsFinished() a = InMemoryDROP('a', 'a') b = LastCharWriterApp('b', 'b') c = SumupContainerChecksum('c', 'c') d = InMemoryDROP('d', 'd') e = InMemoryDROP('e', 'e') a.addStreamingConsumer(b) a.addConsumer(c) b.addOutput(d) c.addOutput(e) # Consumer cannot be normal and streaming at the same time self.assertRaises(Exception, a.addConsumer, b) self.assertRaises(Exception, a.addStreamingConsumer, c) # Write a little, then check the consumers def checkDropStates(aStatus, dStatus, eStatus, lastByte): self.assertEqual(aStatus, a.status) self.assertEqual(dStatus, d.status) self.assertEqual(eStatus, e.status) if lastByte is not None: self.assertEqual(six.b(lastByte), six.int2byte(b._lastByte)) checkDropStates(DROPStates.INITIALIZED , DROPStates.INITIALIZED, DROPStates.INITIALIZED, None) a.write('abcde') checkDropStates(DROPStates.WRITING, DROPStates.WRITING, DROPStates.INITIALIZED, 'e') a.write('fghij') checkDropStates(DROPStates.WRITING, DROPStates.WRITING, DROPStates.INITIALIZED, 'j') a.write('k') with DROPWaiterCtx(self, [d,e]): a.setCompleted() checkDropStates(DROPStates.COMPLETED, DROPStates.COMPLETED, DROPStates.COMPLETED, 'k') self.assertEqual(six.b('ejk'), droputils.allDropContents(d))
def branch_failure(self, tooManyFailures): """ Using the container data object to implement a join/barrier dataflow. A1, A2 and A3 are FileDROPs B1, B2 and B3 are SumupContainerChecksum C1, C2 and C3 are InMemoryDROPs D is a SumupContainerChecksum E is a InMemoryDROP --> A1 --> B1 --> C1 --| --> A2 --> B2 --> C2 --|--> D --> E --> A3 --> B3 --> C3 --| Upon writing all A* DROPs, the execution of B* DROPs should be triggered, after which "C" will transition to COMPLETE. Once all "C"s have moved to COMPLETED "D"'s execution will also be triggered, and finally E will hold the sum of B1, B2 and B3's checksums """ # create file data objects a1 = InMemoryDROP("oid:A1", "uid:A1") a2 = InMemoryDROP("oid:A2", "uid:A2") a3 = InMemoryDROP("oid:A3", "uid:A3") # CRC Result DROPs, storing the result in memory b1 = SumupContainerChecksum("oid:B1", "uid:B1") b2 = SumupContainerChecksum("oid:B2", "uid:B2") b3 = SumupContainerChecksum("oid:B3", "uid:B3") c1 = InMemoryDROP("oid:C1", "uid:C1") c2 = InMemoryDROP("oid:C2", "uid:C2") c3 = InMemoryDROP("oid:C3", "uid:C3") # The final DROP that sums up the CRCs from the container DROP d = SumupContainerChecksum("oid:D", "uid:D", input_error_threshold=33) e = InMemoryDROP("oid:E", "uid:E") # Wire together dropAList = [a1, a2, a3] dropBList = [b1, b2, b3] dropCList = [c1, c2, c3] for dropA, dropB in map(lambda a, b: (a, b), dropAList, dropBList): dropA.addConsumer(dropB) for dropB, dropC in map(lambda b, c: (b, c), dropBList, dropCList): dropB.addOutput(dropC) for dropC in dropCList: dropC.addConsumer(d) d.addOutput(e) # Write data into the initial "A" DROPs, which should trigger # the whole chain explained above with DROPWaiterCtx(self, e): # for dropA in dropAList: # this should be parallel for a1.write(" ") a1.setCompleted() if tooManyFailures: a2.setError() else: a2.write(" ") a2.setCompleted() a3.setError() if tooManyFailures: completedDrops = dropAList[0:1] + dropBList[0:1] + dropCList[0:1] errorDrops = dropAList[1:] + dropBList[1:] + dropCList[1:] + [d, e] else: completedDrops = dropAList[0:2] + dropBList[0:2] + dropCList[0:2] + [d, e] errorDrops = dropAList[2:] + dropBList[2:] + dropCList[2:] for drop in completedDrops: self.assertEquals(drop.status, DROPStates.COMPLETED) for drop in errorDrops: self.assertEquals(drop.status, DROPStates.ERROR) # The results we want to compare # (only in case that at least two branches executed) if not tooManyFailures: sum_crc = c1.checksum + c2.checksum dropEData = int(droputils.allDropContents(e)) self.assertNotEquals(sum_crc, 0) self.assertEquals(sum_crc, dropEData)
def test_simple_chain(self): """ Simple test that creates a pipeline-like chain of commands. In this case we simulate a pipeline that does this, holding each intermediate result in memory: cat someFile | grep 'a' | sort | rev """ class GrepResult(BarrierAppDROP): def initialize(self, **kwargs): super(GrepResult, self).initialize(**kwargs) self._substring = kwargs["substring"] def run(self): drop = self.inputs[0] output = self.outputs[0] allLines = StringIO(droputils.allDropContents(drop)).readlines() for line in allLines: if self._substring in line: output.write(line) class SortResult(BarrierAppDROP): def run(self): drop = self.inputs[0] output = self.outputs[0] sortedLines = StringIO(droputils.allDropContents(drop)).readlines() sortedLines.sort() for line in sortedLines: output.write(line) class RevResult(BarrierAppDROP): def run(self): drop = self.inputs[0] output = self.outputs[0] allLines = StringIO(droputils.allDropContents(drop)).readlines() for line in allLines: buf = "" for c in line: if c == " " or c == "\n": output.write(buf[::-1]) output.write(c) buf = "" else: buf += c a = InMemoryDROP("oid:A", "uid:A") b = GrepResult("oid:B", "uid:B", substring="a") c = InMemoryDROP("oid:C", "uid:C") d = SortResult("oid:D", "uid:D") e = InMemoryDROP("oid:E", "uid:E") f = RevResult("oid:F", "oid:F") g = InMemoryDROP("oid:G", "uid:G") a.addConsumer(b) b.addOutput(c) c.addConsumer(d) d.addOutput(e) e.addConsumer(f) f.addOutput(g) # Initial write contents = "first line\nwe have an a here\nand another one\nnoone knows me" cResExpected = "we have an a here\nand another one\n" eResExpected = "and another one\nwe have an a here\n" gResExpected = "dna rehtona eno\new evah na a ereh\n" with DROPWaiterCtx(self, g): a.write(contents) a.setCompleted() # Get intermediate and final results and compare actualRes = [] for i in [c, e, g]: actualRes.append(droputils.allDropContents(i)) map(lambda x, y: self.assertEquals(x, y), [cResExpected, eResExpected, gResExpected], actualRes)
def branch_failure(self, tooManyFailures): """ Using the container data object to implement a join/barrier dataflow. A1, A2 and A3 are FileDROPs B1, B2 and B3 are SumupContainerChecksum C1, C2 and C3 are InMemoryDROPs D is a SumupContainerChecksum E is a InMemoryDROP --> A1 --> B1 --> C1 --| --> A2 --> B2 --> C2 --|--> D --> E --> A3 --> B3 --> C3 --| Upon writing all A* DROPs, the execution of B* DROPs should be triggered, after which "C" will transition to COMPLETE. Once all "C"s have moved to COMPLETED "D"'s execution will also be triggered, and finally E will hold the sum of B1, B2 and B3's checksums """ #create file data objects a1 = InMemoryDROP('oid:A1', 'uid:A1') a2 = InMemoryDROP('oid:A2', 'uid:A2') a3 = InMemoryDROP('oid:A3', 'uid:A3') # CRC Result DROPs, storing the result in memory b1 = SumupContainerChecksum('oid:B1', 'uid:B1') b2 = SumupContainerChecksum('oid:B2', 'uid:B2') b3 = SumupContainerChecksum('oid:B3', 'uid:B3') c1 = InMemoryDROP('oid:C1', 'uid:C1') c2 = InMemoryDROP('oid:C2', 'uid:C2') c3 = InMemoryDROP('oid:C3', 'uid:C3') # The final DROP that sums up the CRCs from the container DROP d = SumupContainerChecksum('oid:D', 'uid:D', input_error_threshold = 33) e = InMemoryDROP('oid:E', 'uid:E') # Wire together dropAList = [a1,a2,a3] dropBList = [b1,b2,b3] dropCList = [c1,c2,c3] for dropA,dropB in zip(dropAList, dropBList): dropA.addConsumer(dropB) for dropB,dropC in zip(dropBList, dropCList): dropB.addOutput(dropC) for dropC in dropCList: dropC.addConsumer(d) d.addOutput(e) # Write data into the initial "A" DROPs, which should trigger # the whole chain explained above with DROPWaiterCtx(self, e): #for dropA in dropAList: # this should be parallel for a1.write(' '); a1.setCompleted() if tooManyFailures: a2.setError() else: a2.write(' '); a2.setCompleted() a3.setError() if tooManyFailures: completedDrops = dropAList[0:1] + dropBList[0:1] + dropCList[0:1] errorDrops = dropAList[1:] + dropBList[1:] + dropCList[1:] + [d, e] else: completedDrops = dropAList[0:2] + dropBList[0:2] + dropCList[0:2] + [d, e] errorDrops = dropAList[2:] + dropBList[2:] + dropCList[2:] for drop in completedDrops: self.assertEqual(drop.status, DROPStates.COMPLETED) for drop in errorDrops: self.assertEqual(drop.status, DROPStates.ERROR) # The results we want to compare # (only in case that at least two branches executed) if not tooManyFailures: sum_crc = c1.checksum + c2.checksum dropEData = int(droputils.allDropContents(e)) self.assertNotEqual(sum_crc, 0) self.assertEqual(sum_crc, dropEData)
def test_simple_chain(self): ''' Simple test that creates a pipeline-like chain of commands. In this case we simulate a pipeline that does this, holding each intermediate result in memory: cat someFile | grep 'a' | sort | rev ''' class GrepResult(BarrierAppDROP): def initialize(self, **kwargs): super(GrepResult, self).initialize(**kwargs) self._substring = six.b(kwargs['substring']) def run(self): drop = self.inputs[0] output = self.outputs[0] allLines = BytesIO(droputils.allDropContents(drop)).readlines() for line in allLines: if self._substring in line: output.write(line) class SortResult(BarrierAppDROP): def run(self): drop = self.inputs[0] output = self.outputs[0] sortedLines = BytesIO(droputils.allDropContents(drop)).readlines() sortedLines.sort() for line in sortedLines: output.write(line) class RevResult(BarrierAppDROP): def run(self): drop = self.inputs[0] output = self.outputs[0] allbytes = droputils.allDropContents(drop) buf = bytearray() for c in allbytes: if c == six.b(' ') or c == six.b('\n'): output.write(buf[::-1]) output.write(c) buf = bytearray() else: buf.append(c) a = InMemoryDROP('oid:A', 'uid:A') b = GrepResult('oid:B', 'uid:B', substring="a") c = InMemoryDROP('oid:C', 'uid:C') d = SortResult('oid:D', 'uid:D') e = InMemoryDROP('oid:E', 'uid:E') f = RevResult('oid:F', 'oid:F') g = InMemoryDROP('oid:G', 'uid:G') a.addConsumer(b) b.addOutput(c) c.addConsumer(d) d.addOutput(e) e.addConsumer(f) f.addOutput(g) # Initial write contents = "first line\nwe have an a here\nand another one\nnoone knows me" cResExpected = "we have an a here\nand another one\n" eResExpected = "and another one\nwe have an a here\n" gResExpected = "dna rehtona eno\new evah na a ereh\n" with DROPWaiterCtx(self, g): a.write(contents) a.setCompleted() # Get intermediate and final results and compare actualRes = [] for i in [c, e, g]: actualRes.append(droputils.allDropContents(i)) map(lambda x, y: self.assertEqual(x, y), [cResExpected, eResExpected, gResExpected], actualRes)