def test_simpleCopy(self): """ Simple test for a dockerized application. It copies the contents of one file into another via the command-line cp utility. It then checks that the contents of the target DROP are correct, and that the target file is actually owned by our process. """ a = FileDROP("a", "a") b = DockerApp("b", "b", image="ubuntu:14.04", command="cp %i0 %o0") c = FileDROP("c", "c") b.addInput(a) b.addOutput(c) # Random data so we always check different contents data = os.urandom(10) with DROPWaiterCtx(self, c, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(c)) # We own the file, not root uid = os.getuid() self.assertEqual(uid, os.stat(c.path).st_uid)
def test_clientServer(self): """ A client-server duo. The server outputs the data it receives to its output DROP, which in turn is the data held in its input DROP. The graph looks like this: A --|--> B(client) --|--> D |--> C(server) --| C is a server application which B connects to. Therefore C must be started before B, so B knows C's IP address and connects successfully. Although the real writing is done by C, B in this example is also treated as a publisher of D. This way D waits for both applications to finish before proceeding. """ a = FileDROP('a', 'a') b = DockerApp('b', 'b', image='ubuntu:14.04', command='cat %i0 > /dev/tcp/%containerIp[c]%/8000') c = DockerApp('c', 'c', image='ubuntu:14.04', command='nc -l 8000 > %o0') d = FileDROP('d', 'd') b.addInput(a) b.addOutput(d) c.addInput(a) c.addOutput(d) # Let 'b' handle its interest in c b.handleInterest(c) data = os.urandom(10) with DROPWaiterCtx(self, d, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(d))
def test_plasma(self): in_file = '/tmp/test.ms' out_file = '/tmp/copy.ms' with tarfile.open('./data/test_ms.tar.gz', 'r') as ref: ref.extractall('/tmp/') a = FileDROP('a', 'a', filepath=in_file) b = MSPlasmaWriter('b', 'b') c = PlasmaDROP('c', 'c') d = MSPlasmaReader('d', 'd') e = FileDROP('e', 'e', filepath=out_file) b.addInput(a) b.addOutput(c) d.addInput(c) d.addOutput(e) # Check the MS DATA content is the same as original with droputils.DROPWaiterCtx(self, e, 5): a.setCompleted() self.compare_ms(in_file, out_file) # check we can go from dataURL to plasma ID client = plasma.connect("/tmp/plasma") a = c.dataURL.split('//')[1].decode("hex") client.get(plasma.ObjectID(a))
def test_file_reproducibility(self): from dlg.common.reproducibility.reproducibility import common_hash data = b'Helloworld' data_hash = common_hash(data) a = FileDROP('a', 'a') a.write(data) a.reproducibility_level = ReproducibilityFlags.RERUN a.setCompleted() b = NullDROP('b', 'b') b.reproducibility_level = ReproducibilityFlags.RERUN b.setCompleted() self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.REPEAT self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.RECOMPUTE self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.REPRODUCE self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual(a.generate_merkle_data(), {'data_hash': data_hash}) a.reproducibility_level = ReproducibilityFlags.REPLICATE_SCI self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual(a.generate_merkle_data(), {'data_hash': data_hash, 'status': DROPStates.COMPLETED}) a.reproducibility_level = ReproducibilityFlags.REPLICATE_COMP self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual(a.generate_merkle_data(), {'data_hash': data_hash, 'status': DROPStates.COMPLETED}) a.reproducibility_level = ReproducibilityFlags.REPLICATE_TOTAL self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual(a.generate_merkle_data(), {'data_hash': data_hash, 'status': DROPStates.COMPLETED})
def test_no_write_to_file_drop(self): """Check that FileDrops can be *not* written""" a = FileDROP("a", "a") b = SleepAndCopyApp("b", "b") c = InMemoryDROP("c", "c") a.addConsumer(b) b.addOutput(c) with DROPWaiterCtx(self, c): a.setCompleted() self.assertEqual(droputils.allDropContents(c), b"")
def test_basic_run(self): input_ms = os.environ.get('INPUT_MS', '/tmp/output/aa01.ms') a = FileDROP('1', '1', filepath=input_ms) b = CImagerDrop('2', '2') c = FileDROP('3', '3', filepath='image_aa01') b.addInput(a) b.addOutput(c) with DROPWaiterCtx(self, c, 10000): a.setCompleted()
def assertFiles(delete_parent_directory, parentDirExists, tempDir=None): tempDir = tempDir or tempfile.mkdtemp() a = FileDROP('a', 'a', dirname=tempDir, delete_parent_directory=delete_parent_directory) a.write(b' ') a.setCompleted() self.assertTrue(a.exists()) self.assertTrue(os.path.isdir(tempDir)) a.delete() self.assertFalse(a.exists()) self.assertEqual(parentDirExists, os.path.isdir(tempDir)) if parentDirExists: shutil.rmtree(tempDir)
def test_file_reproducibility(self): from dlg.common.reproducibility.reproducibility import common_hash data = b"Helloworld" data_hash = common_hash(data) a = FileDROP("a", "a") a.write(data) a.reproducibility_level = ReproducibilityFlags.RERUN a.setCompleted() b = NullDROP("b", "b") b.reproducibility_level = ReproducibilityFlags.RERUN b.setCompleted() self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.REPEAT a.commit() self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.RECOMPUTE a.commit() self.assertEqual(a.merkleroot, b.merkleroot) a.reproducibility_level = ReproducibilityFlags.REPRODUCE a.commit() self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual(a.generate_merkle_data(), {"data_hash": data_hash}) a.reproducibility_level = ReproducibilityFlags.REPLICATE_SCI a.commit() self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual( a.generate_merkle_data(), {"data_hash": data_hash, "status": DROPStates.COMPLETED}, ) a.reproducibility_level = ReproducibilityFlags.REPLICATE_COMP a.commit() self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual( a.generate_merkle_data(), {"data_hash": data_hash, "status": DROPStates.COMPLETED}, ) a.reproducibility_level = ReproducibilityFlags.REPLICATE_TOTAL a.commit() self.assertNotEqual(a.merkleroot, b.merkleroot) self.assertEqual( a.generate_merkle_data(), {"data_hash": data_hash, "status": DROPStates.COMPLETED}, )
def test_echo(self): a = FileDROP('a', 'a') b = BashShellApp('b', 'b', command='cp %i0 %o0') c = FileDROP('c', 'c') b.addInput(a) b.addOutput(c) # Random data so we always check different contents data = os.urandom(10) with DROPWaiterCtx(self, c, 100): a.write(data) a.setCompleted() self.assertEqual(data, droputils.allDropContents(c)) # We own the file, not root uid = os.getuid() self.assertEqual(uid, os.stat(c.path).st_uid)
def test_dropWroteFromOutside(self): """ A different scenario to those tested above, in which the data represented by the DROP isn't actually written *through* the DROP. Still, the DROP needs to be moved to COMPLETED once the data is written, and reading from it should still yield a correct result """ # Write, but not through the DROP a = FileDROP("A", "A") filename = a.path msg = b"a message" with open(filename, "wb") as f: f.write(msg) a.setCompleted() # Read from the DROP self.assertEqual(msg, droputils.allDropContents(a)) self.assertIsNotNone(a.checksum) self.assertEqual(9, a.size) # The drop now calculates the size thus we can't set it anymore self.assertRaises(Exception, a.size, len(msg))
def test_agg_and_rep(self): # aggregate agg = FitsImageAggregator('0', '0', freq_step=1000.0) file1 = FileDROP('1', '1', filepath='image_eor01.restored.fits', dirname='/tmp/output/') file2 = FileDROP('2', '2', filepath='image_eor02.restored.fits', dirname='/tmp/output/') file3 = FileDROP('3', '3', filepath='image_eor03.restored.fits', dirname='/tmp/output/') file4 = FileDROP('4', '4', filepath='image_eor04.restored.fits', dirname='/tmp/output/') agg.addInput(file1) agg.addInput(file2) agg.addInput(file3) agg.addInput(file4) output = FileDROP('10', '10', filepath='summit.fits', dirname='/tmp/output/') agg.addOutput(output) # replicate rep = FitsImageReplicator('11', '11', copies=4) rep_output = FileDROP('12', '12', filepath='summit_replication.fits', dirname='/tmp/output/') rep.addInput(output) rep.addOutput(rep_output) with droputils.DROPWaiterCtx(self, rep, 1000): file1.setCompleted() file2.setCompleted() file3.setCompleted() file4.setCompleted()