def test_expireAfterUse(self): """ Simple test for the expireAfterUse flag. Two DROPs are created with different values, and after they are used we check whether their data is still there or not """ with dlm.DataLifecycleManager(checkPeriod=0.5, cleanupPeriod=2) as manager: a = DirectoryContainer('a', 'a', precious=False, expireAfterUse=True, dirname=tempfile.mkdtemp()) b_dirname = tempfile.mkdtemp() b = DirectoryContainer('b', 'b', precious=False, expireAfterUse=False, dirname=b_dirname) c = BarrierAppDROP('c', 'c') d = BarrierAppDROP('d', 'd') a.addConsumer(c) a.addConsumer(d) b.addConsumer(c) b.addConsumer(d) manager.addDrop(a) manager.addDrop(b) manager.addDrop(b) manager.addDrop(c) # Make sure all consumers are done with DROPWaiterCtx(self, [c,d], 1): a.setCompleted() b.setCompleted() # Both directories should be there, but after cleanup A's shouldn't # be there anymore self.assertTrue(a.exists()) self.assertTrue(b.exists()) time.sleep(2) self.assertFalse(a.exists()) self.assertTrue(b.exists()) b.delete()
def test_to_first_split(self): s3_drop = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='key123', profile_name='aws-profile') copy_from_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_from_s3.sh %iDataURL0 /dfms_root/%o0', user='******') measurement_set = DirectoryContainer(self.get_oid('dir'), uuid.uuid4(), dirname=TestChiles02._temp) copy_from_s3.addInput(s3_drop) copy_from_s3.addOutput(measurement_set) outputs = [] frequencies = make_groups_of_frequencies(FREQUENCY_GROUPS, 5) frequencies = frequencies[0] for group in frequencies: casa_py_drop = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='casa_py.sh /dfms_root/%i0 /dfms_root/%o0 {0} {1}'.format(group[0], group[1]), user='******') result = FileDROP(self.get_oid('file'), uuid.uuid4(), dirname=TestChiles02._temp) copy_to_s3 = DockerApp(self.get_oid('app'), uuid.uuid4(), image='mock:latest', command='copy_to_s3.sh /dfms_root/%i0 %oDataURL0', user='******') s3_drop_out = MockS3DROP(self.get_oid('s3'), uuid.uuid4(), bucket='mock', key='{0}_{1}/key123'.format(group[0], group[1]), profile_name='aws-profile') casa_py_drop.addInput(measurement_set) casa_py_drop.addOutput(result) copy_to_s3.addInput(result) copy_to_s3.addOutput(s3_drop_out) outputs.append(s3_drop_out) barrier_drop = BarrierAppDROP(self.get_oid('barrier'), uuid.uuid4()) barrier_drop.addInput(measurement_set) for output in outputs: barrier_drop.addInput(output) with droputils.DROPWaiterCtx(self, barrier_drop, 50000): s3_drop.setCompleted()
def _createGraph(self): """ Creates the following graph of DROPs: A |--> B ----> D --> G --> I --| |--> C -|--> E --------------|-> H --> J |--> F B, C, G and H are AppDOs. The names have been given in breadth-first order (although H has a dependency on I) """ a = InMemoryDROP('a', 'a') b = BarrierAppDROP('b', 'b') c = BarrierAppDROP('c', 'c') d = InMemoryDROP('d', 'd') e = InMemoryDROP('e', 'e') f = InMemoryDROP('f', 'f') g = BarrierAppDROP('g', 'g') h = BarrierAppDROP('h', 'h') i = InMemoryDROP('i', 'i') j = InMemoryDROP('j', 'j') a.addConsumer(b) a.addConsumer(c) b.addOutput(d) c.addOutput(e) c.addOutput(f) d.addConsumer(g) e.addConsumer(h) g.addOutput(i) i.addConsumer(h) h.addOutput(j) return a, b, c, d, e, f, g, h, i, j
def initialize(self, **kwargs): BarrierAppDROP.initialize(self, **kwargs) self._remoteUser = self._getArg(kwargs, 'remoteUser', None) self._pkeyPath = self._getArg(kwargs, 'pkeyPath', None) self._timeout = self._getArg(kwargs, 'timeout', None)
def initialize(self, **kwargs): BarrierAppDROP.initialize(self, **kwargs) self._command = self._getArg(kwargs, 'command', None) if not self._command: raise Exception('No command specified, cannot create BashShellApp')
def initialize(self, **kwargs): BarrierAppDROP.initialize(self, **kwargs) self.i = 0
def initialize(self, **kwargs): BarrierAppDROP.initialize(self, **kwargs) self._image = self._getArg(kwargs, "image", None) if not self._image: raise Exception("No docker image specified, cannot create DockerApp") if ":" not in self._image: logger.warn("%r: Image %s is too generic since it doesn't specify a tag" % (self, self._image)) self._command = self._getArg(kwargs, "command", None) if not self._command: raise Exception("No command specified, cannot create DockerApp") # The user used to run the process in the docker container # By default docker containers run as root, but we don't want to run # a process using a different user because otherwise anything that that # process writes to the filesystem self._user = self._getArg(kwargs, "user", None) # In some cases we want to make sure the command in the container runs # as a certain user, so we wrap up the command line in a small script # that will create the user if missing and switch to it self._ensureUserAndSwitch = self._getArg(kwargs, "ensureUserAndSwitch", self._user is None) # By default containers are removed from the filesystem, but people # might want to preserve them. # TODO: This might be something that the data lifecycle manager could # handle, but for the time being we do it here self._removeContainer = self._getArg(kwargs, "removeContainer", True) # Additional volume bindings can be specified for existing files/dirs # on the host system. self._additionalBindings = {} for binding in self._getArg(kwargs, "additionalBindings", []): if binding.find(":") == -1: host_path = container_path = binding else: host_path, container_path = binding.split(":") if not os.path.exists(host_path): raise ValueError("'Path %s doesn't exist, cannot use as additional volume binding" % (host_path,)) self._additionalBindings[host_path] = container_path if logger.isEnabledFor(logging.INFO): logger.info("%r with image '%s' and command '%s' created" % (self, self._image, self._command)) # Check if we have the image; otherwise pull it. extra_kwargs = self._kwargs_from_env() c = AutoVersionClient(**extra_kwargs) found = reduce(lambda a, b: a or self._image in b["RepoTags"], c.images(), False) if not found: if logger.isEnabledFor(logging.DEBUG): logger.debug("Image '%s' not found, pulling it" % (self._image)) start = time.time() c.pull(self._image) end = time.time() if logger.isEnabledFor(logging.DEBUG): logger.debug("Took %.2f [s] to pull image '%s'" % ((end - start), self._image)) else: if logger.isEnabledFor(logging.DEBUG): logger.debug("Image '%s' found, no need to pull it" % (self._image)) self._containerIp = None self._containerId = None self._waiters = []