def test_trigger_controlled_step_killing_worker_in_between(self): stepcontroller = BuildStepController() yield self.setupConfig(masterConfig(num_concurrent=1, extra_steps=[stepcontroller.step]), startWorker=False) d = self.doForceBuild() builds = [] while len(builds) != 2: builds = yield self.master.data.get(("builds", )) yield util.asyncSleep(.1) while not stepcontroller.running: yield util.asyncSleep(.1) worker = self.master.workers.workers['hyper0'] worker.client.remove_container(worker.instance['Id'], v=True, force=True) # wait that the build is retried while len(builds) == 2: builds = yield self.master.data.get(("builds", )) yield util.asyncSleep(.1) stepcontroller.auto_finish_step(SUCCESS) yield d builds = yield self.master.data.get(("builds", )) self.assertEqual(len(builds), 5, msg=None) # the two first builds were retried self.assertEqual(builds[0]['results'], RETRY) self.assertEqual(builds[1]['results'], RETRY) self.assertEqual(builds[2]['results'], SUCCESS) self.assertEqual(builds[3]['results'], SUCCESS) self.assertEqual(builds[4]['results'], SUCCESS)
def test_trigger_controlled_step_killing_worker_in_between(self): stepcontroller = BuildStepController() yield self.setupConfig(masterConfig(num_concurrent=1, extra_steps=[stepcontroller.step]), startWorker=False) d = self.doForceBuild() builds = [] while len(builds) != 2: builds = yield self.master.data.get(("builds",)) yield util.asyncSleep(.1) while not stepcontroller.running: yield util.asyncSleep(.1) worker = self.master.workers.workers['hyper0'] worker.client.remove_container(worker.instance['Id'], v=True, force=True) # wait that the build is retried while len(builds) == 2: builds = yield self.master.data.get(("builds",)) yield util.asyncSleep(.1) stepcontroller.auto_finish_step(SUCCESS) yield d builds = yield self.master.data.get(("builds",)) self.assertEqual(len(builds), 5, msg=None) # the two first builds were retried self.assertEqual(builds[0]['results'], RETRY) self.assertEqual(builds[1]['results'], RETRY) self.assertEqual(builds[2]['results'], SUCCESS) self.assertEqual(builds[3]['results'], SUCCESS) self.assertEqual(builds[4]['results'], SUCCESS)
def setUpRequest(self, args, options=True, activate=True): self.changeHook = change_hook.ChangeHookResource(dialects={'poller': options}, master=mock.Mock()) self.request = FakeRequest(args=args) self.request.uri = "/change_hook/poller" self.request.method = "GET" master = self.request.site.master master.change_svc = ChangeManager() master.change_svc.setServiceParent(master) self.changesrc = self.Subclass("example", 21) self.changesrc.setServiceParent(master.change_svc) if activate: self.changesrc.activate() self.otherpoller = self.Subclass("otherpoller", 22) self.otherpoller.setServiceParent(master.change_svc) if activate: self.otherpoller.activate() anotherchangesrc = base.ChangeSource(name='notapoller') anotherchangesrc.setName("notapoller") anotherchangesrc.setServiceParent(master.change_svc) yield self.request.test_render(self.changeHook) yield util.asyncSleep(0)
def run(self): def dCheck(d): if not isinstance(d, defer.Deferred): raise AssertionError("expected Deferred") return d # don't complete immediately, or synchronously yield util.asyncSleep(0) lo = TestLogObserver() self.addLogObserver('testlog', lo) log = yield dCheck(self.addLog('testlog')) yield dCheck(log.addStdout(u'stdout\n')) yield dCheck(self.addCompleteLog('obs', 'Observer saw %r' % (map(unicode, lo.observed),))) yield dCheck(self.addHTMLLog('foo.html', '<head>\n')) yield dCheck(self.addURL('linkie', 'http://foo')) cmd = remotecommand.RemoteCommand('fake', {}) cmd.useLog(log) stdio = yield dCheck(self.addLog('stdio')) cmd.useLog(stdio) yield dCheck(cmd.addStdout(u'stdio\n')) yield dCheck(cmd.addStderr(u'stderr\n')) yield dCheck(cmd.addHeader(u'hdr\n')) yield dCheck(cmd.addToLog('testlog', 'fromcmd\n')) yield dCheck(log.finish()) defer.returnValue(results.SUCCESS)
def setUpRequest(self, args, options=True, activate=True): self.request = FakeRequest(args=args) self.request.uri = b"/change_hook/poller" self.request.method = b"GET" www = self.request.site.master.www self.master = master = self.request.site.master = fakemaster.make_master( testcase=self, wantData=True) master.www = www yield self.master.startService() self.changeHook = change_hook.ChangeHookResource( dialects={'poller': options}, master=master) master.change_svc = ChangeManager() yield master.change_svc.setServiceParent(master) self.changesrc = self.Subclass(21, name='example') yield self.changesrc.setServiceParent(master.change_svc) self.otherpoller = self.Subclass(22, name="otherpoller") yield self.otherpoller.setServiceParent(master.change_svc) anotherchangesrc = base.ChangeSource(name='notapoller') anotherchangesrc.setName(u"notapoller") yield anotherchangesrc.setServiceParent(master.change_svc) yield self.request.test_render(self.changeHook) yield util.asyncSleep(0.1)
def do_polling(self): yield self.post('/deleteWebhook') offset = 0 kwargs = {'json': {'timeout': self.poll_timeout}} logme = True while self._polling_continue: if offset: kwargs['json']['offset'] = offset try: res = yield self.http_client.post('/getUpdates', timeout=self.poll_timeout + 2, **kwargs) ans = yield res.json() if not ans.get('ok'): raise ValueError("[{}] {}".format(res.code, ans.get('description'))) updates = ans.get('result') except AssertionError as err: raise err except Exception as err: msg = ("ERROR: cannot send Telegram request /getUpdates (will try again): {}" ).format(err) if logme: self.log(msg) logme = False yield asyncSleep(self.retry_delay) else: logme = True if updates: offset = max(update['update_id'] for update in updates) + 1 for update in updates: yield self.process_update(update) self._polling_finished_notifier.notify(None)
def cleanShutdown(self, quickMode=False, stopReactor=True, _reactor=reactor): """Shut down the entire process, once all currently-running builds are complete. quickMode will mark all builds as retry (except the ones that were triggered) """ if self.shuttingDown: return log.msg("Initiating clean shutdown") self.shuttingDown = True # first, stop the distributor; this will finish any ongoing scheduling # operations before firing yield self.brd.disownServiceParent() # Double check that we're still supposed to be shutting down # The shutdown may have been cancelled! while self.shuttingDown: if quickMode: for builder in self.builders.values(): # As we stop the builds, builder.building might change during loop # so we need to copy the list for build in list(builder.building): # if build is waited for then this is a sub-build, so no need to retry it if sum(br.waitedFor for br in build.requests): results = CANCELLED else: results = RETRY is_building = build.workerforbuilder.state == States.BUILDING build.stopBuild("Master Shutdown", results) if not is_building: # if it is not building, then it must be a latent worker # which is substanciating. Cancel it. build.workerforbuilder.worker.insubstantiate() # then wait for all builds to finish l = [] for builder in self.builders.values(): for build in builder.building: l.append(build.waitUntilFinished()) if len(l) == 0: log.msg("No running jobs, starting shutdown immediately") else: log.msg("Waiting for %i build(s) to finish" % len(l)) yield defer.DeferredList(l) # Check that there really aren't any running builds n = 0 for builder in self.builders.values(): n += len(builder.building) if n > 0: log.msg( "Not shutting down, builder %s has %i builds running" % (builder, n)) log.msg("Trying shutdown sequence again") yield util.asyncSleep(1) else: if stopReactor and self.shuttingDown: log.msg("Stopping reactor") _reactor.stop() break if not self.shuttingDown: yield self.brd.setServiceParent(self)
def setUpRequest(self, args, options=True, activate=True): self.request = FakeRequest(args=args) self.request.uri = "/change_hook/poller" self.request.method = "GET" www = self.request.site.master.www self.master = master = self.request.site.master = fakemaster.make_master( testcase=self, wantData=True) master.www = www yield self.master.startService() self.changeHook = change_hook.ChangeHookResource( dialects={'poller': options}, master=master) master.change_svc = ChangeManager() yield master.change_svc.setServiceParent(master) self.changesrc = self.Subclass("example", 21) yield self.changesrc.setServiceParent(master.change_svc) self.otherpoller = self.Subclass("otherpoller", 22) yield self.otherpoller.setServiceParent(master.change_svc) anotherchangesrc = base.ChangeSource(name='notapoller') anotherchangesrc.setName(u"notapoller") yield anotherchangesrc.setServiceParent(master.change_svc) yield self.request.test_render(self.changeHook) yield util.asyncSleep(0.1)
def setUpRequest(self, args, options=True, activate=True): self.request = FakeRequest(args=args) self.request.uri = "/change_hook/poller" self.request.method = "GET" master = self.request.site.master self.changeHook = change_hook.ChangeHookResource(dialects={'poller': options}, master=master) master.change_svc = ChangeManager() master.change_svc.setServiceParent(master) self.changesrc = self.Subclass("example", 21) self.changesrc.setServiceParent(master.change_svc) if activate: self.changesrc.activate() self.otherpoller = self.Subclass("otherpoller", 22) self.otherpoller.setServiceParent(master.change_svc) if activate: self.otherpoller.activate() anotherchangesrc = base.ChangeSource(name='notapoller') anotherchangesrc.setName("notapoller") anotherchangesrc.setServiceParent(master.change_svc) yield self.request.test_render(self.changeHook) yield util.asyncSleep(0.1)
def stop_instance(self, fast=False): if self.instance is None: # be gentle. Something may just be trying to alert us that an # instance never attached, and it's because, somehow, we never # started. return log.msg('{} {}: Stopping instance {}...'.format( self.__class__.__name__, self.workername, self.instance["Id"])) result = yield self.client.post( "/server/{}/stop".format(self.instance["uuid"], ), json={"stop_server": { "stop_type": "hard", "timeout": "1" }}) if result.code // 100 != 2: reason = yield result.content() reason = '{} {} failed to stop instance {} ({}): {}'.format( self.__class__.__name__, self.workername, self.instance["Id"], self._state(), reason.decode()) self.instance = None raise Exception(reason) while (yield self._state()) not in ["stopped", "absent"]: yield util.asyncSleep(1, reactor=self.master.reactor) # destroy it result = yield self.client.delete("/server/{}?storages=1".format( self.instance["uuid"])) if result.code // 100 != 2: reason = yield result.content() reason = '{} {} failed to delete instance {} ({}): {}'.format( self.__class__.__name__, self.workername, self.instance["Id"], self._state(), reason.decode()) self.instance = None raise Exception(reason)
def on_new_step(_, data): if data['name'] == 'sleep': # wait until the step really starts yield asyncSleep(1) brs = yield self.master.data.get(('buildrequests',)) brid = brs[-1]['buildrequestid'] self.master.data.control('cancel', {'reason': 'cancelled by test'}, ('buildrequests', brid))
def stopService(self): # the worker might be insubstantiating from buildWaitTimeout while self.insubstantiating: yield asyncSleep(0.1) if self.conn is not None or self._substantiation_notifier: yield self._soft_disconnect() self._clearBuildWaitTimer() res = yield AbstractWorker.stopService(self) defer.returnValue(res)
def test_sleep(self): clock = task.Clock() self.patch(reactor, 'callLater', clock.callLater) d = util.asyncSleep(2) self.assertFalse(d.called) clock.advance(1) self.assertFalse(d.called) clock.advance(1) self.assertTrue(d.called)
def test_trigger_controlled_step(self): stepcontroller = BuildStepController() yield self.setupConfig(masterConfig(num_concurrent=1, extra_steps=[stepcontroller.step]), startWorker=False) d = self.doForceBuild() builds = [] while len(builds) != 2: builds = yield self.master.data.get(("builds",)) util.asyncSleep(.1) while not stepcontroller.running: yield util.asyncSleep(.1) stepcontroller.finish_step(SUCCESS) yield d builds = yield self.master.data.get(("builds",)) for b in builds: self.assertEqual(b['results'], SUCCESS)
def run(self): if self.worker.worker_system == "nt": sleep = "waitfor SomethingThatIsNeverHappening /t 100 >nul 2>&1" else: sleep = ["sleep", "100"] d = self.runShellSequence([util.ShellArg(sleep)]) yield asyncSleep(1) self.interrupt("just testing") res = yield d defer.returnValue(res)
def run(self): if self.worker.worker_system == "nt": sleep = "waitfor SomethingThatIsNeverHappening /t 100 >nul 2>&1" else: sleep = ["sleep", "100"] d = self.runShellSequence([util.ShellArg(sleep)]) yield asyncSleep(1) self.interrupt("just testing") res = yield d return res
def test_trigger_controlled_step(self): stepcontroller = BuildStepController() yield self.setupConfig(masterConfig(num_concurrent=1, extra_steps=[stepcontroller.step]), startWorker=False) d = self.doForceBuild() builds = [] while len(builds) != 2: builds = yield self.master.data.get(("builds", )) util.asyncSleep(.1) while not stepcontroller.running: yield util.asyncSleep(.1) stepcontroller.finish_step(SUCCESS) yield d builds = yield self.master.data.get(("builds", )) for b in builds: self.assertEqual(b['results'], SUCCESS)
def waitForPodDeletion(self, namespace, name, timeout): t1 = time.time() url = f'/api/v1/namespaces/{namespace}/pods/{name}/status' while True: if time.time() - t1 > timeout: raise TimeoutError( f"Did not see pod {name} terminate after {timeout}s") res = yield self.get(url) res_json = yield res.json() if res.code == 404: break # 404 means the pod has terminated if res.code != 200: raise KubeError(res_json) yield asyncSleep(1) return res_json
def _finished(self, failure=None): self.active = False # the rc is send asynchronously and there is a chance it is still in the callback queue # when finished is received, we have to workaround in the master because worker might be # older timeout = 10 while self.rc is None and timeout > 0: yield util.asyncSleep(.1) timeout -= 1 try: yield self.remoteComplete(failure) # this fires the original deferred we returned from .run(), self.deferred.callback(self) except Exception as e: self.deferred.errback(e)
def waitForPodDeletion(self, namespace, name, timeout): t1 = time.time() url = '/api/v1/namespaces/{namespace}/pods/{name}/status'.format( namespace=namespace, name=name) while True: if time.time() - t1 > timeout: raise TimeoutError( "Did not see pod {name} terminate after {timeout}s".format( name=name, timeout=timeout)) res = yield self.get(url) res_json = yield res.json() if res.code == 404: break # 404 means the pod has terminated if res.code != 200: raise KubeError(res_json) yield asyncSleep(1) defer.returnValue(res_json)
def _walkOverScheduledAnnotatedSteps(self): debuglog( ">>> AnnotatedCommand::_walkOverScheduledAnnotatedSteps: started") while self.annotated_steps or not self._annotated_finished: if self.annotated_steps: yield self._execStep(self.annotated_steps[0], done=self._annotated_finished) last_step = self.annotated_steps.pop(0) if last_step.results == results.EXCEPTION: raise Exception("Annotated step exception") if not self._annotated_finished: yield asyncSleep(.1) debuglog( ">>> AnnotatedCommand::_walkOverScheduledAnnotatedSteps: finished")
def run(self): # Save previously collected log lines. yield self._flushLogs() while not self._request_finish: # Sleep for .1 second. Let arrive more logs. yield asyncSleep(.1) if self._loglines: yield self._flushLogs() # and the last one time. if self._loglines: yield self._flushLogs() debuglog("AnnotatedBuildStep::run() exiting '%s' step: " "stepid=%s, buildid=%s, results=%s" % (self.name, self.stepid, self.build.buildid, self.results)) return self.results
def _walkOverScheduledAnnotatedSteps(self): debuglog( ">>> AnnotatedCommand::_walkOverScheduledAnnotatedSteps: started") while self.annotated_steps or not self._annotated_finished: if self.annotated_steps: if self._annotated_finished: # Reset a status for all unprocessed annotated steps to # the common annotate status. self.annotated_steps[0].updateStatus(self.annotate_status) yield self._execStep(self.annotated_steps[0]) last_step = self.annotated_steps.pop(0) if last_step.results == results.EXCEPTION: raise Exception("Annotated step exception") yield asyncSleep(.1) debuglog( ">>> AnnotatedCommand::_walkOverScheduledAnnotatedSteps: finished")
def _finished(self, failure=None): self.active = False # the rc is send asynchronously and there is a chance it is still in the callback queue # when finished is received, we have to workaround in the master because worker might be older timeout = 10 while self.rc is None and timeout > 0: yield util.asyncSleep(.1) timeout -= 1 # call .remoteComplete. If it raises an exception, or returns the # Failure that we gave it, our self.deferred will be errbacked. If # it does not (either it ate the Failure or there the step finished # normally and it didn't raise a new exception), self.deferred will # be callbacked. d = defer.maybeDeferred(self.remoteComplete, failure) # arrange for the callback to get this RemoteCommand instance # instead of just None d.addCallback(lambda r: self) # this fires the original deferred we returned from .run(), # with self as the result, or a failure d.addBoth(self.deferred.callback)
def post(self, path, **kwargs): logme = True while True: try: res = yield self.http_client.post(path, **kwargs) except AssertionError as err: # just for tests raise err except Exception as err: msg = "ERROR: problem sending Telegram request {} (will try again): {}".format(path, err) if logme: self.log(msg) logme = False yield asyncSleep(self.retry_delay) else: ans = yield res.json() if not ans.get('ok'): self.log("ERROR: cannot send Telegram request {}: " "[{}] {}".format(path, res.code, ans.get('description'))) return None return ans.get('result', True)
def _finished(self, failure=None): # Finished may be called concurrently by a message from worker and interruption due to # lost connection. if not self.active: return self.active = False # the rc is send asynchronously and there is a chance it is still in the callback queue # when finished is received, we have to workaround in the master because worker might be # older if not self._is_conn_test_fake: timeout = 10 while self.rc is None and timeout > 0: yield util.asyncSleep(.1) timeout -= 1 try: yield self.remoteComplete(failure) # this fires the original deferred we returned from .run(), self.deferred.callback(self) except Exception as e: self.deferred.errback(e)
def action(): times.append(round(self.reactor.seconds(), 1)) yield asyncSleep(0.5, reactor=self.reactor)
def start_instance(self, build): if self.instance is not None: raise ValueError('instance active') # convert image to UUID image, hostconfig = yield build.render([self.image, self.hostconfig]) image_uuid = yield self._resolve_image(image) if image_uuid is None: log.msg("{} {}: Instance creation failed: Cannot find template {}". format(self.__class__.__name__, self.workername, image)) raise LatentWorkerFailedToSubstantiate(self.getContainerName(), 'resolving image') # compose json req = { "server": { "zone": hostconfig.get('zone', DEFAULT_ZONE), "title": self.getContainerName(), "hostname": hostconfig.get('hostname', self.name), "user_data": hostconfig.get('user_data', ""), "login_user": { "username": "******", "ssh_keys": { "ssh_key": hostconfig.get('ssh_keys', []), }, }, "password_delivery": "none", "storage_devices": { "storage_device": [{ "action": "clone", "storage": image_uuid, "title": self.getContainerName(), "size": hostconfig.get("os_disk_size", DEFAULT_OS_DISK_SIZE), "tier": "maxiops", }], } } } req["server"]["plan"] = hostconfig.get("plan", DEFAULT_PLAN) if req["server"]["plan"] == "custom": req["server"]["core_number"] = hostconfig.get( "core_number", DEFAULT_CORE_NUMBER) req["server"]["memory_amount"] = hostconfig.get( "memory_amount", DEFAULT_MEMORY_AMOUNT) # request instance result = yield self.client.post("/server", json=req) if result.code // 100 != 2: reason = yield result.content() log.msg("{} {}: Instance creation failed: {} {}".format( self.__class__.__name__, self.workername, result.code, reason)) self.failed_to_start(req['server']['hostname'], 'starting') instance = yield result.json() self.instance = instance["server"] self.instance["Id"] = self.instance["uuid"].split("-")[-1] # wait until server is actually up while (yield self._state()) not in ["started"]: yield util.asyncSleep(1, reactor=self.master.reactor) result = yield self.client.get("/server/{}".format( self.instance["uuid"])) instance = yield result.json() log.msg("{} {}: Instance {} created (root password {})".format( self.__class__.__name__, self.workername, self.instance["Id"], self.instance['password'])) # include root password as worker property self.properties.setProperty("root_password", self.instance['password'], "Worker") return [self.instance["Id"], image]
def inlineCallbacks2(): yield util.asyncSleep(1, self.reactor) self.calls += 1
def waitFor(fn): while True: res = yield fn() if res: defer.returnValue(res) yield util.asyncSleep(.01)
def wait_on_failure(self): seconds = self.calculate_wait_on_failure_seconds() yield asyncSleep(seconds, reactor=self.reactor)
def waitFor(fn): while True: res = yield fn() if res: return res yield util.asyncSleep(.01)
def cleanShutdown(self, quickMode=False, stopReactor=True): """Shut down the entire process, once all currently-running builds are complete. quickMode will mark all builds as retry (except the ones that were triggered) """ if self.shuttingDown: return log.msg("Initiating clean shutdown") self.shuttingDown = True # first, stop the distributor; this will finish any ongoing scheduling # operations before firing yield self.brd.disownServiceParent() # Double check that we're still supposed to be shutting down # The shutdown may have been cancelled! while self.shuttingDown: if quickMode: for builder in self.builders.values(): # As we stop the builds, builder.building might change during loop # so we need to copy the list for build in list(builder.building): # if build is waited for then this is a sub-build, so # no need to retry it if sum(br.waitedFor for br in build.requests): results = CANCELLED else: results = RETRY is_building = build.workerforbuilder.state == States.BUILDING build.stopBuild("Master Shutdown", results) if not is_building: # if it is not building, then it must be a latent worker # which is substantiating. Cancel it. build.workerforbuilder.worker.insubstantiate() # then wait for all builds to finish dl = [] for builder in self.builders.values(): for build in builder.building: # build may be waiting for ping to worker to succeed which # may never happen if the connection to worker was broken # without TCP connection being severed build.workerforbuilder.abortPingIfAny() dl.append(build.waitUntilFinished()) if not dl: log.msg("No running jobs, starting shutdown immediately") else: log.msg(f"Waiting for {len(dl)} build(s) to finish") yield defer.DeferredList(dl) # Check that there really aren't any running builds n = 0 for builder in self.builders.values(): if builder.building: num_builds = len(builder.building) log.msg( f"Builder {builder} has {num_builds} builds running") n += num_builds if n > 0: log.msg(f"Not shutting down, there are {n} builds running") log.msg("Trying shutdown sequence again") yield util.asyncSleep(1) else: if stopReactor and self.shuttingDown: log.msg("Stopping reactor") self.master.reactor.stop() break if not self.shuttingDown: yield self.brd.setServiceParent(self)