def __init__(self, process, name: str, uri: str): self.process = process self.name = name self._uri = uri # the lock for changing the current ongoing deployment self.critical_ratelimiter = locks.Semaphore(1) # lock for dryrun tasks self.dryrunlock = locks.Semaphore(1) # multi threading control self.thread_pool = ThreadPoolExecutor(process.poolsize) self.ratelimiter = locks.Semaphore(process.poolsize) self._env_id = process._env_id self.sessionid = process.sessionid # init self._cache = AgentCache() self._nq = ResourceScheduler(self, self.process.environment, name, self._cache, ratelimiter=self.ratelimiter) self._enabled = None # do regular deploys self._deploy_interval = cfg.agent_interval.get() self._splay_interval = cfg.agent_splay.get() self._splay_value = random.randint(0, self._splay_interval) self._getting_resources = False self._get_resource_timeout = 0
def test_version_closed(self): cache = AgentCache() value = "test too" version = 200 with pytest.raises(Exception): cache.cache_value("test", value, version=version) assert value == cache.find("test", version=version)
def test_resource(self): cache = AgentCache() value = "test too" resource = Id("test::Resource", "test", "key", "test", 100).get_instance() cache.cache_value("test", value, resource=resource) assert value == cache.find("test", resource=resource)
def test_version(self): cache = AgentCache() value = "test too" version = 200 cache.open_version(version) cache.cache_value("test", value, version=version) assert value == cache.find("test", version=version)
def test_resource_fail(my_resource): cache = AgentCache() value = "test too" resource = Id("test::Resource", "test", "key", "test", 100).get_instance() cache.cache_value("test", value, resource=resource) with pytest.raises(KeyError): assert value == cache.find("test")
def test_resource_and_version(): cache = AgentCache() value = "test too" resource = Id("test::Resource", "test", "key", "test", 100).get_instance() version = 200 cache.open_version(version) cache.cache_value("test", value, resource=resource, version=version) assert value == cache.find("test", resource=resource, version=version)
def get_io(cache: AgentCache, uri: str, version: int): """ Get an IO instance for the given uri and version """ if cache is None: io = _get_io_instance(uri) else: try: io = cache.find(uri, version=version) except KeyError: io = _get_io_instance(uri) cache.cache_value(uri, io, version=version, call_on_delete=lambda x: x.close()) return io
def test_version_fail(self): cache = AgentCache() value = "test too" version = 200 cache.open_version(version) cache.cache_value("test", value, version=version) with pytest.raises(KeyError): assert value == cache.find("test")
def test_decorator(self): xcache = AgentCache() class DT(object): def __init__(self, cache): self.cache = cache self.count = 0 self.c2 = 0 @cache def test_method(self): self.count += 1 return "x" @cache def test_method_2(self, version): self.count += 1 return "x2" @cache(cacheNone=False) def test_method_3(self): self.c2 += 1 if self.c2 < 2: return None else: return "X" test = DT(xcache) assert "x" == test.test_method() assert "x" == test.test_method() assert "x" == test.test_method() assert 1 == test.count xcache.open_version(1) xcache.open_version(2) assert "x2" == test.test_method_2(version=1) assert "x2" == test.test_method_2(version=1) assert 2 == test.count assert "x2" == test.test_method_2(version=2) assert 3 == test.count xcache.close_version(1) xcache.open_version(1) assert "x2" == test.test_method_2(version=1) assert "x2" == test.test_method_2(version=1) assert 4 == test.count assert None is test.test_method_3() assert 1 == test.c2 assert "X" == test.test_method_3() assert 2 == test.c2 assert "X" == test.test_method_3() assert 2 == test.c2
def get_io(cache: AgentCache, uri: str, version: int) -> "IOBase": """ Get an IO instance for the given uri and version """ if cache is None: io = _get_io_instance(uri) else: io = cache.get_or_else(uri, lambda version: _get_io_instance(uri), call_on_delete=lambda x: x.close(), version=version) return io
def test_multi_threaded(): class Spy(object): def __init__(self): self.created = 0 self.deleted = 0 self.lock = Lock() def create(self): with self.lock: self.created += 1 return self def delete(self): self.deleted += 1 cache = AgentCache() version = 200 cache.open_version(version) alpha = Spy() beta = Spy() alpha.lock.acquire() t1 = Thread( target=lambda: cache.get_or_else( "test", lambda version: alpha.create(), version=version, call_on_delete=lambda x: x.delete() ) ) t2 = Thread( target=lambda: cache.get_or_else( "test", lambda version: beta.create(), version=version, call_on_delete=lambda x: x.delete() ) ) t1.start() t2.start() alpha.lock.release() t1.join() t2.join() assert alpha.created + beta.created == 1 assert alpha.deleted == 0 assert beta.deleted == 0 cache.close_version(version) assert alpha.created + beta.created == 1 assert alpha.deleted == alpha.created assert beta.deleted == beta.created
def test_timout(self): cache = AgentCache() value = "test too" cache.cache_value("test", value, timeout=0.1) cache.cache_value("test2", value) assert value == cache.find("test") sleep(1) try: assert value == cache.find("test") raise AssertionError("Should get exception") except KeyError: pass assert value == cache.find("test2")
def test_context_manager(): cache = AgentCache() value = "test too" version = 200 with cache.manager(version): cache.cache_value("test", value, version=version) cache.cache_value("test0", value, version=version) cache.cache_value("test4", value, version=version) resource = Id("test::Resource", "test", "key", "test", 100).get_instance() cache.cache_value("testx", value, resource=resource) assert value == cache.find("test", version=version) assert value == cache.find("testx", resource=resource) assert value, cache.find("testx", resource=resource) with pytest.raises(KeyError): assert value == cache.find("test", version=version)
def test_version_close(): cache = AgentCache() value = "test too" version = 200 cache.open_version(version) cache.cache_value("test", value, version=version) cache.cache_value("test0", value, version=version) cache.cache_value("test4", value, version=version) resource = Id("test::Resource", "test", "key", "test", 100).get_instance() cache.cache_value("testx", value, resource=resource) assert value == cache.find("test", version=version) assert value == cache.find("testx", resource=resource) cache.close_version(version) assert value, cache.find("testx", resource=resource) with pytest.raises(KeyError): assert value == cache.find("test", version=version) raise AssertionError("Should get exception")
def _execute(self, ctx: handler.HandlerContext, events: dict, cache: AgentCache) -> (bool, bool): """ :param ctx The context to use during execution of this deploy :param events Possible events that are available for this resource :param cache The cache instance to use :return (success, send_event) Return whether the execution was successful and whether a change event should be sent to provides of this resource. """ ctx.debug("Start deploy %(deploy_id)s of resource %(resource_id)s", deploy_id=self.gid, resource_id=self.resource_id) provider = None try: provider = handler.Commander.get_provider(cache, self.scheduler.agent, self.resource) provider.set_cache(cache) except Exception: if provider is not None: provider.close() cache.close_version(self.resource.id.version) ctx.set_status(const.ResourceState.unavailable) ctx.exception("Unable to find a handler for %(resource_id)s", resource_id=str(self.resource.id)) return False, False yield self.scheduler.agent.thread_pool.submit(provider.execute, ctx, self.resource) send_event = (hasattr(self.resource, "send_event") and self.resource.send_event) if ctx.status is not const.ResourceState.deployed: provider.close() cache.close_version(self.resource.id.version) return False, send_event if len(events) > 0 and provider.can_process_events(): ctx.info( "Sending events to %(resource_id)s because of modified dependencies", resource_id=str(self.resource.id)) yield self.scheduler.agent.thread_pool.submit( provider.process_events, ctx, self.resource, events) provider.close() cache.close_version(self.resource_id.version) return True, send_event
def test_version_and_timout(self): cache = AgentCache() version = 200 cache.open_version(version) value = "test too" cache.cache_value("test", value, version=version, timeout=0.3) cache.cache_value("testx", value) assert value == cache.find("test", version=version) assert value == cache.find("testx") cache.close_version(version) assert value == cache.find("testx") sleep(1) assert value == cache.find("testx") with pytest.raises(KeyError): cache.find("test", version=version)
def test_get_or_else_none(): called = [] def creator(param, resource, version): called.append("x") return param class Sequencer(object): def __init__(self, sequence): self.seq = sequence self.count = 0 def __call__(self, **kwargs): out = self.seq[self.count] self.count += 1 return out cache = AgentCache() value = "test too" resource = Id("test::Resource", "test", "key", "test", 100).get_instance() version = 100 cache.open_version(version) assert None is cache.get_or_else("test", creator, resource=resource, version=version, cache_none=False, param=None) assert len(called) == 1 assert None is cache.get_or_else("test", creator, resource=resource, version=version, cache_none=False, param=None) assert len(called) == 2 assert value == cache.get_or_else("test", creator, resource=resource, version=version, cache_none=False, param=value) assert value == cache.get_or_else("test", creator, resource=resource, version=version, cache_none=False, param=value) assert len(called) == 3 seq = Sequencer([None, None, "A"]) assert None is cache.get_or_else("testx", seq, resource=resource, version=version, cache_none=False) assert seq.count == 1 assert None is cache.get_or_else("testx", seq, resource=resource, version=version, cache_none=False) assert seq.count == 2 assert "A" == cache.get_or_else("testx", seq, resource=resource, version=version, cache_none=False) assert seq.count == 3 assert "A" == cache.get_or_else("testx", seq, resource=resource, version=version, cache_none=False) assert seq.count == 3 assert "A" == cache.get_or_else("testx", seq, resource=resource, version=version, cache_none=False) assert seq.count == 3
def test_get_or_else(self): called = [] def creator(param, resource, version): called.append("x") return param cache = AgentCache() value = "test too" value2 = "test too x" resource = Id("test::Resource", "test", "key", "test", 100).get_instance() resourcev2 = Id("test::Resource", "test", "key", "test", 200).get_instance() assert 200 == resourcev2.id.version version = 200 cache.open_version(version) assert value == cache.get_or_else("test", creator, resource=resource, version=version, param=value) assert value == cache.get_or_else("test", creator, resource=resource, version=version, param=value) assert len(called) == 1 assert value == cache.get_or_else("test", creator, resource=resourcev2, version=version, param=value) assert len(called) == 1 assert value2 == cache.get_or_else("test", creator, resource=resource, version=version, param=value2)
def test_base_fail(self): cache = AgentCache() value = "test too" with pytest.raises(KeyError): assert value == cache.find("test")
def test_base(self): cache = AgentCache() value = "test too" cache.cache_value("test", value) assert value == cache.find("test")
class AgentInstance(object): def __init__(self, process, name: str, uri: str): self.process = process self.name = name self._uri = uri # the lock for changing the current ongoing deployment self.critical_ratelimiter = locks.Semaphore(1) # lock for dryrun tasks self.dryrunlock = locks.Semaphore(1) # multi threading control self.thread_pool = ThreadPoolExecutor(process.poolsize) self.ratelimiter = locks.Semaphore(process.poolsize) self._env_id = process._env_id self.sessionid = process.sessionid # init self._cache = AgentCache() self._nq = ResourceScheduler(self, self.process.environment, name, self._cache, ratelimiter=self.ratelimiter) self._enabled = None # do regular deploys self._deploy_interval = cfg.agent_interval.get() self._splay_interval = cfg.agent_splay.get() self._splay_value = random.randint(0, self._splay_interval) self._getting_resources = False self._get_resource_timeout = 0 @property def environment(self): return self.process.environment def get_client(self): return self.process._client @property def uri(self): return self._uri def is_enabled(self): return self._enabled is not None def add_future(self, future): self.process.add_future(future) def unpause(self): if self._enabled is not None: return 200, "already running" LOGGER.info("Agent assuming primary role for %s" % self.name) @gen.coroutine def action(): yield self.get_latest_version_for_agent() self._enabled = action self.process._sched.add_action(action, self._deploy_interval, self._splay_value) return 200, "unpaused" def pause(self): if self._enabled is None: return 200, "already paused" LOGGER.info("Agent lost primary role for %s" % self.name) token = self._enabled self.process._sched.remove(token) self._enabled = None return 200, "paused" def notify_ready(self, resourceid, send_events, state, change, changes): self._nq.notify_ready(resourceid, send_events, state, change, changes) def _can_get_resources(self): if self._getting_resources: LOGGER.info( "%s Attempting to get resource while get is in progress", self.name) return False if time.time() < self._get_resource_timeout: LOGGER.info( "%s Attempting to get resources during backoff %g seconds left, last download took %d seconds", self.name, self._get_resource_timeout - time.time(), self._get_resource_duration) return False return True @gen.coroutine def get_latest_version_for_agent(self): """ Get the latest version for the given agent (this is also how we are notified) """ if not self._can_get_resources(): return with (yield self.critical_ratelimiter.acquire()): if not self._can_get_resources(): return LOGGER.debug("Getting latest resources for %s" % self.name) self._getting_resources = True start = time.time() try: result = yield self.get_client().get_resources_for_agent( tid=self._env_id, agent=self.name) finally: self._getting_resources = False end = time.time() self._get_resource_duration = end - start self._get_resource_timeout = GET_RESOURCE_BACKOFF * self._get_resource_duration + end if result.code == 404: LOGGER.info( "No released configuration model version available for agent %s", self.name) elif result.code != 200: LOGGER.warning( "Got an error while pulling resources for agent %s. %s", self.name, result.result) else: restypes = set([ res["resource_type"] for res in result.result["resources"] ]) resources = [] yield self.process._ensure_code(self._env_id, result.result["version"], restypes) try: undeployable = {} for res in result.result["resources"]: state = const.ResourceState[res["status"]] if state in const.UNDEPLOYABLE_STATES: undeployable[res["id"]] = state data = res["attributes"] data["id"] = res["id"] resource = Resource.deserialize(data) resources.append(resource) LOGGER.debug("Received update for %s", resource.id) except TypeError: LOGGER.exception("Failed to receive update") if len(resources) > 0: self._nq.reload(resources, undeployable) @gen.coroutine def dryrun(self, dry_run_id, version): self.add_future(self.do_run_dryrun(version, dry_run_id)) return 200 @gen.coroutine def do_run_dryrun(self, version, dry_run_id): with (yield self.dryrunlock.acquire()): with (yield self.ratelimiter.acquire()): result = yield self.get_client().get_resources_for_agent( tid=self._env_id, agent=self.name, version=version) if result.code == 404: LOGGER.warning( "Version %s does not exist, can not run dryrun", version) return elif result.code != 200: LOGGER.warning( "Got an error while pulling resources for agent %s and version %s", self.name, version) return resources = result.result["resources"] restypes = set([res["resource_type"] for res in resources]) # TODO: handle different versions for dryrun and deploy! yield self.process._ensure_code(self._env_id, version, restypes) self._cache.open_version(version) for res in resources: ctx = handler.HandlerContext(res, True) started = datetime.datetime.now() provider = None try: if const.ResourceState[ res["status"]] in const.UNDEPLOYABLE_STATES: ctx.exception( "Skipping %(resource_id)s because in undeployable state %(status)s", resource_id=res["id"], status=res["status"]) yield self.get_client().dryrun_update( tid=self._env_id, id=dry_run_id, resource=res["id"], changes={}) continue data = res["attributes"] data["id"] = res["id"] resource = Resource.deserialize(data) LOGGER.debug("Running dryrun for %s", resource.id) try: provider = handler.Commander.get_provider( self._cache, self, resource) provider.set_cache(self._cache) except Exception as e: ctx.exception( "Unable to find a handler for %(resource_id)s (exception: %(exception)s", resource_id=str(resource.id), exception=str(e)) yield self.get_client().dryrun_update( tid=self._env_id, id=dry_run_id, resource=res["id"], changes={}) else: yield self.thread_pool.submit(provider.execute, ctx, resource, dry_run=True) yield self.get_client().dryrun_update( tid=self._env_id, id=dry_run_id, resource=res["id"], changes=ctx.changes) except TypeError: ctx.exception("Unable to process resource for dryrun.") finally: if provider is not None: provider.close() finished = datetime.datetime.now() self.get_client().resource_action_update( tid=self._env_id, resource_ids=[res["id"]], action_id=ctx.action_id, action=const.ResourceAction.dryrun, started=started, finished=finished, messages=ctx.logs, status=const.ResourceState.dry) self._cache.close_version(version) @gen.coroutine def do_restore(self, restore_id, snapshot_id, resources): with (yield self.ratelimiter.acquire()): LOGGER.info("Start a restore %s", restore_id) yield self.process._ensure_code( self._env_id, resources[0][1]["model"], [res[1]["resource_type"] for res in resources]) version = resources[0][1]["model"] self._cache.open_version(version) for restore, resource in resources: start = datetime.datetime.now() provider = None try: data = resource["attributes"] data["id"] = resource["id"] resource_obj = Resource.deserialize(data) provider = handler.Commander.get_provider( self._cache, self, resource_obj) provider.set_cache(self._cache) if not hasattr( resource_obj, "allow_restore") or not resource_obj.allow_restore: yield self.get_client().update_restore( tid=self._env_id, id=restore_id, resource_id=str(resource_obj.id), start=start, stop=datetime.datetime.now(), success=False, error=False, msg="Resource %s does not allow restore" % resource["id"]) continue try: yield self.thread_pool.submit(provider.restore, resource_obj, restore["content_hash"]) yield self.get_client().update_restore( tid=self._env_id, id=restore_id, resource_id=str(resource_obj.id), success=True, error=False, start=start, stop=datetime.datetime.now(), msg="") except NotImplementedError: yield self.get_client().update_restore( tid=self._env_id, id=restore_id, resource_id=str(resource_obj.id), success=False, error=False, start=start, stop=datetime.datetime.now(), msg="The handler for resource " "%s does not support restores" % resource["id"]) except Exception: LOGGER.exception("Unable to find a handler for %s", resource["id"]) yield self.get_client().update_restore( tid=self._env_id, id=restore_id, resource_id=resource_obj.id.resource_str(), success=False, error=False, start=start, stop=datetime.datetime.now(), msg= "Unable to find a handler to restore a snapshot of resource %s" % resource["id"]) finally: if provider is not None: provider.close() self._cache.close_version(version) return 200 @gen.coroutine def do_snapshot(self, snapshot_id, resources): with (yield self.ratelimiter.acquire()): LOGGER.info("Start snapshot %s", snapshot_id) yield self.process._ensure_code( self._env_id, resources[0]["model"], [res["resource_type"] for res in resources]) version = resources[0]["model"] self._cache.open_version(version) for resource in resources: start = datetime.datetime.now() provider = None try: data = resource["attributes"] data["id"] = resource["id"] resource_obj = Resource.deserialize(data) provider = handler.Commander.get_provider( self._cache, self, resource_obj) provider.set_cache(self._cache) if not hasattr(resource_obj, "allow_snapshot" ) or not resource_obj.allow_snapshot: yield self.get_client().update_snapshot( tid=self._env_id, id=snapshot_id, resource_id=resource_obj.id.resource_str(), snapshot_data="", start=start, stop=datetime.datetime.now(), size=0, success=False, error=False, msg="Resource %s does not allow snapshots" % resource["id"]) continue try: result = yield self.thread_pool.submit( provider.snapshot, resource_obj) if result is not None: sha1sum = hashlib.sha1() sha1sum.update(result) content_id = sha1sum.hexdigest() yield self.get_client().upload_file( id=content_id, content=base64.b64encode(result).decode( "ascii")) yield self.get_client().update_snapshot( tid=self._env_id, id=snapshot_id, resource_id=resource_obj.id.resource_str(), snapshot_data=content_id, start=start, stop=datetime.datetime.now(), size=len(result), success=True, error=False, msg="") else: raise Exception("Snapshot returned no data") except NotImplementedError: yield self.get_client().update_snapshot( tid=self._env_id, id=snapshot_id, error=False, resource_id=resource_obj.id.resource_str(), snapshot_data="", start=start, stop=datetime.datetime.now(), size=0, success=False, msg="The handler for resource " "%s does not support snapshots" % resource["id"]) except Exception: LOGGER.exception( "An exception occurred while creating the snapshot of %s", resource["id"]) yield self.get_client().update_snapshot( tid=self._env_id, id=snapshot_id, snapshot_data="", resource_id=resource_obj.id.resource_str(), error=True, start=start, stop=datetime.datetime.now(), size=0, success=False, msg="The handler for resource " "%s does not support snapshots" % resource["id"]) except Exception: LOGGER.exception("Unable to find a handler for %s", resource["id"]) yield self.get_client().update_snapshot( tid=self._env_id, id=snapshot_id, snapshot_data="", resource_id=resource_obj.id.resource_str(), error=False, start=start, stop=datetime.datetime.now(), size=0, success=False, msg="Unable to find a handler for %s" % resource["id"]) finally: if provider is not None: provider.close() self._cache.close_version(version) return 200 @gen.coroutine def get_facts(self, resource): with (yield self.ratelimiter.acquire()): yield self.process._ensure_code(self._env_id, resource["model"], [resource["resource_type"]]) ctx = handler.HandlerContext(resource) provider = None try: data = resource["attributes"] data["id"] = resource["id"] resource_obj = Resource.deserialize(data) version = resource_obj.id.version try: if const.ResourceState[ resource["status"]] in const.UNDEPLOYABLE_STATES: LOGGER.exception( "Skipping %s because in undeployable state %s", resource["id"], resource["status"]) return 200 self._cache.open_version(version) provider = handler.Commander.get_provider( self._cache, self, resource_obj) provider.set_cache(self._cache) result = yield self.thread_pool.submit( provider.check_facts, ctx, resource_obj) parameters = [{ "id": name, "value": value, "resource_id": resource_obj.id.resource_str(), "source": "fact" } for name, value in result.items()] yield self.get_client().set_parameters( tid=self._env_id, parameters=parameters) except Exception: LOGGER.exception("Unable to retrieve fact") finally: self._cache.close_version(version) except Exception: LOGGER.exception("Unable to find a handler for %s", resource["id"]) return 500 finally: if provider is not None: provider.close() return 200
def test_decorator(): class Closeable: def __init__(self): self.closed = False def close(self): self.closed = True my_closable = Closeable() my_closable_2 = Closeable() xcache = AgentCache() class DT(object): def __init__(self, cache): self.cache = cache self.count = 0 self.c2 = 0 @cache() def test_method(self): self.count += 1 return "x" @cache def test_method_2(self, version): self.count += 1 return "x2" @cache(cacheNone=False) def test_method_3(self): self.c2 += 1 if self.c2 < 2: return None else: return "X" @cache(call_on_delete=lambda x: x.close()) def test_close(self, version): self.count += 1 return my_closable @cache(call_on_delete=lambda x: x.close()) def test_close_2(self): self.count += 1 return my_closable_2 test = DT(xcache) xcache.open_version(3) test.test_close(version=3) test.test_close_2() xcache.close() assert my_closable.closed assert my_closable_2.closed test.count = 0 my_closable.closed = False assert "x" == test.test_method() assert "x" == test.test_method() assert "x" == test.test_method() assert 1 == test.count xcache.open_version(1) xcache.open_version(2) assert "x2" == test.test_method_2(version=1) assert "x2" == test.test_method_2(version=1) assert 2 == test.count assert "x2" == test.test_method_2(version=2) assert 3 == test.count xcache.close_version(1) xcache.open_version(1) assert "x2" == test.test_method_2(version=1) assert "x2" == test.test_method_2(version=1) assert 4 == test.count assert None is test.test_method_3() assert 1 == test.c2 assert "X" == test.test_method_3() assert 2 == test.c2 assert "X" == test.test_method_3() assert 2 == test.c2 test.count = 0 xcache.open_version(3) test.test_close(version=3) assert test.count == 1 test.test_close(version=3) assert test.count == 1 assert not my_closable.closed xcache.close_version(3) assert my_closable.closed