def create(self, requirements, allocationInfo): self._reportRequest(requirements, allocationInfo) self._broadcaster.allocationRequested(requirements, allocationInfo) assert globallock.assertLocked() self._cleanup() self._verifyLabelsExistsInOsmosis([r['imageLabel'] for r in requirements.values()]) try: priorityInstance = priority.Priority( requirements=requirements, allocationInfo=allocationInfo, freePool=self._freePool, allocations=self._allocations, hosts=self._hosts) except priority.OutOfResourcesError: if "uuid" in allocationInfo: logging.error("Allocation request [%(uuid)s] - rejected due to lack of resources", dict(uuid=allocationInfo["uuid"])) self._broadcaster.allocationRejected(reason="noResources") raise allocated = priorityInstance.allocated() try: alloc = allocation.Allocation( index=self._index, requirements=requirements, allocationInfo=allocationInfo, allocated=allocated, broadcaster=self._broadcaster, freePool=self._freePool, hosts=self._hosts) except Exception: self._reportAllocationFailed(allocationInfo) for allocated in allocated.values(): self._freePool.put(allocated) self._broadcaster.allocationRejected(reason="unknown") raise self._allocations.append(alloc) self._index += 1 self._reportAllocationGranted(alloc, allocationInfo) return alloc
def register(self, id, checkInCallback, doneCallback, progressCallback, failureCallback): assert globallock.assertLocked() assert id not in self._registered self._server.listenOnID(id) self._registered[id] = dict( checkInCallback=checkInCallback, doneCallback=doneCallback, progressCallback=progressCallback, failureCallback=failureCallback)
def byIndex(self, index): assert globallock.assertLocked() self._cleanup() for alloc in self._allocations: if alloc.index() == index: return alloc raise IndexError("No such allocation")
def _inauguratorDone(self): assert globallock.assertLocked() assert self._state == STATE_INAUGURATION_LABEL_PROVIDED self._slowReclaimCounter = 0 if self._stateChangeCallback is not None: self._tftpboot.configureForLocalBoot(self._hostImplementation.primaryMACAddress()) self._changeState(STATE_INAUGURATION_DONE)
def create(self, requirements, allocationInfo): logging.info("Allocation requested: '%(requirements)s' '%(allocationInfo)s'", dict( requirements=requirements, allocationInfo=allocationInfo)) self._broadcaster.allocationRequested(requirements, allocationInfo) assert globallock.assertLocked() self._cleanup() self._verifyLabelsExistsInOsmosis([r['imageLabel'] for r in requirements.values()]) priorityInstance = priority.Priority( requirements=requirements, allocationInfo=allocationInfo, freePool=self._freePool, allocations=self._allocations) allocated = priorityInstance.allocated() try: alloc = allocation.Allocation( index=self._index, requirements=requirements, allocationInfo=allocationInfo, allocated=allocated, broadcaster=self._broadcaster, freePool=self._freePool, hosts=self._hosts) except: logging.error("Creating allocation fails, freeing up all allocated hosts") for allocated in allocated.values(): self._freePool.put(allocated) raise self._allocations.append(alloc) self._index += 1 logging.info("Allocation granted: %(allocated)s", dict( allocated={k: v.hostImplementation().id() for k, v in alloc.allocated().iteritems()})) return alloc
def byIndex(self, index): assert globallock.assertLocked() self._cleanup() for alloc in self._allocations: if alloc.index() == index: return alloc raise IndexError("No such allocation")
def _nextTimeout(self): assert globallock.assertLocked() if len(self._timers) == 0: return None timeout = self._timers[0].when - time.time() if timeout < 0: timeout = 0 return timeout
def destroy(self): assert globallock.assertLocked() logging.info("destroying host %(host)s", dict(host=self._hostImplementation.id())) self._inaugurate.unregister(self._hostImplementation.ipAddress()) self._changeState(STATE_DESTROYED) assert self._destroyCallback is not None self._destroyCallback = None self._hostImplementation.destroy()
def enqueue(self, label, sizeGB, callback): assert globallock.assertLocked() if self._busy: callback( None, "Image builder still busy with previous tasks, waiting in queue" ) self._queue.put((label, sizeGB, callback))
def unassign(self): assert globallock.assertLocked() assert self._stateChangeCallback is not None self._stateChangeCallback = None if self._state in [ STATE_INAUGURATION_LABEL_PROVIDED, STATE_INAUGURATION_DONE ]: self._softReclaim()
def _nextTimeout(self): assert globallock.assertLocked() if len(self._timers) == 0: return None timeout = self._timers[0].when - time.time() if timeout < 0: timeout = 0 return timeout
def _inauguratorDone(self): assert globallock.assertLocked() assert self._state == STATE_INAUGURATION_LABEL_PROVIDED self._slowReclaimCounter = 0 if self._stateChangeCallback is not None: self._tftpboot.configureForLocalBoot( self._hostImplementation.primaryMACAddress()) self._changeState(STATE_INAUGURATION_DONE)
def _timeout(self): assert globallock.assertLocked() logging.warning("Timeout for host %(id)s at state %(state)s", dict( id=self._hostImplementation.id(), state=self._state)) if self._state in (STATE_COLD_RECLAMATION, STATE_SOFT_RECLAMATION): self._coldReclaim() else: self._softReclaim()
def destroy(self): assert globallock.assertLocked() logging.info("destroying host %(host)s", dict(host=self._hostImplementation.id())) self._inaugurate.unregister(self._hostImplementation.ipAddress()) self._changeState(STATE_DESTROYED) assert self._destroyCallback is not None self._destroyCallback = None self._hostImplementation.destroy()
def softReclaimFailed(self): assert globallock.assertLocked() assert self._state in [STATE_SOFT_RECLAMATION, STATE_DESTROYED] if self._state != STATE_SOFT_RECLAMATION: logging.warning("Ignoring soft reclamation failure, node already destroyed") return logging.warning("Soft reclaimation for host %(id)s failed, reverting to cold reclaimation. Previous" " label=%(previousLabel)s", dict(id=self._hostImplementation.id(), previousLabel=self._imageLabel)) self._coldReclaim()
def _softReclaimFailed(self): assert globallock.assertLocked() assert self._state in [STATE_QUICK_RECLAIMATION_IN_PROGRESS, STATE_DESTROYED] if self._state != STATE_QUICK_RECLAIMATION_IN_PROGRESS: logging.warning("Ignoring soft reclamation failure, node already destroyed") return logging.warning( "Soft reclaimation for host %(id)s failed, reverting to cold reclaimation", dict(id=self._hostImplementation.id(), state=self._state)) self._coldReclaim()
def _inauguratorDone(self): assert globallock.assertLocked() if self._state != STATE_INAUGURATION_LABEL_PROVIDED: logging.error('Got an inauguration-done message for %(server)s in state %(state)s, ignoring.', dict(server=self._hostImplementation.id(), state=self._state)) return self._slowReclaimCounter = 0 if self._stateChangeCallback is not None: self._tftpboot.configureForLocalBoot(self._hostImplementation.primaryMACAddress()) self._changeState(STATE_INAUGURATION_DONE)
def assign(self, stateChangeCallback, imageLabel, imageHint): assert globallock.assertLocked() assert self._stateChangeCallback is None assert stateChangeCallback is not None assert self._state not in [STATE_INAUGURATION_DONE, STATE_INAUGURATION_LABEL_PROVIDED] self._stateChangeCallback = stateChangeCallback self._imageLabel = imageLabel self._imageHint = imageHint if self._state == STATE_CHECKED_IN: self._provideLabel()
def _runOne(self): assert globallock.assertLocked() if len(self._timers) == 0: return if self._timers[0].when > time.time(): return timer = self._timers.pop(0) try: timer.callback() except: logging.exception("Timer '%(callback)s' raised", dict(callback=timer.callback))
def _runOne(self): assert globallock.assertLocked() if len(self._timers) == 0: return if self._timers[0].when > time.time(): return timer = self._timers.pop(0) try: timer.callback() except: logging.exception("Timer '%(callback)s' raised", dict(callback=timer.callback))
def create(self, requirements): assert globallock.assertLocked() self._cleanup() alloc = allocation.Allocation(index=self._index, requirements=requirements, broadcaster=self._broadcaster, buildImageThread=self._buildImageThread, imageStore=self._imageStore, allVMs=self._allVMs) self._allocations.append(alloc) self._index += 1 return alloc
def assign(self, stateChangeCallback, imageLabel, imageHint): assert globallock.assertLocked() assert self._stateChangeCallback is None assert stateChangeCallback is not None assert self._state not in [ STATE_INAUGURATION_DONE, STATE_INAUGURATION_LABEL_PROVIDED ] self._stateChangeCallback = stateChangeCallback self._imageLabel = imageLabel self._imageHint = imageHint if self._state == STATE_CHECKED_IN: self._provideLabel()
def _inauguratorCheckedIn(self): assert globallock.assertLocked() # assert self._state in [ # STATE_COLD_RECLAMATION, STATE_SOFT_RECLAMATION] if self._state not in [STATE_COLD_RECLAMATION, STATE_SOFT_RECLAMATION]: logging.error("expected reclamation state, found %(state)s", dict(state=self._state)) ##### if self._stateChangeCallback is not None: self._provideLabel() else: self._changeState(STATE_CHECKED_IN)
def _softReclaimFailed(self): assert globallock.assertLocked() assert self._state in [ STATE_QUICK_RECLAIMATION_IN_PROGRESS, STATE_DESTROYED ] if self._state != STATE_QUICK_RECLAIMATION_IN_PROGRESS: logging.warning( "Ignoring soft reclamation failure, node already destroyed") return logging.warning( "Soft reclaimation for host %(id)s failed, reverting to cold reclaimation", dict(id=self._hostImplementation.id(), state=self._state)) self._coldReclaim()
def _buildImageThreadCallback(self, complete, message): assert globallock.assertLocked() if complete is None: self._broadcaster.allocationProviderMessage(self._index, message) return if complete: self._waitingForImages -= 1 self._broadcaster.allocationProviderMessage( self._index, "QCOW2 built successfully. Waiting for %d more " "qcows to be built" % self._waitingForImages) if self._waitingForImages == 0: self._createVMs() else: self._die("unable to build image")
def _buildImageThreadCallback(self, complete, message): assert globallock.assertLocked() if complete is None: self._broadcaster.allocationProviderMessage(self._index, message) return if complete: self._waitingForImages -= 1 self._broadcaster.allocationProviderMessage( self._index, "QCOW2 built successfully. Waiting for %d more " "qcows to be built" % self._waitingForImages) if self._waitingForImages == 0: self._createVMs() else: self._die("unable to build image")
def _inauguratorCheckedIn(self): assert globallock.assertLocked() # assert self._state in [ # STATE_SLOW_RECLAIMATION_IN_PROGRESS, STATE_QUICK_RECLAIMATION_IN_PROGRESS] if self._state not in [ STATE_SLOW_RECLAIMATION_IN_PROGRESS, STATE_QUICK_RECLAIMATION_IN_PROGRESS ]: logging.error("expected reclamation state, found %(state)s", dict(state=self._state)) ##### if self._stateChangeCallback is not None: self._provideLabel() else: self._changeState(STATE_CHECKED_IN)
def _timeout(self): assert globallock.assertLocked() hostID = self._hostImplementation.id() if self._state == STATE_COLD_RECLAMATION: logging.warning("Timeout for host %(hostID)s in cold reclamation", dict(hostID=hostID)) self._coldReclaim() elif self._state == STATE_SOFT_RECLAMATION: logging.warning("Timeout for host %(hostID)s in soft reclamation", dict(hostID=hostID)) self._coldReclaim() elif self._state == STATE_INAUGURATION_LABEL_PROVIDED: logging.warning("Timeout for host %(hostID)s while inaugurating. Soft reclaiming...", dict(hostID=hostID)) self._softReclaim() else: logging.error("Timeout for host %(hostID)s in an invalid state: %(state)s", dict(state=self._state, hostID=hostID))
def _buildImageThreadCallback(self, complete, message): assert globallock.assertLocked() if complete is None: self._broadcaster.allocationProviderMessage(self._index, message) return if complete: self._waitingForImages -= 1 self._broadcaster.allocationProviderMessage( self._index, "QCOW2 built successfully. Waiting for %d more " "qcows to be built" % self._waitingForImages) if self._waitingForImages == 0: isAlive = self.dead() is None if isAlive: logging.info("All labels required for the allocation were built. Starting VMs...") self._createVMs() else: logging.warn("An build-image job that belongs to a dead allocation has just finished") else: self._die("unable to build image")
def _inauguratorFailed(self, message): assert globallock.assertLocked() hostID = self._hostImplementation.id() isCritical = False if isinstance(message, dict): text = message["text"] code = message["code"] if "isCritical" in message: isCritical = message["isCritical"] else: isCritical = code in (talktoserver.TalkToServer.FAILURE_CODE_SSD_DEVICE_NOT_FOUND, talktoserver.TalkToServer.FAILURE_CODE_HDD_DEVICE_NOT_FOUND) # Backward compatibility elif isinstance(message, str): text = message code = None logging.error("Inaugurator of '%(hostID)s' failed: '%(text)s', code: %(code)s", dict(hostID=hostID, text=text, code=code)) if isCritical: logging.warning("The inauguration failure is critical, destroing the host...") self.destroy(reason=text) else: self._handleInaugurationFailure()
def create(self, requirements, allocationInfo): logging.info( "Allocation requested: '%(requirements)s' '%(allocationInfo)s'", dict(requirements=requirements, allocationInfo=allocationInfo)) assert globallock.assertLocked() self._cleanup() self._verifyLabelsExistsInOsmosis( [r['imageLabel'] for r in requirements.values()]) priorityInstance = priority.Priority(requirements=requirements, allocationInfo=allocationInfo, freePool=self._freePool, allocations=self._allocations) allocated = priorityInstance.allocated() try: alloc = allocation.Allocation(index=self._index, requirements=requirements, allocationInfo=allocationInfo, allocated=allocated, broadcaster=self._broadcaster, freePool=self._freePool) except: logging.error( "Creating allocation fails, freeing up all allocated hosts") for allocated in allocated.values(): self._freePool.put(allocated) raise self._allocations.append(alloc) self._index += 1 logging.info( "Allocation granted: %(allocated)s", dict( allocated={ k: v.hostImplementation().id() for k, v in alloc.allocated().iteritems() })) return alloc
def _hostSelfDestructed(self, hostStateMachine): assert globallock.assertLocked() self._hosts.destroy(hostStateMachine) self._pool.remove(hostStateMachine)
def cancelAllByTag(self, tag): assert globallock.assertLocked() self._timers = [t for t in self._timers if t.tag is not tag] self._event.set()
def scheduleAt(self, when, callback, tag): assert globallock.assertLocked() self._timers.append(_Timer(when=when, callback=callback, tag=tag)) self._timers.sort(key=lambda x: x.when) self._event.set()
def state(self): assert globallock.assertLocked() return self._state
def unassign(self): assert globallock.assertLocked() assert self._stateChangeCallback is not None self._stateChangeCallback = None if self._state in [STATE_INAUGURATION_LABEL_PROVIDED, STATE_INAUGURATION_DONE]: self._softReclaim()
def _timeout(self): assert globallock.assertLocked() logging.warning( "Timeout for host %(id)s at state %(state)s", dict(id=self._hostImplementation.id(), state=self._state)) self._coldReclaim()
def all(self): assert globallock.assertLocked() for hostStateMachine in self._pool: yield hostStateMachine
def enqueue(self, label, sizeGB, callback): assert globallock.assertLocked() if self._busy: callback(None, "Image builder still busy with previous tasks, waiting in queue") self._queue.put((label, sizeGB, callback))
def takeOut(self, hostStateMachine): assert globallock.assertLocked() self._pool.remove(hostStateMachine)
def unregister(self, ipAddress): assert globallock.assertLocked() assert ipAddress in self._registered del self._registered[ipAddress]
def all(self): assert globallock.assertLocked() self._cleanup() return self._allocations
def _filename(self, imageLabel, sizeGB): assert globallock.assertLocked() return os.path.join(config.IMAGE_STORE_DIRECTORY, "%s____%dGB.qcow2" % (imageLabel, sizeGB))
def unregister(self, ipAddress): assert globallock.assertLocked() assert ipAddress in self._registered del self._registered[ipAddress]
def register(self, ipAddress, checkInCallback, doneCallback): assert globallock.assertLocked() assert ipAddress not in self._registered self._registered[ipAddress] = dict(checkInCallback=checkInCallback, doneCallback=doneCallback)
def scheduleAt(self, when, callback, tag): assert globallock.assertLocked() self._timers.append(_Timer(when=when, callback=callback, tag=tag)) self._timers.sort(key=lambda x: x.when) self._event.set()
def cancelAllByTag(self, tag): assert globallock.assertLocked() self._timers = [t for t in self._timers if t.tag is not tag] self._event.set()
def all(self): assert globallock.assertLocked() self._cleanup() return self._allocations
def put(self, hostStateMachine): assert globallock.assertLocked() self._pool.append(hostStateMachine) hostStateMachine.setDestroyCallback(self._hostSelfDestructed) for listener in self._putListeners: listener()
def unregister(self, id): assert globallock.assertLocked() assert id in self._registered del self._registered[id] self._server.stopListeningOnID(id)
def register(self, ipAddress, checkInCallback, doneCallback): assert globallock.assertLocked() assert ipAddress not in self._registered self._registered[ipAddress] = dict(checkInCallback=checkInCallback, doneCallback=doneCallback)