def manage(self): """ managing machine states that change dependant of the state changes on 1and1 cloud site run once per cycle :return: """ try: oao_machines = self.getOneAndOneMachines() except Exception: return # loop over all machines in machine registry for mid in self.mr.getMachines(self.siteName): machine = self.mr.machines[mid] # remove the corresponding machine from the 1and1 machine list try: oao_machine = oao_machines.pop(machine[self.reg_site_server_id]) except KeyError: self.mr.removeMachine(mid) continue # check for status which is handled by integration adapter if machine[self.mr.regStatus] in [self.mr.statusIntegrating]: continue # manage machine in status booting if machine[self.mr.regStatus] == self.mr.statusBooting: # if the 1and1 machine is in status powered if oao_machine[self.status][self.state] == self.state_powered_off: # check if a private network is assigned # if not then assign the right network if self.network not in oao_machine: try: self.assignPrivateNetwork(mid=mid) except Exception: break # if the private network is assigned to the 1and1 machine, add it to the machine registry elif self.reg_site_server_network not in machine: machine[self.reg_site_server_network] = machine[self.network] # if everything is done, start the machine else: try: self.modifyMachineStatus(mid=mid, action=self.command_power_on) except Exception: break machine[self.reg_site_server_status] = self.state_powering_on # it the 1and1machine is powered on, update the ip address, state and the condor name # at the end update the machine status in the machine registry elif oao_machine[self.status][self.state] == self.state_powered_on: machine[self.reg_site_server_status] = self.state_powered_on machine[self.reg_site_server_ip] = oao_machine[self.ips][0][self.ip] machine[self.reg_site_server_condor_name] = self.getCondorName(mid=mid) self.mr.updateMachineStatus(mid=mid, newStatus=self.mr.statusUp) # manage machine in status working or pending disintegration elif machine[self.mr.regStatus] == self.mr.statusWorking or machine[ self.mr.regStatus] == self.mr.statusPendingDisintegration: # if the 1and1 machine is powered on and it is later than "stop time" # move the machine to disintegrating if oao_machine[self.status][self.state] == self.state_powered_on: start_time = datetime.datetime.strptime(self.getConfig(self.configTimeStart), "%H:%M").time() stop_time = datetime.datetime.strptime(self.getConfig(self.configTimeEnd), "%H:%M").time() drain_time = datetime.datetime.strptime(self.getConfig(self.configTimeDrain), "%H:%M").time() current_time = datetime.datetime.now().time() if stop_time < current_time < start_time: self.mr.updateMachineStatus(mid=mid, newStatus=self.mr.statusDisintegrating) # if the 1and1 machine is powering off or powered off, move it to disintegrating elif oao_machine[self.status][self.state] in [self.state_powering_off, self.state_powered_off]: machine[self.reg_site_server_status] = oao_machine[self.status][self.state] self.mr.updateMachineStatus(mid=mid, newStatus=self.mr.statusDisintegrating) # manage machine in status disintegrating elif machine[self.mr.regStatus] == self.mr.statusDisintegrating: # if the machine is still powered on, shut it off if oao_machine[self.status][self.state] == self.state_powered_on: try: self.modifyMachineStatus(mid=mid, action=self.command_power_off) except Exception: break machine[self.reg_site_server_status] = self.state_powering_off # manage machine in status disintegrated elif machine[self.mr.regStatus] == self.mr.statusDisintegrated: # if the 1and1 machine is powered off, set it to status down if oao_machine[self.status][self.state] == self.state_powered_off: machine[self.reg_site_server_status] = self.state_powered_off self.mr.updateMachineStatus(mid=mid, newStatus=self.mr.statusDown) # manage machine in status down elif machine[self.mr.regStatus] == self.mr.statusDown: # if the 1and1 machine is powered off, and the delete option is enabled, delete the 1and1 machine if oao_machine[self.status][self.state] == self.state_powered_off: if self.getConfig(self.configDelete) is True: try: self.modifyMachineStatus(mid=mid, action=self.command_delete) except Exception: break machine[self.reg_site_server_status] = self.state_deleting # add all machines remaining in machine list from 1&1 for oao_machine in oao_machines: # check if machine is already in machine registry if oao_machine in [machine[self.reg_site_server_id] for machine in \ self.mr.getMachines(self.siteName).values()]: continue # create new machine in machine registry mid = self.mr.newMachine() # set some machine specific entries in machine registry self.mr.machines[mid][self.mr.regSite] = self.siteName self.mr.machines[mid][self.mr.regSiteType] = self.siteType self.mr.machines[mid][self.mr.regMachineType] = self.getConfig(self.configMachines).keys()[0] # machineType self.mr.machines[mid][self.reg_site_server_name] = oao_machines[oao_machine][self.name] self.mr.machines[mid][self.reg_site_server_id] = oao_machines[oao_machine][self.id] self.mr.machines[mid][self.reg_site_server_status] = oao_machines[oao_machine][self.status][self.state] self.mr.machines[mid][self.reg_site_server_datacenter] = oao_machines[oao_machine][self.datacenter][self.id] self.mr.machines[mid][self.reg_site_server_network] = \ self.getIDs(key=self.datacenter, value=oao_machines[oao_machine][self.datacenter][self.id])[2] self.mr.machines[mid][self.reg_site_server_condor_name] = "" self.mr.updateMachineStatus(mid, self.mr.statusBooting) # add current amounts of machines to Json log file # self.logger.info("Current machines running at %s: %d" % (self.siteName, self.runningMachinesCount)) self.logger.info("Current machines running at %s: %d" % (self.siteName, self.runningMachinesCount[ list(self.getConfig(self.configMachines).keys())[0]])) # ["vm-default"])) json_log = JsonLog() json_log.addItem(self.siteName, "machines_requested", int(len(self.getSiteMachines(status=self.mr.statusBooting)) + len(self.getSiteMachines(status=self.mr.statusUp)) + len(self.getSiteMachines(status=self.mr.statusIntegrating)))) json_log.addItem(self.siteName, "condor_nodes", len(self.getSiteMachines(status=self.mr.statusWorking))) json_log.addItem(self.siteName, "condor_nodes_draining", len(self.getSiteMachines(status=self.mr.statusPendingDisintegration)))
def manage(self): """Managing machine states, run once per cycle This function takes care of the machine status and manages state changes: booting -> up disintegrating -> disintegrated It uses machine states in OpenStack and the machine registry machine states to trigger state changes. :return: """ nova_machines = self.__getNovaMachines() # Look for each machine in machine registry and perform necessary status change(s). # # In the process we delete each machine that's in the machine registry from nova_machines. # As a result, nova_machines then contains a list of MISSING machines. # # -> Add these machines to the machines registry. # This can happen, if (somehow) machines boot up at OpenStack without being requested... for mid in self.mr.getMachines(self.siteName): # machine not listed in OpenStack -> remove from machine registry if len(nova_machines) == 0 or mid not in nova_machines: self.mr.removeMachine(mid) continue # check if condor name is set if not self.reg_site_server_condor_name in self.mr.machines[mid]: self.mr.machines[self.reg_site_server_condor_name] = mid # if machine is in error state, move it to disintegrating if nova_machines[mid][self.reg_site_server_status] in [ self.reg_site_server_status_error, self.reg_site_server_status_shutoff ]: self.mr.machines[mid][self.reg_site_server_status] = \ self.reg_site_server_status_error self.mr.updateMachineStatus(mid, self.mr.statusDisintegrating) # status handled by Integration Adapter if self.mr.machines[mid][self.mr.regStatus] in [ self.mr.statusIntegrating, self.mr.statusWorking, self.mr.statusPendingDisintegration ]: del nova_machines[mid] # if status = down, machine was terminated at OpenStack -> remove from machine registry elif self.mr.machines[mid][ self.mr.regStatus] == self.mr.statusDown: self.mr.removeMachine(mid) continue # check if machine could be started correctly elif self.mr.machines[mid][ self.mr.regStatus] == self.mr.statusBooting: # they started correctly when OpenStack state changes to active if nova_machines[mid][ self. reg_site_server_status] == self.reg_site_server_status_active: self.mr.updateMachineStatus(mid, self.mr.statusUp) self.mr.machines[mid][ self.reg_site_server_status] = nova_machines[mid][ self.reg_site_server_status] if mid in nova_machines: del nova_machines[mid] # check if machines is disintegrating elif self.mr.machines[mid][ self.mr.regStatus] == self.mr.statusDisintegrating: # check if machine is in status active (OpenStack status), if so, send stop command if nova_machines[mid][ self. reg_site_server_status] == self.reg_site_server_status_active: self.__openstackStopMachine(mid) # if machine is in status shutoff (OpenStack), update to disintegrated if nova_machines[mid][ self. reg_site_server_status] == self.reg_site_server_status_shutoff: self.mr.updateMachineStatus(mid, self.mr.statusDisintegrated) if mid in nova_machines: del nova_machines[mid] # add running nova machines and information to machine registry for mid in nova_machines: if mid not in self.mr.getMachines(self.siteName): new = self.mr.newMachine(mid) self.mr.machines[new][self.mr.regSite] = self.siteName self.mr.machines[new][self.mr.regSiteType] = self.siteType # TODO: handle different machine types self.mr.machines[new][ self.mr. regMachineType] = self._machineType # self.getConfig( # self.configMachines) # "vm-default" self.mr.machines[new][self.reg_site_server_id] = nova_machines[ mid][self.reg_site_server_id] self.mr.machines[new][ self.reg_site_server_status] = nova_machines[mid][ self.reg_site_server_status] self.mr.machines[new][self.reg_site_server_name] = mid self.mr.machines[new][self.reg_site_server_condor_name] = mid # self.mr.machines[new][self.mr.regMachineCores] = self.getConfig(self.configMachineType)["vm-default"][ # "cores"] if nova_machines[mid][ self. reg_site_server_status] == self.reg_site_server_status_error: self.mr.updateMachineStatus(mid, self.mr.statusDisintegrating) else: self.mr.updateMachineStatus(mid, self.mr.statusWorking) if self.getConfig(self.configUseTime): self.__openstackTimeDepStopMachine() ### # Write Json log file: # requested machines, nodes, draining nodes. ### self.logger.info( "Current machines running at %s: %d" % (self.siteName, self.runningMachinesCount[self.getConfig( self.configMachines).keys()[0]])) json_log = JsonLog() json_log.addItem( self.siteName, "machines_requested", int( len(self.getSiteMachines(status=self.mr.statusBooting)) + len(self.getSiteMachines(status=self.mr.statusUp)) + len(self.getSiteMachines(status=self.mr.statusIntegrating)))) json_log.addItem( self.siteName, "condor_nodes", len(self.getSiteMachines(status=self.mr.statusWorking))) json_log.addItem( self.siteName, "condor_nodes_draining", len( self.getSiteMachines( status=self.mr.statusPendingDisintegration)))
def manage(self): """ managing machine states that change dependant of the state changes on 1and1 cloud site run once per cycle :return: """ # get machines from EC2 ec2_machines_status, ec2_machines_list = self.getEC2Machines() machines_to_stop = list() machines_to_terminate = list() # if something fails while receiving response from EC2 a type "None" will be returned if ec2_machines_status is None: # or (len(oao_machines) == 0): return for mid in self.mr.getMachines(self.siteName): machine = self.mr.machines[mid] # check if machine is already deleted on site and remove it from machine registry # if not machine[self.reg_site_server_id] in ec2_machines_status: # self.mr.removeMachine(mid) # continue # check for status which is handled by integration adapter if machine[self.mr.regStatus] in [ self.mr.statusUp, self.mr.statusIntegrating, self.mr.statusWorking, self.mr.statusPendingDisintegration ]: del ec2_machines_status[machine[self.reg_site_server_id]] # down # if machine status in machine registry is down and machine is still listed on EC2 cloud, terminate machine elif machine[self.mr.regStatus] == self.mr.statusDown: if not machine[self.reg_site_server_id] in ec2_machines_list: self.mr.removeMachine(mid) # del ec2_machines_status[machine[self.reg_site_server_id]] self.cleanupEC2() continue elif machine[self.mr.regStatus] == self.mr.statusDisintegrated: if not machine[self.reg_site_server_id] in ec2_machines_status: machines_to_terminate.append(mid) self.mr.updateMachineStatus(mid, self.mr.statusDown) else: del ec2_machines_status[machine[self.reg_site_server_id]] elif machine[self.mr.regStatus] == self.mr.statusDisintegrating: # self.terminateEC2Machine(self.stop, mid) if machine[self.reg_site_server_id] in ec2_machines_status: machines_to_stop.append(mid) del ec2_machines_status[machine[self.reg_site_server_id]] # TODO: use this status transition from up to integrating instead of the one used in integration adapter.onEvent # if machine[self.mr.regStatus] == self.mr.statusUp: # if ec2_machines_status[machine[self.reg_site_server_id]][self.ec2_instance_status][ # "Status"] == "initializing": # self.mr.updateMachineStatus(mid, self.mr.statusIntegrating) # del ec2_machines_status[machine[self.reg_site_server_id]] # booting -> up # check if machine status booting elif machine[self.mr.regStatus] == self.mr.statusBooting: if machine[self.reg_site_server_id] in ec2_machines_status: self.mr.updateMachineStatus(mid, self.mr.statusUp) else: continue if (ec2_machines_status[machine[self.reg_site_server_id]][ self.ec2_instance_status]["Status"] == "initializing"): pass elif (ec2_machines_status[machine[self.reg_site_server_id]][ self.ec2_instance_status]["Status"] == "ok"): self.mr.updateMachineStatus(mid, self.mr.statusUp) del ec2_machines_status[machine[self.reg_site_server_id]] self.terminateEC2Machine(self.stop, machines_to_stop) self.terminateEC2Machine(self.terminate, machines_to_terminate) # add all machines remaining in machine list from 1&1 for machine in ec2_machines_status: # if machine is listed in the service machine section, skip it! if not machine in self.getConfig(self.configServiceIDs): # create new machine in machine registry mid = self.mr.newMachine() self.mr.machines[mid][self.mr.regSite] = self.siteName self.mr.machines[mid][self.mr.regSiteType] = self.siteType self.mr.machines[mid][ self.mr.regMachineType] = self.ec2 # machineType # self.mr.machines[mid][self.reg_site_server_name] = oao_machines[vm][self.oao_name] self.mr.machines[mid][self.reg_site_server_id] = machine # self.mr.machines[mid][self.reg_site_server_status] = ec2_machines_status[machine][self.ec2_instance_status] self.mr.machines[mid][ self.reg_site_server_condor_name] = machine self.mr.updateMachineStatus(mid, self.mr.statusBooting) # add current amounts of machines to Json log file self.logger.info( "Current machines running at %s: %d" % (self.siteName, self.runningMachinesCount[self._machineType])) json_log = JsonLog() json_log.addItem( self.siteName, "machines_requested", int( len(self.getSiteMachines(status=self.mr.statusBooting)) + len(self.getSiteMachines(status=self.mr.statusUp)) + len(self.getSiteMachines(status=self.mr.statusIntegrating)))) json_log.addItem( self.siteName, "condor_nodes", len(self.getSiteMachines(status=self.mr.statusWorking))) json_log.addItem( self.siteName, "condor_nodes_draining", len( self.getSiteMachines( status=self.mr.statusPendingDisintegration)))
def manage(self): """Managing machine states, run once per cycle This function takes care of the machine status and manages state changes: booting -> up disintegrating -> disintegrated It uses machine states in OpenStack and the machine registry machine states to trigger state changes. :return: """ nova_machines = self.__getNovaMachines() # Look for each machine in machine registry and perform necessary status change(s). # # In the process we delete each machine that's in the machine registry from nova_machines. # As a result, nova_machines then contains a list of MISSING machines. # # -> Add these machines to the machines registry. # This can happen, if (somehow) machines boot up at OpenStack without being requested... for mid in self.mr.getMachines(self.siteName): # machine not listed in OpenStack -> remove from machine registry if len(nova_machines) == 0 or mid not in nova_machines: self.mr.removeMachine(mid) continue # check if condor name is set if not self.reg_site_server_condor_name in self.mr.machines[mid]: self.mr.machines[self.reg_site_server_condor_name] = mid # if machine is in error state, move it to disintegrating if nova_machines[mid][self.reg_site_server_status] in [ self.reg_site_server_status_error, self.reg_site_server_status_shutoff]: self.mr.machines[mid][self.reg_site_server_status] = \ self.reg_site_server_status_error self.mr.updateMachineStatus(mid, self.mr.statusDisintegrating) # status handled by Integration Adapter if self.mr.machines[mid][self.mr.regStatus] in [self.mr.statusIntegrating, self.mr.statusWorking, self.mr.statusPendingDisintegration]: del nova_machines[mid] # if status = down, machine was terminated at OpenStack -> remove from machine registry elif self.mr.machines[mid][self.mr.regStatus] == self.mr.statusDown: self.mr.removeMachine(mid) continue # check if machine could be started correctly elif self.mr.machines[mid][self.mr.regStatus] == self.mr.statusBooting: # they started correctly when OpenStack state changes to active if nova_machines[mid][ self.reg_site_server_status] == self.reg_site_server_status_active: self.mr.updateMachineStatus(mid, self.mr.statusUp) self.mr.machines[mid][self.reg_site_server_status] = nova_machines[mid][ self.reg_site_server_status] if mid in nova_machines: del nova_machines[mid] # check if machines is disintegrating elif self.mr.machines[mid][self.mr.regStatus] == self.mr.statusDisintegrating: # check if machine is in status active (OpenStack status), if so, send stop command if nova_machines[mid][ self.reg_site_server_status] == self.reg_site_server_status_active: self.__openstackStopMachine(mid) # if machine is in status shutoff (OpenStack), update to disintegrated if nova_machines[mid][ self.reg_site_server_status] == self.reg_site_server_status_shutoff: self.mr.updateMachineStatus(mid, self.mr.statusDisintegrated) if mid in nova_machines: del nova_machines[mid] # add running nova machines and information to machine registry for mid in nova_machines: if mid not in self.mr.getMachines(self.siteName): new = self.mr.newMachine(mid) self.mr.machines[new][self.mr.regSite] = self.siteName self.mr.machines[new][self.mr.regSiteType] = self.siteType # TODO: handle different machine types self.mr.machines[new][self.mr.regMachineType] = self._machineType # self.getConfig( # self.configMachines) # "vm-default" self.mr.machines[new][self.reg_site_server_id] = nova_machines[mid][ self.reg_site_server_id] self.mr.machines[new][self.reg_site_server_status] = nova_machines[mid][ self.reg_site_server_status] self.mr.machines[new][self.reg_site_server_name] = mid self.mr.machines[new][self.reg_site_server_condor_name] = mid # self.mr.machines[new][self.mr.regMachineCores] = self.getConfig(self.configMachineType)["vm-default"][ # "cores"] if nova_machines[mid][ self.reg_site_server_status] == self.reg_site_server_status_error: self.mr.updateMachineStatus(mid, self.mr.statusDisintegrating) else: self.mr.updateMachineStatus(mid, self.mr.statusWorking) if self.getConfig(self.configUseTime): self.__openstackTimeDepStopMachine() ### # Write Json log file: # requested machines, nodes, draining nodes. ### self.logger.info("Current machines running at %s: %d" % (self.siteName, self.runningMachinesCount[self.getConfig(self.configMachines).keys()[0]])) json_log = JsonLog() json_log.addItem(self.siteName, "machines_requested", int(len(self.getSiteMachines(status=self.mr.statusBooting)) + len(self.getSiteMachines(status=self.mr.statusUp)) + len(self.getSiteMachines(status=self.mr.statusIntegrating)))) json_log.addItem(self.siteName, "condor_nodes", len(self.getSiteMachines(status=self.mr.statusWorking))) json_log.addItem(self.siteName, "condor_nodes_draining", len(self.getSiteMachines(status=self.mr.statusPendingDisintegration)))