def _get_vms_net_io(): dom_recs = self.get_dom_records(nocache) vif_metrics_recs = self.server.xenapi.VIF_metrics.get_all_records() log.debug("[API]", self.node.get_hostname(), "vif_metrics_recs=", vif_metrics_recs) vifs_doms_metrics = dict() for dom_rec in dom_recs.values(): if dom_rec['power_state'] == "Halted": continue # Discard non instantiated vm vifs_metrics = list() for vif in dom_rec['VIFs']: vifs_metrics.append({ 'Rx': int( float(vif_metrics_recs[vif]['io_total_read_kbs']) * 1024), 'Tx': int( float(vif_metrics_recs[vif]['io_total_write_kbs']) * 1024) }) vifs_doms_metrics[dom_rec['name_label']] = vifs_metrics return vifs_doms_metrics
def __init__(self,hostname): """Instanciate a Node object. This constructor open SSH and XenAPI connections to the node. If the node is not online, this will fail with an uncatched exception from paramiko or XenAPI. """ log.info("Connecting to", hostname, "...") self.hostname=hostname # Open SSH channel (localhost use popen2) if not self.is_local_node(): self.ssh = paramiko.SSHClient() self.ssh.load_system_host_keys() #self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self.ssh.connect(hostname,22,'root', timeout=2) # Open Xen-API Session if self.is_local_node(): # Use unix socket on localhost self.server = XenAPI.Session("httpu:///var/run/xend/xen-api.sock") log.debug("[API]","Using unix socket.") else: self.server = XenAPI.Session("http://"+hostname+":9363") log.debug("[API]","Using tcp socket.") self.server.login_with_password("root", "") # Prepare connection with legacy API self.__legacy_server=None # Prepare metrics self.__metrics=None # Prepare cache self._cache=datacache.DataCache() self._last_refresh=0
def check_autostart(self): """Perform a sanity check of the autostart links.""" log.info("Checking autostart links on", self.get_hostname(), "...") safe=True # Get all autostart links on the node links = [ link.strip() for link in self.run("ls /etc/xen/auto/").readlines() ] log.debug("[NODE]", self.hostname, "links=", links) # Get all running VM running_vms = [ vm.name for vm in self.get_vms() ] log.debug("[NODE]", self.hostname, "running_vms=", running_vms) # Compute running vm without autostart link link_without_vm = list(Set(links) - Set(running_vms)) if len(link_without_vm): log.info(" ** WARNING : Found autostart link without running VM :\n\t", "\n\t".join(link_without_vm)) safe=False # Compute running vm without autostart link vm_without_link = list(Set(running_vms) - Set(links)) if len(vm_without_link): log.info(" ** WARNING : Found running VM without autostart link :\n\t", "\n\t".join(vm_without_link)) safe=False return safe
def getFromCache(self, limit, offset=0, **kwargs): """ Pull the requested data from cache if it exists there, otherwise pull the data from db Returns a tuple of (the data list, bool indicating if the end of the collection stream was reached) """ t0 = time.time() t1 = t0 if offset == 0: self._clearCacheForKeyParams(**kwargs) data = [] while len(data) < limit: curOffset = ((offset + len(data)) // self._blockSize) * self._blockSize key = self._generateKey(curOffset, **kwargs) try: newData = self._cache[key] except KeyError: newData = self._updateCache(offset=curOffset, **kwargs) except Exception: logs.error('Error retrieving data from memcached. Is memcached running?') newData = self._updateCache(curOffset, **kwargs) start = (offset+len(data)) % self._blockSize data += newData[start:start+limit] if len(newData) < self._blockSize: break logs.debug('Time for getFromCache: %s' % (time.time() - t1)) t1 = time.time() logs.debug('total returned: %s' % len(data)) return data[:limit], len(data) < limit
def check_cfg(self): """Perform a check on configuration files. Return False if a file is missing somewhere. """ log.info("Checking synchronization of configuration files...") safe=True # Get a dict with config files of each nodes nodes_cfg=dict() for node in self.get_nodes(): nodes_cfg[node.get_hostname()]=node.get_possible_vm_names() log.debug("nodes_cfg=",nodes_cfg) # Compare file lists for each nodes missing=dict() for node in nodes_cfg.keys(): for cfg in nodes_cfg.values(): missing.setdefault(node,[]).extend(list(Set(cfg) - Set(nodes_cfg[node]))) # Show missing files without duplicates for node in missing.keys(): if missing[node]: log.info(" ** WARNING : Missing configuration files on %s : %s" % (node,", ".join(list(Set(missing[node]))))) safe=False return safe
def get_efficient_solution(self): """Get a better solution at the minimal cost. Return the choosen solution, or None if there's no solution. """ layer = 1 # Layer 0 is filled with the initial solution while layer <= core.cfg["LB_MAX_MIGRATION"]: # Create current layer's solutions from previous layer for previous_solution in self.solutions[layer - 1]: self.create_layer(previous_solution, layer) try: if not len(self.solutions[layer]) > 0: return None except KeyError: return None # No more solutions, giving up. # Get the best solution of this layer best_solution = self.solutions[layer][0] # Compare initial solution to the best solution of this layer if best_solution.score < self.root.score: log.debug(" [LB]", "Found", best_solution) # Compute the gain (in percetage) of this solution gain = ((self.root.score - best_solution.score) * 100) / self.root.score if gain >= core.cfg["LB_MIN_GAIN"]: log.debug(" [LB]", "Pickup this one, migration plan:", best_solution.path) return best_solution layer += 1 # No better solution found in this layer, going a step further. return None # No better solution found at all, giving up.
def delete(self, key): """Delete the value associated with the key.""" try: del self._data[key] log.debug("[CAH]", "DEL", key) except KeyError: pass
def _get_host_record(): host_uuid = self.server.xenapi.session.get_this_host( self.server.getSession()) host_record = self.server.xenapi.host.get_record(host_uuid) log.debug("[API]", self.node.get_hostname(), "host_record=", host_record) return host_record
def decrement(self, name, sample_rate=1): logs.debug("[%s-%s:%d] decrement: %s" % (self, self.statsd.addr[0], self.statsd.addr[1], name)) if 0 == sample_rate: return return self.statsd.decrement(name, sample_rate)
def check_bridges(self): """Perform a check on briges configurations. Return False if a bridge is missing somewhere. """ log.info("Checking bridges configurations...") safe=True # Get a dict with bridges of each nodes nodes_bridges=dict() for node in self.get_nodes(): nodes_bridges[node.get_hostname()]=node.get_bridges() log.debug("nodes_bridges=",nodes_bridges) # Compare bridges lists for each nodes missing=dict() for node in nodes_bridges.keys(): for bridges in nodes_bridges.values(): missing.setdefault(node,[]).extend(list(Set(bridges) - Set(nodes_bridges[node]))) # Show missing bridges without duplicates for node in missing.keys(): if missing[node]: log.info(" ** WARNING : Missing bridges on %s : %s" % (node,", ".join(list(Set(missing[node]))))) safe=False return safe
def __init__(self, hostname): """Instanciate a Node object. This constructor open SSH and XenAPI connections to the node. If the node is not online, this will fail with an uncatched exception from paramiko or XenAPI. """ log.info("Connecting to", hostname, "...") self.hostname = hostname # Open SSH channel (localhost use popen2) if not self.is_local_node(): self.ssh = paramiko.SSHClient() self.ssh.load_system_host_keys() #self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) self.ssh.connect(hostname, 22, 'root', timeout=2) # Open Xen-API Session if self.is_local_node(): # Use unix socket on localhost self.server = XenAPI.Session("httpu:///var/run/xend/xen-api.sock") log.debug("[API]", "Using unix socket.") else: self.server = XenAPI.Session("http://" + hostname + ":9363") log.debug("[API]", "Using tcp socket.") self.server.login_with_password("root", "") # Prepare connection with legacy API self.__legacy_server = None # Prepare metrics self.__metrics = None # Prepare cache self._cache = datacache.DataCache() self._last_refresh = 0
def start(self): logs.debug("Begin: %s" % self) self.expiration = datetime.datetime.utcnow() + datetime.timedelta(seconds=self._userSessionLength) logs.debug("Expiration: %s" % self.expiration) """ Define possible actions the user can take, including wait time """ actions = {} weights = {} # View the user's profile def _viewInbox(): return self.viewTastemakerInbox() actions['inbox'] = _viewInbox weights['inbox'] = 20 assert len(actions) == len(weights) """ Run the actions """ while datetime.datetime.utcnow() < self.expiration: try: return self._runAction(actions, weights) except RootException: continue
def check_bridges(self): """Perform a check on briges configurations. Return False if a bridge is missing somewhere. """ log.info("Checking bridges configurations...") safe = True # Get a dict with bridges of each nodes nodes_bridges = dict() for node in self.get_nodes(): nodes_bridges[node.get_hostname()] = node.get_bridges() log.debug("nodes_bridges=", nodes_bridges) # Compare bridges lists for each nodes missing = dict() for node in nodes_bridges.keys(): for bridges in nodes_bridges.values(): missing.setdefault(node, []).extend(list(Set(bridges) - Set(nodes_bridges[node]))) # Show missing bridges without duplicates for node in missing.keys(): if missing[node]: log.info(" ** WARNING : Missing bridges on %s : %s" % (node, ", ".join(list(Set(missing[node]))))) safe = False return safe
def removeBlock(self, friendship): userId = friendship.user_id friendId = friendship.friend_id logs.debug("Remove Block: %s -> %s" % (userId, friendId)) return self.block_collection.removeBlock(userId=userId, friendId=friendId)
def viewProfile(self, userId, fromAddFriends=False): logs.debug("%sView Profile (%s)" % ((' '*2*len(self._stack)), userId)) #Initial API Calls user = _get_users_show(userId) userStamps = _get_stamps_collection(scope='user', userId=userId, token=self.token) """ Define possible actions the user can take, including wait time """ actions = {} weights = {} # Go back def _pass(): time.sleep(random.randint(4, 12) * self._userWaitSpeed) raise DoneException("Done!") actions['pass'] = _pass weights['pass'] = 50 # View stamp def _viewStamp(): time.sleep(random.randint(4, 12) * self._userWaitSpeed) return self.viewStampDetail(stamp=userStamps[0]) actions['stamp'] = _viewStamp weights['stamp'] = 10 assert len(actions) == len(weights) """ Run the actions """ return self._runAction(actions, weights)
def searchAllSource(self, query, timeout=None): if query.kinds is not None and len(query.kinds) > 0 and len(self.kinds.intersection(query.kinds)) == 0: logs.debug('Skipping %s (kinds: %s)' % (self.sourceName, query.kinds)) return self.emptySource logs.debug('Searching %s...' % self.sourceName) def gen(): try: raw_results = [] def getFactualSearch(q, useLocation=False): if useLocation and q.coordinates is not None: results = self.__factual.search(q.query_string, coordinates=q.coordinates) else: results = self.__factual.search(q.query_string) for result in results: raw_results.append(result) if query.coordinates is not None: pool = Pool(2) pool.spawn(getFactualSearch, query, False) pool.spawn(getFactualSearch, query, True) pool.join(timeout=timeout) else: raw_results = getFactualSearch(query) if raw_results is not None: for result in raw_results: yield FactualPlace(data=result) except GeneratorExit: pass return generatorSource(gen(), constructor=FactualSearchAll)
def _try_ping_db(self, node): cmd_template = "mongo --quiet %s:27017/admin --eval 'printjson(%s);'" # ensure that the server and replica set and both responding and healthy mongo_cmds = ["db.serverStatus()", "rs.status()"] for mongo_cmd in mongo_cmds: dns = node.private_ip_address if is_ec2 else node.public_dns_name cmd = cmd_template % (dns, mongo_cmd) retries = 0 while retries < 5: logs.debug(cmd) utils.log(cmd) ret = utils.shell(cmd) if 0 == ret[1]: break retries += 1 time.sleep(retries * retries) if 0 != ret[1]: error = "unable to reach db server at '%s.%s'" % (node.stack, node.name) raise MonitorException(error, detail=ret[0], email=True, sms=True) if re.match('.*"ok"[ \t]*:[ \t]*1.*', ret[0], re.DOTALL) is None: error = "db server '%s.%s' returned invalid status for cmd '%s'" % (node.stack, node.name, mongo_cmd) raise MonitorException(error, detail=ret[0], email=True, sms=True)
def enrichEntity(self, entity, decorations, max_iterations=None, timestamp=None): """ (might be named enrichedEntityWithSources) enrichEntity takes a entity schema object (defined in api/Schemas.py), an output dict of decorations that is opaque to this class - only group objects and sources have an understanding of the decorations format the group method syncDecorations() handles all propagation of source local decorations to the output decoration dict returns a bool value indicating whether the entity was enriched """ self.setNow(timestamp) max_iterations = max_iterations or self.__default_max_iterations modified_total = False logs.debug("Begin enrichment: %s (%s)" % (entity.title, entity.entity_id)) # We will loop through all sources multiple times, because as data is enriched, previous unresolvable sources # may become resolvable and can enrich in turn. If no fields are modified by any source in a given iteration, # then there's no reason to loop again for i in range(max_iterations): modified = False for source in self.__sources: if entity.kind not in source.kinds: continue if entity.types and source.types and not set(entity.types).intersection(source.types): continue groups = source.getGroups(entity) targetGroups = set() for group in groups: if self.shouldEnrich(group, source.sourceName, entity): targetGroups.add(group) if not targetGroups: continue # We have groups that are eligible for enrichment. We'll modify a deep-copy of the entity copy = buildEntity(entity.dataExport()) # timestamps is passed down to the source. If the source enriches a group, a mapping is added from the # group name to the time it was enriched (now, essentially). When the data we get from external source # is identical to what we already have, presence of the group in this map is the only way we can tell # that we received fresh data. # TODO: This is a dictionary for legacy reasons, it should really be a set. timestamps = {} localDecorations = {} # opaque decorations, for group object based extensions (i.e. Menus) logs.debug("Enriching with '%s' for groups %s" % (source.sourceName, sorted(targetGroups))) groupObjs = [self.getGroup(group) for group in targetGroups] try: enriched = source.enrichEntity(copy, groupObjs, self, localDecorations, timestamps) if enriched: for groupObj in groupObjs: fieldsChanged = groupObj.syncFields(copy, entity) decorationsChanged = groupObj.syncDecorations(localDecorations, decorations) if fieldsChanged or groupObj.groupName in timestamps or decorationsChanged: groupObj.setTimestamp(entity, self.now) groupObj.setSource(entity, source.sourceName) modified = True except Exception as e: report() if not modified: break modified_total |= modified return modified_total
def __init__(self,vmname, id=-1, ram=None, vcpu=None): """Instanciate a VM object, with the optional ram and vcpu metrics.""" self.name=vmname self.id=id self.__ram=ram self.__vcpu=vcpu self.config=dict() self.metrics=None self.devices=dict() try: try: execfile("%s/%s" % (core.cfg['VMCONF_DIR'],vmname) ,dict(),self.config) except IOError: execfile("%s/%s.cfg" % (core.cfg['VMCONF_DIR'],vmname) ,dict(),self.config) except IOError: if not core.cfg['QUIET']: log.warn("Missing configuration file: %s" % (vmname)) log.debug("[VM]", vmname, self.config) # Get devices from config file try: for disk in self.config['disk']: try: self.devices[self.diskre.search(disk).group(1)]=self.diskre.search(disk).group(2) except: if not core.cfg['QUIET']: log.warn("Bad disk input for %s: %s" % (self.name, disk)) except KeyError: pass
def check_cfg(self): """Perform a check on configuration files. Return False if a file is missing somewhere. """ log.info("Checking synchronization of configuration files...") safe = True # Get a dict with config files of each nodes nodes_cfg = dict() for node in self.get_nodes(): nodes_cfg[node.get_hostname()] = node.get_possible_vm_names() log.debug("nodes_cfg=", nodes_cfg) # Compare file lists for each nodes missing = dict() for node in nodes_cfg.keys(): for cfg in nodes_cfg.values(): missing.setdefault(node, []).extend(list(Set(cfg) - Set(nodes_cfg[node]))) # Show missing files without duplicates for node in missing.keys(): if missing[node]: log.info( " ** WARNING : Missing configuration files on %s : %s" % (node, ", ".join(list(Set(missing[node])))) ) safe = False return safe
def get_efficient_solution(self): """Get a better solution at the minimal cost. Return the choosen solution, or None if there's no solution. """ layer = 1 # Layer 0 is filled with the initial solution while layer <= core.cfg['LB_MAX_MIGRATION']: # Create current layer's solutions from previous layer for previous_solution in self.solutions[layer - 1]: self.create_layer(previous_solution, layer) try: if not len(self.solutions[layer]) > 0: return None except KeyError: return None # No more solutions, giving up. # Get the best solution of this layer best_solution = self.solutions[layer][0] # Compare initial solution to the best solution of this layer if best_solution.score < self.root.score: log.debug(" [LB]", "Found", best_solution) # Compute the gain (in percetage) of this solution gain = ((self.root.score - best_solution.score) * 100) / self.root.score if gain >= core.cfg['LB_MIN_GAIN']: log.debug(" [LB]", "Pickup this one, migration plan:", best_solution.path) return best_solution layer += 1 # No better solution found in this layer, going a step further. return None # No better solution found at all, giving up.
def termWaiting(): logs.debug('in termWaiting') try: return self.__terminateWaiting(pool, datetime.datetime.now(), category, results) except Exception: logs.report() logs.debug('done with termWaiting')
def sendEmails(self, noop=False): logs.info("Submitting emails to %s users" % len(self._emailQueue)) # Apply rate limit limit = 8 ses = boto.connect_ses(keys.aws.AWS_ACCESS_KEY_ID, keys.aws.AWS_SECRET_KEY) for emailAddress, emailQueue in self._emailQueue.iteritems(): if IS_PROD or emailAddress in self._adminEmails: count = 0 emailQueue.reverse() for email in emailQueue: count += 1 if count > limit: logs.debug("Limit exceeded for email '%s'" % emailAddress) break try: logs.debug("Send email: %s" % (email)) if not noop: ses.send_email(email.sender, email.title, email.body, emailAddress, format='html') except Exception as e: logs.warning("Email failed: %s" % email) logs.warning(utils.getFormattedException()) logs.info("Success!")
def log(s=""): s = _formatLog(s) + "\n" logs.debug(s) sys.stderr.write(s) sys.stdout.flush() sys.stderr.flush()
def get_dom0_nr_cpus(self): """Return the number (int) of VCPU for the Domain-0.""" dom0_record = self.server.xenapi.VM.get_record( '00000000-0000-0000-0000-000000000000') log.debug("[API]", self.node.get_hostname(), "dom0_record=", dom0_record) return int(dom0_record['VCPUs_max'])
def check_autostart(self): """Perform a sanity check of the autostart links.""" log.info("Checking autostart links on", self.get_hostname(), "...") safe = True # Get all autostart links on the node links = [ link.strip() for link in self.run("ls /etc/xen/auto/").readlines() ] log.debug("[NODE]", self.hostname, "links=", links) # Get all running VM running_vms = [vm.name for vm in self.get_vms()] log.debug("[NODE]", self.hostname, "running_vms=", running_vms) # Compute running vm without autostart link link_without_vm = list(Set(links) - Set(running_vms)) if len(link_without_vm): log.info( " ** WARNING : Found autostart link without running VM :\n\t", "\n\t".join(link_without_vm)) safe = False # Compute running vm without autostart link vm_without_link = list(Set(running_vms) - Set(links)) if len(vm_without_link): log.info( " ** WARNING : Found running VM without autostart link :\n\t", "\n\t".join(vm_without_link)) safe = False return safe
def entityGenerator(): id_set = set() try: for query in token_queries: tokenSearchQuery = formatSearchQuery(query) if not tokenSearchQuery: continue mongo_query = { 'sources.tombstone_id' : {'$exists' : False}, 'sources.user_generated_id' : {'$exists' : False}, '$and' : tokenSearchQuery, } mongo_query.update(kwargs) nemesis_ids = None if query_obj.source == 'stamped' and query_obj.key: mongo_query['_id'] = {'$lt' : ObjectId(query_obj.key)} nemesis_ids = query_obj.entity.sources.nemesis_ids matches = self.__id_query(mongo_query) for match in matches: match_id = str(match['_id']) if nemesis_ids and match_id in nemesis_ids: continue if match_id not in id_set: id_set.add(match_id) yield match_id except GeneratorExit: pass logs.debug('Consumed %d results from query: %s' % (len(id_set), id_set))
def addToken(self, token): logs.debug("Token: %s" % token.token_id) document = self._convertToMongo(token) document = self._addMongoDocument(document) token = self._convertFromMongo(document) return token
def is_vm_started(self, vmname): """Return True if the specified vm is started on this node.""" vm = self.server.xenapi.VM.get_by_name_label(vmname) log.debug("[API]", self.hostname, "vm=", vm) try: return self.server.xenapi.VM.get_power_state(vm[0]) != "Halted" except IndexError: return False
def removeFriendship(self, friendship): userId = friendship.user_id friendId = friendship.friend_id logs.debug("Remove Friendship: %s -> %s" % (userId, friendId)) self.friends_collection.removeFriend(userId=userId, friendId=friendId) self.followers_collection.removeFollower(userId=friendId, followerId=userId) return True
def is_vm_started(self, vmname): """Return True if the specified vm is started on this node.""" vm=self.server.xenapi.VM.get_by_name_label(vmname) log.debug("[API]", self.hostname, "vm=", vm) try: return self.server.xenapi.VM.get_power_state(vm[0]) != "Halted" except IndexError: return False
def checkFriendship(self, friendship): userId = friendship.user_id friendId = friendship.friend_id logs.debug("Check Friendship: %s -> %s" % (userId, friendId)) status = self.friends_collection.checkFriend(userId=userId, \ friendId=friendId) return status
def checkBlock(self, friendship): userId = friendship.user_id friendId = friendship.friend_id logs.debug("Check Block: %s -> %s" % (userId, friendId)) status = self.block_collection.checkBlock(userId=userId, friendId=friendId) return status
def search(self, category, text, timeout=SEARCH_TIMEOUT, limit=10, coords=None): if not isinstance(text, unicode): text = text.decode('utf-8') if category not in Constants.categories: raise Exception("unrecognized category: (%s)" % category) start = datetime.datetime.now() results = {} times = {} pool = utils.LoggingThreadPool(len(self.__categories_to_sources_and_priorities)) def termWaiting(): logs.debug('in termWaiting') try: return self.__terminateWaiting(pool, datetime.datetime.now(), category, results) except Exception: logs.report() logs.debug('done with termWaiting') logs.debug("SHOULD_DISABLE_TIMEOUT IS " + str(shouldDisableTimeout)) if not shouldDisableTimeout: logTimingData('SPAWNING TERMINATE WAITING') #pool.spawn(self.__terminateWaiting, pool, datetime.datetime.now(), category, results) pool.spawn(termWaiting) for (source, priority) in self.__categories_to_sources_and_priorities[category]: # TODO: Handing the exact same timeout down to the inner call is probably wrong because we end up in this # situation where outer pools and inner pools are using the same timeout and possibly the outer pool will # nix the whole thing before the inner pool cancels out, which is what we'd prefer so that it's handled # more gracefully. pool.spawn(self.__searchSource, source, category, text, results, times, timeout=timeout, coords=coords) logTimingData("TIME CHECK ISSUED ALL QUERIES AT " + str(datetime.datetime.now())) pool.join() logTimingData("TIME CHECK GOT ALL RESPONSES AT " + str(datetime.datetime.now())) logTimingData('TIMES: ' + (', '.join(['%s took %s' % (source.sourceName, str(times[source])) for source in times]))) for source in self.__all_sources: if source in results and results[source]: logSourceResultsData("\nRESULTS FROM SOURCE " + source.sourceName + " TIME ELAPSED: " + str(times[source]) + "\n\n") for result in results[source]: logSourceResultsData(utils.normalize(repr(result))) pass beforeDeduping = datetime.datetime.now() dedupedResults = SearchResultDeduper().dedupeResults(category, results.values()) afterDeduping = datetime.datetime.now() logTimingData("DEDUPING TOOK " + str(afterDeduping - beforeDeduping)) logTimingData("TIME CHECK DONE AT:" + str(datetime.datetime.now())) logTimingData("ELAPSED:" + str(afterDeduping - start)) logClusterData("\n\nDEDUPED RESULTS\n\n") for dedupedResult in dedupedResults[:limit]: logClusterData("\n\n%s\n\n" % str(dedupedResult)) return dedupedResults[:limit]
def script_parts(self): if not hasattr(self, '_script_parts'): try: self._script_parts = shell.split_command(self.script) except Exception: logs.debug( u"Can't split command script {} because:\n {}".format( self, sys.exc_info())) self._script_parts = None return self._script_parts
def _is_cache_locked(self): try: mtime = os.stat(self.lock).st_mtime if (int(time.time()) - mtime > self.timeout): log.debug("[PCH]", "LOCK TIMEOUT", self.lock) raise TimeoutException() else: return True except OSError: return False
def get_legacy_server(self): """Return the legacy API socket.""" if self.__legacy_server is None: if self.is_local_node(): self.__legacy_server=ServerProxy("httpu:///var/run/xend/xmlrpc.sock") log.debug("[Legacy-API]","Using unix socket.") else: self.__legacy_server=ServerProxy("http://"+self.hostname+":8006") log.debug("[Legacy-API]","Using tcp socket.") return self.__legacy_server
def _is_cache_locked(self): try: mtime=os.stat(self.lock).st_mtime if(int(time.time()) - mtime > self.timeout): log.debug("[PCH]", "LOCK TIMEOUT", self.lock) raise TimeoutException() else: return True except OSError: return False
def _get_ram_infos(): host_metrics_record = self.server.xenapi.host_metrics.get_record(self.get_host_record(nocache)["metrics"]) log.debug("[API]", self.node.get_hostname(), "host_metrics_record=", host_metrics_record) total=int(host_metrics_record["memory_total"])/1024/1024 free=int(host_metrics_record["memory_free"])/1024/1024 ram_infos = { 'total': total, 'free':free, 'used':total-free } return ram_infos
def check(self): """Perform a sanity check of the cluster. Return a corresponding exit code (0=success, 0!=error) """ log.info("Checking for duplicate VM...") safe=True # Get cluster wide VM list vm_by_node=dict() for node in self.get_nodes(): vm_by_node[node.get_hostname()]=node.get_vms() log.debug("vm_by_node=",vm_by_node) # Invert key/value of the dict node_by_vm=dict() for node, vms in vm_by_node.items(): for vm in vms: try: node_by_vm[vm.name].append(node) except KeyError: node_by_vm[vm.name]=[node] log.debug("node_by_vm=",node_by_vm) # Check duplicate VM for vm, nodes in node_by_vm.items(): if len(nodes)>1: log.info(" ** WARNING :", vm, "is running on", " and ".join(nodes)) safe=False # Check bridges if not self.check_bridges(): safe=False # Check synchronization of configuration files if not self.check_cfg(): safe=False # Check existence of used logicals volumes if not self.get_local_node().check_missing_lvs(): safe=False # Other checks for node in self.get_nodes(): # Check (non)activation of LVs if not node.check_activated_lvs(): safe=False # Check autostart link if not node.check_autostart(): safe=False return safe
def parseFileUpload(schema, request, fileName='image', **kwargs): ### Parse Request try: if request.method != 'POST': raise rawData = request.POST # Build the dict because django sucks data = {} for k, v in rawData.iteritems(): data[k] = v # Extract file if fileName in request.FILES: f = request.FILES[fileName] max_size = 1048576 # 1 MB if f.size > max_size: msg = "Uploaded file is too large (%s) (max size is %d)" % (f.size, max_size) logs.warning(msg) raise Exception(msg) data[fileName] = f.read() logs.attachment(fileName, f.size) data.pop('oauth_token', None) data.pop('client_id', None) data.pop('client_secret', None) logData = data.copy() obfuscate = kwargs.pop('obfuscate', []) obfuscate.append('password') for item in obfuscate: if item in logData: logData[item] = '*****' if fileName in logData: logData[fileName] = 'FILE (SIZE: %s)' % f.size logs.form(logData) if schema == None: if len(data) > 0: raise return schema.dataImport(data) schema.validate() logs.debug("Parsed request data") return schema except Exception as e: msg = u"Unable to parse form (%s)" % e logs.warning(msg) utils.printException() raise e
def check(self): """Perform a sanity check of the cluster. Return a corresponding exit code (0=success, 0!=error) """ log.info("Checking for duplicate VM...") safe = True # Get cluster wide VM list vm_by_node = dict() for node in self.get_nodes(): vm_by_node[node.get_hostname()] = node.get_vms() log.debug("vm_by_node=", vm_by_node) # Invert key/value of the dict node_by_vm = dict() for node, vms in vm_by_node.items(): for vm in vms: try: node_by_vm[vm.name].append(node) except KeyError: node_by_vm[vm.name] = [node] log.debug("node_by_vm=", node_by_vm) # Check duplicate VM for vm, nodes in node_by_vm.items(): if len(nodes) > 1: log.info(" ** WARNING :", vm, "is running on", " and ".join(nodes)) safe = False # Check bridges if not self.check_bridges(): safe = False # Check synchronization of configuration files if not self.check_cfg(): safe = False # Check existence of used logicals volumes if not self.get_local_node().check_missing_lvs(): safe = False # Other checks for node in self.get_nodes(): # Check (non)activation of LVs if not node.check_activated_lvs(): safe = False # Check autostart link if not node.check_autostart(): safe = False return safe
def add(self, key, lifetime, value): """ Add a new value to the cache. - 'key' could be any hashable object, but a significant string is better, - 'lifetime' is in second, - 'value' could be an object or any type. """ self._data[key] = { 'expire': int(time.time())+lifetime, 'value': value } log.debug("[CAH]", "ADD", key, lifetime, value)
def get_legacy_server(self): """Return the legacy API socket.""" if self.__legacy_server is None: if self.is_local_node(): self.__legacy_server = ServerProxy( "httpu:///var/run/xend/xmlrpc.sock") log.debug("[Legacy-API]", "Using unix socket.") else: self.__legacy_server = ServerProxy("http://" + self.hostname + ":8006") log.debug("[Legacy-API]", "Using tcp socket.") return self.__legacy_server
def _get_ram_infos(): host_metrics_record = self.server.xenapi.host_metrics.get_record( self.get_host_record(nocache)["metrics"]) log.debug("[API]", self.node.get_hostname(), "host_metrics_record=", host_metrics_record) total = int(host_metrics_record["memory_total"]) / 1024 / 1024 free = int(host_metrics_record["memory_free"]) / 1024 / 1024 ram_infos = {'total': total, 'free': free, 'used': total - free} return ram_infos
def run(self, old_cmd): """Runs command from rule for passed command. :type old_cmd: Command """ if self.side_effect: compatibility_call(self.side_effect, old_cmd, self.script) # This depends on correct setting of PYTHONIOENCODING by the alias: logs.debug(u'PYTHONIOENCODING: {}'.format( os.environ.get('PYTHONIOENCODING', '!!not-set!!'))) print(self.script)
def check_activated_lvs(self): """ Perform a sanity check of the LVM activation on this node. Return False if there is some inconsistencies. """ log.info("Checking LV activation on", self.get_hostname(), "...") safe = True # Get all active LVs on the node regex = re.compile('.{4}a.') active_lvs = list() for line in self.run( "lvs -o vg_name,name,attr --noheading").readlines(): (vg, lv, attr) = line.strip().split() if regex.search(attr) != None: active_lvs.append("/dev/" + vg + "/" + lv) # Get all LVs used by VMs used_lvs = list() for vm in self.get_possible_vm_names(): used_lvs.extend(VM(vm).get_lvs()) # Compute the intersection of the two lists (active and used LVs) active_and_used_lvs = list(Set(active_lvs) & Set(used_lvs)) log.debug("[NODE]", self.hostname, "active_and_used_lvs=", active_and_used_lvs) # Get all LVs of running VM running_lvs = [lv for vm in self.get_vms() for lv in vm.get_lvs()] log.debug("[NODE]", self.hostname, "running_lvs=", running_lvs) # Compute activated LVs without running vm lvs_without_vm = list(Set(active_and_used_lvs) - Set(running_lvs)) if len(lvs_without_vm): log.info( " ** WARNING : Found activated LV without running VM :\n\t", "\n\t".join(lvs_without_vm)) safe = False # Compute running vm without activated LVs vm_without_lvs = list(Set(running_lvs) - Set(active_and_used_lvs)) if len(vm_without_lvs): log.info( " ** WARNING : Found running VM without activated LV :\n\t", "\n\t".join(vm_without_lvs)) safe = False return safe
def _get_vms_cpu_usage(): cpu = dict() # Get domains' infos doms = self.node.legacy_server.xend.domains(True) log.debug("[Legacy-API]", self.node.get_hostname(), "doms=", doms) # Timestamp used to compute CPU percentage timestamp = time.time() # Initialize result with 0 for all vm # This is because legacy api do not report paused vm for vm in self.node.get_vms( nocache ): # 5s of cache is ok, this func is designed to be run every 60s cpu[vm.name] = 0 for dom in doms: dom_info = main.parse_doms_info(dom) try: # String version with one digit after dot # See http://stackoverflow.com/questions/56820/round-in-python-doesnt-seem-to-be-rounding-properly for reasons. #cpu[dom_info['name']]="%.1f" % round( # (dom_info['cpu_time']-self.cpu_cache[dom_info['name']])*100/(timestamp-self.cpu_cache['timestamp']),1 #) cpu[dom_info['name']] = ( dom_info['cpu_time'] - self.cpu_cache[dom_info['name']] ) * 100 / (timestamp - self.cpu_cache['timestamp']) except KeyError: # First call: return zero values cpu[dom_info['name']] = 0 except ZeroDivisionError: cpu[dom_info['name']] = 0 # In case of reboot, remove negative values if cpu[dom_info['name']] < 0: cpu[dom_info['name']] = 0 # Update cpu_cache with the new value self.cpu_cache[dom_info['name']] = dom_info['cpu_time'] # Update timestamp self.cpu_cache['timestamp'] = timestamp return cpu
def _get_vms_names(): vms_names = list() dom_recs = self.server.xenapi.VM.get_all_records() log.debug("[API]", self.hostname, "dom_recs=", dom_recs) for dom_rec in dom_recs.values(): if dom_rec['name_label'] == "Domain-0": continue # Discard Dom0 if dom_rec['name_label'].startswith("migrating-"): continue # Discard migration temporary vm if dom_rec['power_state'] == "Halted": # power_state could be: Halted, Paused, Running, Suspended, Crashed, Unknown continue # Discard non instantiated vm vms_names.append(dom_rec['name_label']) return vms_names
def _read_cache(self): try: f = open(self.file, 'r') try: data = pickle.load(f) except Exception, e: # Python simplicity in example : Pickle raise just one ... sorry, # Pickle could raise more than 7 exceptions in case of bad input file... # So we have to catch all of them and re-raise a single exception. raise pickle.PickleError(e) f.close() mtime = os.stat(self.file).st_mtime if (int(time.time()) - mtime > self.ttl): log.debug("[PCH]", "EXPIRED", self.file) raise InvalidCacheException("Cache expired") else: log.debug("[PCH]", "HIT", self.file) return data
def get(self, key): """ Return the cached value of the specified key. If the value is outdated, it will be deleted from cache and a CacheExpiredException will be raised. If the key is unknown, a CacheMissingException will be raised. """ try: data=self._data[key] except KeyError: log.debug("[CAH]", "MISS", key) raise CacheMissingException(key) if data['expire'] <= int(time.time()): self.delete(key) raise CacheExpiredException(key) log.debug("[CAH]", "HIT", key, data['value']) return data['value']
def get_best_solution(self): """Get the best solution whatever the cost. WARNING: this is really time-consuming ! Return the choosen solution, or None if there's no solution. """ # Set initial solution best_solution = self.root # Loop to find all solutions # Layer 0 is filled with the initial solution for layer in range(1, core.cfg['LB_MAX_MIGRATION']): # Create current layer's solutions from previous layer for previous_solution in self.solutions[layer - 1]: self.create_layer(previous_solution, layer) # Give up if no more solutions try: if not len(self.solutions[layer]) > 0: break except KeyError: break # Get the best solution of this layer if self.solutions[layer][0].score < best_solution.score: best_solution = self.solutions[layer][0] # Compare initial solution to the best solution if best_solution.score < self.root.score: log.debug(" [LB]", "Found", best_solution) # Compute the gain (in percetage) of this solution gain = ((self.root.score - best_solution.score) * 100) / self.root.score if gain >= core.cfg['LB_MIN_GAIN']: log.debug(" [LB]", "Pickup this one, migration plan:", best_solution.path) return best_solution return None # No better solution found at all, giving up.
def from_raw_script(cls, raw_script): """Creates instance of `Command` from a list of script parts. :type raw_script: [basestring] :rtype: Command :raises: EmptyCommand """ script = cls._prepare_script(raw_script) if not script: raise EmptyCommand env = dict(os.environ) env.update(settings.env) with logs.debug_time(u'Call: {}; with env: {};'.format(script, env)): result = Popen(script, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env) if cls._wait_output(result): stdout = result.stdout.read().decode('utf-8') stderr = result.stderr.read().decode('utf-8') logs.debug(u'Received stdout: {}'.format(stdout)) logs.debug(u'Received stderr: {}'.format(stderr)) return cls(script, stdout, stderr) else: logs.debug(u'Execution timed out!') return cls(script, None, None)
def set_metrics(self, vm_metrics, node_metrics): """ Initialize a loadbalancer with metrics informations. Example of metrics' dict: vm_metrics = { 'vm1': { 'cpu':10 , 'ram':1024 }, 'vm2': { 'cpu':23 , 'ram':512 }, 'vm3': { 'cpu':0 , 'ram':128 }, } node_metrics = { 'node1': { 'ram' : 1024 }, 'node2': { 'ram' : 2048 }, } """ self.vm_metrics = vm_metrics self.node_metrics = node_metrics # Finalize initialisation self.root.compute_score(self.vm_metrics) log.debug(" [LB]", "vm_metrics=", vm_metrics) log.debug(" [LB]", "node_metrics=", node_metrics) log.debug(" [LB]", "current_state=", self.root)
def clear(self): """Clear the cache: erase all datas.""" self._data.clear() log.debug("[CAH]", "Cleared")
class PersistentCache(object): def __init__(self, file, ttl=60, timeout=15): """ This class is a decorator that save the return value of functions even after the interpreter dies. This cache is stored in a file, so it could be shared between many instance of the same script that run in parallel. A lock is used to prevent simultaneous write of this cache. Values returned by the decorated function are serialized with cPickle. Parameters : - file : filename of the cache. Should be in a writable path. - ttl : life time of the cached datas (in seconds) - timeout : maximum time (in seconds) to wait for the release of the lock. If excedeed, the lock is deleted and the function is called to feed the cache. Example of usage : >>> @PersistentCache("/some/were/myfilecache", ttl=5, timeout=10) >>> def myfunc(param): >>> return param >>> myfunc("some parameters") # Will call myfunc and feed the cache >>> myfunc("some parameters") # Will not call myfunc but read the cache Be carefull, there is a (small) bug : >>> myfunc("other parameters") # Will hit the cache and return the value with the previous parameters """ assert type(file) == str, "'file' should be a string." self.file = file self.lock = file + ".lock" assert type(ttl) == int, "'ttl' should be an integer." self.ttl = ttl assert type(timeout) == int, "'timeout' should be an integer." self.timeout = timeout def _read_cache(self): try: f = open(self.file, 'r') try: data = pickle.load(f) except Exception, e: # Python simplicity in example : Pickle raise just one ... sorry, # Pickle could raise more than 7 exceptions in case of bad input file... # So we have to catch all of them and re-raise a single exception. raise pickle.PickleError(e) f.close() mtime = os.stat(self.file).st_mtime if (int(time.time()) - mtime > self.ttl): log.debug("[PCH]", "EXPIRED", self.file) raise InvalidCacheException("Cache expired") else: log.debug("[PCH]", "HIT", self.file) return data except (IOError, OSError, pickle.PickleError): log.debug("[PCH]", "MISS", self.file) raise InvalidCacheException("Missing or bad cache file")
def _get_dom_records(): dom_recs = self.server.xenapi.VM.get_all_records() log.debug("[API]", self.node.get_hostname(), "dom_recs=", dom_recs) return dom_recs
def run(self, cmd): """Execute command on this node via SSH (or via shell if this is the local node).""" # Does'nt work with LVM commands # if(self.is_local_node()): # p = subprocess.Popen(cmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) # msg=p.stderr.read() # if(len(msg)>0): # raise ClusterNodeError(self.hostname,ClusterNodeError.SHELL_ERROR,msg) # return p.stdout # else: # Deadlock bug if cmd's output is bigger than 65k # if(self.is_local_node()): # if core.cfg['DEBUG'] : print "DEBUG SHELL: "+ self.get_hostname() +" -> "+cmd # stdout, stdin, stderr = popen2.popen3(cmd,9300000) # msg=stderr.read() # if(len(msg)>0): # raise ClusterNodeError(self.hostname,ClusterNodeError.SHELL_ERROR,msg) if (core.cfg['PATH']): cmd = core.cfg['PATH'] + "/" + cmd if (self.is_local_node()): log.debug("[SHL]", self.hostname, "->", cmd) # Create buffers stdout = StringIO.StringIO() stderr = StringIO.StringIO() proc = popen2.Popen3(cmd, True) # Run cmd # Load output in the buffers and rewind them try: stdout.write(proc.fromchild.read()) except IOError: pass # Discard broken pipe for backgrounded commands stdout.seek(0) try: stderr.write(proc.childerr.read()) except IOError: pass stderr.seek(0) exitcode = proc.wait() if exitcode != 0: msg = stderr.read() raise ShellError(self.hostname, msg, exitcode >> 8) else: log.debug("[SSH]", self.hostname, "->", cmd) stdin, stdout, stderr = self.ssh.exec_command(cmd) # Lock bug workaround : Check exit status before trying to read stderr # Because sometimes, when stdout is big (maybe >65k ?), strderr.read() hand on # a thread's deadlock if stderr is readed before stdout... exitcode = stderr.channel.recv_exit_status() if exitcode != 0: stderr.channel.settimeout(3) try: msg = stderr.read() raise SSHError(self.hostname, msg, exitcode) except socket.timeout: raise SSHError(self.hostname, "Timeout reading stderr !", exitcode) return stdout
def isdirty(f, depth, max_changed, already_checked, is_checked=state.File.is_checked, set_checked=state.File.set_checked_save): if f.id in already_checked: raise state.CyclicDependencyError() # make a copy of the list, so upon returning, our parent's copy # is unaffected already_checked = list(already_checked) + [f.id] if vars.DEBUG >= 1: debug('%s?%s\n' % (depth, f.nicename())) if f.failed_runid: debug('%s-- DIRTY (failed last time)\n' % depth) return DIRTY if f.changed_runid == None: debug('%s-- DIRTY (never built)\n' % depth) return DIRTY if f.changed_runid > max_changed: debug('%s-- DIRTY (built)\n' % depth) return DIRTY # has been built more recently than parent if is_checked(f): if vars.DEBUG >= 1: debug('%s-- CLEAN (checked)\n' % depth) return CLEAN # has already been checked during this session if not f.stamp: debug('%s-- DIRTY (no stamp)\n' % depth) return DIRTY newstamp = f.read_stamp() if f.stamp != newstamp: if newstamp == state.STAMP_MISSING: debug('%s-- DIRTY (missing)\n' % depth) if f.stamp and f.is_generated: # previously was stamped and generated, but suddenly missing. # We can safely forget that it is/was a target; if someone # does redo-ifchange on it and it doesn't exist, we'll mark # it a target again, but if someone creates it by hand, # it'll be a source. This should reduce false alarms when # files change from targets to sources as a project evolves. debug('%s converted target -> source\n' % depth) f.is_generated = False #f.update_stamp() f.save() else: debug('%s-- DIRTY (mtime)\n' % depth) if f.csum: return [f] else: return DIRTY must_build = [] for mode, f2 in f.deps(): dirty = CLEAN if mode == 'c': if os.path.exists(os.path.join(vars.BASE, f2.name)): debug('%s-- DIRTY (created)\n' % depth) dirty = DIRTY elif mode == 'm': sub = isdirty(f2, depth=depth + ' ', max_changed=max(f.changed_runid, f.checked_runid), already_checked=already_checked, is_checked=is_checked, set_checked=set_checked) if sub: debug('%s-- DIRTY (sub)\n' % depth) dirty = sub else: assert (mode in ('c', 'm')) if not f.csum: # f is a "normal" target: dirty f2 means f is instantly dirty if dirty == DIRTY: # f2 is definitely dirty, so f definitely needs to # redo. return DIRTY elif isinstance(dirty, list): # our child f2 might be dirty, but it's not sure yet. It's # given us a list of targets we have to redo in order to # be sure. must_build += dirty else: # f is "checksummable": dirty f2 means f needs to redo, # but f might turn out to be clean after that (ie. our parent # might not be dirty). if dirty == DIRTY: # f2 is definitely dirty, so f definitely needs to # redo. However, after that, f might turn out to be # unchanged. return [f] elif isinstance(dirty, list): # our child f2 might be dirty, but it's not sure yet. It's # given us a list of targets we have to redo in order to # be sure. must_build += dirty if must_build: # f is *maybe* dirty because at least one of its children is maybe # dirty. must_build has accumulated a list of "topmost" uncertain # objects in the tree. If we build all those, we can then # redo-ifchange f and it won't have any uncertainty next time. return must_build debug('%s-- CLEAN\n' % (depth, )) # if we get here, it's because the target is clean if f.is_override: state.warn_override(f.name) set_checked(f) return CLEAN