def processComputerPartition(self, computer_partition): """ Process a Computer Partition, depending on its state """ computer_partition_id = computer_partition.getId() # Sanity checks before processing # Those values should not be None or empty string or any falsy value if not computer_partition_id: raise ValueError('Computer Partition id is empty.') # Check if we defined explicit list of partitions to process. # If so, if current partition not in this list, skip. if len(self.computer_partition_filter_list) > 0 and \ (computer_partition_id not in self.computer_partition_filter_list): return self.logger.info('Processing Computer Partition %s.' % computer_partition_id) instance_path = os.path.join(self.instance_root, computer_partition_id) # Try to get partition timestamp (last modification date) timestamp_path = os.path.join( instance_path, COMPUTER_PARTITION_TIMESTAMP_FILENAME ) parameter_dict = computer_partition.getInstanceParameterDict() if 'timestamp' in parameter_dict: timestamp = parameter_dict['timestamp'] else: timestamp = None try: software_url = computer_partition.getSoftwareRelease().getURI() except NotFoundError: # Problem with instance: SR URI not set. # Try to process it anyway, it may need to be deleted. software_url = None try: software_path = os.path.join(self.software_root, md5digest(software_url)) except TypeError: # Problem with instance: SR URI not set. # Try to process it anyway, it may need to be deleted. software_path = None periodicity = self.maximum_periodicity if software_path: periodicity_path = os.path.join(software_path, 'periodicity') if os.path.exists(periodicity_path): try: periodicity = int(open(periodicity_path).read()) except ValueError: os.remove(periodicity_path) self.logger.exception('') # Check if timestamp from server is more recent than local one. # If not: it's not worth processing this partition (nothing has # changed). if (computer_partition_id not in self.computer_partition_filter_list and not self.develop and os.path.exists(timestamp_path)): old_timestamp = open(timestamp_path).read() last_runtime = int(os.path.getmtime(timestamp_path)) if timestamp: try: if periodicity == 0: os.remove(timestamp_path) elif int(timestamp) <= int(old_timestamp): if computer_partition.getState() != COMPUTER_PARTITION_STARTED_STATE: return # Check periodicity, i.e if periodicity is one day, partition # should be processed at least every day. # Only do it for "started" instances if int(time.time()) <= (last_runtime + periodicity) or periodicity < 0: self.logger.info('Partition already up-to-date, skipping.') return else: # Periodicity forced processing this partition. Removing # the timestamp file in case it fails. os.remove(timestamp_path) except ValueError: os.remove(timestamp_path) self.logger.exception('') self.logger.info(' Software URL: %s' % software_url) self.logger.info(' Software path: %s' % software_path) self.logger.info(' Instance path: %s' % instance_path) local_partition = Partition( software_path=software_path, instance_path=instance_path, supervisord_partition_configuration_path=os.path.join( self.supervisord_configuration_directory, '%s.conf' % computer_partition_id), supervisord_socket=self.supervisord_socket, computer_partition=computer_partition, computer_id=self.computer_id, partition_id=computer_partition_id, server_url=self.master_url, software_release_url=software_url, certificate_repository_path=self.certificate_repository_path, buildout=self.buildout, logger=self.logger) computer_partition_state = computer_partition.getState() if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE: local_partition.install() computer_partition.available() local_partition.start() self._checkPromises(computer_partition) computer_partition.started() elif computer_partition_state == COMPUTER_PARTITION_STOPPED_STATE: try: local_partition.install() computer_partition.available() finally: # Instance has to be stopped even if buildout/reporting is wrong. local_partition.stop() computer_partition.stopped() elif computer_partition_state == COMPUTER_PARTITION_DESTROYED_STATE: local_partition.stop() try: computer_partition.stopped() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: pass else: error_string = "Computer Partition %r has unsupported state: %s" % \ (computer_partition_id, computer_partition_state) computer_partition.error(error_string, logger=self.logger) raise NotImplementedError(error_string) # If partition has been successfully processed, write timestamp if timestamp: timestamp_path = os.path.join( instance_path, COMPUTER_PARTITION_TIMESTAMP_FILENAME ) open(timestamp_path, 'w').write(timestamp)
def agregateAndSendUsage(self): """Will agregate usage from each Computer Partition. """ # Prepares environment self.checkEnvironmentAndCreateStructure() self._launchSupervisord() slap_computer_usage = self.slap.registerComputer(self.computer_id) computer_partition_usage_list = [] self.logger.info('Aggregating and sending usage reports...') #We retrieve XSD models try: computer_consumption_model = \ pkg_resources.resource_string( 'slapos.slap', 'doc/computer_consumption.xsd') except IOError: computer_consumption_model = \ pkg_resources.resource_string( __name__, '../../../../slapos/slap/doc/computer_consumption.xsd') try: partition_consumption_model = \ pkg_resources.resource_string( 'slapos.slap', 'doc/partition_consumption.xsd') except IOError: partition_consumption_model = \ pkg_resources.resource_string( __name__, '../../../../slapos/slap/doc/partition_consumption.xsd') clean_run = True # Loop over the different computer partitions computer_partition_list = self.FilterComputerPartitionList( slap_computer_usage.getComputerPartitionList()) for computer_partition in computer_partition_list: try: computer_partition_id = computer_partition.getId() #We want to execute all the script in the report folder instance_path = os.path.join(self.instance_root, computer_partition.getId()) report_path = os.path.join(instance_path, 'etc', 'report') if os.path.isdir(report_path): script_list_to_run = os.listdir(report_path) else: script_list_to_run = [] #We now generate the pseudorandom name for the xml file # and we add it in the invocation_list f = tempfile.NamedTemporaryFile() name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name)) path_to_slapreport = os.path.join(instance_path, 'var', 'xml_report', name_xml) failed_script_list = [] for script in script_list_to_run: invocation_list = [] invocation_list.append(os.path.join(instance_path, 'etc', 'report', script)) #We add the xml_file name to the invocation_list #f = tempfile.NamedTemporaryFile() #name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name)) #path_to_slapreport = os.path.join(instance_path, 'var', name_xml) invocation_list.append(path_to_slapreport) #Dropping privileges uid, gid = None, None stat_info = os.stat(instance_path) #stat sys call to get statistics informations uid = stat_info.st_uid gid = stat_info.st_gid process_handler = SlapPopen(invocation_list, preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger), cwd=os.path.join(instance_path, 'etc', 'report'), env=None, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, logger=self.logger) if process_handler.returncode is None: process_handler.kill() if process_handler.returncode != 0: clean_run = False failed_script_list.append("Script %r failed." % script) self.logger.warning('Failed to run %r' % invocation_list) if len(failed_script_list): computer_partition.error('\n'.join(failed_script_list), logger=self.logger) # Whatever happens, don't stop processing other instances except Exception: self.logger.exception('Cannot run usage script(s) for %r:' % computer_partition.getId()) #Now we loop through the different computer partitions to report report_usage_issue_cp_list = [] for computer_partition in computer_partition_list: try: filename_delete_list = [] computer_partition_id = computer_partition.getId() instance_path = os.path.join(self.instance_root, computer_partition_id) dir_reports = os.path.join(instance_path, 'var', 'xml_report') #The directory xml_report contain a number of files equal #to the number of software instance running inside the same partition if os.path.isdir(dir_reports): filename_list = os.listdir(dir_reports) else: filename_list = [] #self.logger.debug('name List %s' % filename_list) for filename in filename_list: file_path = os.path.join(dir_reports, filename) if os.path.exists(file_path): usage = open(file_path, 'r').read() #We check the validity of xml content of each reports if not self.validateXML(usage, partition_consumption_model): self.logger.info('WARNING: The XML file %s generated by slapreport is ' 'not valid - This report is left as is at %s where you can ' 'inspect what went wrong ' % (filename, dir_reports)) # Warn the SlapOS Master that a partition generates corrupted xml # report else: computer_partition_usage = self.slap.registerComputerPartition( self.computer_id, computer_partition_id) computer_partition_usage.setUsage(usage) computer_partition_usage_list.append(computer_partition_usage) filename_delete_list.append(filename) else: self.logger.debug('Usage report %r not found, ignored' % file_path) #After sending the aggregated file we remove all the valid xml reports for filename in filename_delete_list: os.remove(os.path.join(dir_reports, filename)) # Whatever happens, don't stop processing other instances except Exception: self.logger.exception('Cannot run usage script(s) for %r:' % computer_partition.getId()) for computer_partition_usage in computer_partition_usage_list: self.logger.info('computer_partition_usage_list: %s - %s' % (computer_partition_usage.usage, computer_partition_usage.getId())) #If there is, at least, one report if computer_partition_usage_list != []: try: #We generate the final XML report with asXML method computer_consumption = self.asXML(computer_partition_usage_list) self.logger.info('Final xml report: %s' % computer_consumption) #We test the XML report before sending it if self.validateXML(computer_consumption, computer_consumption_model): self.logger.info('XML file generated by asXML is valid') slap_computer_usage.reportUsage(computer_consumption) else: self.logger.info('XML file generated by asXML is not valid !') raise ValueError('XML file generated by asXML is not valid !') except Exception: issue = "Cannot report usage for %r: %s" % ( computer_partition.getId(), traceback.format_exc()) self.logger.info(issue) computer_partition.error(issue, logger=self.logger) report_usage_issue_cp_list.append(computer_partition_id) for computer_partition in computer_partition_list: if computer_partition.getState() == COMPUTER_PARTITION_DESTROYED_STATE: try: computer_partition_id = computer_partition.getId() try: software_url = computer_partition.getSoftwareRelease().getURI() software_path = os.path.join(self.software_root, md5digest(software_url)) except (NotFoundError, TypeError): software_url = None software_path = None local_partition = Partition( software_path=software_path, instance_path=os.path.join(self.instance_root, computer_partition.getId()), supervisord_partition_configuration_path=os.path.join( self.supervisord_configuration_directory, '%s.conf' % computer_partition_id), supervisord_socket=self.supervisord_socket, computer_partition=computer_partition, computer_id=self.computer_id, partition_id=computer_partition_id, server_url=self.master_url, software_release_url=software_url, certificate_repository_path=self.certificate_repository_path, buildout=self.buildout, logger=self.logger) local_partition.stop() try: computer_partition.stopped() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: pass if computer_partition.getId() in report_usage_issue_cp_list: self.logger.info('Ignoring destruction of %r, as no report usage was sent' % computer_partition.getId()) continue local_partition.destroy() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: clean_run = False self.logger.exception('') exc = traceback.format_exc() computer_partition.error(exc, logger=self.logger) try: computer_partition.destroyed() except NotFoundError: self.logger.debug('Ignored slap error while trying to inform about ' 'destroying not fully configured Computer Partition %r' % computer_partition.getId()) except ServerError as server_error: self.logger.debug('Ignored server error while trying to inform about ' 'destroying Computer Partition %r. Error is:\n%r' % (computer_partition.getId(), server_error.args[0])) self.logger.info('Finished usage reports.') # Return success value if not clean_run: return SLAPGRID_FAIL return SLAPGRID_SUCCESS
def processComputerPartition(self, computer_partition): """ Process a Computer Partition, depending on its state """ computer_partition_id = computer_partition.getId() # Sanity checks before processing # Those values should not be None or empty string or any falsy value if not computer_partition_id: raise ValueError('Computer Partition id is empty.') # Check if we defined explicit list of partitions to process. # If so, if current partition not in this list, skip. if len(self.computer_partition_filter_list) > 0 and \ (computer_partition_id not in self.computer_partition_filter_list): return self.logger.debug('Check if %s requires processing...' % computer_partition_id) instance_path = os.path.join(self.instance_root, computer_partition_id) # Try to get partition timestamp (last modification date) timestamp_path = os.path.join( instance_path, COMPUTER_PARTITION_TIMESTAMP_FILENAME ) parameter_dict = computer_partition.getInstanceParameterDict() if 'timestamp' in parameter_dict: timestamp = parameter_dict['timestamp'] else: timestamp = None try: software_url = computer_partition.getSoftwareRelease().getURI() except NotFoundError: # Problem with instance: SR URI not set. # Try to process it anyway, it may need to be deleted. software_url = None try: software_path = os.path.join(self.software_root, md5digest(software_url)) except TypeError: # Problem with instance: SR URI not set. # Try to process it anyway, it may need to be deleted. software_path = None periodicity = self.maximum_periodicity if software_path: periodicity_path = os.path.join(software_path, 'periodicity') if os.path.exists(periodicity_path): try: periodicity = int(open(periodicity_path).read()) except ValueError: os.remove(periodicity_path) self.logger.exception('') # Check if timestamp from server is more recent than local one. # If not: it's not worth processing this partition (nothing has # changed). if (computer_partition_id not in self.computer_partition_filter_list and not self.develop and os.path.exists(timestamp_path)): old_timestamp = open(timestamp_path).read() last_runtime = int(os.path.getmtime(timestamp_path)) if timestamp: try: if periodicity == 0: os.remove(timestamp_path) elif int(timestamp) <= int(old_timestamp): # Check periodicity, i.e if periodicity is one day, partition # should be processed at least every day. if int(time.time()) <= (last_runtime + periodicity) or periodicity < 0: self.logger.debug('Partition already up-to-date, skipping.') return else: # Periodicity forced processing this partition. Removing # the timestamp file in case it fails. os.remove(timestamp_path) except ValueError: os.remove(timestamp_path) self.logger.exception('') # Include Partition Logging log_folder_path = "%s/.slapgrid/log" % instance_path mkdir_p(log_folder_path) partition_file_handler = logging.FileHandler( filename="%s/instance.log" % (log_folder_path) ) stat_info = os.stat(instance_path) chownDirectory("%s/.slapgrid" % instance_path, uid=stat_info.st_uid, gid=stat_info.st_gid) formatter = logging.Formatter( '[%(asctime)s] %(levelname)-8s %(name)s %(message)s') partition_file_handler.setFormatter(formatter) self.logger.addHandler(partition_file_handler) try: self.logger.info('Processing Computer Partition %s.' % computer_partition_id) self.logger.info(' Software URL: %s' % software_url) self.logger.info(' Software path: %s' % software_path) self.logger.info(' Instance path: %s' % instance_path) filter_dict = getattr(computer_partition, '_filter_dict', None) if filter_dict: retention_delay = filter_dict.get('retention_delay', '0') else: retention_delay = '0' local_partition = Partition( software_path=software_path, instance_path=instance_path, supervisord_partition_configuration_path=os.path.join( _getSupervisordConfigurationDirectory(self.instance_root), '%s.conf' % computer_partition_id), supervisord_socket=self.supervisord_socket, computer_partition=computer_partition, computer_id=self.computer_id, partition_id=computer_partition_id, server_url=self.master_url, software_release_url=software_url, certificate_repository_path=self.certificate_repository_path, buildout=self.buildout, logger=self.logger, retention_delay=retention_delay, instance_min_free_space=self.instance_min_free_space, instance_storage_home=self.instance_storage_home, ipv4_global_network=self.ipv4_global_network, ) computer_partition_state = computer_partition.getState() # XXX this line breaks 37 tests # self.logger.info(' Instance type: %s' % computer_partition.getType()) self.logger.info(' Instance status: %s' % computer_partition_state) if computer_partition_state == COMPUTER_PARTITION_STARTED_STATE: local_partition.install() computer_partition.available() local_partition.start() self._checkPromises(computer_partition) computer_partition.started() elif computer_partition_state == COMPUTER_PARTITION_STOPPED_STATE: try: # We want to process the partition, even if stopped, because it should # propagate the state to children if any. local_partition.install() computer_partition.available() finally: # Instance has to be stopped even if buildout/reporting is wrong. local_partition.stop() computer_partition.stopped() elif computer_partition_state == COMPUTER_PARTITION_DESTROYED_STATE: local_partition.stop() try: computer_partition.stopped() except (SystemExit, KeyboardInterrupt): computer_partition.error(traceback.format_exc(), logger=self.logger) raise except Exception: pass else: error_string = "Computer Partition %r has unsupported state: %s" % \ (computer_partition_id, computer_partition_state) computer_partition.error(error_string, logger=self.logger) raise NotImplementedError(error_string) finally: self.logger.removeHandler(partition_file_handler) # If partition has been successfully processed, write timestamp if timestamp: open(timestamp_path, 'w').write(timestamp)