def __init__(self, config, log, process_group_pid_set=None, slapproxy_log=None): self.log = log self.config = config # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(config['proxy_database']): os.unlink(config['proxy_database']) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen([config['slapproxy_binary'], config['slapos_config']], **kwargs) process_group_pid_set.add(proxy.pid) # XXX: dirty, giving some time for proxy to being able to accept # connections time.sleep(10) slap = slapos.slap.slap() slap.initializeConnection(config['master_url']) # register software profile self.software_profile = config['custom_profile_path'] slap.registerSupply().supply( self.software_profile, computer_guid=config['computer_id']) computer = slap.registerComputer(config['computer_id']) # create partition and configure computer partition_reference = config['partition_reference'] partition_path = os.path.join(config['instance_root'], partition_reference) if not os.path.exists(partition_path): os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration(xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': config['instance_root'], 'netmask': '255.255.255.255', 'partition_list': [{'address_list': [{'addr': config['ipv4_address'], 'netmask': '255.255.255.255'}, {'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::'}, ], 'path': partition_path, 'reference': partition_reference, 'tap': {'name': partition_reference}, } ], 'reference': config['computer_id'], 'software_root': config['software_root']}))
def _initializeSlapOSConnection(self): """ Initialize communication with slapos """ slap = slapos.slap.slap() retry = 0 while True: # wait until _hateoas_navigator is loaded. if retry > 100: break slap.initializeConnection( self.slapos_url, self.key_path, self.cert_path, timeout=120, slapgrid_rest_uri=self.slapos_api_rest_url) if getattr(slap, '_hateoas_navigator', None) is None: retry += 1 logger.info( "Fail to load _hateoas_navigator waiting a bit and retry.") time.sleep(30) else: break if getattr(slap, '_hateoas_navigator', None) is None: raise ValueError("Fail to load _hateoas_navigator") supply = slap.registerSupply() order = slap.registerOpenOrder() return slap, supply, order
def updateProxy(config): """ Configure Slapos Node computer and partitions. Send current Software Release to Slapproxy for compilation and deployment. """ startProxy(config) if not os.path.exists(config['instance_root']): os.mkdir(config['instance_root']) slap = slapos.slap.slap() profile = getCurrentSoftwareReleaseProfile(config) slap.initializeConnection(config['master_url']) slap.registerSupply().supply(profile, computer_guid=config['computer_id']) computer = slap.registerComputer(config['computer_id']) prefix = 'slappart' slap_config = { 'address': config['ipv4_address'], 'instance_root': config['instance_root'], 'netmask': '255.255.255.255', 'partition_list': [], 'reference': config['computer_id'], 'software_root': config['software_root'] } for i in xrange(0, int(config['partition_amount'])): partition_reference = '%s%s' % (prefix, i) partition_path = os.path.join(config['instance_root'], partition_reference) if not os.path.exists(partition_path): os.mkdir(partition_path) os.chmod(partition_path, 0750) slap_config['partition_list'].append({ 'address_list': [ { 'addr': config['ipv4_address'], 'netmask': '255.255.255.255' }, { 'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::' }, ], 'path': partition_path, 'reference': partition_reference, 'tap': {'name': partition_reference}}) computer.updateConfiguration(xml_marshaller.xml_marshaller.dumps(slap_config)) return True
def main(): """ Note: This code does not test as much as it monitors. The goal is to regularily try to build & instantiate a software release on several machines, to monitor vifib stability and SR stability as time passes (and things once available online become unavailable). Part of this function could be reused to make an actual test bot, testing only when actual changes are committed to a software release, to look for regressions. Note: This code does not connect to any instantiated service, it relies on the presence of a promise section to make instantiation fail until promise is happy. """ parser = argparse.ArgumentParser() parser.add_argument('--pidfile', '-p', help='pidfile preventing parallel ' 'execution.') parser.add_argument('--log', '-l', help='Log file path.') parser.add_argument('--verbose', '-v', help='Be verbose.', action='store_true') parser.add_argument('configuration_file', type=argparse.FileType(), help='Slap Test Agent configuration file.') # Just to keep strong references to AutoSTemp instances key_file_dict = {} def asFilenamePair(key, cert): # Note: python's ssl support only supports fetching key & cert data # from on-disk files. This is why we need to "convert" direct data # into file paths, using temporary files. cert = cert.strip() try: temp_key, temp_cert = key_file_dict[cert] except KeyError: temp_key = AutoSTemp(key.strip()) temp_cert = AutoSTemp(cert) key_file_dict[cert] = (temp_key, temp_cert) return temp_key.name, temp_cert.name args = parser.parse_args() log = args.log formatter = logging.Formatter('%(asctime)s %(message)s') logger = logging.getLogger() if args.verbose: log_level = logging.DEBUG else: log_level = logging.INFO logger.setLevel(log_level) handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) logger.addHandler(handler) if log: handler = logging.FileHandler(log) handler.setFormatter(formatter) logger.addHandler(handler) log_file = open(log) log_file.seek(0, 2) pidfile = args.pidfile if pidfile: setRunning(pidfile) try: section_dict = collections.OrderedDict() configuration = ConfigParser.SafeConfigParser() configuration.readfp(args.configuration_file) for section in configuration.sections(): if section == 'agent': continue section_dict[section] = section_entry_dict = dict( configuration.items(section)) for key in ('request_kw', 'max_install_duration', 'max_destroy_duration', 'max_request_duration', 'max_uninstall_duration', 'computer_list' ): if key in section_entry_dict: try: if isinstance(section_entry_dict[key], str) or \ isinstance(section_entry_dict[key], unicode): section_entry_dict[key] = json.loads( section_entry_dict[key]) except Exception as exc: logger.error("Fail to load %s on %s" % (key, section_entry_dict)) raise if 'key' in section_entry_dict: key_file, cert_file = asFilenamePair(section_entry_dict['key'], section_entry_dict['cert']) section_entry_dict['key'] = key_file section_entry_dict['cert'] = cert_file if "computer_list" in section_entry_dict: section_entry_dict["target_computer"] = \ random.choice(section_entry_dict["computer_list"]) agent_parameter_dict = dict(configuration.items('agent')) # XXX: should node title be auto-generated by installation recipe ? # For example, using computer guid. node_title = agent_parameter_dict['node_title'] test_title = agent_parameter_dict['test_title'] project_title = agent_parameter_dict['project_title'] task_distribution_tool = TaskDistributionTool(agent_parameter_dict[ 'report_url']) master_slap_connection_dict = {} test_result = task_distribution_tool.createTestResult( revision='', test_name_list=section_dict.keys(), node_title=node_title, allow_restart=True, test_title=test_title, project_title=project_title, ) test_result.watcher_period = 300 if log: test_result.addWatch(log, log_file, max_history_bytes=10000) assert test_result is not None test_mapping = TestMap(section_dict) logger.info("Running %s tests in parallel." % \ len(test_mapping.getComputerList())) ran_test_set = set() running_test_dict = {} more_tests = True logger.info('Starting Test Agent run %s ' % node_title) while True: # Get up to parallel_task_count tasks to execute while len(running_test_dict) < len(test_mapping.getComputerList())\ and more_tests: test_mapping.cleanUp() target_computer = test_mapping.getNextComputer([computer \ for _, _, computer in running_test_dict.itervalues()]) test_line = test_result.start( exclude_list= list(ran_test_set) + \ list(test_mapping.getExcludeList(target_computer))) logger.info("Test Line: %s " % test_line) logger.info("Ran Test Set: %s " % ran_test_set) logger.info("Running test dict: %s " % running_test_dict) logger.info("Target Computer: %s " % target_computer) if test_line is None: test_mapping.dropComputer(target_computer) if len(test_mapping.getComputerList()) == 0: more_tests = False continue test_name = test_line.name try: section_entry_dict = section_dict[test_name] except KeyError: # We don't know how to execute this test. Assume it doesn't # exist anymore, and fail it in result. test_line.stop(stderr='This test does not exist on test ' 'node %s' % (node_title, )) continue master_url = section_entry_dict['master_url'] master_slap_connection_key = (master_url, section_entry_dict.get('key')) try: supply, order, rpc = master_slap_connection_dict[ master_slap_connection_key] except KeyError: key = section_entry_dict.get('key') cert = section_entry_dict.get('cert') slap = slapos.slap.slap() slap.initializeConnection(master_url, key, cert) supply = slap.registerSupply() order = slap.registerOpenOrder() assert master_url.startswith('https:') rpc = xmlrpclib.ServerProxy(master_url, allow_none=True, transport=x509Transport( {'key_file': key, 'cert_file': cert})) master_slap_connection_dict[ master_slap_connection_key] = (supply, order, rpc) tester = SoftwareReleaseTester( test_name + '_' + node_title + time.strftime( '_%Y/%m/%d_%H:%M:%S_+0000', time.gmtime()), logger, rpc, supply, order, section_entry_dict['url'], section_entry_dict['target_computer'], section_entry_dict['max_install_duration'], section_entry_dict['max_uninstall_duration'], section_entry_dict.get('request_kw'), section_entry_dict.get('max_request_duration'), section_entry_dict.get('max_destroy_duration'), ) ran_test_set.add(test_name) running_test_dict[test_name] = (test_line, tester, target_computer) if not running_test_dict: break now = time.time() # Synchronise refreshes on watcher period, so it doesn't report a # stalled test node where we are actually still sleeping. # Change test_result.watcher_period outside this loop if you wish # to change sleep duration. next_deadline = now + test_result.watcher_period for section, (test_line, tester, target_computer) in running_test_dict.items(): logger.info('Checking %s: %r...', section, tester) try: deadline = tester.tic(now) except Exception: logger.exception('Test execution fail for %s' % (section)) test_line.stop( test_count=1, error_count=1, failure_count=0, skip_count=0, stderr=traceback.format_exc(), ) del running_test_dict[section] try: tester.teardown() except slapos.slap.NotFoundError: # This exception is ignored because we cannot # Teardown if SR URL do not exist. logger.exception('Fail and not found') pass except Exception: logger.exception('teardown failed, human ' 'assistance needed for cleanup') raise else: logger.info('%r', tester) if deadline is None: # TODO: report how long each step took. logger.info('Test execution finished for %s' % (section)) test_line.stop( test_count=1, error_count=0, failure_count=0, skip_count=0, ) del running_test_dict[section] try: tester.teardown() except slapos.slap.NotFoundError: # This exception is ignored because we cannot # Teardown if SR URL do not exist. logger.exception('Fail and not found') pass except Exception: logger.exception('teardown failed, human ' 'assistance needed for cleanup') raise else: next_deadline = min(deadline, next_deadline) if running_test_dict: to_sleep = next_deadline - time.time() if to_sleep > 0: logger.info('Sleeping %is...', to_sleep) time.sleep(to_sleep) if not test_result.isAlive(): for _, tester, computer_id in running_test_dict.itervalues(): tester.teardown() finally: if pidfile: setFinished(pidfile) # Help interpreter get rid of AutoSTemp instances. key_file_dict.clear()
def initializeSlapOSControler(self, slapproxy_log=None, process_manager=None, reset_software=False, software_path_list=None): self.process_manager = process_manager self.software_path_list = software_path_list self.log('SlapOSControler, initialize, reset_software: %r' % reset_software) config = self.config slapos_config_dict = self.config.copy() slapos_config_dict.update(software_root=self.software_root, instance_root=self.instance_root, proxy_database=self.proxy_database) open(self.slapos_config, 'w').write( pkg_resources.resource_string('erp5.util.testnode', 'template/slapos.cfg.in') % slapos_config_dict) createFolder(self.software_root) createFolder(self.instance_root) # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(self.proxy_database): os.unlink(self.proxy_database) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen( [config['slapproxy_binary'], self.slapos_config], **kwargs) process_manager.process_pid_set.add(proxy.pid) # XXX: dirty, giving some time for proxy to being able to accept # connections time.sleep(10) try: slap = slapos.slap.slap() self.slap = slap self.slap.initializeConnection(config['master_url']) # register software profile for path in self.software_path_list: slap.registerSupply().supply( path, computer_guid=config['computer_id']) computer = slap.registerComputer(config['computer_id']) except: self.log("SlapOSControler.initializeSlapOSControler, \ exception in registerSupply", exc_info=sys.exc_info()) raise ValueError("Unable to initializeSlapOSControler") # Reset all previously generated software if needed if reset_software: self._resetSoftware() instance_root = self.instance_root if os.path.exists(instance_root): # delete old paritions which may exists in order to not get its data # (ex. MySQL db content) from previous testnode's runs # In order to be able to change partition naming scheme, do this at # instance_root level (such change happened already, causing problems). shutil.rmtree(instance_root) if not (os.path.exists(instance_root)): os.mkdir(instance_root) for i in range(0, MAX_PARTIONS): # create partition and configure computer # XXX: at the moment all partitions do share same virtual interface address # this is not a problem as usually all services are on different ports partition_reference = '%s-%s' % (config['partition_reference'], i) partition_path = os.path.join(instance_root, partition_reference) if not (os.path.exists(partition_path)): os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration( xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': instance_root, 'netmask': '255.255.255.255', 'partition_list': [{ 'address_list': [ { 'addr': config['ipv4_address'], 'netmask': '255.255.255.255' }, { 'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::' }, ], 'path': partition_path, 'reference': partition_reference, 'tap': { 'name': partition_reference }, }], 'reference': config['computer_id'], 'software_root': self.software_root }))
def initializeSlapOSControler(self, slapproxy_log=None, process_manager=None, reset_software=False, software_path_list=None): self.process_manager = process_manager self.software_path_list = software_path_list self.log('SlapOSControler, initialize, reset_software: %r' % reset_software) config = self.config slapos_config_dict = self.config.copy() slapos_config_dict.update(software_root=self.software_root, instance_root=self.instance_root, proxy_database=self.proxy_database) open(self.slapos_config, 'w').write(pkg_resources.resource_string( 'erp5.util.testnode', 'template/slapos.cfg.in') % slapos_config_dict) createFolder(self.software_root) createFolder(self.instance_root) # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(self.proxy_database): os.unlink(self.proxy_database) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen([config['slapproxy_binary'], self.slapos_config], **kwargs) process_manager.process_pid_set.add(proxy.pid) # XXX: dirty, giving some time for proxy to being able to accept # connections time.sleep(10) try: slap = slapos.slap.slap() self.slap = slap self.slap.initializeConnection(config['master_url']) # register software profile for path in self.software_path_list: slap.registerSupply().supply( path, computer_guid=config['computer_id']) computer = slap.registerComputer(config['computer_id']) except: self.log("SlapOSControler.initializeSlapOSControler, \ exception in registerSupply", exc_info=sys.exc_info()) raise ValueError("Unable to initializeSlapOSControler") # Reset all previously generated software if needed if reset_software: self._resetSoftware() instance_root = self.instance_root if os.path.exists(instance_root): # delete old paritions which may exists in order to not get its data # (ex. MySQL db content) from previous testnode's runs # In order to be able to change partition naming scheme, do this at # instance_root level (such change happened already, causing problems). shutil.rmtree(instance_root) if not(os.path.exists(instance_root)): os.mkdir(instance_root) for i in range(0, MAX_PARTIONS): # create partition and configure computer # XXX: at the moment all partitions do share same virtual interface address # this is not a problem as usually all services are on different ports partition_reference = '%s-%s' %(config['partition_reference'], i) partition_path = os.path.join(instance_root, partition_reference) if not(os.path.exists(partition_path)): os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration(xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': instance_root, 'netmask': '255.255.255.255', 'partition_list': [ {'address_list': [{'addr': config['ipv4_address'], 'netmask': '255.255.255.255'}, {'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::'},], 'path': partition_path, 'reference': partition_reference, 'tap': {'name': partition_reference},}], 'reference': config['computer_id'], 'software_root': self.software_root}))
def initializeSlapOSControler(self, slapproxy_log=None, process_manager=None, reset_software=False, software_path_list=None): self.process_manager = process_manager self.software_path_list = software_path_list logger.debug('SlapOSControler, initialize, reset_software: %r', reset_software) config = self.config slapos_config_dict = config.copy() slapos_config_dict.update(software_root=self.software_root, instance_root=self.instance_root, proxy_database=self.proxy_database, shared_part_list='\n '.join( self.shared_part_list)) with open(self.slapos_config, 'w') as f: f.write( pkg_resources.resource_string( 'erp5.util.testnode', 'template/slapos.cfg.in').decode() % slapos_config_dict) # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(self.proxy_database): os.unlink(self.proxy_database) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen([ config['slapos_binary'], 'proxy', 'start', '--cfg', self.slapos_config ], **kwargs) process_manager.process_pid_set.add(proxy.pid) slap = self.slap = slapos.slap.slap() # Wait for proxy to accept connections retries = 0 while True: time.sleep(1) try: slap.initializeConnection(config['master_url']) computer = slap.registerComputer(config['computer_id']) # Call a method to ensure connection to master can be established computer.getComputerPartitionList() except slapos.slap.ConnectionError as e: retries += 1 if retries >= 60: raise logger.debug("Proxy still not started %s, retrying", e) else: break try: # register software profile for path in self.software_path_list: slap.registerSupply().supply( path, computer_guid=config['computer_id']) except Exception: logger.exception("SlapOSControler.initializeSlapOSControler") raise ValueError("Unable to registerSupply") # Reset all previously generated software if needed if reset_software: self._resetSoftware() else: createFolder(self.software_root) instance_root = self.instance_root # Delete any existing partition in order to not get its data (ex. # MySQL DB content) from previous runs. To support changes of partition # naming scheme (which already happened), do this at instance_root level. createFolder(instance_root, True) partition_list = [] for i in range(MAX_PARTITIONS): # create partition and configure computer # XXX: at the moment all partitions do share same virtual interface address # this is not a problem as usually all services are on different ports partition_reference = '%s-%s' % (config['partition_reference'], i) partition_path = os.path.join(instance_root, partition_reference) if not (os.path.exists(partition_path)): os.mkdir(partition_path) os.chmod(partition_path, 0o750) partition_list.append({ 'address_list': [ { 'addr': config['ipv4_address'], 'netmask': '255.255.255.255' }, { 'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::' }, ], 'path': partition_path, 'reference': partition_reference, 'tap': { 'name': partition_reference }, }) computer.updateConfiguration( xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': instance_root, 'netmask': '255.255.255.255', 'partition_list': partition_list, 'reference': config['computer_id'], 'software_root': self.software_root }))
class SlapOSControler(object): def __init__(self, working_directory, config): self.config = config self.software_root = os.path.join(working_directory, 'soft') self.instance_root = os.path.join(working_directory, 'inst') self.slapos_config = os.path.join(working_directory, 'slapos.cfg') self.proxy_database = os.path.join(working_directory, 'proxy.db') self.instance_config = {} #TODO: implement a method to get all instance related the slapOS account # and deleting all old instances (based on creation date or name etc...) def createSlaposConfigurationFileAccount(self, key, certificate, slapos_url, config): # Create "slapos_account" directory in the "slapos_directory" slapos_account_directory = os.path.join(config['slapos_directory'], "slapos_account") createFolder(slapos_account_directory) # Create slapos-account files slapos_account_key_path = os.path.join(slapos_account_directory, "key") slapos_account_certificate_path = os.path.join( slapos_account_directory, "certificate") configuration_file_path = os.path.join(slapos_account_directory, "slapos.cfg") configuration_file_value = "[slapos]\nmaster_url = %s\n\ [slapconsole]\ncert_file = %s\nkey_file = %s" % ( slapos_url, slapos_account_certificate_path, slapos_account_key_path) with open(slapos_account_key_path, "w") as f: f.write(key) with open(slapos_account_certificate_path, "w") as f: f.write(certificate) with open(configuration_file_path, "w") as f: f.write(configuration_file_value) self.configuration_file_path = configuration_file_path return slapos_account_key_path, slapos_account_certificate_path, configuration_file_path def supply(self, software_url, computer_id, state="available"): """ Request the installation of a software release on a specific node Ex : my_controler.supply('kvm.cfg', 'COMP-726') """ logger.debug('SlapOSControler : supply') parser = argparse.ArgumentParser() parser.add_argument("configuration_file") parser.add_argument("software_url") parser.add_argument("node") if os.path.exists(self.configuration_file_path): args = parser.parse_args( [self.configuration_file_path, software_url, computer_id]) config = client.Config() config.setConfig(args, args.configuration_file) try: local = client.init(config) local['supply'](software_url, computer_guid=computer_id, state=state) logger.debug('SlapOSControler: supply %s %s %s', software_url, computer_id, state) except Exception: logger.exception("SlapOSControler.supply") raise ValueError("Unable to supply (or remove)") else: raise ValueError("Configuration file not found.") def request(self, reference, software_url, software_type=None, software_configuration=None, computer_guid=None, state='started'): """ configuration_file_path (slapos acount) reference : instance title software_url : software path/url software_type : scalability software_configuration : dict { "_" : "{'toto' : 'titi'}" } Ex : my_controler._request('Instance16h34Ben', 'kvm.cfg', 'cluster', { "_" : "{'toto' : 'titi'}" } ) """ logger.debug('SlapOSControler : request-->SlapOSMaster') current_intance_config = { 'software_type': software_type, 'software_configuration': software_configuration, 'computer_guid': computer_guid, 'software_url': software_url, 'requested_state': state, 'partition': None } self.instance_config[reference] = current_intance_config filter_kw = None if computer_guid != None: filter_kw = {"computer_guid": computer_guid} if os.path.exists(self.configuration_file_path): parser = argparse.ArgumentParser() parser.add_argument("configuration_file") args = parser.parse_args([self.configuration_file_path]) config = client.Config() config.setConfig(args, args.configuration_file) try: local = client.init(config) partition = local['request']( software_release=software_url, partition_reference=reference, partition_parameter_kw=software_configuration, software_type=software_type, filter_kw=filter_kw, state=state) self.instance_config[reference]['partition'] = partition if state == 'destroyed': del self.instance_config[reference] elif state == 'started': logger.debug('Instance started with configuration: %s', software_configuration) except Exception: logger.exception("SlapOSControler.request") raise ValueError("Unable to do this request") else: raise ValueError("Configuration file not found.") def _requestSpecificState(self, reference, state): self.request(reference, self.instance_config[reference]['software_url'], self.instance_config[reference]['software_type'], self.instance_config[reference]['software_configuration'], self.instance_config[reference]['computer_guid'], state=state) def destroyInstance(self, reference): logger.debug('SlapOSControler : delete instance') try: self._requestSpecificState(reference, 'destroyed') except Exception: raise ValueError( "Can't delete instance %r (instance not created?)" % reference) def stopInstance(self, reference): logger.debug('SlapOSControler : stop instance') try: self._requestSpecificState(reference, 'stopped') except Exception: raise ValueError("Can't stop instance %r (instance not created?)" % reference) def startInstance(self, reference): logger.debug('SlapOSControler : start instance') try: self._requestSpecificState(reference, 'started') except Exception: raise ValueError( "Can't start instance %r (instance not created?)" % reference) def updateInstanceXML(self, reference, software_configuration): """ Update the XML configuration of an instance # Request same instance with different parameters. """ logger.debug('SlapOSControler : updateInstanceXML will request same' ' instance with new XML configuration...') try: self.request(reference, self.instance_config[reference]['software_url'], self.instance_config[reference]['software_type'], software_configuration, self.instance_config[reference]['computer_guid'], state='started') except Exception: raise ValueError("Can't update instance '%s' (may not exist?)" % reference) def _resetSoftware(self): logger.info('SlapOSControler: GOING TO RESET ALL SOFTWARE : %r', self.software_root) createFolder(self.software_root, True) def initializeSlapOSControler(self, slapproxy_log=None, process_manager=None, reset_software=False, software_path_list=None): self.process_manager = process_manager self.software_path_list = software_path_list logger.debug('SlapOSControler, initialize, reset_software: %r', reset_software) config = self.config slapos_config_dict = config.copy() slapos_config_dict.update(software_root=self.software_root, instance_root=self.instance_root, proxy_database=self.proxy_database) with open(self.slapos_config, 'w') as f: f.write( pkg_resources.resource_string('erp5.util.testnode', 'template/slapos.cfg.in') % slapos_config_dict) # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(self.proxy_database): os.unlink(self.proxy_database) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen([ config['slapos_binary'], 'proxy', 'start', '--cfg', self.slapos_config ], **kwargs) process_manager.process_pid_set.add(proxy.pid) slap = self.slap = slapos.slap.slap() # Wait for proxy to accept connections retries = 0 while True: time.sleep(1) try: slap.initializeConnection(config['master_url']) computer = slap.registerComputer(config['computer_id']) # Call a method to ensure connection to master can be established computer.getComputerPartitionList() except slapos.slap.ConnectionError, e: retries += 1 if retries >= 60: raise logger.debug("Proxy still not started %s, retrying", e) else: break try: # register software profile for path in self.software_path_list: slap.registerSupply().supply( path, computer_guid=config['computer_id']) except Exception: logger.exception("SlapOSControler.initializeSlapOSControler") raise ValueError("Unable to registerSupply") # Reset all previously generated software if needed if reset_software: self._resetSoftware() else: createFolder(self.software_root) instance_root = self.instance_root # Delete any existing partition in order to not get its data (ex. # MySQL DB content) from previous runs. To support changes of partition # naming scheme (which already happened), do this at instance_root level. createFolder(instance_root, True) for i in xrange(MAX_PARTITIONS): # create partition and configure computer # XXX: at the moment all partitions do share same virtual interface address # this is not a problem as usually all services are on different ports partition_reference = '%s-%s' % (config['partition_reference'], i) partition_path = os.path.join(instance_root, partition_reference) if not (os.path.exists(partition_path)): os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration( xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': instance_root, 'netmask': '255.255.255.255', 'partition_list': [{ 'address_list': [ { 'addr': config['ipv4_address'], 'netmask': '255.255.255.255' }, { 'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::' }, ], 'path': partition_path, 'reference': partition_reference, 'tap': { 'name': partition_reference }, }], 'reference': config['computer_id'], 'software_root': self.software_root }))
def __init__(self, config, log, slapproxy_log=None, process_manager=None, reset_software=False): log('SlapOSControler, initialize, reset_software: %r' % reset_software) self.log = log self.config = config self.process_manager = process_manager # By erasing everything, we make sure that we are able to "update" # existing profiles. This is quite dirty way to do updates... if os.path.exists(config['proxy_database']): os.unlink(config['proxy_database']) kwargs = dict(close_fds=True, preexec_fn=os.setsid) if slapproxy_log is not None: slapproxy_log_fp = open(slapproxy_log, 'w') kwargs['stdout'] = slapproxy_log_fp kwargs['stderr'] = slapproxy_log_fp proxy = subprocess.Popen([config['slapproxy_binary'], config['slapos_config']], **kwargs) process_manager.process_pid_set.add(proxy.pid) # XXX: dirty, giving some time for proxy to being able to accept # connections time.sleep(10) slap = slapos.slap.slap() slap.initializeConnection(config['master_url']) # register software profile self.software_profile = config['custom_profile_path'] slap.registerSupply().supply( self.software_profile, computer_guid=config['computer_id']) computer = slap.registerComputer(config['computer_id']) # Reset all previously generated software if needed if reset_software: software_root = config['software_root'] log('SlapOSControler : GOING TO RESET ALL SOFTWARE') if os.path.exists(software_root): shutil.rmtree(software_root) os.mkdir(software_root) os.chmod(software_root, 0750) instance_root = config['instance_root'] if os.path.exists(instance_root): # delete old paritions which may exists in order to not get its data # (ex. MySQL db content) from previous testnode's runs # In order to be able to change partition naming scheme, do this at # instance_root level (such change happened already, causing problems). shutil.rmtree(instance_root) os.mkdir(instance_root) for i in range(0, MAX_PARTIONS): # create partition and configure computer # XXX: at the moment all partitions do share same virtual interface address # this is not a problem as usually all services are on different ports partition_reference = '%s-%s' %(config['partition_reference'], i) partition_path = os.path.join(instance_root, partition_reference) os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration(xml_marshaller.xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': instance_root, 'netmask': '255.255.255.255', 'partition_list': [{'address_list': [{'addr': config['ipv4_address'], 'netmask': '255.255.255.255'}, {'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::'},], 'path': partition_path, 'reference': partition_reference, 'tap': {'name': partition_reference}, } ], 'reference': config['computer_id'], 'software_root': config['software_root']}))
def main(): """ Note: This code does not test as much as it monitors. The goal is to regularily try to build & instantiate a software release on several machines, to monitor vifib stability and SR stability as time passes (and things once available online become unavailable). Part of this function could be reused to make an actual test bot, testing only when actual changes are committed to a software release, to look for regressions. Note: This code does not connect to any instantiated service, it relies on the presence of a promise section to make instantiation fail until promise is happy. """ parser = argparse.ArgumentParser() parser.add_argument('--pidfile', '-p', help='pidfile preventing parallel ' 'execution.') parser.add_argument('--log', '-l', help='Log file path.') parser.add_argument('--verbose', '-v', help='Be verbose.', action='store_true') parser.add_argument('configuration_file', type=argparse.FileType(), help='Slap Test Agent configuration file.') key_file_dict = {} args = parser.parse_args() log = args.log logger, log_file = getLogger(log, args.verbose) configuration = ConfigParser.SafeConfigParser() configuration.readfp(args.configuration_file) pidfile = args.pidfile if pidfile: setRunning(logger=logger, pidfile=pidfile) try: while True: section_dict = loadConfiguration(configuration, logger) agent_parameter_dict = dict(configuration.items('agent')) task_distributor = TaskDistributor(agent_parameter_dict['report_url'], logger=logger) task_distributor.subscribeNode( node_title=agent_parameter_dict['node_title'], computer_guid="None") test_suite_data = task_distributor.startTestSuite( node_title=agent_parameter_dict['node_title'], computer_guid="None") if type(test_suite_data) == str: # Backward compatiblity test_suite_data = json.loads(test_suite_data) slap_account_key = task_distributor.getSlaposAccountKey() slap_certificate = task_distributor.getSlaposAccountCertificate() master_url = task_distributor.getSlaposUrl() key_file_dict = {} def asFilenamePair(key, cert): # Note: python's ssl support only supports fetching key & cert data # from on-disk files. This is why we need to "convert" direct data # into file paths, using temporary files. cert = cert.strip() try: temp_key, temp_cert = key_file_dict[cert] except KeyError: temp_key = AutoSTemp(key.strip()) temp_cert = AutoSTemp(cert) key_file_dict[cert] = (temp_key, temp_cert) return temp_key.name, temp_cert.name key_file, cert_file = asFilenamePair(slap_account_key, slap_certificate) process_manager = ProcessManager(logger.info) for test_suite in test_suite_data: full_revision_list = getAndUpdateFullRevisionList(test_suite, agent_parameter_dict["working_directory"], logger, process_manager) unit_test_dict = task_distributor.generateConfiguration( test_suite['test_suite_title']) if not len(full_revision_list): # We don't watch git revision but we periodically # run the test, once a day. full_revision_list = ["day=%s" % time.strftime('%Y/%m/%d', time.gmtime())] if type(unit_test_dict) == str: # Backward compatiblity unit_test_dict = json.loads(unit_test_dict) test_result = task_distributor.createTestResult( revision=','.join(full_revision_list), test_name_list=unit_test_dict.keys(), node_title=agent_parameter_dict['node_title'], allow_restart=False, test_title=test_suite['test_suite_title'], project_title=agent_parameter_dict['project_title'], ) if test_result is None: # We already have a test result logger.info('Skiping test for %s, result already available (%s)' % (test_suite['test_suite_title'], ','.join(full_revision_list))) continue test_result.watcher_period = 120 assert test_result is not None if log_file is not None: test_result.addWatch(log, log_file, max_history_bytes=10000) logger.info("Starting to run for %s" % test_result ) test_mapping = TestMap(unit_test_dict) logger.info("Running %s tests in parallel." % \ len(test_mapping.getGroupList())) assert master_url.startswith('https:') slap = slapos.slap.slap() retry = 0 while True: if retry > 100: break # wait until _hateoas_navigator is loaded. slap.initializeConnection( master_url, key_file, cert_file, timeout=120) if getattr(slap, '_hateoas_navigator', None) is None: logger.info("Fail to load _hateoas_navigator waiting a bit and retry.") time.sleep(30) else: break if getattr(slap, '_hateoas_navigator', None) is None: raise ValueError("Fail to load _hateoas_navigator") supply = slap.registerSupply() order = slap.registerOpenOrder() running_test_dict = {} logger.info('Starting Test Agent run %s ' % agent_parameter_dict['node_title']) while True: # Get up to parallel_task_count tasks to execute while len(running_test_dict) < len(test_mapping.getGroupList())\ and (len(test_mapping.getGroupList()) > 0): test_mapping.cleanEmptyGroup() # Select an unused computer to run the test. group = test_mapping.getNextGroup( ignore_list = [group for _, _, group in \ running_test_dict.itervalues()]) # Select a test test_line = test_result.start( exclude_list=list(test_mapping.getExcludeList(group))) logger.info("Test Line: %s " % test_line) logger.info("Ran Test Set: %s " % test_mapping.ran_test_set) logger.info("Running test dict: %s " % running_test_dict) logger.info("Group: %s " % group) if test_line is None: logger.info("Removing Group (empty test line): %s " % group) test_mapping.dropGroup(group) continue test_name = test_line.name try: section_entry_dict = unit_test_dict[test_name] except KeyError: # We don't know how to execute this test. Assume it doesn't # exist anymore, and fail it in result. test_line.stop(stderr='This test does not exist on test ' 'node %s' % (agent_parameter_dict['node_title'], )) continue general_timeout = agent_parameter_dict.get('timeout', 3600) tester = SoftwareReleaseTester( test_name + time.strftime('_%Y/%m/%d_%H:%M:%S_+0000', time.gmtime()), logger, slap, order, supply, section_entry_dict['url'], section_entry_dict.get('supply_computer'), section_entry_dict.get('request_kw'), agent_parameter_dict.get('software_timeout', general_timeout), agent_parameter_dict.get('instance_timeout', general_timeout) ) test_mapping.addRanTest(test_name) running_test_dict[test_name] = (test_line, tester, group) if not running_test_dict: logger.info('No more tests to run...') break now = time.time() # Synchronise refreshes on watcher period, so it doesn't report a # stalled test node where we are actually still sleeping. # Change test_result.watcher_period outside this loop if you wish # to change sleep duration. next_deadline = now + test_result.watcher_period for section, (test_line, tester, group) in running_test_dict.items(): logger.info('Checking %s: %r...', section, tester) try: deadline = tester.tic(now) except ConnectionError: logger.exception('Test execution ConnectionError for %s' % (section)) deadline = next_deadline except Exception: logger.exception('Test execution fail for %s' % (section)) test_line.stop(test_count=1, error_count=1, failure_count=0, skip_count=0, command=tester.getInfo(), stdout=tester.getFormatedLastMessage(), stderr=traceback.format_exc()) del running_test_dict[section] try: tester.teardown() except slapos.slap.NotFoundError: # This exception is ignored because we cannot # Teardown if SR URL do not exist. logger.exception('Fail and not found') pass except Exception: logger.exception('teardown failed, human assistance needed for cleanup') raise else: logger.info('%r' % tester) if deadline is None: # TODO: report how long each step took. logger.info('Test execution finished for %s' % (section)) test_line.stop(test_count=1, error_count=0, failure_count=0, skip_count=0, command=tester.getInfo(), stdout=tester.getFormatedLastMessage()) del running_test_dict[section] try: pass #tester.teardown() except slapos.slap.NotFoundError: # This exception is ignored because we cannot # Teardown if SR URL do not exist. logger.exception('Fail and not found') pass except Exception: logger.exception('teardown failed, human assistance needed for cleanup') raise else: next_deadline = min(deadline, next_deadline) if running_test_dict: to_sleep = next_deadline - time.time() if to_sleep > 0: logger.info('Sleeping %is...', to_sleep) time.sleep(to_sleep) if not test_result.isAlive(): for _, tester, computer_id in running_test_dict.itervalues(): tester.teardown() time.sleep(300) finally: if pidfile: setFinished(pidfile) key_file_dict.clear()
def run(args): config = args[0] for k,v in config['environment'].iteritems(): os.environ[k] = v proxy = None slapgrid = None last_revision_file = os.path.join(config['working_directory'], 'revision.txt') if os.path.exists(last_revision_file): os.unlink(last_revision_file) # fetch repository from git repository_clone = os.path.join(config['working_directory'], 'repository') profile_path = os.path.join(repository_clone, config['profile_path']) if os.path.exists(config['proxy_database']): os.unlink(config['proxy_database']) proxy = subprocess.Popen([config['slapproxy_binary'], config['slapos_config']], close_fds=True, preexec_fn=os.setsid) process_group_pid_list.append(proxy.pid) slap = slapos.slap.slap() slap.initializeConnection(config['master_url']) while True: try: slap.registerSupply().supply(profile_path, computer_guid=config['computer_id']) except socket.error: time.sleep(1) pass else: break while True: info_list = [] a = info_list.append while True: try: if os.path.exists(repository_clone): if getCurrentFetchRemote(config, repository_clone) != config['repository']: shutil.rmtree(repository_clone) if not os.path.exists(repository_clone): subprocess.check_call([config['git_binary'], 'clone', config['repository'], repository_clone]) # switch to branch branch = getCurrentBranchName(config, repository_clone) if branch != config['branch']: subprocess.check_call([config['git_binary'], 'checkout', '--force', '--track', '-b', config['branch'], 'origin/'+config['branch']], cwd=repository_clone) subprocess.check_call([config['git_binary'], 'reset', '--hard', '@{upstream}'], cwd=repository_clone) except Exception: print 'Retrying git in 60s' time.sleep(60) else: break a('Tested repository: %s' % config['repository']) a('Machine identification: %s' % getMachineIdString()) erp5_report = ERP5TestReportHandler(config['test_suite_master_url'], '@'.join([config['suite_name'], branch])) last_revision = '' if os.path.exists(last_revision_file): last_revision = open(last_revision_file).read().strip() revision = getRevision(config, repository_clone) open(last_revision_file, 'w').write(revision) if revision != last_revision: print 'Running for revision %r' % revision while True: try: erp5_report.reportStart() except Exception: print 'Retrying in 5s' time.sleep(5) else: break if os.path.exists(config['software_root']): shutil.rmtree(config['software_root']) os.mkdir(config['software_root']) out_file = os.path.join(config['working_directory'], 'slapgrid.out') if os.path.exists(out_file): os.unlink(out_file) out = open(out_file, 'w') begin = time.time() slapgrid_environment = os.environ.copy() for k, v in config['slapgrid_environment'].iteritems(): slapgrid_environment[k] = v a('Slapgrid environment: %r'% config['slapgrid_environment']) slapgrid = subprocess.Popen([config['slapgrid_software_binary'], '-vc', config['slapos_config']], close_fds=True, preexec_fn=os.setsid, stdout=out, stderr=subprocess.STDOUT, env=slapgrid_environment) process_group_pid_list.append(slapgrid.pid) slapgrid.communicate() out.close() while True: try: erp5_report.reportFinished(out_file,revision, slapgrid.returncode == 0, time.time() - begin, '\n'.join(info_list)) except Exception: print 'Retrying in 5s' time.sleep(5) else: break print 'Sleeping for 600s' time.sleep(600)
def run(args): config = args[0] for k,v in config['environment'].iteritems(): os.environ[k] = v proxy = None slapgrid = None supervisord_pid_file = os.path.join(config['instance_root'], 'var', 'run', 'supervisord.pid') if os.path.exists(config['proxy_database']): os.unlink(config['proxy_database']) try: proxy = subprocess.Popen([config['slapproxy_binary'], config['slapos_config']], close_fds=True, preexec_fn=os.setsid) process_group_pid_list.append(proxy.pid) slap = slapos.slap.slap() slap.initializeConnection(config['master_url']) while True: try: slap.registerSupply().supply(config['profile_url'], computer_guid=config['computer_id']) except socket.error: time.sleep(1) pass else: break while True: slapgrid = subprocess.Popen([config['slapgrid_software_binary'], '-vc', config['slapos_config']], close_fds=True, preexec_fn=os.setsid) process_group_pid_list.append(slapgrid.pid) slapgrid.wait() if slapgrid.returncode == 0: print 'Software installed properly' break print 'Problem with software installation, trying again' time.sleep(600) computer = slap.registerComputer(config['computer_id']) partition_reference = config['partition_reference'] partition_path = os.path.join(config['instance_root'], partition_reference) if not os.path.exists(partition_path): os.mkdir(partition_path) os.chmod(partition_path, 0750) computer.updateConfiguration(xml_marshaller.dumps({ 'address': config['ipv4_address'], 'instance_root': config['instance_root'], 'netmask': '255.255.255.255', 'partition_list': [{'address_list': [{'addr': config['ipv4_address'], 'netmask': '255.255.255.255'}, {'addr': config['ipv6_address'], 'netmask': 'ffff:ffff:ffff::'}, ], 'path': partition_path, 'reference': partition_reference, 'tap': {'name': partition_reference}, } ], 'reference': config['computer_id'], 'software_root': config['software_root']})) slap.registerOpenOrder().request(config['profile_url'], partition_reference='testing partition', partition_parameter_kw=config['instance_dict']) slapgrid = subprocess.Popen([config['slapgrid_partition_binary'], '-vc', config['slapos_config']], close_fds=True, preexec_fn=os.setsid) slapgrid.wait() if slapgrid.returncode != 0: raise ValueError('Slapgrid instance failed') runUnitTest = os.path.join(partition_path, 'bin', 'runUnitTest') if not os.path.exists(runUnitTest): raise ValueError('No %r provided' % runUnitTest) except: try: if os.path.exists(supervisord_pid_file): os.kill(int(open(supervisord_pid_file).read().strip()), signal.SIGTERM) except: pass raise finally: # Nice way to kill *everything* generated by run process -- process # groups working only in POSIX compilant systems # Exceptions are swallowed during cleanup phase if proxy is not None: os.killpg(proxy.pid, signal.SIGTERM) if os.path.exists(config['proxy_database']): os.unlink(config['proxy_database']) if slapgrid is not None and slapgrid.returncode is None: os.killpg(slapgrid.pid, signal.SIGTERM) try: bot_env = os.environ.copy() bot_env['PATH'] = ':'.join([config['bin_directory']] + bot_env['PATH'].split(':')) for l in config['bot_environment'].split(): k, v = l.split('=') bot_env[k] = v if subprocess.call([config['buildbot_binary'], 'create-slave', '-f', config['working_directory'], config['buildbot_host'], config['slave_name'], config['slave_password']]) != 0: raise ValueError('Buildbot call failed') process_command_list.append([config['buildbot_binary'], 'stop', config['working_directory']]) if os.path.exists(os.path.join(config['working_directory'], 'buildbot.tac.new')): tac = os.path.join(config['working_directory'], 'buildbot.tac') if os.path.exists(tac): os.unlink(tac) os.rename(os.path.join(config['working_directory'], 'buildbot.tac.new'), tac) if subprocess.call([config['buildbot_binary'], 'start', config['working_directory']], env=bot_env) != 0: raise ValueError('Issue during starting buildbot') while True: time.sleep(3600) finally: try: subprocess.call([config['buildbot_binary'], 'stop', config['working_directory']]) except: pass try: if os.path.exists(supervisord_pid_file): os.kill(int(open(supervisord_pid_file).read().strip()), signal.SIGTERM) except: pass