Example #1
0
    def take_action(self, args):
        configp = self.fetch_config(args)
        options = merged_options(args, configp)

        # Parse if we have to check if running from root
        # XXX document this feature.
        if string_to_boolean(options.get('root_check', 'True').lower()):
          check_root_user(self)

        check_missing_parameters(options)
        check_missing_files(options)

        random_delay(options, logger=self.app.log)

        slapgrid_object = create_slapgrid_object(options, logger=self.app.log)

        pidfile = options.get('pidfile') or self.default_pidfile

        if pidfile:
            setRunning(logger=self.app.log, pidfile=pidfile)
        try:
            return getattr(slapgrid_object, self.method_name)()
        finally:
            if pidfile:
                setFinished(pidfile)
Example #2
0
def realRun(argument_tuple, method):
  slapgrid_object, pidfile = parseArgumentTupleAndReturnSlapgridObject(*argument_tuple)
  if pidfile:
    setRunning(logger=slapgrid_object.logger, pidfile=pidfile)
  try:
    return getattr(slapgrid_object, method)()
  finally:
    if pidfile:
      setFinished(pidfile)
Example #3
0
    def take_action(self, args):
        configp = self.fetch_config(args)
        options = merged_options(args, configp)

        check_missing_parameters(options)
        check_missing_files(options)

        random_delay(options, logger=self.app.log)

        slapgrid_object = create_slapgrid_object(options, logger=self.app.log)

        pidfile = options.get('pidfile') or self.default_pidfile

        if pidfile:
            setRunning(logger=self.app.log, pidfile=pidfile)
        try:
            return getattr(slapgrid_object, self.method_name)()
        finally:
            if pidfile:
                setFinished(pidfile)
Example #4
0
def main():
    """
    Note: This code does not test as much as it monitors.
    The goal is to regularily try to build & instantiate a software release
    on several machines, to monitor vifib stability and SR stability as time
    passes (and things once available online become unavailable).
    Part of this function could be reused to make an actual test bot, testing
    only when actual changes are committed to a software release, to look for
    regressions.

    Note: This code does not connect to any instantiated service, it relies on
    the presence of a promise section to make instantiation fail until promise
    is happy.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--pidfile', '-p', help='pidfile preventing parallel '
        'execution.')
    parser.add_argument('--log', '-l', help='Log file path.')
    parser.add_argument('--verbose', '-v', help='Be verbose.',
        action='store_true')
    parser.add_argument('configuration_file', type=argparse.FileType(),
        help='Slap Test Agent configuration file.')
    # Just to keep strong references to AutoSTemp instances
    key_file_dict = {}
    def asFilenamePair(key, cert):
        # Note: python's ssl support only supports fetching key & cert data
        # from on-disk files. This is why we need to "convert" direct data
        # into file paths, using temporary files.
        cert = cert.strip()
        try:
            temp_key, temp_cert = key_file_dict[cert]
        except KeyError:
            temp_key = AutoSTemp(key.strip())
            temp_cert = AutoSTemp(cert)
            key_file_dict[cert] = (temp_key, temp_cert)
        return temp_key.name, temp_cert.name
    args = parser.parse_args()

    log = args.log
    formatter = logging.Formatter('%(asctime)s %(message)s')
    logger = logging.getLogger()
    if args.verbose:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    logger.setLevel(log_level)
    handler = logging.StreamHandler(sys.stdout)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    if log:
        handler = logging.FileHandler(log)
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        log_file = open(log)
        log_file.seek(0, 2)

    pidfile = args.pidfile
    if pidfile:
        setRunning(pidfile)
    try:
        section_dict = collections.OrderedDict()
        configuration = ConfigParser.SafeConfigParser()
        configuration.readfp(args.configuration_file)
        for section in configuration.sections():
            if section == 'agent':
                continue
            section_dict[section] = section_entry_dict = dict(
                configuration.items(section))
            for key in ('request_kw', 'max_install_duration',
                        'max_destroy_duration', 'max_request_duration',
                        'max_uninstall_duration', 'computer_list'
                    ):
                if key in section_entry_dict:
                    try:
                        if isinstance(section_entry_dict[key], str) or \
                              isinstance(section_entry_dict[key], unicode):
                            section_entry_dict[key] = json.loads(
                                                  section_entry_dict[key])
                    except Exception as exc:
                        logger.error("Fail to load %s on %s" % (key, section_entry_dict))
                        raise
            if 'key' in section_entry_dict:
                key_file, cert_file = asFilenamePair(section_entry_dict['key'],
                    section_entry_dict['cert'])
                section_entry_dict['key'] = key_file
                section_entry_dict['cert'] = cert_file
            if "computer_list" in section_entry_dict:
                section_entry_dict["target_computer"] = \
                          random.choice(section_entry_dict["computer_list"])
        agent_parameter_dict = dict(configuration.items('agent'))
        # XXX: should node title be auto-generated by installation recipe ?
        # For example, using computer guid.
        node_title = agent_parameter_dict['node_title']
        test_title = agent_parameter_dict['test_title']
        project_title = agent_parameter_dict['project_title']
        task_distribution_tool = TaskDistributionTool(agent_parameter_dict[
            'report_url'])
        master_slap_connection_dict = {}
        test_result = task_distribution_tool.createTestResult(
            revision='',
            test_name_list=section_dict.keys(),
            node_title=node_title,
            allow_restart=True,
            test_title=test_title,
            project_title=project_title,
        )
        test_result.watcher_period = 300
        if log:
            test_result.addWatch(log, log_file, max_history_bytes=10000)
        assert test_result is not None
        test_mapping = TestMap(section_dict)
        logger.info("Running %s tests in parallel." % \
                      len(test_mapping.getComputerList()))

        ran_test_set = set()
        running_test_dict = {}
        more_tests = True
        logger.info('Starting Test Agent run %s ' % node_title)
        while True:
            # Get up to parallel_task_count tasks to execute
            while len(running_test_dict) < len(test_mapping.getComputerList())\
                    and more_tests:
                test_mapping.cleanUp()
                target_computer = test_mapping.getNextComputer([computer \
                        for _, _, computer in running_test_dict.itervalues()])

                test_line = test_result.start(
                    exclude_list= list(ran_test_set) + \
                           list(test_mapping.getExcludeList(target_computer)))

                logger.info("Test Line: %s " % test_line)
                logger.info("Ran Test Set: %s " % ran_test_set)
                logger.info("Running test dict: %s " % running_test_dict)
                logger.info("Target Computer: %s " % target_computer)
                if test_line is None:
                    test_mapping.dropComputer(target_computer)
                    if len(test_mapping.getComputerList()) == 0:
                        more_tests = False
                    continue
                test_name = test_line.name
                try:
                    section_entry_dict = section_dict[test_name]
                except KeyError:
                    # We don't know how to execute this test. Assume it doesn't
                    # exist anymore, and fail it in result.
                    test_line.stop(stderr='This test does not exist on test '
                        'node %s' % (node_title, ))
                    continue
                master_url = section_entry_dict['master_url']
                master_slap_connection_key = (master_url,
                    section_entry_dict.get('key'))
                try:
                    supply, order, rpc = master_slap_connection_dict[
                        master_slap_connection_key]
                except KeyError:
                    key = section_entry_dict.get('key')
                    cert = section_entry_dict.get('cert')
                    slap = slapos.slap.slap()
                    slap.initializeConnection(master_url, key, cert)
                    supply = slap.registerSupply()
                    order = slap.registerOpenOrder()
                    assert master_url.startswith('https:')
                    rpc = xmlrpclib.ServerProxy(master_url, allow_none=True,
                        transport=x509Transport(
                            {'key_file': key, 'cert_file': cert}))
                    master_slap_connection_dict[
                        master_slap_connection_key] = (supply, order, rpc)
                tester = SoftwareReleaseTester(
                    test_name + '_' + node_title + time.strftime(
                        '_%Y/%m/%d_%H:%M:%S_+0000', time.gmtime()),
                    logger,
                    rpc,
                    supply,
                    order,
                    section_entry_dict['url'],
                    section_entry_dict['target_computer'],
                    section_entry_dict['max_install_duration'],
                    section_entry_dict['max_uninstall_duration'],
                    section_entry_dict.get('request_kw'),
                    section_entry_dict.get('max_request_duration'),
                    section_entry_dict.get('max_destroy_duration'),
                )
                ran_test_set.add(test_name)
                running_test_dict[test_name] = (test_line, tester, target_computer)
            if not running_test_dict:
               break

            now = time.time()
            # Synchronise refreshes on watcher period, so it doesn't report a
            # stalled test node where we are actually still sleeping.
            # Change test_result.watcher_period outside this loop if you wish
            # to change sleep duration.
            next_deadline = now + test_result.watcher_period
            for section, (test_line, tester, target_computer) in running_test_dict.items():
                logger.info('Checking %s: %r...', section, tester)
                try:
                    deadline = tester.tic(now)
                except Exception:
                    logger.exception('Test execution fail for  %s' % (section))
                    test_line.stop(
                        test_count=1,
                        error_count=1,
                        failure_count=0,
                        skip_count=0,
                        stderr=traceback.format_exc(),
                    )
                    del running_test_dict[section]
                    try:
                        tester.teardown()
                    except slapos.slap.NotFoundError:
                        # This exception is ignored because we cannot
                        # Teardown if SR URL do not exist.
                        logger.exception('Fail and not found')
                        pass
                    except Exception:
                        logger.exception('teardown failed, human '
                            'assistance needed for cleanup')
                        raise
                else:
                    logger.info('%r', tester)
                    if deadline is None:
                        # TODO: report how long each step took.
                        logger.info('Test execution finished for  %s' % (section))
                        test_line.stop(
                            test_count=1,
                            error_count=0,
                            failure_count=0,
                            skip_count=0,
                        )
                        del running_test_dict[section]
                        try:
                            tester.teardown()
                        except slapos.slap.NotFoundError:
                            # This exception is ignored because we cannot
                            # Teardown if SR URL do not exist.
                            logger.exception('Fail and not found')
                            pass
                        except Exception:
                            logger.exception('teardown failed, human '
                                 'assistance needed for cleanup')
                            raise

                    else:
                        next_deadline = min(deadline, next_deadline)
            if running_test_dict:
                to_sleep = next_deadline - time.time()
                if to_sleep > 0:
                    logger.info('Sleeping %is...', to_sleep)
                    time.sleep(to_sleep)
                if not test_result.isAlive():
                    for _, tester, computer_id in running_test_dict.itervalues():
                        tester.teardown()
    finally:
        if pidfile:
            setFinished(pidfile)
        # Help interpreter get rid of AutoSTemp instances.
        key_file_dict.clear()
Example #5
0
def main():
  """
  Note: This code does not test as much as it monitors.
  The goal is to regularily try to build & instantiate a software release
  on several machines, to monitor vifib stability and SR stability as time
  passes (and things once available online become unavailable).
  Part of this function could be reused to make an actual test bot, testing
  only when actual changes are committed to a software release, to look for
  regressions.

  Note: This code does not connect to any instantiated service, it relies on
  the presence of a promise section to make instantiation fail until promise
  is happy.
  """
  parser = argparse.ArgumentParser()
  parser.add_argument('--pidfile', '-p', help='pidfile preventing parallel '
      'execution.')
  parser.add_argument('--log', '-l', help='Log file path.')
  parser.add_argument('--verbose', '-v', help='Be verbose.',
      action='store_true')
  parser.add_argument('configuration_file', type=argparse.FileType(),
      help='Slap Test Agent configuration file.')
  key_file_dict = {}
  args = parser.parse_args()

  log = args.log

  logger, log_file = getLogger(log, args.verbose)

  configuration = ConfigParser.SafeConfigParser()
  configuration.readfp(args.configuration_file)

  pidfile = args.pidfile
  if pidfile:
    setRunning(logger=logger, pidfile=pidfile)
  try:
    while True:

      section_dict = loadConfiguration(configuration, logger)
 
      agent_parameter_dict = dict(configuration.items('agent'))

      task_distributor = TaskDistributor(agent_parameter_dict['report_url'],
                                         logger=logger)

      task_distributor.subscribeNode(
          node_title=agent_parameter_dict['node_title'], 
          computer_guid="None")

      test_suite_data = task_distributor.startTestSuite(
          node_title=agent_parameter_dict['node_title'], 
          computer_guid="None")

      if type(test_suite_data) == str:
         # Backward compatiblity
         test_suite_data = json.loads(test_suite_data) 
      
      slap_account_key = task_distributor.getSlaposAccountKey()
      slap_certificate = task_distributor.getSlaposAccountCertificate() 
      master_url = task_distributor.getSlaposUrl()

      key_file_dict = {}
      def asFilenamePair(key, cert):
          # Note: python's ssl support only supports fetching key & cert data
          # from on-disk files. This is why we need to "convert" direct data
          # into file paths, using temporary files.
          cert = cert.strip()
          try:
              temp_key, temp_cert = key_file_dict[cert]
          except KeyError:
              temp_key = AutoSTemp(key.strip())
              temp_cert = AutoSTemp(cert)
              key_file_dict[cert] = (temp_key, temp_cert)
          return temp_key.name, temp_cert.name

      key_file, cert_file = asFilenamePair(slap_account_key, 
        slap_certificate) 


      process_manager = ProcessManager(logger.info)

      for test_suite in test_suite_data:

        full_revision_list = getAndUpdateFullRevisionList(test_suite, 
            agent_parameter_dict["working_directory"], logger, process_manager)
        unit_test_dict = task_distributor.generateConfiguration(
          test_suite['test_suite_title'])

        if not len(full_revision_list):
          # We don't watch git revision but we periodically
          # run the test, once a day.
          full_revision_list = ["day=%s" % time.strftime('%Y/%m/%d', time.gmtime())] 

        if type(unit_test_dict) == str:
          # Backward compatiblity
          unit_test_dict = json.loads(unit_test_dict)
    
        test_result = task_distributor.createTestResult(
          revision=','.join(full_revision_list),
          test_name_list=unit_test_dict.keys(),
          node_title=agent_parameter_dict['node_title'],
          allow_restart=False,
          test_title=test_suite['test_suite_title'],
          project_title=agent_parameter_dict['project_title'],
        )
        if test_result is None:
          # We already have a test result
          logger.info('Skiping test for %s, result already available (%s)' % 
            (test_suite['test_suite_title'], ','.join(full_revision_list)))
          continue

        test_result.watcher_period = 120
        assert test_result is not None
    
        if log_file is not None:
          test_result.addWatch(log, log_file, max_history_bytes=10000)
    
        logger.info("Starting to run for %s" % test_result )
    
        test_mapping = TestMap(unit_test_dict)
        logger.info("Running %s tests in parallel." % \
                      len(test_mapping.getGroupList()))
    
        assert master_url.startswith('https:')
        slap = slapos.slap.slap()
        retry = 0
        while True:
          if retry > 100:
             break
          # wait until _hateoas_navigator is loaded.
          slap.initializeConnection(
            master_url, key_file, cert_file, timeout=120)

          if getattr(slap, '_hateoas_navigator', None) is None:
             logger.info("Fail to load _hateoas_navigator waiting a bit and retry.")
             time.sleep(30)
          else:
             break

        if getattr(slap, '_hateoas_navigator', None) is None:
          raise ValueError("Fail to load _hateoas_navigator")
    
        supply = slap.registerSupply()
        order = slap.registerOpenOrder()
    
        running_test_dict = {}
    
        logger.info('Starting Test Agent run %s ' % agent_parameter_dict['node_title'])
        while True:
          # Get up to parallel_task_count tasks to execute
          while len(running_test_dict) < len(test_mapping.getGroupList())\
                and (len(test_mapping.getGroupList()) > 0):
    
            test_mapping.cleanEmptyGroup()
            
            # Select an unused computer to run the test.
            group = test_mapping.getNextGroup(
              ignore_list = [group for _, _, group in \
                           running_test_dict.itervalues()])
    
            # Select a test 
            test_line = test_result.start(
                exclude_list=list(test_mapping.getExcludeList(group)))
    
            logger.info("Test Line: %s " % test_line)
            logger.info("Ran Test Set: %s " % test_mapping.ran_test_set)
            logger.info("Running test dict: %s " % running_test_dict)
            logger.info("Group: %s " % group)
    
            if test_line is None:
              logger.info("Removing Group (empty test line): %s " % group)
              test_mapping.dropGroup(group)
              continue
    
            test_name = test_line.name
            try:
              section_entry_dict = unit_test_dict[test_name]
            except KeyError:
              # We don't know how to execute this test. Assume it doesn't
              # exist anymore, and fail it in result.
              test_line.stop(stderr='This test does not exist on test '
                  'node %s' % (agent_parameter_dict['node_title'], ))
              continue
    
            general_timeout = agent_parameter_dict.get('timeout', 3600) 
            tester = SoftwareReleaseTester(
                test_name + time.strftime('_%Y/%m/%d_%H:%M:%S_+0000', time.gmtime()),
                logger,
                slap,
                order,
                supply,
                section_entry_dict['url'],
                section_entry_dict.get('supply_computer'),
                section_entry_dict.get('request_kw'),
                agent_parameter_dict.get('software_timeout', general_timeout),
                agent_parameter_dict.get('instance_timeout', general_timeout)
            )
            test_mapping.addRanTest(test_name)
            running_test_dict[test_name] = (test_line, tester, group)
    
          if not running_test_dict:
            logger.info('No more tests to run...')
            break
    
          now = time.time()
          # Synchronise refreshes on watcher period, so it doesn't report a
          # stalled test node where we are actually still sleeping.
          # Change test_result.watcher_period outside this loop if you wish
          # to change sleep duration.
          next_deadline = now + test_result.watcher_period
    
          for section, (test_line, tester, group) in running_test_dict.items():
            logger.info('Checking %s: %r...', section, tester)
            try:
              deadline = tester.tic(now)
            except ConnectionError:
              logger.exception('Test execution ConnectionError for  %s' % (section))
              deadline = next_deadline

            except Exception:
              logger.exception('Test execution fail for  %s' % (section))
              test_line.stop(test_count=1, error_count=1, failure_count=0,
                   skip_count=0, command=tester.getInfo(), 
                   stdout=tester.getFormatedLastMessage(), 
                   stderr=traceback.format_exc())
    
              del running_test_dict[section]
              try:
                tester.teardown()
              except slapos.slap.NotFoundError:
                # This exception is ignored because we cannot
                # Teardown if SR URL do not exist.
                logger.exception('Fail and not found')
                pass
              except Exception:
                logger.exception('teardown failed, human assistance needed for cleanup')
                raise
    
            else:
              logger.info('%r' % tester)
              if deadline is None:
                # TODO: report how long each step took.
                logger.info('Test execution finished for  %s' % (section))
                test_line.stop(test_count=1, error_count=0, failure_count=0,
                          skip_count=0, command=tester.getInfo(), stdout=tester.getFormatedLastMessage())
    
                del running_test_dict[section]
                try:
                  pass #tester.teardown()
                except slapos.slap.NotFoundError:
                  # This exception is ignored because we cannot
                  # Teardown if SR URL do not exist.
                  logger.exception('Fail and not found')
                  pass
                except Exception:
                  logger.exception('teardown failed, human assistance needed for cleanup')
                  raise
    
              else:
                next_deadline = min(deadline, next_deadline)
    
          if running_test_dict:
            to_sleep = next_deadline - time.time()
            if to_sleep > 0:
              logger.info('Sleeping %is...', to_sleep)
              time.sleep(to_sleep)
            if not test_result.isAlive():
              for _, tester, computer_id in running_test_dict.itervalues():
                tester.teardown()

      time.sleep(300)    
  finally:
    if pidfile:
        setFinished(pidfile)
    key_file_dict.clear()