Exemple #1
0
def launchSupervisord(instance_root, logger,
                      supervisord_additional_argument_list=None):
  configuration_file = _getSupervisordConfigurationFilePath(instance_root)
  socket = _getSupervisordSocketPath(instance_root)
  if os.path.exists(socket):
    trynum = 1
    while trynum < 6:
      try:
        supervisor = getSupervisorRPC(socket)
        status = supervisor.getState()
      except xmlrpclib.Fault as e:
        if e.faultCode == 6 and e.faultString == 'SHUTDOWN_STATE':
          logger.info('Supervisor in shutdown procedure, will check again later.')
          trynum += 1
          time.sleep(2 * trynum)
      except Exception:
        # In case if there is problem with connection, assume that supervisord
        # is not running and try to run it
        break
      else:
        if status['statename'] == 'RUNNING' and status['statecode'] == 1:
          logger.debug('Supervisord already running.')
          _updateWatchdog(socket)
          return
        elif status['statename'] == 'SHUTDOWN_STATE' and status['statecode'] == 6:
          logger.info('Supervisor in shutdown procedure, will check again later.')
          trynum += 1
          time.sleep(2 * trynum)
        else:
          log_message = 'Unknown supervisord state %r. Will try to start.' % status
          logger.warning(log_message)
          break

  supervisord_argument_list = ['-c', configuration_file]
  if supervisord_additional_argument_list is not None:
    supervisord_argument_list.extend(supervisord_additional_argument_list)

  logger.info("Launching supervisord with clean environment.")
  # Extract python binary to prevent shebang size limit
  invocation_list = ["supervisord", '-c']
  invocation_list.append(
      "import sys ; sys.path=" + str(sys.path) + " ; " +
      "import supervisor.supervisord ; " +
      "sys.argv[1:1]=" + str(supervisord_argument_list) + " ; " +
      "supervisor.supervisord.main()")
  supervisord_popen = SlapPopen(invocation_list,
                                env={},
                                executable=sys.executable,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT,
                                logger=logger)

  result = supervisord_popen.communicate()[0]
  if supervisord_popen.returncode:
    logger.warning('Supervisord unknown problem: %s' % result)
    return

  try:
    default_timeout = socketlib.getdefaulttimeout()
    current_timeout = 1
    trynum = 1
    while trynum < 6:
      try:
        socketlib.setdefaulttimeout(current_timeout)
        supervisor = getSupervisorRPC(socket)
        status = supervisor.getState()
        if status['statename'] == 'RUNNING' and status['statecode'] == 1:
          return
        logger.warning('Wrong status name %(statename)r and code '
          '%(statecode)r, trying again' % status)
        trynum += 1
      except Exception:
        current_timeout = 5 * trynum
        trynum += 1
      else:
        logger.info('Supervisord started correctly in try %s.' % trynum)
        return
    logger.warning('Issue while checking supervisord.')
  finally:
    socketlib.setdefaulttimeout(default_timeout)
Exemple #2
0
  def agregateAndSendUsage(self):
    """Will agregate usage from each Computer Partition.
    """
    # Prepares environment
    self.checkEnvironmentAndCreateStructure()
    self._launchSupervisord()

    slap_computer_usage = self.slap.registerComputer(self.computer_id)
    computer_partition_usage_list = []
    self.logger.info('Aggregating and sending usage reports...')

    #We retrieve XSD models
    try:
      computer_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/computer_consumption.xsd')
    except IOError:
      computer_consumption_model = \
        pkg_resources.resource_string(
          __name__,
          '../../../../slapos/slap/doc/computer_consumption.xsd')

    try:
      partition_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/partition_consumption.xsd')
    except IOError:
      partition_consumption_model = \
        pkg_resources.resource_string(
          __name__,
          '../../../../slapos/slap/doc/partition_consumption.xsd')

    clean_run = True
    # Loop over the different computer partitions
    computer_partition_list = self.FilterComputerPartitionList(
       slap_computer_usage.getComputerPartitionList())

    for computer_partition in computer_partition_list:
      try:
        computer_partition_id = computer_partition.getId()

        #We want to execute all the script in the report folder
        instance_path = os.path.join(self.instance_root,
            computer_partition.getId())
        report_path = os.path.join(instance_path, 'etc', 'report')
        if os.path.isdir(report_path):
          script_list_to_run = os.listdir(report_path)
        else:
          script_list_to_run = []

        #We now generate the pseudorandom name for the xml file
        # and we add it in the invocation_list
        f = tempfile.NamedTemporaryFile()
        name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
        path_to_slapreport = os.path.join(instance_path, 'var', 'xml_report',
            name_xml)

        failed_script_list = []
        for script in script_list_to_run:
          invocation_list = []
          invocation_list.append(os.path.join(instance_path, 'etc', 'report',
            script))
          #We add the xml_file name to the invocation_list
          #f = tempfile.NamedTemporaryFile()
          #name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
          #path_to_slapreport = os.path.join(instance_path, 'var', name_xml)

          invocation_list.append(path_to_slapreport)
          #Dropping privileges
          uid, gid = None, None
          stat_info = os.stat(instance_path)
          #stat sys call to get statistics informations
          uid = stat_info.st_uid
          gid = stat_info.st_gid
          process_handler = SlapPopen(invocation_list,
                                      preexec_fn=lambda: dropPrivileges(uid, gid, logger=self.logger),
                                      cwd=os.path.join(instance_path, 'etc', 'report'),
                                      env=None,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.STDOUT,
                                      logger=self.logger)
          if process_handler.returncode is None:
            process_handler.kill()
          if process_handler.returncode != 0:
            clean_run = False
            failed_script_list.append("Script %r failed." % script)
            self.logger.warning('Failed to run %r' % invocation_list)
          if len(failed_script_list):
            computer_partition.error('\n'.join(failed_script_list), logger=self.logger)
      # Whatever happens, don't stop processing other instances
      except Exception:
        self.logger.exception('Cannot run usage script(s) for %r:' %
                                  computer_partition.getId())

    #Now we loop through the different computer partitions to report
    report_usage_issue_cp_list = []
    for computer_partition in computer_partition_list:
      try:
        filename_delete_list = []
        computer_partition_id = computer_partition.getId()
        instance_path = os.path.join(self.instance_root, computer_partition_id)
        dir_reports = os.path.join(instance_path, 'var', 'xml_report')
        #The directory xml_report contain a number of files equal
        #to the number of software instance running inside the same partition
        if os.path.isdir(dir_reports):
          filename_list = os.listdir(dir_reports)
        else:
          filename_list = []
        #self.logger.debug('name List %s' % filename_list)

        for filename in filename_list:

          file_path = os.path.join(dir_reports, filename)
          if os.path.exists(file_path):
            usage = open(file_path, 'r').read()

            #We check the validity of xml content of each reports
            if not self.validateXML(usage, partition_consumption_model):
              self.logger.info('WARNING: The XML file %s generated by slapreport is '
                               'not valid - This report is left as is at %s where you can '
                               'inspect what went wrong ' % (filename, dir_reports))
              # Warn the SlapOS Master that a partition generates corrupted xml
              # report
            else:
              computer_partition_usage = self.slap.registerComputerPartition(
                  self.computer_id, computer_partition_id)
              computer_partition_usage.setUsage(usage)
              computer_partition_usage_list.append(computer_partition_usage)
              filename_delete_list.append(filename)
          else:
            self.logger.debug('Usage report %r not found, ignored' % file_path)

        #After sending the aggregated file we remove all the valid xml reports
        for filename in filename_delete_list:
          os.remove(os.path.join(dir_reports, filename))

      # Whatever happens, don't stop processing other instances
      except Exception:
        self.logger.exception('Cannot run usage script(s) for %r:' %
                                computer_partition.getId())

    for computer_partition_usage in computer_partition_usage_list:
      self.logger.info('computer_partition_usage_list: %s - %s' %
                       (computer_partition_usage.usage, computer_partition_usage.getId()))

    #If there is, at least, one report
    if computer_partition_usage_list != []:
      try:
        #We generate the final XML report with asXML method
        computer_consumption = self.asXML(computer_partition_usage_list)

        self.logger.info('Final xml report: %s' % computer_consumption)

        #We test the XML report before sending it
        if self.validateXML(computer_consumption, computer_consumption_model):
          self.logger.info('XML file generated by asXML is valid')
          slap_computer_usage.reportUsage(computer_consumption)
        else:
          self.logger.info('XML file generated by asXML is not valid !')
          raise ValueError('XML file generated by asXML is not valid !')
      except Exception:
        issue = "Cannot report usage for %r: %s" % (
            computer_partition.getId(),
            traceback.format_exc())
        self.logger.info(issue)
        computer_partition.error(issue, logger=self.logger)
        report_usage_issue_cp_list.append(computer_partition_id)

    for computer_partition in computer_partition_list:
      if computer_partition.getState() == COMPUTER_PARTITION_DESTROYED_STATE:
        try:
          computer_partition_id = computer_partition.getId()
          try:
            software_url = computer_partition.getSoftwareRelease().getURI()
            software_path = os.path.join(self.software_root, md5digest(software_url))
          except (NotFoundError, TypeError):
            software_url = None
            software_path = None

          local_partition = Partition(
            software_path=software_path,
            instance_path=os.path.join(self.instance_root,
                computer_partition.getId()),
            supervisord_partition_configuration_path=os.path.join(
              self.supervisord_configuration_directory, '%s.conf' %
              computer_partition_id),
            supervisord_socket=self.supervisord_socket,
            computer_partition=computer_partition,
            computer_id=self.computer_id,
            partition_id=computer_partition_id,
            server_url=self.master_url,
            software_release_url=software_url,
            certificate_repository_path=self.certificate_repository_path,
            buildout=self.buildout,
            logger=self.logger)
          local_partition.stop()
          try:
            computer_partition.stopped()
          except (SystemExit, KeyboardInterrupt):
            computer_partition.error(traceback.format_exc(), logger=self.logger)
            raise
          except Exception:
            pass
          if computer_partition.getId() in report_usage_issue_cp_list:
            self.logger.info('Ignoring destruction of %r, as no report usage was sent' %
                                computer_partition.getId())
            continue
          local_partition.destroy()
        except (SystemExit, KeyboardInterrupt):
          computer_partition.error(traceback.format_exc(), logger=self.logger)
          raise
        except Exception:
          clean_run = False
          self.logger.exception('')
          exc = traceback.format_exc()
          computer_partition.error(exc, logger=self.logger)
        try:
          computer_partition.destroyed()
        except NotFoundError:
          self.logger.debug('Ignored slap error while trying to inform about '
                            'destroying not fully configured Computer Partition %r' %
                                computer_partition.getId())
        except ServerError as server_error:
          self.logger.debug('Ignored server error while trying to inform about '
                            'destroying Computer Partition %r. Error is:\n%r' %
                                (computer_partition.getId(), server_error.args[0]))

    self.logger.info('Finished usage reports.')

    # Return success value
    if not clean_run:
      return SLAPGRID_FAIL
    return SLAPGRID_SUCCESS