Example #1
0
    def _checkPromises(self, computer_partition):
        self.logger.info("Checking promises...")
        instance_path = os.path.join(self.instance_root, computer_partition.getId())

        uid, gid = None, None
        stat_info = os.stat(instance_path)

        # stat sys call to get statistics informations
        uid = stat_info.st_uid
        gid = stat_info.st_gid

        promise_present = False
        # Get the list of promises
        promise_dir = os.path.join(instance_path, "etc", "promise")
        if os.path.exists(promise_dir) and os.path.isdir(promise_dir):
            cwd = instance_path
            promises_list = os.listdir(promise_dir)

            # Check whether every promise is kept
            for promise in promises_list:
                promise_present = True

                command = [os.path.join(promise_dir, promise)]

                promise = os.path.basename(command[0])
                self.logger.info("Checking promise %r.", promise)

                kw = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE)

                process_handler = SlapPopen(command, preexec_fn=lambda: dropPrivileges(uid, gid), cwd=cwd, env={}, **kw)

                time.sleep(self.promise_timeout)

                if process_handler.poll() is None:
                    process_handler.terminate()
                    raise Slapgrid.PromiseError("The promise %r timed out" % promise)
                elif process_handler.poll() != 0:
                    stderr = process_handler.communicate()[1]
                    if stderr is None:
                        stderr = "No error output from %r." % promise
                    else:
                        stderr = "Promise %r:" % promise + stderr
                    raise Slapgrid.PromiseError(stderr)

        if not promise_present:
            self.logger.info("No promise.")
Example #2
0
  def agregateAndSendUsage(self):
    """Will agregate usage from each Computer Partition.
    """
    slap_computer_usage = self.slap.registerComputer(self.computer_id)
    computer_partition_usage_list = []
    logger = logging.getLogger('UsageReporting')
    logger.info("Aggregating and sending usage reports...")

    #We retrieve XSD models
    try:
      computer_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/computer_consumption.xsd')
    except IOError:
      computer_consumption_model = \
        pkg_resources.resource_string(
          __name__, 
          '../../../../slapos/slap/doc/computer_consumption.xsd')

    try:
      partition_consumption_model = \
        pkg_resources.resource_string(
          'slapos.slap',
          'doc/partition_consumption.xsd')
    except IOError:
      partition_consumption_model = \
        pkg_resources.resource_string(
          __name__, 
          '../../../../slapos/slap/doc/partition_consumption.xsd')

    clean_run = True
    #We loop on the different computer partitions
    for computer_partition in slap_computer_usage.getComputerPartitionList():
      computer_partition_id = computer_partition.getId()

      #We want execute all the script in the report folder
      instance_path = os.path.join(self.instance_root,
          computer_partition.getId())
      report_path = os.path.join(instance_path, 'etc', 'report')
      if os.path.isdir(report_path):
        script_list_to_run = os.listdir(report_path)
      else:
        script_list_to_run = []

      #We now generate the pseudorandom name for the xml file
      # and we add it in the invocation_list
      f = tempfile.NamedTemporaryFile()
      name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
      path_to_slapreport = os.path.join(instance_path, 'var', 'xml_report',
          name_xml)

      failed_script_list = []
      for script in script_list_to_run:

        invocation_list = []
        invocation_list.append(os.path.join(instance_path, 'etc', 'report',
          script))
        #We add the xml_file name in the invocation_list
        #f = tempfile.NamedTemporaryFile()
        #name_xml = '%s.%s' % ('slapreport', os.path.basename(f.name))
        #path_to_slapreport = os.path.join(instance_path, 'var', name_xml)

        invocation_list.append(path_to_slapreport)
        #Dropping privileges
        uid, gid = None, None
        stat_info = os.stat(instance_path)
        #stat sys call to get statistics informations
        uid = stat_info.st_uid
        gid = stat_info.st_gid
        kw = dict()
        if not self.console:
          kw.update(stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        process_handler = SlapPopen(invocation_list,
          preexec_fn=lambda: dropPrivileges(uid, gid),
          cwd=os.path.join(instance_path, 'etc', 'report'),
          env=None, **kw)
        result = process_handler.communicate()[0]
        if self.console:
          result = 'Please consult messages above'
        if process_handler.returncode is None:
          process_handler.kill()
        if process_handler.returncode != 0:
          clean_run = False
          failed_script_list.append("Script %r failed with %s." % (script, result))
          logger.warning("Failed to run %r, the result was. \n%s" %
            (invocation_list, result))
        if len(failed_script_list):
          computer_partition.error('\n'.join(failed_script_list))

    #Now we loop through the different computer partitions to ggetId()et reports
    report_usage_issue_cp_list = []
    for computer_partition in slap_computer_usage.getComputerPartitionList():
      filename_delete_list = []
      computer_partition_id = computer_partition.getId()
      instance_path = os.path.join(self.instance_root, computer_partition_id)
      dir_reports = os.path.join(instance_path, 'var', 'xml_report')
      #The directory xml_report contain a number of files equal
      #to the number of software instance running inside the same partition
      if os.path.isdir(dir_reports):
        filename_list = os.listdir(dir_reports)
      else:
        filename_list = []
      #logger.debug('name List %s' % filename_list)
      usage = ''

      for filename in filename_list:

        file_path = os.path.join(dir_reports, filename)
        if os.path.exists(file_path):
          usage_file = open(file_path, 'r')
          usage = usage_file.read()
          usage_file.close()

          #We check the validity of xml content of each reports
          if not self.validateXML(usage, partition_consumption_model):
            logger.info('WARNING: The XML file %s generated by slapreport is not valid - ' \
                            'This report is left as is at %s where you can inspect what went wrong ' % (filename, dir_reports))
            #Warn the SlapOS Master that a partition generates corrupted xml report
          else:
            computer_partition_usage = self.slap.registerComputerPartition(
                    self.computer_id, computer_partition_id)
            computer_partition_usage.setUsage(usage)
            computer_partition_usage_list.append(computer_partition_usage)
            filename_delete_list.append(filename)
        else:
          logger.debug("Usage report %r not found, ignored" % file_path)

        #last_push_date = self.computer.getLastUsagePush()
        #periodicity_timedelta = datetime.timedelta(
        #        self.usage_report_periodicity)
        #if periodicity_timedelta + last_push_date < datetime.datetime.today():
        # Pushes informations, if any

      #After sending the aggregated file we remove all the valid xml reports
      for filename in filename_delete_list:
        os.remove(os.path.join(dir_reports, filename))

    for computer_partition_usage in computer_partition_usage_list:
      logger.info('computer_partition_usage_list : %s - %s' % \
        (computer_partition_usage.usage, computer_partition_usage.getId()))

    #If there is, at least, one report
    if computer_partition_usage_list != []:
      try:
        #We generate the final XML report with asXML method
        computer_consumption = self.asXML(computer_partition_usage_list)

        logger.info('Final xml report : %s' % computer_consumption)

        #We test the XML report before sending it
        if self.validateXML(computer_consumption, computer_consumption_model):
          logger.info('XML file generated by asXML is valid')
          slap_computer_usage.reportUsage(computer_consumption)
        else:
          logger.info('XML file generated by asXML is not valid !')
          raise 'XML file generated by asXML is not valid !'
      except Exception:
        computer_partition_id = computer_partition.getId()
        exception = traceback.format_exc()
        issue = "Cannot report usage for %r: %s" % (computer_partition_id,
          exception)
        logger.info(issue)
        computer_partition.error(issue)
        report_usage_issue_cp_list.append(computer_partition_id)

    for computer_partition in slap_computer_usage.getComputerPartitionList():
      computer_partition_id = computer_partition.getId()
      try:
        software_url = computer_partition.getSoftwareRelease().getURI()
      except NotFoundError:
        software_url = None
      software_path = os.path.join(self.software_root,
            getSoftwareUrlHash(software_url))
      local_partition = Partition(
        software_path=software_path,
        instance_path=os.path.join(self.instance_root,
            computer_partition.getId()),
        supervisord_partition_configuration_path=os.path.join(
          self.supervisord_configuration_directory, '%s.conf' %
          computer_partition_id),
        supervisord_socket=self.supervisord_socket,
        computer_partition=computer_partition,
        computer_id=self.computer_id,
        partition_id=computer_partition_id,
        server_url=self.master_url,
        software_release_url=software_url,
        certificate_repository_path=self.certificate_repository_path,
        console=self.console, buildout=self.buildout
        )
      if computer_partition.getState() == "destroyed":
        try:
          local_partition.stop()
          try:
            computer_partition.stopped()
          except (SystemExit, KeyboardInterrupt):
            exception = traceback.format_exc()
            computer_partition.error(exception)
            raise
          except Exception:
            pass
        except (SystemExit, KeyboardInterrupt):
          exception = traceback.format_exc()
          computer_partition.error(exception)
          raise
        except Exception:
          clean_run = False
          exception = traceback.format_exc()
          computer_partition.error(exception)
          logger.error(exception)
        if computer_partition.getId() in report_usage_issue_cp_list:
          logger.info('Ignoring destruction of %r, as not report usage was '
            'sent' % computer_partition.getId())
          continue
        local_partition.destroy()
        try:
          computer_partition.destroyed()
        except slap.NotFoundError:
          logger.debug('Ignored slap error while trying to inform about '
              'destroying not fully configured Computer Partition %r' %
                  computer_partition.getId())

    logger.info("Finished usage reports...")
    return clean_run
Example #3
0
  def processComputerPartitionList(self):
    """Will start supervisord and process each Computer Partition.
    """
    logger = logging.getLogger('ComputerPartitionProcessing')
    logger.info("Processing computer partitions...")
    # Prepares environment
    self.checkEnvironmentAndCreateStructure()
    self._launchSupervisord()
    # Process Computer Partitions
    clean_run = True
    for computer_partition in self.getComputerPartitionList():
      computer_partition_id = computer_partition.getId()
      try:
        software_url = computer_partition.getSoftwareRelease().getURI()
      except NotFoundError:
        software_url = None
      software_path = os.path.join(self.software_root,
            getSoftwareUrlHash(software_url))
      local_partition = Partition(
        software_path=software_path,
        instance_path=os.path.join(self.instance_root,
            computer_partition.getId()),
        supervisord_partition_configuration_path=os.path.join(
          self.supervisord_configuration_directory, '%s.conf' %
          computer_partition_id),
        supervisord_socket=self.supervisord_socket,
        computer_partition=computer_partition,
        computer_id=self.computer_id,
        partition_id=computer_partition_id,
        server_url=self.master_url,
        software_release_url=software_url,
        certificate_repository_path=self.certificate_repository_path,
        console=self.console, buildout=self.buildout)
      # There are no conditions to try to instanciate partition
      try:
        computer_partition_state = computer_partition.getState()
        if computer_partition_state == "started":
          local_partition.install()
          computer_partition.available()
          local_partition.start()
          computer_partition.started()
        elif computer_partition_state == "stopped":
          local_partition.install()
          computer_partition.available()
          local_partition.stop()
          computer_partition.stopped()
        elif computer_partition_state == "destroyed":
          # Stop, but safely
          try:
            local_partition.stop()
            try:
              computer_partition.stopped()
            except (SystemExit, KeyboardInterrupt):
              exception = traceback.format_exc()
              computer_partition.error(exception)
              raise
            except Exception:
              pass
          except (SystemExit, KeyboardInterrupt):
            exception = traceback.format_exc()
            computer_partition.error(exception)
            raise
          except Exception:
            clean_run = False
            exception = traceback.format_exc()
            logger.error(exception)
            computer_partition.error(exception)
        else:
          error_string = "Computer Partition %r has unsupported state: %s" % \
            (computer_partition_id, computer_partition_state)
          computer_partition.error(error_string)
          raise NotImplementedError(error_string)
      except (SystemExit, KeyboardInterrupt):
        exception = traceback.format_exc()
        computer_partition.error(exception)
        raise
      except Exception:
        clean_run = False
        exception = traceback.format_exc()
        logger.error(exception)
        computer_partition.error(exception)

      # Promises

      instance_path = os.path.join(self.instance_root,
          computer_partition.getId())

      uid, gid = None, None
      stat_info = os.stat(instance_path)

      #stat sys call to get statistics informations
      uid = stat_info.st_uid
      gid = stat_info.st_gid

      # Get the list of promises
      promise_dir = os.path.join(instance_path, 'etc', 'promise')
      if os.path.exists(promise_dir) and os.path.isdir(promise_dir):
        cwd = instance_path
        promises_list = os.listdir(promise_dir)

        # Check whether every promise is kept
        for promise in promises_list:

          command = os.path.join(promise_dir, promise)

          kw = dict()
          if not self.console:
            kw.update(stdout=subprocess.PIPE, stderr=subprocess.PIPE)

          process_handler = SlapPopen(command,
            preexec_fn=lambda: dropPrivileges(uid, gid),
            cwd=cwd,
            env=None, **kw)

          time.sleep(self.promise_timeout)

          promise = os.path.basename(command)

          if process_handler.poll() is None:
            process_handler.kill()
            computer_partition.error("The promise %r timed out" % promise)
            clean_run = False
          elif process_handler.poll() != 0:
            stderr = process_handler.communicate()[1]
            if stderr is None:
              stderr = 'No error output from %r.' % promise
            computer_partition.error(stderr)
            clean_run = False


    logger.info("Finished computer partitions...")
    return clean_run
          trynum += 1
          time.sleep(2 * trynum)
        else:
          log_message = 'Unknown supervisord state %r. Will try to start.' % status
          logger.warning(log_message)
          break

  logger.info("Launching supervisord with clean environment.")
  # Extract python binary to prevent shebang size limit
  invocation_list = ["supervisord", '-c']
  invocation_list.append("import sys ; sys.path=" + str(sys.path) + " ; import "
      "supervisor.supervisord ; sys.argv[1:1]=['-c','" +
      configuration_file +
      "'] ; supervisor.supervisord.main()")
  supervisord_popen = SlapPopen(invocation_list,
      env={},
      executable=sys.executable, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
  result = supervisord_popen.communicate()[0]
  if supervisord_popen.returncode == 0:
    log_message = 'Supervisord command invoked with: %s' % result
    logger.info(log_message)
    try:
      default_timeout = socketlib.getdefaulttimeout()
      current_timeout = 1
      trynum = 1
      while trynum < 6:
        try:
          socketlib.setdefaulttimeout(current_timeout)
          status = supervisor.getState()
          if status['statename'] == 'RUNNING' and status['statecode'] == 1:
            return