Ejemplo n.º 1
0
def main(args):
  # Add SWARMING_HEADLESS into environ so subcommands know that they are running
  # in a headless (non-interactive) mode.
  os.environ['SWARMING_HEADLESS'] = '1'

  # The only reason this is kept is to enable the unit test to use --help to
  # quit the process.
  parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
  _, args = parser.parse_args(args)

  # Enforces that only one process with a bot in this directory can be run on
  # this host at once.
  #
  # This is generally a problem with launchd which is a bit too much
  # 'restart-happy', causing 2 bots running concurrently on the host but it was
  # observed on linux too.
  if not SINGLETON.acquire():
    print >> sys.stderr, 'Found a previous bot, %d exiting.' % os.getpid()
    return 1

  for t in ('out', 'err'):
    log_path = os.path.join(
        os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t)
    os_utilities.roll_log(log_path)
    os_utilities.trim_rolled_log(log_path)

  error = None
  if len(args) != 0:
    error = 'Unexpected arguments: %s' % args
  try:
    return run_bot(error)
  finally:
    call_hook(bot.Bot(None, None, None, None, os.path.dirname(THIS_FILE), None),
              'on_bot_shutdown')
Ejemplo n.º 2
0
def main(args):
    subprocess42.inhibit_os_error_reporting()
    # Add SWARMING_HEADLESS into environ so subcommands know that they are running
    # in a headless (non-interactive) mode.
    os.environ['SWARMING_HEADLESS'] = '1'

    # The only reason this is kept is to enable the unit test to use --help to
    # quit the process.
    parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
    _, args = parser.parse_args(args)

    # Enforces that only one process with a bot in this directory can be run on
    # this host at once.
    if not SINGLETON.acquire():
        if sys.platform == 'darwin':
            msg = ('Found a previous bot, %d rebooting as a workaround for '
                   'https://crbug.com/569610.') % os.getpid()
            print >> sys.stderr, msg
            os_utilities.restart(msg)
        else:
            print >> sys.stderr, 'Found a previous bot, %d exiting.' % os.getpid(
            )
        return 1

    base_dir = os.path.dirname(THIS_FILE)
    for t in ('out', 'err'):
        log_path = os.path.join(base_dir, 'logs', 'bot_std%s.log' % t)
        os_utilities.roll_log(log_path)
        os_utilities.trim_rolled_log(log_path)

    error = None
    if len(args) != 0:
        error = 'Unexpected arguments: %s' % args
    try:
        return run_bot(error)
    finally:
        call_hook(bot.Bot(None, None, None, None, base_dir, None),
                  'on_bot_shutdown')
        logging.info('main() returning')
Ejemplo n.º 3
0
def main(args):
  # Add SWARMING_HEADLESS into environ so subcommands know that they are running
  # in a headless (non-interactive) mode.
  os.environ['SWARMING_HEADLESS'] = '1'

  # The only reason this is kept is to enable the unit test to use --help to
  # quit the process.
  parser = optparse.OptionParser(description=sys.modules[__name__].__doc__)
  _, args = parser.parse_args(args)

  # Enforces that only one process with a bot in this directory can be run on
  # this host at once.
  if not SINGLETON.acquire():
    if sys.platform == 'darwin':
      msg = (
          'Found a previous bot, %d rebooting as a workaround for '
          'https://crbug.com/569610.') % os.getpid()
      print >> sys.stderr, msg
      os_utilities.restart(msg)
    else:
      print >> sys.stderr, 'Found a previous bot, %d exiting.' % os.getpid()
    return 1

  for t in ('out', 'err'):
    log_path = os.path.join(
        os.path.dirname(THIS_FILE), 'logs', 'bot_std%s.log' % t)
    os_utilities.roll_log(log_path)
    os_utilities.trim_rolled_log(log_path)

  error = None
  if len(args) != 0:
    error = 'Unexpected arguments: %s' % args
  try:
    return run_bot(error)
  finally:
    call_hook(bot.Bot(None, None, None, os.path.dirname(THIS_FILE), None),
              'on_bot_shutdown')
    logging.info('main() returning')
Ejemplo n.º 4
0
def run_manifest(botobj, manifest, start):
    """Defers to task_runner.py.

  Return True if the task succeeded.
  """
    # Ensure the manifest is valid. This can throw a json decoding error. Also
    # raise if it is empty.
    if not manifest:
        raise ValueError('Empty manifest')

    # Necessary to signal an internal_failure. This occurs when task_runner fails
    # to execute the command. It is important to note that this data is extracted
    # before any I/O is done, like writting the manifest to disk.
    task_id = manifest['task_id']
    hard_timeout = manifest['hard_timeout'] or None
    # Default the grace period to 30s here, this doesn't affect the grace period
    # for the actual task.
    grace_period = manifest['grace_period'] or 30
    if manifest['hard_timeout']:
        # One for the child process, one for run_isolated, one for task_runner.
        hard_timeout += 3 * manifest['grace_period']
        # For isolated task, download time is not counted for hard timeout so add
        # more time.
        if not manifest['command']:
            hard_timeout += manifest['io_timeout'] or 600

    url = manifest.get('host', botobj.server)
    task_dimensions = manifest['dimensions']
    task_result = {}

    failure = False
    internal_failure = False
    msg = None
    work_dir = os.path.join(botobj.base_dir, 'work')
    try:
        try:
            if os.path.isdir(work_dir):
                file_path.rmtree(work_dir)
        except OSError:
            # If a previous task created an undeleteable file/directory inside 'work',
            # make sure that following tasks are not affected. This is done by working
            # around the undeleteable directory by creating a temporary directory
            # instead. This is not normal behavior. The bot will report a failure on
            # start.
            work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work')
        else:
            os.makedirs(work_dir)

        env = os.environ.copy()
        # Windows in particular does not tolerate unicode strings in environment
        # variables.
        env['SWARMING_TASK_ID'] = task_id.encode('ascii')

        task_in_file = os.path.join(work_dir, 'task_runner_in.json')
        with open(task_in_file, 'wb') as f:
            f.write(json.dumps(manifest))
        call_hook(botobj, 'on_before_task')
        task_result_file = os.path.join(work_dir, 'task_runner_out.json')
        if os.path.exists(task_result_file):
            os.remove(task_result_file)
        command = [
            sys.executable,
            THIS_FILE,
            'task_runner',
            '--swarming-server',
            url,
            '--in-file',
            task_in_file,
            '--out-file',
            task_result_file,
            '--cost-usd-hour',
            str(botobj.state.get('cost_usd_hour') or 0.),
            # Include the time taken to poll the task in the cost.
            '--start',
            str(start),
            '--min-free-space',
            str(get_min_free_space()),
        ]
        logging.debug('Running command: %s', command)
        # Put the output file into the current working directory, which should be
        # the one containing swarming_bot.zip.
        log_path = os.path.join(botobj.base_dir, 'logs',
                                'task_runner_stdout.log')
        os_utilities.roll_log(log_path)
        os_utilities.trim_rolled_log(log_path)
        with open(log_path, 'a+b') as f:
            proc = subprocess42.Popen(command,
                                      detached=True,
                                      cwd=botobj.base_dir,
                                      env=env,
                                      stdin=subprocess42.PIPE,
                                      stdout=f,
                                      stderr=subprocess42.STDOUT,
                                      close_fds=sys.platform != 'win32')
            try:
                proc.wait(hard_timeout)
            except subprocess42.TimeoutExpired:
                # That's the last ditch effort; as task_runner should have completed a
                # while ago and had enforced the timeout itself (or run_isolated for
                # hard_timeout for isolated task).
                logging.error('Sending SIGTERM to task_runner')
                proc.terminate()
                internal_failure = True
                msg = 'task_runner hung'
                try:
                    proc.wait(grace_period)
                except subprocess42.TimeoutExpired:
                    logging.error('Sending SIGKILL to task_runner')
                    proc.kill()
                proc.wait()
                return False

        logging.info('task_runner exit: %d', proc.returncode)
        if os.path.exists(task_result_file):
            with open(task_result_file, 'rb') as fd:
                task_result = json.load(fd)

        if proc.returncode:
            msg = 'Execution failed: internal error (%d).' % proc.returncode
            internal_failure = True
        elif not task_result:
            logging.warning('task_runner failed to write metadata')
            msg = 'Execution failed: internal error (no metadata).'
            internal_failure = True
        elif task_result[u'must_signal_internal_failure']:
            msg = ('Execution failed: %s' %
                   task_result[u'must_signal_internal_failure'])
            internal_failure = True

        failure = bool(task_result.get('exit_code')) if task_result else False
        return not internal_failure and not failure
    except Exception as e:
        # Failures include IOError when writing if the disk is full, OSError if
        # swarming_bot.zip doesn't exist anymore, etc.
        logging.exception('run_manifest failed')
        msg = 'Internal exception occured: %s\n%s' % (
            e, traceback.format_exc()[-2048:])
        internal_failure = True
    finally:
        if internal_failure:
            post_error_task(botobj, msg, task_id)
        call_hook(botobj, 'on_after_task', failure, internal_failure,
                  task_dimensions, task_result)
        if os.path.isdir(work_dir):
            try:
                file_path.rmtree(work_dir)
            except Exception as e:
                botobj.post_error('Failed to delete work directory %s: %s' %
                                  (work_dir, e))
Ejemplo n.º 5
0
def run_manifest(botobj, manifest, start):
  """Defers to task_runner.py.

  Return True if the task succeeded.
  """
  # Ensure the manifest is valid. This can throw a json decoding error. Also
  # raise if it is empty.
  if not manifest:
    raise ValueError('Empty manifest')

  # Necessary to signal an internal_failure. This occurs when task_runner fails
  # to execute the command. It is important to note that this data is extracted
  # before any I/O is done, like writting the manifest to disk.
  task_id = manifest['task_id']
  hard_timeout = manifest['hard_timeout'] or None
  # Default the grace period to 30s here, this doesn't affect the grace period
  # for the actual task.
  grace_period = manifest['grace_period'] or 30
  if manifest['hard_timeout']:
    # One for the child process, one for run_isolated, one for task_runner.
    hard_timeout += 3 * manifest['grace_period']
    # For isolated task, download time is not counted for hard timeout so add
    # more time.
    if not manifest['command']:
      hard_timeout += manifest['io_timeout'] or 600

  url = manifest.get('host', botobj.remote.url)
  task_dimensions = manifest['dimensions']
  task_result = {}

  failure = False
  internal_failure = False
  msg = None
  work_dir = os.path.join(botobj.base_dir, 'work')
  try:
    try:
      if os.path.isdir(work_dir):
        file_path.rmtree(work_dir)
    except OSError:
      # If a previous task created an undeleteable file/directory inside 'work',
      # make sure that following tasks are not affected. This is done by working
      # around the undeleteable directory by creating a temporary directory
      # instead. This is not normal behavior. The bot will report a failure on
      # start.
      work_dir = tempfile.mkdtemp(dir=botobj.base_dir, prefix='work')
    else:
      os.makedirs(work_dir)

    env = os.environ.copy()
    # Windows in particular does not tolerate unicode strings in environment
    # variables.
    env['SWARMING_TASK_ID'] = task_id.encode('ascii')

    task_in_file = os.path.join(work_dir, 'task_runner_in.json')
    with open(task_in_file, 'wb') as f:
      f.write(json.dumps(manifest))
    call_hook(botobj, 'on_before_task')
    task_result_file = os.path.join(work_dir, 'task_runner_out.json')
    if os.path.exists(task_result_file):
      os.remove(task_result_file)
    command = [
      sys.executable, THIS_FILE, 'task_runner',
      '--swarming-server', url,
      '--in-file', task_in_file,
      '--out-file', task_result_file,
      '--cost-usd-hour', str(botobj.state.get('cost_usd_hour') or 0.),
      # Include the time taken to poll the task in the cost.
      '--start', str(start),
    ]
    logging.debug('Running command: %s', command)
    # Put the output file into the current working directory, which should be
    # the one containing swarming_bot.zip.
    log_path = os.path.join(botobj.base_dir, 'logs', 'task_runner_stdout.log')
    os_utilities.roll_log(log_path)
    os_utilities.trim_rolled_log(log_path)
    with open(log_path, 'a+b') as f:
      proc = subprocess42.Popen(
          command,
          detached=True,
          cwd=botobj.base_dir,
          env=env,
          stdout=f,
          stderr=subprocess42.STDOUT)
      try:
        proc.wait(hard_timeout)
      except subprocess42.TimeoutExpired:
        # That's the last ditch effort; as task_runner should have completed a
        # while ago and had enforced the timeout itself (or run_isolated for
        # hard_timeout for isolated task).
        logging.error('Sending SIGTERM to task_runner')
        proc.terminate()
        internal_failure = True
        msg = 'task_runner hung'
        try:
          proc.wait(grace_period)
        except subprocess42.TimeoutExpired:
          logging.error('Sending SIGKILL to task_runner')
          proc.kill()
        proc.wait()
        return False

    logging.info('task_runner exit: %d', proc.returncode)
    if os.path.exists(task_result_file):
      with open(task_result_file, 'rb') as fd:
        task_result = json.load(fd)

    if proc.returncode:
      msg = 'Execution failed: internal error (%d).' % proc.returncode
      internal_failure = True
    elif not task_result:
      logging.warning('task_runner failed to write metadata')
      msg = 'Execution failed: internal error (no metadata).'
      internal_failure = True
    elif task_result[u'must_signal_internal_failure']:
      msg = (
        'Execution failed: %s' % task_result[u'must_signal_internal_failure'])
      internal_failure = True

    failure = bool(task_result.get('exit_code')) if task_result else False
    return not internal_failure and not failure
  except Exception as e:
    # Failures include IOError when writing if the disk is full, OSError if
    # swarming_bot.zip doesn't exist anymore, etc.
    logging.exception('run_manifest failed')
    msg = 'Internal exception occured: %s\n%s' % (
        e, traceback.format_exc()[-2048:])
    internal_failure = True
  finally:
    if internal_failure:
      post_error_task(botobj, msg, task_id)
    call_hook(
        botobj, 'on_after_task', failure, internal_failure, task_dimensions,
        task_result)
    if os.path.isdir(work_dir):
      try:
        file_path.rmtree(work_dir)
      except Exception as e:
        botobj.post_error('Failed to delete work directory: %s' % e)