def trigger_by_manifest(swarming, manifest):
  """Given a task manifest, triggers it for execution on swarming.

  Args:
    swarming: URL of a swarming service.
    manifest: instance of Manifest.

  Returns:
    tuple(Task id, priority) on success. tuple(None, None) on failure.
  """
  logging.info('Triggering: %s', manifest.task_name)
  manifest_text = manifest.to_json()
  result = net.url_read(swarming + '/test', data={'request': manifest_text})
  if not result:
    on_error.report('Failed to trigger task %s' % manifest.task_name)
    return None, None
  try:
    data = json.loads(result)
  except (ValueError, TypeError):
    msg = '\n'.join((
        'Failed to trigger task %s' % manifest.task_name,
        'Manifest: %s' % manifest_text,
        'Bad response: %s' % result))
    on_error.report(msg)
    return None, None
  if not data:
    return None, None
  return data['test_keys'][0]['test_key'], data['priority']
Exemple #2
0
def trigger_by_manifest(swarming, manifest):
    """Given a task manifest, triggers it for execution on swarming.

  Args:
    swarming: URL of a swarming service.
    manifest: instance of Manifest.

  Returns:
    tuple(Task id, priority) on success. tuple(None, None) on failure.
  """
    logging.info('Triggering: %s', manifest.task_name)
    manifest_text = manifest.to_json()
    result = net.url_read(swarming + '/test', data={'request': manifest_text})
    if not result:
        tools.report_error('Failed to trigger task %s' % manifest.task_name)
        return None
    try:
        data = json.loads(result)
    except (ValueError, TypeError) as e:
        msg = '\n'.join(('Failed to trigger task %s' % manifest.task_name,
                         'Manifest: %s' % manifest_text,
                         'Bad response: %s' % result, str(e)))
        tools.report_error(msg)
        return None, None
    if not data:
        return None, None
    return data['test_keys'][0]['test_key'], data['priority']
Exemple #3
0
def retrieve_results(base_url, test_key, timeout, should_stop):
  """Retrieves results for a single test_key."""
  assert isinstance(timeout, float), timeout
  params = [('r', test_key)]
  result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
  start = now()
  while True:
    if timeout and (now() - start) >= timeout:
      logging.error('retrieve_results(%s) timed out', base_url)
      return {}
    # Do retries ourselves.
    response = net.url_read(result_url, retry_404=False, retry_50x=False)
    if response is None:
      # Aggressively poll for results. Do not use retry_404 so
      # should_stop is polled more often.
      remaining = min(5, timeout - (now() - start)) if timeout else 5
      if remaining > 0:
        if should_stop.get():
          return {}
        net.sleep_before_retry(1, remaining)
    else:
      try:
        data = json.loads(response) or {}
      except (ValueError, TypeError):
        logging.warning(
            'Received corrupted data for test_key %s. Retrying.', test_key)
      else:
        if data['output']:
          return data
    if should_stop.get():
      return {}
Exemple #4
0
def retrieve_results(base_url, task_key, timeout, should_stop, output_collector):
    """Retrieves results for a single task_key.

  Returns a dict with results on success or None on failure or timeout.
  """
    assert isinstance(timeout, float), timeout
    params = [("r", task_key)]
    result_url = "%s/get_result?%s" % (base_url, urllib.urlencode(params))
    started = now()
    deadline = started + timeout if timeout else None
    attempt = 0

    while not should_stop.is_set():
        attempt += 1

        # Waiting for too long -> give up.
        current_time = now()
        if deadline and current_time >= deadline:
            logging.error("retrieve_results(%s) timed out on attempt %d", base_url, attempt)
            return None

        # Do not spin too fast. Spin faster at the beginning though.
        # Start with 1 sec delay and for each 30 sec of waiting add another second
        # of delay, until hitting 15 sec ceiling.
        if attempt > 1:
            max_delay = min(15, 1 + (current_time - started) / 30.0)
            delay = min(max_delay, deadline - current_time) if deadline else max_delay
            if delay > 0:
                logging.debug("Waiting %.1f sec before retrying", delay)
                should_stop.wait(delay)
                if should_stop.is_set():
                    return None

        # Disable internal retries in net.url_read, since we are doing retries
        # ourselves. Do not use retry_404 so should_stop is polled more often.
        response = net.url_read(result_url, retry_404=False, retry_50x=False)

        # Request failed. Try again.
        if response is None:
            continue

        # Got some response, ensure it is JSON dict, retry if not.
        try:
            result = json.loads(response) or {}
            if not isinstance(result, dict):
                raise ValueError()
        except (ValueError, TypeError):
            logging.warning("Received corrupted or invalid data for task_key %s, retrying: %r", task_key, response)
            continue

        # Swarming server uses non-empty 'output' value as a flag that task has
        # finished. How to wait for tasks that produce no output is a mystery.
        if result.get("output"):
            # Record the result, try to fetch attached output files (if any).
            if output_collector:
                # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
                output_collector.process_shard_result(result)
            return result
Exemple #5
0
def download_data(root_dir, files):
    """Downloads and expands the zip files enumerated in the test run data."""
    for data_url, _ in files:
        logging.info('Downloading: %s', data_url)
        content = net.url_read(data_url)
        if content is None:
            raise Exception('Failed to download %s' % data_url)
        with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
            zip_file.extractall(root_dir)
Exemple #6
0
def download_data(root_dir, files):
  """Downloads and expands the zip files enumerated in the test run data."""
  for data_url, _ in files:
    logging.info('Downloading: %s', data_url)
    content = net.url_read(data_url)
    if content is None:
      raise Exception('Failed to download %s' % data_url)
    with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
      zip_file.extractall(root_dir)
Exemple #7
0
def calculate_version(url):
  """Retrieves the swarm_bot code and returns the SHA-1 for it."""
  # Cannot use url_open() since zipfile requires .seek().
  archive = zipfile.ZipFile(StringIO.StringIO(net.url_read(url)))
  # See
  # https://code.google.com/p/swarming/source/browse/services/swarming/common/bot_archive.py
  d = hashlib.sha1()
  for f in archive.namelist():
    d.update(archive.read(f))
  return d.hexdigest()
Exemple #8
0
    def test_url_read(self):
        # Successfully reads the data.
        self.mock(net, "url_open", lambda url, **_kwargs: net.HttpResponse.get_fake_response("111", url))
        self.assertEqual(net.url_read("https://fake_url.com/test"), "111")

        # Respects url_open connection errors.
        self.mock(net, "url_open", lambda _url, **_kwargs: None)
        self.assertIsNone(net.url_read("https://fake_url.com/test"))

        # Respects read timeout errors.
        def timeouting_http_response(url):
            def read_mock(_size=None):
                raise net.TimeoutError()

            response = net.HttpResponse.get_fake_response("", url)
            self.mock(response, "read", read_mock)
            return response

        self.mock(net, "url_open", lambda url, **_kwargs: timeouting_http_response(url))
        self.assertIsNone(net.url_read("https://fake_url.com/test"))
Exemple #9
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.
  """
  options, args = parser.parse_args(args)
  if len(args) != 1:
    parser.error('Must specify exactly one task id.')

  url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  if not os.path.isdir('work'):
    os.mkdir('work')

  swarming_host = urlparse.urlparse(options.swarming).netloc
  properties = request['properties']
  for data_url, _ in properties['data']:
    assert data_url.startswith('https://'), data_url
    data_host = urlparse.urlparse(data_url).netloc
    if data_host != swarming_host:
      auth.ensure_logged_in('https://' + data_host)

    content = net.url_read(data_url)
    if content is None:
      print >> sys.stderr, 'Failed to download %s' % data_url
      return 1
    with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
      zip_file.extractall('work')

  env = None
  if properties['env']:
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    env.update(
        (k.encode('utf-8'), v.encode('utf-8'))
        for k, v in properties['env'].iteritems())

  exit_code = 0
  for cmd in properties['commands']:
    try:
      c = subprocess.call(cmd, env=env, cwd='work')
    except OSError as e:
      print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
      print >> sys.stderr, str(e)
      c = 1
    if not exit_code:
      exit_code = c
  return exit_code
Exemple #10
0
  def url_read(self, resource, **kwargs):
    url = self.url + resource
    if kwargs.get('data') == None:
      # No XSRF token for GET.
      return net.url_read(url, **kwargs)

    if self.need_refresh():
      self.refresh_token()
    resp = self._url_read_post(url, **kwargs)
    if resp is None:
      raise Error('Failed to connect to %s; %s' % (url, self.expiration))
    return resp
Exemple #11
0
  def test_url_read(self):
    # Successfully reads the data.
    self.mock(net, 'url_open',
        lambda url, **_kwargs: net_utils.make_fake_response('111', url))
    self.assertEqual(net.url_read('https://fake_url.com/test'), '111')

    # Respects url_open connection errors.
    self.mock(net, 'url_open', lambda _url, **_kwargs: None)
    self.assertIsNone(net.url_read('https://fake_url.com/test'))

    # Respects read timeout errors.
    def timeouting_http_response(url):
      def read_mock(_size=None):
        raise net.TimeoutError()
      response = net_utils.make_fake_response('', url)
      self.mock(response, 'read', read_mock)
      return response

    self.mock(net, 'url_open',
        lambda url, **_kwargs: timeouting_http_response(url))
    self.assertIsNone(net.url_read('https://fake_url.com/test'))
Exemple #12
0
  def test_url_read(self):
    # Successfully reads the data.
    self.mock(net, 'url_open',
        lambda url, **_kwargs: net_utils.make_fake_response('111', url))
    self.assertEqual(net.url_read('https://fake_url.com/test'), '111')

    # Respects url_open connection errors.
    self.mock(net, 'url_open', lambda _url, **_kwargs: None)
    self.assertIsNone(net.url_read('https://fake_url.com/test'))

    # Respects read timeout errors.
    def timeouting_http_response(url):
      def read_mock(_size=None):
        raise net.TimeoutError()
      response = net_utils.make_fake_response('', url)
      self.mock(response, 'read', read_mock)
      return response

    self.mock(net, 'url_open',
        lambda url, **_kwargs: timeouting_http_response(url))
    self.assertIsNone(net.url_read('https://fake_url.com/test'))
Exemple #13
0
def process_manifest(
    swarming, isolate_server, namespace, isolated_hash, task_name, shards,
    dimensions, env, working_dir, verbose, profile, priority, algo):
  """Processes the manifest file and send off the swarming task request."""
  try:
    manifest = Manifest(
        isolate_server=isolate_server,
        namespace=namespace,
        isolated_hash=isolated_hash,
        task_name=task_name,
        shards=shards,
        dimensions=dimensions,
        env=env,
        working_dir=working_dir,
        verbose=verbose,
        profile=profile,
        priority=priority,
        algo=algo)
  except ValueError as e:
    tools.report_error('Unable to process %s: %s' % (task_name, e))
    return 1

  chromium_setup(manifest)

  logging.info('Zipping up files...')
  if not zip_and_upload(manifest):
    return 1

  logging.info('Server: %s', swarming)
  logging.info('Task name: %s', task_name)
  trigger_url = swarming + '/test'
  manifest_text = manifest.to_json()
  result = net.url_read(trigger_url, data={'request': manifest_text})
  if not result:
    tools.report_error(
        'Failed to trigger task %s\n%s' % (task_name, trigger_url))
    return 1
  try:
    json.loads(result)
  except (ValueError, TypeError) as e:
    msg = '\n'.join((
        'Failed to trigger task %s' % task_name,
        'Manifest: %s' % manifest_text,
        'Bad response: %s' % result,
        str(e)))
    tools.report_error(msg)
    return 1
  return 0
def process_manifest(swarming, isolate_server, namespace, isolated_hash,
                     task_name, extra_args, shards, dimensions, env,
                     working_dir, deadline, verbose, profile, priority):
    """Processes the manifest file and send off the swarming task request."""
    try:
        manifest = Manifest(isolate_server=isolate_server,
                            namespace=namespace,
                            isolated_hash=isolated_hash,
                            task_name=task_name,
                            extra_args=extra_args,
                            shards=shards,
                            dimensions=dimensions,
                            env=env,
                            working_dir=working_dir,
                            deadline=deadline,
                            verbose=verbose,
                            profile=profile,
                            priority=priority)
    except ValueError as e:
        tools.report_error('Unable to process %s: %s' % (task_name, e))
        return 1

    chromium_setup(manifest)

    logging.info('Zipping up files...')
    if not zip_and_upload(manifest):
        return 1

    logging.info('Server: %s', swarming)
    logging.info('Task name: %s', task_name)
    trigger_url = swarming + '/test'
    manifest_text = manifest.to_json()
    result = net.url_read(trigger_url, data={'request': manifest_text})
    if not result:
        tools.report_error('Failed to trigger task %s\n%s' %
                           (task_name, trigger_url))
        return 1
    try:
        json.loads(result)
    except (ValueError, TypeError) as e:
        msg = '\n'.join(('Failed to trigger task %s' % task_name,
                         'Manifest: %s' % manifest_text,
                         'Bad response: %s' % result, str(e)))
        tools.report_error(msg)
        return 1
    return 0
Exemple #15
0
    def url_read(self, resource, **kwargs):
        url = self.url + resource
        if kwargs.get('data') == None:
            # No XSRF token for GET.
            return net.url_read(url, **kwargs)

        if not self.token:
            self.token = self.refresh_token()
        resp = self._url_read_post(url, **kwargs)
        if resp is None:
            # This includes 403 because the XSRF token expired. Renew the token.
            # TODO(maruel): It'd be great if it were transparent.
            self.refresh_token()
            resp = self._url_read_post(url, **kwargs)
        if resp is None:
            raise Error('Failed to connect to %s' % url)
        return resp
Exemple #16
0
  def url_read(self, resource, **kwargs):
    url = self.url + resource
    if kwargs.get('data') == None:
      # No XSRF token for GET.
      return net.url_read(url, **kwargs)

    if not self.token:
      self.token = self.refresh_token()
    resp = self._url_read_post(url, **kwargs)
    if resp is None:
      # This includes 403 because the XSRF token expired. Renew the token.
      # TODO(maruel): It'd be great if it were transparent.
      self.refresh_token()
      resp = self._url_read_post(url, **kwargs)
    if resp is None:
      raise Error('Failed to connect to %s' % url)
    return resp
Exemple #17
0
def get_task_keys(swarm_base_url, task_name):
    """Returns the Swarming task key for each shards of task_name."""
    key_data = urllib.urlencode([("name", task_name)])
    url = "%s/get_matching_test_cases?%s" % (swarm_base_url, key_data)

    for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
        result = net.url_read(url, retry_404=True)
        if result is None:
            raise Failure("Error: Unable to find any task with the name, %s, on swarming server" % task_name)

        # TODO(maruel): Compare exact string.
        if "No matching" in result:
            logging.warning("Unable to find any task with the name, %s, on swarming " "server" % task_name)
            continue
        return json.loads(result)

    raise Failure("Error: Unable to find any task with the name, %s, on swarming server" % task_name)
def CMDis_fine(_args):
  """Just reports that the code doesn't throw.

  That ensures that the bot has minimal viability before transfering control to
  it. For now, it just imports bot_main and send ping request to server but
  later it'll check the config, etc.
  """
  # pylint: disable=unused-variable
  from bot_code import bot_main
  from config import bot_config

  resp = net.url_read(bot_main.get_config()['server'] +
                      '/swarming/api/v1/bot/server_ping')
  if resp is None:
    logging.error('No response from server_ping')
    return 1
  return 0
def calculate_version(url):
  """Retrieves the swarm_bot code and returns the SHA-1 for it."""
  # Cannot use url_open() since zipfile requires .seek().
  archive = zipfile.ZipFile(StringIO.StringIO(net.url_read(url)))
  # See
  # https://code.google.com/p/swarming/source/browse/src/common/version.py?repo=swarming-server
  files = (
    'slave_machine.py',
    'swarm_bot/local_test_runner.py',
    'common/__init__.py',
    'common/swarm_constants.py',
    'common/version.py',
    'common/test_request_message.py',
    'common/url_helper.py',
  )
  d = hashlib.sha1()
  for f in files:
    d.update(archive.read(f))
  return d.hexdigest()
Exemple #20
0
def calculate_version(url):
    """Retrieves the swarm_bot code and returns the SHA-1 for it."""
    # Cannot use url_open() since zipfile requires .seek().
    archive = zipfile.ZipFile(StringIO.StringIO(net.url_read(url)))
    # See
    # https://code.google.com/p/swarming/source/browse/src/common/version.py?repo=swarming-server
    files = (
        'slave_machine.py',
        'swarm_bot/local_test_runner.py',
        'common/__init__.py',
        'common/swarm_constants.py',
        'common/version.py',
        'common/test_request_message.py',
        'common/url_helper.py',
    )
    d = hashlib.sha1()
    for f in files:
        d.update(archive.read(f))
    return d.hexdigest()
Exemple #21
0
def get_test_keys(swarm_base_url, test_name):
  """Returns the Swarm test key for each shards of test_name."""
  key_data = urllib.urlencode([('name', test_name)])
  url = '%s/get_matching_test_cases?%s' % (swarm_base_url, key_data)

  for _ in net.retry_loop(max_attempts=net.URL_OPEN_MAX_ATTEMPTS):
    result = net.url_read(url, retry_404=True)
    if result is None:
      raise Failure(
          'Error: Unable to find any tests with the name, %s, on swarm server'
          % test_name)

    # TODO(maruel): Compare exact string.
    if 'No matching' in result:
      logging.warning('Unable to find any tests with the name, %s, on swarm '
                      'server' % test_name)
      continue
    return json.loads(result)

  raise Failure(
      'Error: Unable to find any tests with the name, %s, on swarm server'
      % test_name)
Exemple #22
0
  def _do_push(self, push_state, content):
    """Uploads isolated file to the URL.

    Used only for storing files, not for API calls. Can be overridden in
    subclasses.

    Args:
      url: URL to upload the data to.
      push_state: an _IsolateServicePushState instance
      item: the original Item to be uploaded
      content: an iterable that yields 'str' chunks.
    """
    # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
    # upload support is implemented.
    if isinstance(content, list) and len(content) == 1:
      content = content[0]
    else:
      content = ''.join(content)

    # DB upload
    if not push_state.finalize_url:
      url = '%s/%s' % (self._base_url, push_state.upload_url)
      content = base64.b64encode(content)
      data = {
          'upload_ticket': push_state.preupload_status['upload_ticket'],
          'content': content,
      }
      response = net.url_read_json(url=url, data=data)
      return response is not None and response['ok']

    # upload to GS
    url = push_state.upload_url
    response = net.url_read(
        content_type='application/octet-stream',
        data=content,
        method='PUT',
        headers={'Cache-Control': 'public, max-age=31536000'},
        url=url)
    return response is not None
Exemple #23
0
 def _url_read_post(self, url, **kwargs):
     headers = (kwargs.pop('headers', None) or {}).copy()
     headers['X-XSRF-Token'] = self.token
     return net.url_read(url, headers=headers, **kwargs)
Exemple #24
0
 def _url_read_post(self, url, **kwargs):
   headers = (kwargs.pop('headers', None) or {}).copy()
   headers['X-XSRF-Token'] = self.token
   return net.url_read(url, headers=headers, **kwargs)
Exemple #25
0
def retrieve_results(base_url, shard_index, task_key, timeout, should_stop,
                     output_collector):
    """Retrieves results for a single task_key.

  Returns:
    <result dict> on success.
    None on failure.
  """
    assert isinstance(timeout, float), timeout
    params = [('r', task_key)]
    result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
    started = now()
    deadline = started + timeout if timeout else None
    attempt = 0

    while not should_stop.is_set():
        attempt += 1

        # Waiting for too long -> give up.
        current_time = now()
        if deadline and current_time >= deadline:
            logging.error('retrieve_results(%s) timed out on attempt %d',
                          base_url, attempt)
            return None

        # Do not spin too fast. Spin faster at the beginning though.
        # Start with 1 sec delay and for each 30 sec of waiting add another second
        # of delay, until hitting 15 sec ceiling.
        if attempt > 1:
            max_delay = min(15, 1 + (current_time - started) / 30.0)
            delay = min(max_delay, deadline -
                        current_time) if deadline else max_delay
            if delay > 0:
                logging.debug('Waiting %.1f sec before retrying', delay)
                should_stop.wait(delay)
                if should_stop.is_set():
                    return None

        # Disable internal retries in net.url_read, since we are doing retries
        # ourselves. Do not use retry_404 so should_stop is polled more often.
        response = net.url_read(result_url, retry_404=False, retry_50x=False)

        # Request failed. Try again.
        if response is None:
            continue

        # Got some response, ensure it is JSON dict, retry if not.
        try:
            result = json.loads(response) or {}
            if not isinstance(result, dict):
                raise ValueError()
        except (ValueError, TypeError):
            logging.warning(
                'Received corrupted or invalid data for task_key %s, retrying: %r',
                task_key, response)
            continue

        # Swarming server uses non-empty 'output' value as a flag that task has
        # finished. How to wait for tasks that produce no output is a mystery.
        if result.get('output'):
            # Record the result, try to fetch attached output files (if any).
            if output_collector:
                # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
                output_collector.process_shard_result(shard_index, result)
            return result
Exemple #26
0
def process_manifest(
    swarming,
    isolate_server,
    namespace,
    isolated_hash,
    task_name,
    extra_args,
    shards,
    dimensions,
    env,
    working_dir,
    deadline,
    verbose,
    profile,
    priority,
):
    """Processes the manifest file and send off the swarming task request."""
    try:
        manifest = Manifest(
            isolate_server=isolate_server,
            namespace=namespace,
            isolated_hash=isolated_hash,
            task_name=task_name,
            extra_args=extra_args,
            shards=shards,
            dimensions=dimensions,
            env=env,
            working_dir=working_dir,
            deadline=deadline,
            verbose=verbose,
            profile=profile,
            priority=priority,
        )
    except ValueError as e:
        tools.report_error("Unable to process %s: %s" % (task_name, e))
        return 1

    chromium_setup(manifest)

    logging.info("Zipping up files...")
    if not zip_and_upload(manifest):
        return 1

    logging.info("Server: %s", swarming)
    logging.info("Task name: %s", task_name)
    trigger_url = swarming + "/test"
    manifest_text = manifest.to_json()
    result = net.url_read(trigger_url, data={"request": manifest_text})
    if not result:
        tools.report_error("Failed to trigger task %s\n%s" % (task_name, trigger_url))
        return 1
    try:
        json.loads(result)
    except (ValueError, TypeError) as e:
        msg = "\n".join(
            (
                "Failed to trigger task %s" % task_name,
                "Manifest: %s" % manifest_text,
                "Bad response: %s" % result,
                str(e),
            )
        )
        tools.report_error(msg)
        return 1
    return 0
def calculate_version(url):
  """Retrieves the swarm_bot code and returns the SHA-1 for it."""
  # Cannot use url_open() since zipfile requires .seek().
  return generate_version(StringIO.StringIO(net.url_read(url)))
def calculate_version(url):
  """Retrieves the swarm_bot code and returns the SHA-1 for it."""
  # Cannot use url_open() since zipfile requires .seek().
  return generate_version(StringIO.StringIO(net.url_read(url)))
  def _run(self):
    """Polls the server and fake execution."""
    try:
      self._progress.update_item('%d alive' % self._index, bots=1)
      while True:
        if self._kill_event.is_set():
          return
        data = {'attributes': json.dumps(self._attributes)}
        request = net.url_open(self._swarming + '/poll_for_test', data=data)
        if request is None:
          self._events.put('poll_for_test_empty')
          continue
        start = time.time()
        try:
          manifest = json.load(request)
        except ValueError:
          self._progress.update_item('Failed to poll')
          self._events.put('poll_for_test_invalid')
          continue

        commands = [c['function'] for c in manifest.get('commands', [])]
        if not commands:
          # Nothing to run.
          self._events.put('sleep')
          time.sleep(manifest['come_back'])
          continue

        if commands == ['UpdateSlave']:
          # Calculate the proper SHA-1 and loop again.
          # This could happen if the Swarming server is upgraded while this
          # script runs.
          self._attributes['version'] = calculate_version(
              manifest['commands'][0]['args'])
          self._events.put('update_slave')
          continue

        if commands != ['RunManifest']:
          self._progress.update_item(
              'Unexpected RPC call %s\n%s' % (commands, manifest))
          self._events.put('unknown_rpc')
          break

        store_cmd = manifest['commands'][0]
        if not isinstance(store_cmd['args'], unicode):
          self._progress.update_item('Unexpected RPC manifest\n%s' % manifest)
          self._events.put('unknown_args')
          break

        result_url = manifest['result_url']
        test_run = json.loads(store_cmd['args'])
        if result_url != test_run['result_url']:
          self._progress.update_item(
              'Unexpected result url: %s != %s' %
              (result_url, test_run['result_url']))
          self._events.put('invalid_result_url')
          break
        ping_url = test_run['ping_url']
        ping_delay = test_run['ping_delay']
        self._progress.update_item('%d processing' % self._index, processing=1)

        # Fake activity and send pings as requested.
        while True:
          remaining = max(0, (start + self._duration) - time.time())
          if remaining > ping_delay:
            # Include empty data to ensure the request is a POST request.
            result = net.url_read(ping_url, data={})
            assert result == 'Success.', result
            remaining = max(0, (start + self._duration) - time.time())
          if not remaining:
            break
          time.sleep(remaining)

        # In the old API, r=<task_id>&id=<bot_id> is passed as the url.
        data = {
          'o': TASK_OUTPUT,
          'x': '0',
        }
        result = net.url_read(manifest['result_url'], data=data)
        self._progress.update_item(
            '%d processed' % self._index, processing=-1, processed=1)
        if not result:
          self._events.put('result_url_fail')
        else:
          assert result == 'Successfully update the runner results.', result
          self._events.put(time.time() - start)
    finally:
      try:
        # Unregister itself. Otherwise the server will have tons of fake slaves
        # that the admin will have to remove manually.
        response = net.url_open(
            self._swarming + '/delete_machine_stats',
            data=[('r', self._bot_id)])
        if not response:
          self._events.put('failed_unregister')
        else:
          response.read()
      finally:
        self._progress.update_item('%d quit' % self._index, bots=-1)
Exemple #30
0
def run_bot(arg_error):
  """Runs the bot until it reboots or self-update or a signal is received.

  When a signal is received, simply exit.
  """
  quit_bit = threading.Event()
  def handler(sig, _):
    logging.info('Got signal %s', sig)
    quit_bit.set()

  # TODO(maruel): Set quit_bit when stdin is closed on Windows.

  with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
    config = get_config()
    try:
      # First thing is to get an arbitrary url. This also ensures the network is
      # up and running, which is necessary before trying to get the FQDN below.
      resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping')
      if resp is None:
        logging.error('No response from server_ping')
    except Exception as e:
      # url_read() already traps pretty much every exceptions. This except
      # clause is kept there "just in case".
      logging.exception('server_ping threw')

    if quit_bit.is_set():
      logging.info('Early quit 1')
      return 0

    # If this fails, there's hardly anything that can be done, the bot can't
    # even get to the point to be able to self-update.
    botobj = get_bot()
    resp = net.url_read_json(
        botobj.server + '/swarming/api/v1/bot/handshake',
        data=botobj._attributes)
    if not resp:
      logging.error('Failed to contact for handshake')
    else:
      logging.info('Connected to %s', resp.get('server_version'))
      if resp.get('bot_version') != botobj._attributes['version']:
        logging.warning(
            'Found out we\'ll need to update: server said %s; we\'re %s',
            resp.get('bot_version'), botobj._attributes['version'])

    if arg_error:
      botobj.post_error('Bootstrapping error: %s' % arg_error)

    if quit_bit.is_set():
      logging.info('Early quit 2')
      return 0

    clean_isolated_cache(botobj)

    call_hook(botobj, 'on_bot_startup')

    if quit_bit.is_set():
      logging.info('Early quit 3')
      return 0

    # This environment variable is accessible to the tasks executed by this bot.
    os.environ['SWARMING_BOT_ID'] = botobj.id.encode('utf-8')

    # Remove the 'work' directory if present, as not removing it may cause the
    # bot to stay quarantined and not be able to get out of this state.
    work_dir = os.path.join(botobj.base_dir, 'work')
    try:
      if os.path.isdir(work_dir):
        file_path.rmtree(work_dir)
    except Exception as e:
      botobj.post_error('Failed to remove work: %s' % e)

    consecutive_sleeps = 0
    while not quit_bit.is_set():
      try:
        botobj.update_dimensions(get_dimensions(botobj))
        botobj.update_state(get_state(botobj, consecutive_sleeps))
        did_something = poll_server(botobj, quit_bit)
        if did_something:
          consecutive_sleeps = 0
        else:
          consecutive_sleeps += 1
      except Exception as e:
        logging.exception('poll_server failed')
        msg = '%s\n%s' % (e, traceback.format_exc()[-2048:])
        botobj.post_error(msg)
        consecutive_sleeps = 0
    logging.info('Quitting')

  # Tell the server we are going away.
  botobj.post_event('bot_shutdown', 'Signal was received')
  botobj.cancel_all_timers()
  return 0
Exemple #31
0
  def _run(self):
    try:
      self._progress.update_item('%d alive' % self._index, bots=1)
      while True:
        if self._kill_event.is_set():
          return
        data = {'attributes': json.dumps(self._attributes)}
        request = net.url_open(self._swarming + '/poll_for_test', data=data)
        if request is None:
          self._events.put('poll_for_test_empty')
          continue
        start = time.time()
        try:
          manifest = json.load(request)
        except ValueError:
          self._progress.update_item('Failed to poll')
          self._events.put('poll_for_test_invalid')
          continue

        commands = [c['function'] for c in manifest.get('commands', [])]
        if not commands:
          # Nothing to run.
          self._events.put('sleep')
          time.sleep(manifest['come_back'])
          continue

        if commands == ['UpdateSlave']:
          # Calculate the proper SHA-1 and loop again.
          # This could happen if the Swarming server is upgraded while this
          # script runs.
          self._attributes['version'] = calculate_version(
              manifest['commands'][0]['args'])
          self._events.put('update_slave')
          continue

        if commands != ['StoreFiles', 'RunCommands']:
          self._progress.update_item(
              'Unexpected RPC call %s\n%s' % (commands, manifest))
          self._events.put('unknown_rpc')
          break

        # The normal way Swarming works is that it 'stores' a test_run.swarm
        # file and then defer control to swarm_bot/local_test_runner.py.
        store_cmd = manifest['commands'][0]
        assert len(store_cmd['args']) == 1, store_cmd['args']
        filepath, filename, test_run_content = store_cmd['args'][0]
        assert filepath == ''
        assert filename == 'test_run.swarm'
        assert 'local_test_runner.py' in manifest['commands'][1]['args'][0], (
            manifest['commands'][1])
        result_url = manifest['result_url']
        test_run = json.loads(test_run_content)
        assert result_url == test_run['result_url']
        ping_url = test_run['ping_url']
        ping_delay = test_run['ping_delay']
        self._progress.update_item('%d processing' % self._index, processing=1)

        # Fake activity and send pings as requested.
        while True:
          remaining = max(0, (start + self._duration) - time.time())
          if remaining > ping_delay:
            # Include empty data to ensure the request is a POST request.
            result = net.url_read(ping_url, data={})
            assert result == 'Success.', result
            remaining = max(0, (start + self._duration) - time.time())
          if not remaining:
            break
          time.sleep(remaining)

        data = {
          'c': test_run['configuration']['config_name'],
          'n': test_run['test_run_name'],
          'o': False,
          'result_output': TASK_OUTPUT,
          's': True,
          'x': '0',
        }
        result = net.url_read(manifest['result_url'], data=data)
        self._progress.update_item(
            '%d processed' % self._index, processing=-1, processed=1)
        if not result:
          self._events.put('result_url_fail')
        else:
          assert result == 'Successfully update the runner results.', result
          self._events.put(time.time() - start)
    finally:
      try:
        # Unregister itself. Otherwise the server will have tons of fake slaves
        # that the admin will have to remove manually.
        response = net.url_open(
            self._swarming + '/delete_machine_stats',
            data=[('r', self._machine_id)])
        if not response:
          self._events.put('failed_unregister')
        else:
          response.read()
      finally:
        self._progress.update_item('%d quit' % self._index, bots=-1)
Exemple #32
0
 def ping(self):
     """Unlike all other methods, this one isn't authenticated."""
     resp = net.url_read(self._server + '/swarming/api/v1/bot/server_ping')
     if resp is None:
         logging.error('No response from server_ping')
Exemple #33
0
def process_manifest(
    file_hash_or_isolated, test_name, shards, test_filter, slave_os,
    working_dir, isolate_server, swarming, verbose, profile, priority, algo):
  """Process the manifest file and send off the swarm test request.

  Optionally archives an .isolated file.
  """
  if file_hash_or_isolated.endswith('.isolated'):
    file_hash = archive(
        file_hash_or_isolated, isolate_server, slave_os, algo, verbose)
    if not file_hash:
      print >> sys.stderr, 'Archival failure %s' % file_hash_or_isolated
      return 1
  elif isolateserver.is_valid_hash(file_hash_or_isolated, algo):
    file_hash = file_hash_or_isolated
  else:
    print >> sys.stderr, 'Invalid hash %s' % file_hash_or_isolated
    return 1

  try:
    manifest = Manifest(
        file_hash,
        test_name,
        shards,
        test_filter,
        PLATFORM_MAPPING_SWARMING[slave_os],
        working_dir,
        isolate_server,
        verbose,
        profile,
        priority,
        algo)
  except ValueError as e:
    print >> sys.stderr, 'Unable to process %s: %s' % (test_name, e)
    return 1

  chromium_setup(manifest)

  # Zip up relevant files.
  print('Zipping up files...')
  if not manifest.zip_and_upload():
    return 1

  # Send test requests off to swarm.
  print('Sending test requests to swarm.')
  print('Server: %s' % swarming)
  print('Job name: %s' % test_name)
  test_url = swarming + '/test'
  manifest_text = manifest.to_json()
  result = net.url_read(test_url, data={'request': manifest_text})
  if not result:
    print >> sys.stderr, 'Failed to send test for %s\n%s' % (
        test_name, test_url)
    return 1
  try:
    json.loads(result)
  except (ValueError, TypeError) as e:
    print >> sys.stderr, 'Failed to send test for %s' % test_name
    print >> sys.stderr, 'Manifest: %s' % manifest_text
    print >> sys.stderr, 'Bad response: %s' % result
    print >> sys.stderr, str(e)
    return 1
  return 0
Exemple #34
0
def trigger_task(swarming_url, dimensions, sleep_time, output_size, progress,
                 unique, timeout, index):
    """Triggers a Swarming job and collects results.

  Returns the total amount of time to run a task remotely, including all the
  overhead.
  """
    name = 'load-test-%d-%s' % (index, unique)
    start = time.time()

    logging.info('trigger')
    manifest = swarming.Manifest(isolate_server='http://localhost:1',
                                 namespace='dummy-isolate',
                                 isolated_hash=1,
                                 task_name=name,
                                 extra_args=[],
                                 env={},
                                 dimensions=dimensions,
                                 deadline=int(timeout - TIMEOUT_OVERHEAD),
                                 verbose=False,
                                 profile=False,
                                 priority=100)
    cmd = [
        'python', '-c',
        'import time; print(\'1\'*%s); time.sleep(%d); print(\'Back\')' %
        (output_size, sleep_time)
    ]
    manifest.add_task('echo stuff', cmd)
    data = {'request': manifest.to_json()}
    response = net.url_read(swarming_url + '/test', data=data)
    if response is None:
        # Failed to trigger. Return a failure.
        return 'failed_trigger'

    result = json.loads(response)
    # Old API uses harcoded config name. New API doesn't have concept of config
    # name so it uses the task name. Ignore this detail.
    test_keys = []
    for key in result['test_keys']:
        key.pop('config_name')
        test_keys.append(key.pop('test_key'))
        assert re.match('[0-9a-f]+', test_keys[-1]), test_keys
    expected = {
        u'priority': 100,
        u'test_case_name': unicode(name),
        u'test_keys': [{
            u'num_instances': 1,
            u'instance_index': 0,
        }],
    }
    assert result == expected, '\n%s\n%s' % (result, expected)

    progress.update_item('%5d' % index, processing=1)
    try:
        logging.info('collect')
        new_test_keys = swarming.get_task_keys(swarming_url, name)
        if not new_test_keys:
            return 'no_test_keys'
        assert test_keys == new_test_keys, (test_keys, new_test_keys)
        out = [
            output for _index, output in swarming.yield_results(
                swarming_url, test_keys, timeout, None, False, None, False,
                True)
        ]
        if not out:
            return 'no_result'
        for item in out:
            item.pop('machine_tag')
            item.pop('machine_id')
            # TODO(maruel): Assert output even when run on a real bot.
            _out_actual = item.pop('output')
            # assert out_actual == swarming_load_test_bot.TASK_OUTPUT, out_actual
        expected = [{
            u'config_instance_index': 0,
            u'exit_codes': u'0',
            u'num_config_instances': 1,
        }]
        assert out == expected, '\n%s\n%s' % (out, expected)
        return time.time() - start
    finally:
        progress.update_item('%5d - done' % index, processing=-1, processed=1)
  def _run(self):
    """Polls the server and fake execution."""
    try:
      self._progress.update_item('%d alive' % self._index, bots=1)
      while True:
        if self._kill_event.is_set():
          return
        data = {'attributes': json.dumps(self._attributes)}
        request = net.url_read(self._swarming + '/poll_for_test', data=data)
        if request is None:
          self._events.put('poll_for_test_empty')
          continue
        start = time.time()
        try:
          manifest = json.loads(request)
        except ValueError:
          self._progress.update_item('Failed to poll')
          self._events.put('poll_for_test_invalid')
          continue

        commands = [c['function'] for c in manifest.get('commands', [])]
        if not commands:
          # Nothing to run.
          self._events.put('sleep')
          time.sleep(manifest['come_back'])
          continue

        if commands == ['UpdateSlave']:
          # Calculate the proper SHA-1 and loop again.
          # This could happen if the Swarming server is upgraded while this
          # script runs.
          self._attributes['version'] = calculate_version(
              manifest['commands'][0]['args'])
          self._events.put('update_slave')
          continue

        if commands != ['RunManifest']:
          self._progress.update_item(
              'Unexpected RPC call %s\n%s' % (commands, manifest))
          self._events.put('unknown_rpc')
          break

        store_cmd = manifest['commands'][0]
        if not isinstance(store_cmd['args'], unicode):
          self._progress.update_item('Unexpected RPC manifest\n%s' % manifest)
          self._events.put('unknown_args')
          break

        result_url = manifest['result_url']
        test_run = json.loads(store_cmd['args'])
        if result_url != test_run['result_url']:
          self._progress.update_item(
              'Unexpected result url: %s != %s' %
              (result_url, test_run['result_url']))
          self._events.put('invalid_result_url')
          break
        ping_url = test_run['ping_url']
        ping_delay = test_run['ping_delay']
        self._progress.update_item('%d processing' % self._index, processing=1)

        # Fake activity and send pings as requested.
        while True:
          remaining = max(0, (start + self._duration) - time.time())
          if remaining > ping_delay:
            # Include empty data to ensure the request is a POST request.
            result = net.url_read(ping_url, data={})
            assert result == 'Success.', result
            remaining = max(0, (start + self._duration) - time.time())
          if not remaining:
            break
          time.sleep(remaining)

        # In the old API, r=<task_id>&id=<bot_id> is passed as the url.
        data = {
          'o': TASK_OUTPUT,
          'x': '0',
        }
        result = net.url_read(manifest['result_url'], data=data)
        self._progress.update_item(
            '%d processed' % self._index, processing=-1, processed=1)
        if not result:
          self._events.put('result_url_fail')
        else:
          assert result == 'Successfully update the runner results.', result
          self._events.put(time.time() - start)
    finally:
      try:
        # Unregister itself. Otherwise the server will have tons of fake bots
        # that the admin will have to remove manually.
        response = net.url_read(
            self._swarming + '/delete_machine_stats',
            data=[('r', self._bot_id)])
        if response is None:
          self._events.put('failed_unregister')
      finally:
        self._progress.update_item('%d quit' % self._index, bots=-1)
Exemple #36
0
def run_bot(arg_error):
    """Runs the bot until it reboots or self-update or a signal is received.

  When a signal is received, simply exit.
  """
    quit_bit = threading.Event()

    def handler(sig, _):
        logging.info('Got signal %s', sig)
        quit_bit.set()

    # TODO(maruel): Set quit_bit when stdin is closed on Windows.

    with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
        config = get_config()
        try:
            # First thing is to get an arbitrary url. This also ensures the network is
            # up and running, which is necessary before trying to get the FQDN below.
            resp = net.url_read(config['server'] +
                                '/swarming/api/v1/bot/server_ping')
            if resp is None:
                logging.error('No response from server_ping')
        except Exception as e:
            # url_read() already traps pretty much every exceptions. This except
            # clause is kept there "just in case".
            logging.exception('server_ping threw')

        if quit_bit.is_set():
            logging.info('Early quit 1')
            return 0

        # If this fails, there's hardly anything that can be done, the bot can't
        # even get to the point to be able to self-update.
        botobj = get_bot()
        resp = net.url_read_json(botobj.server +
                                 '/swarming/api/v1/bot/handshake',
                                 data=botobj._attributes)
        if not resp:
            logging.error('Failed to contact for handshake')
        else:
            logging.info('Connected to %s', resp.get('server_version'))
            if resp.get('bot_version') != botobj._attributes['version']:
                logging.warning(
                    'Found out we\'ll need to update: server said %s; we\'re %s',
                    resp.get('bot_version'), botobj._attributes['version'])

        if arg_error:
            botobj.post_error('Bootstrapping error: %s' % arg_error)

        if quit_bit.is_set():
            logging.info('Early quit 2')
            return 0

        clean_isolated_cache(botobj)

        call_hook(botobj, 'on_bot_startup')

        if quit_bit.is_set():
            logging.info('Early quit 3')
            return 0

        # This environment variable is accessible to the tasks executed by this bot.
        os.environ['SWARMING_BOT_ID'] = botobj.id.encode('utf-8')

        # Remove the 'work' directory if present, as not removing it may cause the
        # bot to stay quarantined and not be able to get out of this state.
        work_dir = os.path.join(botobj.base_dir, 'work')
        try:
            if os.path.isdir(work_dir):
                file_path.rmtree(work_dir)
        except Exception as e:
            botobj.post_error('Failed to remove work: %s' % e)

        consecutive_sleeps = 0
        while not quit_bit.is_set():
            try:
                botobj.update_dimensions(get_dimensions(botobj))
                botobj.update_state(get_state(botobj, consecutive_sleeps))
                did_something = poll_server(botobj, quit_bit)
                if did_something:
                    consecutive_sleeps = 0
                else:
                    consecutive_sleeps += 1
            except Exception as e:
                logging.exception('poll_server failed')
                msg = '%s\n%s' % (e, traceback.format_exc()[-2048:])
                botobj.post_error(msg)
                consecutive_sleeps = 0
        logging.info('Quitting')

    # Tell the server we are going away.
    botobj.post_event('bot_shutdown', 'Signal was received')
    botobj.cancel_all_timers()
    return 0