Example #1
0
 def post_event(self, event_type, message):
     """Posts an event to the server."""
     data = self._attributes.copy()
     data['event'] = event_type
     data['message'] = message
     net.url_read_json(self.server + '/swarming/api/v1/bot/event',
                       data=data)
Example #2
0
def retrieve_results(
    base_url, shard_index, task_id, timeout, should_stop, output_collector):
  """Retrieves results for a single task ID.

  Returns:
    <result dict> on success.
    None on failure.
  """
  assert isinstance(timeout, float), timeout
  result_url = '%s/_ah/api/swarming/v1/task/%s/result' % (base_url, task_id)
  output_url = '%s/_ah/api/swarming/v1/task/%s/stdout' % (base_url, task_id)
  started = now()
  deadline = started + timeout if timeout else None
  attempt = 0

  while not should_stop.is_set():
    attempt += 1

    # Waiting for too long -> give up.
    current_time = now()
    if deadline and current_time >= deadline:
      logging.error('retrieve_results(%s) timed out on attempt %d',
          base_url, attempt)
      return None

    # Do not spin too fast. Spin faster at the beginning though.
    # Start with 1 sec delay and for each 30 sec of waiting add another second
    # of delay, until hitting 15 sec ceiling.
    if attempt > 1:
      max_delay = min(15, 1 + (current_time - started) / 30.0)
      delay = min(max_delay, deadline - current_time) if deadline else max_delay
      if delay > 0:
        logging.debug('Waiting %.1f sec before retrying', delay)
        should_stop.wait(delay)
        if should_stop.is_set():
          return None

    # Disable internal retries in net.url_read_json, since we are doing retries
    # ourselves.
    # TODO(maruel): We'd need to know if it's a 404 and not retry at all.
    # TODO(maruel): Sadly, we currently have to poll here. Use hanging HTTP
    # request on GAE v2.
    result = net.url_read_json(result_url, retry_50x=False)
    if not result:
      continue

    if result['state'] in State.STATES_NOT_RUNNING:
      # TODO(maruel): Not always fetch stdout?
      out = net.url_read_json(output_url)
      result['output'] = out.get('output') if out else out
      # Record the result, try to fetch attached output files (if any).
      if output_collector:
        # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
        output_collector.process_shard_result(shard_index, result)
      if result.get('internal_failure'):
        logging.error('Internal error!')
      elif result['state'] == 'BOT_DIED':
        logging.error('Bot died!')
      return result
Example #3
0
def poll_server(botobj, quit_bit):
    """Polls the server to run one loop.

  Returns True if executed some action, False if server asked the bot to sleep.
  """
    # Access to a protected member _XXX of a client class - pylint: disable=W0212
    start = time.time()
    resp = net.url_read_json(botobj.server + '/swarming/api/v1/bot/poll',
                             data=botobj._attributes)
    if not resp:
        return False
    logging.debug('Server response:\n%s', resp)

    cmd = resp['cmd']
    if cmd == 'sleep':
        quit_bit.wait(resp['duration'])
        return False

    if cmd == 'terminate':
        quit_bit.set()
        # This is similar to post_update() in task_runner.py.
        params = {
            'cost_usd': 0,
            'duration': 0,
            'exit_code': 0,
            'hard_timeout': False,
            'id': botobj.id,
            'io_timeout': False,
            'output': '',
            'output_chunk_start': 0,
            'task_id': resp['task_id'],
        }
        net.url_read_json(
            botobj.server +
            '/swarming/api/v1/bot/task_update/%s' % resp['task_id'],
            data=params)
        return False

    if cmd == 'run':
        if run_manifest(botobj, resp['manifest'], start):
            # Completed a task successfully so update swarming_bot.zip if necessary.
            update_lkgbc(botobj)
        # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This
        # is concerning as this means a signal (often SIGTERM) was received while
        # running the task. Make sure the host is properly restarting.
    elif cmd == 'update':
        update_bot(botobj, resp['version'])
    elif cmd == 'restart':
        if _in_load_test_mode():
            logging.warning('Would have restarted: %s' % resp['message'])
        else:
            botobj.restart(resp['message'])
    else:
        raise ValueError('Unexpected command: %s\n%s' % (cmd, resp))

    return True
Example #4
0
def poll_server(botobj, quit_bit):
  """Polls the server to run one loop.

  Returns True if executed some action, False if server asked the bot to sleep.
  """
  # Access to a protected member _XXX of a client class - pylint: disable=W0212
  start = time.time()
  resp = net.url_read_json(
     botobj.server + '/swarming/api/v1/bot/poll', data=botobj._attributes)
  if not resp:
    return False
  logging.debug('Server response:\n%s', resp)

  cmd = resp['cmd']
  if cmd == 'sleep':
    quit_bit.wait(resp['duration'])
    return False

  if cmd == 'terminate':
    quit_bit.set()
    # This is similar to post_update() in task_runner.py.
    params = {
      'cost_usd': 0,
      'duration': 0,
      'exit_code': 0,
      'hard_timeout': False,
      'id': botobj.id,
      'io_timeout': False,
      'output': '',
      'output_chunk_start': 0,
      'task_id': resp['task_id'],
    }
    net.url_read_json(
        botobj.server + '/swarming/api/v1/bot/task_update/%s' % resp['task_id'],
        data=params)
    return False

  if cmd == 'run':
    if run_manifest(botobj, resp['manifest'], start):
      # Completed a task successfully so update swarming_bot.zip if necessary.
      update_lkgbc(botobj)
    # TODO(maruel): Handle the case where quit_bit.is_set() happens here. This
    # is concerning as this means a signal (often SIGTERM) was received while
    # running the task. Make sure the host is properly restarting.
  elif cmd == 'update':
    update_bot(botobj, resp['version'])
  elif cmd == 'restart':
    if _in_load_test_mode():
      logging.warning('Would have restarted: %s' % resp['message'])
    else:
      botobj.restart(resp['message'])
  else:
    raise ValueError('Unexpected command: %s\n%s' % (cmd, resp))

  return True
Example #5
0
def swarming_trigger(swarming, raw_request, xsrf_token):
  """Triggers a request on the Swarming server and returns the json data.

  It's the low-level function.

  Returns:
    {
      'request': {
        'created_ts': u'2010-01-02 03:04:05',
        'name': ..
      },
      'task_id': '12300',
    }
  """
  logging.info('Triggering: %s', raw_request['name'])

  headers = {'X-XSRF-Token': xsrf_token}
  result = net.url_read_json(
      swarming + '/swarming/api/v1/client/request',
      data=raw_request,
      headers=headers)
  if not result:
    on_error.report('Failed to trigger task %s' % raw_request['name'])
    return None
  return result
Example #6
0
def CMDbot_delete(parser, args):
  """Forcibly deletes bots from the Swarming server."""
  parser.add_option(
      '-f', '--force', action='store_true',
      help='Do not prompt for confirmation')
  options, args = parser.parse_args(args)
  if not args:
    parser.error('Please specific bots to delete')

  bots = sorted(args)
  if not options.force:
    print('Delete the following bots?')
    for bot in bots:
      print('  %s' % bot)
    if raw_input('Continue? [y/N] ') not in ('y', 'Y'):
      print('Goodbye.')
      return 1

  result = 0
  for bot in bots:
    url = '%s/_ah/api/swarming/v1/bot/%s/delete' % (options.swarming, bot)
    if net.url_read_json(url, data={}, method='POST') is None:
      print('Deleting %s failed. Probably already gone' % bot)
      result = 1
  return result
Example #7
0
def post_update(swarming_server, params, exit_code, stdout, output_chunk_start):
  """Posts task update to task_update.

  Arguments:
    swarming_server: Base URL to Swarming server.
    params: Default JSON parameters for the POST.
    exit_code: Process exit code, only when a command completed.
    stdout: Incremental output since last call, if any.
    output_chunk_start: Total number of stdout previously sent, for coherency
        with the server.
  """
  params = params.copy()
  if exit_code is not None:
    params['exit_code'] = exit_code
  if stdout:
    # The output_chunk_start is used by the server to make sure that the stdout
    # chunks are processed and saved in the DB in order.
    params['output'] = base64.b64encode(stdout)
    params['output_chunk_start'] = output_chunk_start
  # TODO(maruel): Support early cancellation.
  # https://code.google.com/p/swarming/issues/detail?id=62
  resp = net.url_read_json(
      swarming_server+'/swarming/api/v1/bot/task_update/%s' % params['task_id'],
      data=params)
  logging.debug('post_update() = %s', resp)
  if not resp or resp.get('error'):
    # Abandon it. This will force a process exit.
    raise ValueError(resp.get('error') if resp else 'Failed to contact server')
def CMDbots(parser, args):
    """Returns information about the bots connected to the Swarming server."""
    add_filter_options(parser)
    parser.filter_group.add_option(
        "--dead-only", action="store_true", help="Only print dead bots, useful to reap them and reimage broken bots"
    )
    parser.filter_group.add_option("-k", "--keep-dead", action="store_true", help="Do not filter out dead bots")
    parser.filter_group.add_option("-b", "--bare", action="store_true", help="Do not print out dimensions")
    options, args = parser.parse_args(args)

    if options.keep_dead and options.dead_only:
        parser.error("Use only one of --keep-dead and --dead-only")

    bots = []
    cursor = None
    limit = 250
    # Iterate via cursors.
    base_url = options.swarming + "/_ah/api/swarming/v1/bots/list?limit=%d" % limit
    while True:
        url = base_url
        if cursor:
            url += "&cursor=%s" % urllib.quote(cursor)
        data = net.url_read_json(url)
        if data is None:
            print >> sys.stderr, "Failed to access %s" % options.swarming
            return 1
        bots.extend(data["items"])
        cursor = data.get("cursor")
        if not cursor:
            break

    for bot in natsort.natsorted(bots, key=lambda x: x["bot_id"]):
        if options.dead_only:
            if not bot.get("is_dead"):
                continue
        elif not options.keep_dead and bot.get("is_dead"):
            continue

        # If the user requested to filter on dimensions, ensure the bot has all the
        # dimensions requested.
        dimensions = {i["key"]: i["value"] for i in bot["dimensions"]}
        for key, value in options.dimensions:
            if key not in dimensions:
                break
            # A bot can have multiple value for a key, for example,
            # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
            # be accepted.
            if isinstance(dimensions[key], list):
                if value not in dimensions[key]:
                    break
            else:
                if value != dimensions[key]:
                    break
        else:
            print bot["bot_id"]
            if not options.bare:
                print "  %s" % json.dumps(dimensions, sort_keys=True)
                if bot.get("task_id"):
                    print "  task: %s" % bot["task_id"]
    return 0
Example #9
0
 def _url_read_json(self, url_path, data=None):
     """Does POST (if data is not None) or GET request to a JSON endpoint."""
     return net.url_read_json(self._server + url_path,
                              data=data,
                              headers=self.get_headers(include_auth=True),
                              timeout=NET_CONNECTION_TIMEOUT_SEC,
                              follow_redirects=False)
Example #10
0
  def _do_fetch(self, url, digest, offset):
    """Fetches isolated data from the URL.

    Used only for fetching files, not for API calls. Can be overridden in
    subclasses.

    Args:
      url: URL to fetch the data from, can possibly return http redirect.
      offset: byte offset inside the file to start fetching from.

    Returns:
      net.HttpResponse compatible object, with 'read' and 'get_header' calls.
    """
    assert isinstance(offset, int)
    data = {
        'digest': digest.encode('utf-8'),
        'namespace': self._namespace_dict,
        'offset': offset,
    }
    # TODO(maruel): url + '?' + urllib.urlencode(data) once a HTTP GET endpoint
    # is added.
    return net.url_read_json(
        url=url,
        data=data,
        read_timeout=DOWNLOAD_READ_TIMEOUT)
def CMDreproduce(parser, args):
    """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.
  """
    options, args = parser.parse_args(args)
    if len(args) != 1:
        parser.error("Must specify exactly one task id.")

    url = options.swarming + "/_ah/api/swarming/v1/task/%s/request" % args[0]
    request = net.url_read_json(url)
    if not request:
        print >> sys.stderr, "Failed to retrieve request data for the task"
        return 1

    if not os.path.isdir("work"):
        os.mkdir("work")

    properties = request["properties"]
    env = None
    if properties["env"]:
        env = os.environ.copy()
        logging.info("env: %r", properties["env"])
        env.update((i["key"].encode("utf-8"), i["value"].encode("utf-8")) for i in properties["env"])

    try:
        return subprocess.call(properties["command"], env=env, cwd="work")
    except OSError as e:
        print >> sys.stderr, "Failed to run: %s" % " ".join(properties["command"])
        print >> sys.stderr, str(e)
        return 1
Example #12
0
def post_error_task(botobj, error, task_id):
    """Posts given error as failure cause for the task.

  This is used in case of internal code error, and this causes the task to
  become BOT_DIED.

  Arguments:
    botobj: A bot.Bot instance.
    error: String representing the problem.
    task_id: Task that had an internal error. When the Swarming server sends
        commands to a bot, even though they could be completely wrong, the
        server assumes the job as running. Thus this function acts as the
        exception handler for incoming commands from the Swarming server. If for
        any reason the local test runner script can not be run successfully,
        this function is invoked.
  """
    logging.error('Error: %s', error)
    data = {
        'id': botobj.id,
        'message': error,
        'task_id': task_id,
    }
    return net.url_read_json(botobj.server +
                             '/swarming/api/v1/bot/task_error/%s' % task_id,
                             data=data)
Example #13
0
 def refresh_token(self):
     """Returns a fresh token. Necessary as the token may expire after an hour.
 """
     url = self.url + self.token_resource
     resp = net.url_read_json(url,
                              headers={'X-XSRF-Token-Request': '1'},
                              data=self.xsrf_request_params)
     if resp is None:
         raise Error('Failed to connect to %s' % url)
     self.token = resp['xsrf_token']
     return self.token
def CMDput_bot_config(parser, args):
    """Uploads a new version of bot_config.py."""
    options, args = parser.parse_args(args)
    if len(args) != 1:
        parser.error("Must specify file to upload")
    url = options.swarming + "/_ah/api/swarming/v1/server/put_bot_config"
    with open(args[0], "rb") as f:
        content = f.read().decode("utf-8")
    data = net.url_read_json(url, data={"content": content})
    print data
    return 0
Example #15
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.
  """
  options, args = parser.parse_args(args)
  if len(args) != 1:
    parser.error('Must specify exactly one task id.')

  url = options.swarming + '/swarming/api/v1/client/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  if not os.path.isdir('work'):
    os.mkdir('work')

  swarming_host = urlparse.urlparse(options.swarming).netloc
  properties = request['properties']
  for data_url, _ in properties['data']:
    assert data_url.startswith('https://'), data_url
    data_host = urlparse.urlparse(data_url).netloc
    if data_host != swarming_host:
      auth.ensure_logged_in('https://' + data_host)

    content = net.url_read(data_url)
    if content is None:
      print >> sys.stderr, 'Failed to download %s' % data_url
      return 1
    with zipfile.ZipFile(StringIO.StringIO(content)) as zip_file:
      zip_file.extractall('work')

  env = None
  if properties['env']:
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    env.update(
        (k.encode('utf-8'), v.encode('utf-8'))
        for k, v in properties['env'].iteritems())

  exit_code = 0
  for cmd in properties['commands']:
    try:
      c = subprocess.call(cmd, env=env, cwd='work')
    except OSError as e:
      print >> sys.stderr, 'Failed to run: %s' % ' '.join(cmd)
      print >> sys.stderr, str(e)
      c = 1
    if not exit_code:
      exit_code = c
  return exit_code
Example #16
0
  def url_read_json(self, resource, **kwargs):
    url = self.url + resource
    if kwargs.get('data') == None:
      # No XSRF token required for GET.
      return net.url_read_json(url, **kwargs)

    if self.need_refresh():
      self.refresh_token()
    resp = self._url_read_json_post(url, **kwargs)
    if resp is None:
      raise Error('Failed to connect to %s; %s' % (url, self.expiration))
    return resp
Example #17
0
def endpoints_api_discovery_apis(host):
  """Uses Cloud Endpoints' API Discovery Service to returns metadata about all
  the APIs exposed by a host.

  https://developers.google.com/discovery/v1/reference/apis/list
  """
  data = net.url_read_json(host + '/_ah/api/discovery/v1/apis')
  if data is None:
    raise APIError('Failed to discover APIs on %s' % host)
  out = {}
  for api in data['items']:
    if api['id'] == 'discovery:v1':
      continue
    # URL is of the following form:
    # url = host + (
    #   '/_ah/api/discovery/v1/apis/%s/%s/rest' % (api['id'], api['version'])
    api_data = net.url_read_json(api['discoveryRestUrl'])
    if api_data is None:
      raise APIError('Failed to discover %s on %s' % (api['id'], host))
    out[api['id']] = api_data
  return out
Example #18
0
def swarming_handshake(swarming):
  """Initiates the connection to the Swarming server."""
  headers = {'X-XSRF-Token-Request': '1'}
  response = net.url_read_json(
      swarming + '/swarming/api/v1/client/handshake',
      headers=headers,
      data={})
  if not response:
    logging.error('Failed to handshake with server')
    return None
  logging.info('Connected to server version: %s', response['server_version'])
  return response['xsrf_token']
Example #19
0
 def refresh_token(self):
   """Returns a fresh token. Necessary as the token may expire after an hour.
   """
   url = self.url + self.token_resource
   resp = net.url_read_json(
       url,
       headers={'X-XSRF-Token-Request': '1'},
       data=self.xsrf_request_params)
   if resp is None:
     raise Error('Failed to connect to %s' % url)
   self.token = resp['xsrf_token']
   return self.token
Example #20
0
def CMDput_bot_config(parser, args):
  """Uploads a new version of bot_config.py."""
  options, args = parser.parse_args(args)
  if len(args) != 1:
    parser.error('Must specify file to upload')
  url = options.swarming + '/_ah/api/swarming/v1/server/put_bot_config'
  path = unicode(os.path.abspath(args[0]))
  with fs.open(path, 'rb') as f:
    content = f.read().decode('utf-8')
  data = net.url_read_json(url, data={'content': content})
  print data
  return 0
Example #21
0
    def _do_push(self, push_state, content):
        """Uploads isolated file to the URL.

    Used only for storing files, not for API calls. Can be overridden in
    subclasses.

    Args:
      url: URL to upload the data to.
      push_state: an _IsolateServicePushState instance
      item: the original Item to be uploaded
      content: an iterable that yields 'str' chunks.
    """
        # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
        # upload support is implemented.
        if isinstance(content, list) and len(content) == 1:
            content = content[0]
        else:
            content = b''.join(content)

        # DB upload
        if not push_state.finalize_url:
            url = '%s/%s' % (self.server_ref.url, push_state.upload_url)
            content = base64.b64encode(content)
            data = {
                'upload_ticket': push_state.preupload_status['upload_ticket'],
                'content': six.ensure_str(content),
            }
            response = net.url_read_json(url=url, data=data)
            return response is not None and response.get('ok')

        # upload to GS
        url = push_state.upload_url
        response = net.url_open(
            content_type='application/octet-stream',
            data=content,
            method='PUT',
            headers={'Cache-Control': 'public, max-age=31536000'},
            url=url)
        if not response:
            return False
        try:
            response.read()
        except net.TimeoutError:
            return False

        # Integrity check of uploaded file.
        # https://cloud.google.com/storage/docs/xml-api/reference-headers#xgooghash
        goog_hash = response.headers.get('x-goog-hash')
        assert goog_hash, response.headers
        md5_x_goog_hash = 'md5=' + six.ensure_str(
            base64.b64encode(hashlib.md5(content).digest()))
        return md5_x_goog_hash in goog_hash
Example #22
0
def resolve_version(cipd_server, package_name, version, timeout=None):
  """Resolves a package instance version (e.g. a tag) to an instance id."""
  url = '%s/_ah/api/repo/v1/instance/resolve?%s' % (
      cipd_server,
      urllib.urlencode({
        'package_name': package_name,
        'version': version,
      }))
  res = net.url_read_json(url, timeout=timeout)
  _check_response(res, 'Could not resolve version %s:%s', package_name, version)
  instance_id = res.get('instance_id')
  if not instance_id:
    raise Error('Invalid resolveVersion response: no instance id')
  return instance_id
def CMDquery(parser, args):
  """Returns information about the bots connected to the Swarming server."""
  add_filter_options(parser)
  parser.filter_group.add_option(
      '--dead-only', action='store_true',
      help='Only print dead bots, useful to reap them and reimage broken bots')
  parser.filter_group.add_option(
      '-k', '--keep-dead', action='store_true',
      help='Do not filter out dead bots')
  parser.filter_group.add_option(
      '-b', '--bare', action='store_true',
      help='Do not print out dimensions')
  options, args = parser.parse_args(args)

  if options.keep_dead and options.dead_only:
    parser.error('Use only one of --keep-dead and --dead-only')

  auth.ensure_logged_in(options.swarming)
  data = net.url_read_json(options.swarming + '/swarming/api/v1/bots')
  if data is None:
    print >> sys.stderr, 'Failed to access %s' % options.swarming
    return 1
  for machine in natsort.natsorted(data['machines'], key=lambda x: x['id']):
    if options.dead_only:
      if not machine['is_dead']:
        continue
    elif not options.keep_dead and machine['is_dead']:
      continue

    # If the user requested to filter on dimensions, ensure the bot has all the
    # dimensions requested.
    dimensions = machine['dimensions']
    for key, value in options.dimensions:
      if key not in dimensions:
        break
      # A bot can have multiple value for a key, for example,
      # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
      # be accepted.
      if isinstance(dimensions[key], list):
        if value not in dimensions[key]:
          break
      else:
        if value != dimensions[key]:
          break
    else:
      print machine['id']
      if not options.bare:
        print '  %s' % json.dumps(dimensions, sort_keys=True)
  return 0
Example #24
0
    def push(self, item, push_state, content=None):
        assert isinstance(item, Item)
        assert item.digest is not None
        assert item.size is not None
        assert isinstance(push_state, _IsolateServerPushState)
        assert not push_state.finalized

        # Default to item.content().
        content = item.content() if content is None else content
        logging.info('Push state size: %d', push_state.size)
        guard_memory_use(self, content, push_state.size)

        try:
            # This push operation may be a retry after failed finalization call below,
            # no need to reupload contents in that case.
            if not push_state.uploaded:
                # PUT file to |upload_url|.
                success = self._do_push(push_state, content)
                if not success:
                    raise IOError(
                        'Failed to upload file with hash %s to URL %s' %
                        (item.digest, push_state.upload_url))
                push_state.uploaded = True
            else:
                logging.info(
                    'A file %s already uploaded, retrying finalization only',
                    item.digest)

            # Optionally notify the server that it's done.
            if push_state.finalize_url:
                # TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
                # send it to isolated server. That way isolate server can verify that
                # the data safely reached Google Storage (GS provides MD5 and CRC32C of
                # stored files).
                # TODO(maruel): Fix the server to accept properly data={} so
                # url_read_json() can be used.
                response = net.url_read_json(
                    url='%s/%s' % (self._base_url, push_state.finalize_url),
                    data={
                        'upload_ticket':
                        push_state.preupload_status['upload_ticket'],
                    })
                if not response or not response['ok']:
                    raise IOError('Failed to finalize file with hash %s.' %
                                  item.digest)
            push_state.finalized = True
        finally:
            with self._lock:
                self._memory_use -= push_state.size
Example #25
0
 def refresh_token(self):
   """Returns a fresh token. Necessary as the token may expire after an hour.
   """
   url = self.url + self.token_resource
   resp = net.url_read_json(
       url,
       headers={'X-XSRF-Token-Request': '1'},
       data=self.xsrf_request_params)
   if resp is None:
     raise Error('Failed to connect to %s' % url)
   self.token = resp['xsrf_token']
   if resp.get('expiration_sec'):
     exp = resp['expiration_sec']
     exp -= min(round(exp * 0.1), 600)
     self.expiration = _utcnow() + datetime.timedelta(seconds=exp)
   return self.token
Example #26
0
    def url_read_json(self, resource, **kwargs):
        url = self.url + resource
        if kwargs.get('data') == None:
            # No XSRF token required for GET.
            return net.url_read_json(url, **kwargs)

        if not self.token:
            self.token = self.refresh_token()
        resp = self._url_read_json_post(url, **kwargs)
        if resp is None:
            logging.error('Forcibly refreshing; %s, %s', url, kwargs)
            # This includes 403 because the XSRF token expired. Renew the token.
            # TODO(maruel): It'd be great if it were transparent.
            self.refresh_token()
            resp = self._url_read_json_post(url, **kwargs)
        if resp is None:
            raise Error('Failed to connect to %s' % url)
        return resp
Example #27
0
def get_client_fetch_url(service_url, package_name, instance_id, timeout=None):
  """Returns a fetch URL of CIPD client binary contents.

  Raises:
    Error if cannot retrieve fetch URL.
  """
  # Fetch the URL of the binary from CIPD backend.
  url = '%s/_ah/api/repo/v1/client?%s' % (service_url, urllib.urlencode({
    'package_name': package_name,
    'instance_id': instance_id,
  }))
  res = net.url_read_json(url, timeout=timeout)
  _check_response(
      res, 'Could not fetch CIPD client %s:%s',package_name, instance_id)
  fetch_url = res.get('client_binary', {}).get('fetch_url')
  if not fetch_url:
    raise Error('Invalid fetchClientBinary response: no fetch_url')
  return fetch_url
Example #28
0
  def _server_capabilities(self):
    """Gets server details.

    Returns:
      Server capabilities dictionary as returned by /server_details endpoint.
    """
    # TODO(maruel): Make this request much earlier asynchronously while the
    # files are being enumerated.

    # TODO(vadimsh): Put |namespace| in the URL so that server can apply
    # namespace-level ACLs to this call.

    with self._lock:
      if self._server_caps is None:
        self._server_caps = net.url_read_json(
            url='%s/_ah/api/isolateservice/v1/server_details' % self._base_url,
            data={})
      return self._server_caps
Example #29
0
  def url_read_json(self, resource, **kwargs):
    url = self.url + resource
    if kwargs.get('data') == None:
      # No XSRF token required for GET.
      return net.url_read_json(url, **kwargs)

    if not self.token:
      self.token = self.refresh_token()
    resp = self._url_read_json_post(url, **kwargs)
    if resp is None:
      logging.error('Forcibly refreshing; %s, %s', url, kwargs)
      # This includes 403 because the XSRF token expired. Renew the token.
      # TODO(maruel): It'd be great if it were transparent.
      self.refresh_token()
      resp = self._url_read_json_post(url, **kwargs)
    if resp is None:
      raise Error('Failed to connect to %s' % url)
    return resp
Example #30
0
def CMDterminate(parser, args):
  """Tells a bot to gracefully shut itself down as soon as it can.

  This is done by completing whatever current task there is then exiting the bot
  process.
  """
  parser.add_option(
      '--wait', action='store_true', help='Wait for the bot to terminate')
  options, args = parser.parse_args(args)
  if len(args) != 1:
    parser.error('Please provide the bot id')
  url = options.swarming + '/_ah/api/swarming/v1/bot/%s/terminate' % args[0]
  request = net.url_read_json(url, data={})
  if not request:
    print >> sys.stderr, 'Failed to ask for termination'
    return 1
  if options.wait:
    return collect(
        options.swarming, [request['task_id']], 0., False, False, None, None)
  return 0
Example #31
0
    def contains(self, items):
        # Ensure all items were initialized with 'prepare' call. Storage does that.
        assert all(i.digest is not None and i.size is not None for i in items)

        # Request body is a json encoded list of dicts.
        body = {
            'items': [{
                'digest': item.digest,
                'is_isolated': bool(item.high_priority),
                'size': item.size,
            } for item in items],
            'namespace':
            self._namespace_dict,
        }

        query_url = '%s/api/isolateservice/v1/preupload' % self._base_url

        # Response body is a list of push_urls (or null if file is already present).
        response = None
        try:
            response = net.url_read_json(url=query_url, data=body)
            if response is None:
                raise isolated_format.MappingError(
                    'Failed to execute preupload query')
        except ValueError as err:
            raise isolated_format.MappingError(
                'Invalid response from server: %s, body is %s' %
                (err, response))

        # Pick Items that are missing, attach _PushState to them.
        missing_items = {}
        for preupload_status in response.get('items', []):
            assert 'upload_ticket' in preupload_status, (
                preupload_status,
                '/preupload did not generate an upload ticket')
            index = int(preupload_status['index'])
            missing_items[items[index]] = _IsolateServerPushState(
                preupload_status, items[index].size)
        logging.info('Queried %d files, %d cache hit', len(items),
                     len(items) - len(missing_items))
        return missing_items
def swarming_trigger(swarming, raw_request):
    """Triggers a request on the Swarming server and returns the json data.

  It's the low-level function.

  Returns:
    {
      'request': {
        'created_ts': u'2010-01-02 03:04:05',
        'name': ..
      },
      'task_id': '12300',
    }
  """
    logging.info("Triggering: %s", raw_request["name"])

    result = net.url_read_json(swarming + "/_ah/api/swarming/v1/tasks/new", data=raw_request)
    if not result:
        on_error.report("Failed to trigger task %s" % raw_request["name"])
        return None
    return result
Example #33
0
  def _do_push(self, push_state, content):
    """Uploads isolated file to the URL.

    Used only for storing files, not for API calls. Can be overridden in
    subclasses.

    Args:
      url: URL to upload the data to.
      push_state: an _IsolateServicePushState instance
      item: the original Item to be uploaded
      content: an iterable that yields 'str' chunks.
    """
    # A cheezy way to avoid memcpy of (possibly huge) file, until streaming
    # upload support is implemented.
    if isinstance(content, list) and len(content) == 1:
      content = content[0]
    else:
      content = ''.join(content)

    # DB upload
    if not push_state.finalize_url:
      url = '%s/%s' % (self._base_url, push_state.upload_url)
      content = base64.b64encode(content)
      data = {
          'upload_ticket': push_state.preupload_status['upload_ticket'],
          'content': content,
      }
      response = net.url_read_json(url=url, data=data)
      return response is not None and response['ok']

    # upload to GS
    url = push_state.upload_url
    response = net.url_read(
        content_type='application/octet-stream',
        data=content,
        method='PUT',
        headers={'Cache-Control': 'public, max-age=31536000'},
        url=url)
    return response is not None
def CMDbot_delete(parser, args):
    """Forcibly deletes bots from the Swarming server."""
    parser.add_option("-f", "--force", action="store_true", help="Do not prompt for confirmation")
    options, args = parser.parse_args(args)
    if not args:
        parser.error("Please specific bots to delete")

    bots = sorted(args)
    if not options.force:
        print ("Delete the following bots?")
        for bot in bots:
            print ("  %s" % bot)
        if raw_input("Continue? [y/N] ") not in ("y", "Y"):
            print ("Goodbye.")
            return 1

    result = 0
    for bot in bots:
        url = "%s/_ah/api/swarming/v1/bot/%s/delete" % (options.swarming, bot)
        if net.url_read_json(url, data={}, method="POST") is None:
            print ("Deleting %s failed. Probably already gone" % bot)
            result = 1
    return result
Example #35
0
def post_error_task(botobj, error, task_id):
  """Posts given error as failure cause for the task.

  This is used in case of internal code error, and this causes the task to
  become BOT_DIED.

  Arguments:
    botobj: A bot.Bot instance.
    error: String representing the problem.
    task_id: Task that had an internal error. When the Swarming server sends
        commands to a bot, even though they could be completely wrong, the
        server assumes the job as running. Thus this function acts as the
        exception handler for incoming commands from the Swarming server. If for
        any reason the local test runner script can not be run successfully,
        this function is invoked.
  """
  logging.error('Error: %s', error)
  data = {
    'id': botobj.id,
    'message': error,
    'task_id': task_id,
  }
  return net.url_read_json(
      botobj.server + '/swarming/api/v1/bot/task_error/%s' % task_id, data=data)
Example #36
0
def swarming_trigger(swarming, raw_request):
  """Triggers a request on the Swarming server and returns the json data.

  It's the low-level function.

  Returns:
    {
      'request': {
        'created_ts': u'2010-01-02 03:04:05',
        'name': ..
      },
      'task_id': '12300',
    }
  """
  logging.info('Triggering: %s', raw_request['name'])

  result = net.url_read_json(
      swarming + '/_ah/api/swarming/v1/tasks/new', data=raw_request)
  if not result:
    on_error.report('Failed to trigger task %s' % raw_request['name'])
    return None
  if result.get('error'):
    # The reply is an error.
    msg = 'Failed to trigger task %s' % raw_request['name']
    if result['error'].get('errors'):
      for err in result['error']['errors']:
        if err.get('message'):
          msg += '\nMessage: %s' % err['message']
        if err.get('debugInfo'):
          msg += '\nDebug info:\n%s' % err['debugInfo']
    elif result['error'].get('message'):
      msg += '\nMessage: %s' % result['error']['message']

    on_error.report(msg)
    return None
  return result
Example #37
0
def CMDquery(parser, args):
  """Returns raw JSON information via an URL endpoint. Use 'query-list' to
  gather the list of API methods from the server.

  Examples:
    Listing all bots:
      swarming.py query -S server-url.com bots/list

    Listing last 10 tasks on a specific bot named 'swarm1':
      swarming.py query -S server-url.com --limit 10 bot/swarm1/tasks

    Listing last 10 tasks with tags os:Ubuntu-12.04 and pool:Chrome. Note that
    quoting is important!:
      swarming.py query -S server-url.com --limit 10 \\
          'tasks/list?tags=os:Ubuntu-12.04&tags=pool:Chrome'
  """
  CHUNK_SIZE = 250

  parser.add_option(
      '-L', '--limit', type='int', default=200,
      help='Limit to enforce on limitless items (like number of tasks); '
           'default=%default')
  parser.add_option(
      '--json', help='Path to JSON output file (otherwise prints to stdout)')
  parser.add_option(
      '--progress', action='store_true',
      help='Prints a dot at each request to show progress')
  options, args = parser.parse_args(args)
  if len(args) != 1:
    parser.error(
        'Must specify only method name and optionally query args properly '
        'escaped.')
  base_url = options.swarming + '/_ah/api/swarming/v1/' + args[0]
  url = base_url
  if options.limit:
    # Check check, change if not working out.
    merge_char = '&' if '?' in url else '?'
    url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
  data = net.url_read_json(url)
  if data is None:
    # TODO(maruel): Do basic diagnostic.
    print >> sys.stderr, 'Failed to access %s' % url
    return 1

  # Some items support cursors. Try to get automatically if cursors are needed
  # by looking at the 'cursor' items.
  while (
      data.get('cursor') and
      (not options.limit or len(data['items']) < options.limit)):
    merge_char = '&' if '?' in base_url else '?'
    url = base_url + '%scursor=%s' % (merge_char, urllib.quote(data['cursor']))
    if options.limit:
      url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
    if options.progress:
      sys.stdout.write('.')
      sys.stdout.flush()
    new = net.url_read_json(url)
    if new is None:
      if options.progress:
        print('')
      print >> sys.stderr, 'Failed to access %s' % options.swarming
      return 1
    data['items'].extend(new.get('items', []))
    data['cursor'] = new.get('cursor')

  if options.progress:
    print('')
  if options.limit and len(data.get('items', [])) > options.limit:
    data['items'] = data['items'][:options.limit]
  data.pop('cursor', None)

  if options.json:
    options.json = unicode(os.path.abspath(options.json))
    tools.write_json(options.json, data, True)
  else:
    try:
      tools.write_json(sys.stdout, data, False)
      sys.stdout.write('\n')
    except IOError:
      pass
  return 0
Example #38
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.

  You can pass further additional arguments to the target command by passing
  them after --.
  """
  options, args = parser.parse_args(args)
  extra_args = []
  if not args:
    parser.error('Must specify exactly one task id.')
  if len(args) > 1:
    if args[1] == '--':
      if len(args) > 2:
        extra_args = args[2:]
    else:
      extra_args = args[1:]

  url = options.swarming + '/_ah/api/swarming/v1/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  workdir = unicode(os.path.abspath('work'))
  if not fs.isdir(workdir):
    fs.mkdir(workdir)

  properties = request['properties']
  env = None
  if properties.get('env'):
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    for i in properties['env']:
      key = i['key'].encode('utf-8')
      if not i['value']:
        env.pop(key, None)
      else:
        env[key] = i['value'].encode('utf-8')

  if properties.get('inputs_ref'):
    # Create the tree.
    with isolateserver.get_storage(
          properties['inputs_ref']['isolatedserver'],
          properties['inputs_ref']['namespace']) as storage:
      bundle = isolateserver.fetch_isolated(
          properties['inputs_ref']['isolated'],
          storage,
          isolateserver.MemoryCache(file_mode_mask=0700),
          workdir,
          False)
      command = bundle.command
      if bundle.relative_cwd:
        workdir = os.path.join(workdir, bundle.relative_cwd)
  else:
    command = properties['command']
  try:
    return subprocess.call(command + extra_args, env=env, cwd=workdir)
  except OSError as e:
    print >> sys.stderr, 'Failed to run: %s' % ' '.join(command)
    print >> sys.stderr, str(e)
    return 1
Example #39
0
def run_bot(arg_error):
  """Runs the bot until it reboots or self-update or a signal is received.

  When a signal is received, simply exit.
  """
  quit_bit = threading.Event()
  def handler(sig, _):
    logging.info('Got signal %s', sig)
    quit_bit.set()

  # TODO(maruel): Set quit_bit when stdin is closed on Windows.

  with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
    config = get_config()
    try:
      # First thing is to get an arbitrary url. This also ensures the network is
      # up and running, which is necessary before trying to get the FQDN below.
      resp = net.url_read(config['server'] + '/swarming/api/v1/bot/server_ping')
      if resp is None:
        logging.error('No response from server_ping')
    except Exception as e:
      # url_read() already traps pretty much every exceptions. This except
      # clause is kept there "just in case".
      logging.exception('server_ping threw')

    if quit_bit.is_set():
      logging.info('Early quit 1')
      return 0

    # If this fails, there's hardly anything that can be done, the bot can't
    # even get to the point to be able to self-update.
    botobj = get_bot()
    resp = net.url_read_json(
        botobj.server + '/swarming/api/v1/bot/handshake',
        data=botobj._attributes)
    if not resp:
      logging.error('Failed to contact for handshake')
    else:
      logging.info('Connected to %s', resp.get('server_version'))
      if resp.get('bot_version') != botobj._attributes['version']:
        logging.warning(
            'Found out we\'ll need to update: server said %s; we\'re %s',
            resp.get('bot_version'), botobj._attributes['version'])

    if arg_error:
      botobj.post_error('Bootstrapping error: %s' % arg_error)

    if quit_bit.is_set():
      logging.info('Early quit 2')
      return 0

    clean_isolated_cache(botobj)

    call_hook(botobj, 'on_bot_startup')

    if quit_bit.is_set():
      logging.info('Early quit 3')
      return 0

    # This environment variable is accessible to the tasks executed by this bot.
    os.environ['SWARMING_BOT_ID'] = botobj.id.encode('utf-8')

    # Remove the 'work' directory if present, as not removing it may cause the
    # bot to stay quarantined and not be able to get out of this state.
    work_dir = os.path.join(botobj.base_dir, 'work')
    try:
      if os.path.isdir(work_dir):
        file_path.rmtree(work_dir)
    except Exception as e:
      botobj.post_error('Failed to remove work: %s' % e)

    consecutive_sleeps = 0
    while not quit_bit.is_set():
      try:
        botobj.update_dimensions(get_dimensions(botobj))
        botobj.update_state(get_state(botobj, consecutive_sleeps))
        did_something = poll_server(botobj, quit_bit)
        if did_something:
          consecutive_sleeps = 0
        else:
          consecutive_sleeps += 1
      except Exception as e:
        logging.exception('poll_server failed')
        msg = '%s\n%s' % (e, traceback.format_exc()[-2048:])
        botobj.post_error(msg)
        consecutive_sleeps = 0
    logging.info('Quitting')

  # Tell the server we are going away.
  botobj.post_event('bot_shutdown', 'Signal was received')
  botobj.cancel_all_timers()
  return 0
Example #40
0
def run_bot(arg_error):
    """Runs the bot until it reboots or self-update or a signal is received.

  When a signal is received, simply exit.
  """
    quit_bit = threading.Event()

    def handler(sig, _):
        logging.info('Got signal %s', sig)
        quit_bit.set()

    # TODO(maruel): Set quit_bit when stdin is closed on Windows.

    with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
        config = get_config()
        try:
            # First thing is to get an arbitrary url. This also ensures the network is
            # up and running, which is necessary before trying to get the FQDN below.
            resp = net.url_read(config['server'] +
                                '/swarming/api/v1/bot/server_ping')
            if resp is None:
                logging.error('No response from server_ping')
        except Exception as e:
            # url_read() already traps pretty much every exceptions. This except
            # clause is kept there "just in case".
            logging.exception('server_ping threw')

        if quit_bit.is_set():
            logging.info('Early quit 1')
            return 0

        # If this fails, there's hardly anything that can be done, the bot can't
        # even get to the point to be able to self-update.
        botobj = get_bot()
        resp = net.url_read_json(botobj.server +
                                 '/swarming/api/v1/bot/handshake',
                                 data=botobj._attributes)
        if not resp:
            logging.error('Failed to contact for handshake')
        else:
            logging.info('Connected to %s', resp.get('server_version'))
            if resp.get('bot_version') != botobj._attributes['version']:
                logging.warning(
                    'Found out we\'ll need to update: server said %s; we\'re %s',
                    resp.get('bot_version'), botobj._attributes['version'])

        if arg_error:
            botobj.post_error('Bootstrapping error: %s' % arg_error)

        if quit_bit.is_set():
            logging.info('Early quit 2')
            return 0

        clean_isolated_cache(botobj)

        call_hook(botobj, 'on_bot_startup')

        if quit_bit.is_set():
            logging.info('Early quit 3')
            return 0

        # This environment variable is accessible to the tasks executed by this bot.
        os.environ['SWARMING_BOT_ID'] = botobj.id.encode('utf-8')

        # Remove the 'work' directory if present, as not removing it may cause the
        # bot to stay quarantined and not be able to get out of this state.
        work_dir = os.path.join(botobj.base_dir, 'work')
        try:
            if os.path.isdir(work_dir):
                file_path.rmtree(work_dir)
        except Exception as e:
            botobj.post_error('Failed to remove work: %s' % e)

        consecutive_sleeps = 0
        while not quit_bit.is_set():
            try:
                botobj.update_dimensions(get_dimensions(botobj))
                botobj.update_state(get_state(botobj, consecutive_sleeps))
                did_something = poll_server(botobj, quit_bit)
                if did_something:
                    consecutive_sleeps = 0
                else:
                    consecutive_sleeps += 1
            except Exception as e:
                logging.exception('poll_server failed')
                msg = '%s\n%s' % (e, traceback.format_exc()[-2048:])
                botobj.post_error(msg)
                consecutive_sleeps = 0
        logging.info('Quitting')

    # Tell the server we are going away.
    botobj.post_event('bot_shutdown', 'Signal was received')
    botobj.cancel_all_timers()
    return 0
Example #41
0
def CMDquery(parser, args):
  """Returns raw JSON information via an URL endpoint. Use 'list' to gather the
  list of valid values from the server.

  Examples:
    Printing the list of known URLs:
      swarming.py query -S https://server-url list

    Listing last 50 tasks on a specific bot named 'swarm1'
      swarming.py query -S https://server-url --limit 50 bot/swarm1/tasks
  """
  CHUNK_SIZE = 250

  parser.add_option(
      '-L', '--limit', type='int', default=200,
      help='Limit to enforce on limitless items (like number of tasks); '
           'default=%default')
  parser.add_option(
      '--json', help='Path to JSON output file (otherwise prints to stdout)')
  (options, args) = parser.parse_args(args)
  if len(args) != 1:
    parser.error('Must specify only one resource name.')

  base_url = options.swarming + '/swarming/api/v1/client/' + args[0]
  url = base_url
  if options.limit:
    # Check check, change if not working out.
    merge_char = '&' if '?' in url else '?'
    url += '%slimit=%d' % (merge_char, min(CHUNK_SIZE, options.limit))
  data = net.url_read_json(url)
  if data is None:
    print >> sys.stderr, 'Failed to access %s' % options.swarming
    return 1

  # Some items support cursors. Try to get automatically if cursors are needed
  # by looking at the 'cursor' items.
  while (
      data.get('cursor') and
      (not options.limit or len(data['items']) < options.limit)):
    merge_char = '&' if '?' in base_url else '?'
    url = base_url + '%scursor=%s' % (merge_char, urllib.quote(data['cursor']))
    if options.limit:
      url += '&limit=%d' % min(CHUNK_SIZE, options.limit - len(data['items']))
    new = net.url_read_json(url)
    if new is None:
      print >> sys.stderr, 'Failed to access %s' % options.swarming
      return 1
    data['items'].extend(new['items'])
    data['cursor'] = new['cursor']

  if options.limit and len(data.get('items', [])) > options.limit:
    data['items'] = data['items'][:options.limit]
  data.pop('cursor', None)

  if options.json:
    with open(options.json, 'w') as f:
      json.dump(data, f)
  else:
    try:
      json.dump(data, sys.stdout, indent=2, sort_keys=True)
      sys.stdout.write('\n')
    except IOError:
      pass
  return 0
Example #42
0
 def _url_read_json_post(self, url, **kwargs):
     headers = (kwargs.pop('headers', None) or {}).copy()
     headers['X-XSRF-Token'] = self.token
     return net.url_read_json(url, headers=headers, **kwargs)
Example #43
0
def CMDbots(parser, args):
  """Returns information about the bots connected to the Swarming server."""
  add_filter_options(parser)
  parser.filter_group.add_option(
      '--dead-only', action='store_true',
      help='Only print dead bots, useful to reap them and reimage broken bots')
  parser.filter_group.add_option(
      '-k', '--keep-dead', action='store_true',
      help='Do not filter out dead bots')
  parser.filter_group.add_option(
      '-b', '--bare', action='store_true',
      help='Do not print out dimensions')
  options, args = parser.parse_args(args)

  if options.keep_dead and options.dead_only:
    parser.error('Use only one of --keep-dead and --dead-only')

  bots = []
  cursor = None
  limit = 250
  # Iterate via cursors.
  base_url = (
      options.swarming + '/_ah/api/swarming/v1/bots/list?limit=%d' % limit)
  while True:
    url = base_url
    if cursor:
      url += '&cursor=%s' % urllib.quote(cursor)
    data = net.url_read_json(url)
    if data is None:
      print >> sys.stderr, 'Failed to access %s' % options.swarming
      return 1
    bots.extend(data['items'])
    cursor = data.get('cursor')
    if not cursor:
      break

  for bot in natsort.natsorted(bots, key=lambda x: x['bot_id']):
    if options.dead_only:
      if not bot.get('is_dead'):
        continue
    elif not options.keep_dead and bot.get('is_dead'):
      continue

    # If the user requested to filter on dimensions, ensure the bot has all the
    # dimensions requested.
    dimensions = {i['key']: i.get('value') for i in bot['dimensions']}
    for key, value in options.dimensions:
      if key not in dimensions:
        break
      # A bot can have multiple value for a key, for example,
      # {'os': ['Windows', 'Windows-6.1']}, so that --dimension os=Windows will
      # be accepted.
      if isinstance(dimensions[key], list):
        if value not in dimensions[key]:
          break
      else:
        if value != dimensions[key]:
          break
    else:
      print bot['bot_id']
      if not options.bare:
        print '  %s' % json.dumps(dimensions, sort_keys=True)
        if bot.get('task_id'):
          print '  task: %s' % bot['task_id']
  return 0
def retrieve_results(
    base_url, shard_index, task_key, timeout, should_stop, output_collector):
  """Retrieves results for a single task_key.

  Returns:
    <result dict> on success.
    None on failure.
  """
  assert isinstance(timeout, float), timeout
  params = [('r', task_key)]
  result_url = '%s/get_result?%s' % (base_url, urllib.urlencode(params))
  started = now()
  deadline = started + timeout if timeout else None
  attempt = 0

  while not should_stop.is_set():
    attempt += 1

    # Waiting for too long -> give up.
    current_time = now()
    if deadline and current_time >= deadline:
      logging.error('retrieve_results(%s) timed out on attempt %d',
          base_url, attempt)
      return None

    # Do not spin too fast. Spin faster at the beginning though.
    # Start with 1 sec delay and for each 30 sec of waiting add another second
    # of delay, until hitting 15 sec ceiling.
    if attempt > 1:
      max_delay = min(15, 1 + (current_time - started) / 30.0)
      delay = min(max_delay, deadline - current_time) if deadline else max_delay
      if delay > 0:
        logging.debug('Waiting %.1f sec before retrying', delay)
        should_stop.wait(delay)
        if should_stop.is_set():
          return None

    result = None
    try:
      # Disable internal retries in net.url_read, since we are doing retries
      # ourselves. Do not use retry_404 so should_stop is polled more often.
      result = net.url_read_json(result_url, retry_404=False, retry_50x=False)

      # Request failed. Try again.
      if result is None:
        continue

      if not isinstance(result, dict):
        raise ValueError()
    except (ValueError, TypeError):
      logging.warning(
          'Received corrupted or invalid data for task_key %s, retrying: %r',
          task_key, result)
      continue

    # Swarming server uses non-empty 'output' value as a flag that task has
    # finished. How to wait for tasks that produce no output is a mystery.
    if result.get('output'):
      # Record the result, try to fetch attached output files (if any).
      if output_collector:
        # TODO(vadimsh): Respect |should_stop| and |deadline| when fetching.
        output_collector.process_shard_result(shard_index, result)
      return result
Example #45
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.

  You can pass further additional arguments to the target command by passing
  them after --.
  """
  parser.add_option(
      '--output-dir', metavar='DIR', default='',
      help='Directory that will have results stored into')
  options, args = parser.parse_args(args)
  extra_args = []
  if not args:
    parser.error('Must specify exactly one task id.')
  if len(args) > 1:
    if args[1] == '--':
      if len(args) > 2:
        extra_args = args[2:]
    else:
      extra_args = args[1:]

  url = options.swarming + '/_ah/api/swarming/v1/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  workdir = unicode(os.path.abspath('work'))
  if fs.isdir(workdir):
    parser.error('Please delete the directory \'work\' first')
  fs.mkdir(workdir)

  properties = request['properties']
  env = None
  if properties.get('env'):
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    for i in properties['env']:
      key = i['key'].encode('utf-8')
      if not i['value']:
        env.pop(key, None)
      else:
        env[key] = i['value'].encode('utf-8')

  if properties.get('inputs_ref'):
    # Create the tree.
    with isolateserver.get_storage(
          properties['inputs_ref']['isolatedserver'],
          properties['inputs_ref']['namespace']) as storage:
      bundle = isolateserver.fetch_isolated(
          properties['inputs_ref']['isolated'],
          storage,
          isolateserver.MemoryCache(file_mode_mask=0700),
          workdir)
      command = bundle.command
      if bundle.relative_cwd:
        workdir = os.path.join(workdir, bundle.relative_cwd)
      command.extend(properties.get('extra_args') or [])
    # https://github.com/luci/luci-py/blob/master/appengine/swarming/doc/Magic-Values.md
    new_command = run_isolated.process_command(command, options.output_dir)
    if not options.output_dir and new_command != command:
      parser.error('The task has outputs, you must use --output-dir')
    command = new_command
  else:
    command = properties['command']
  try:
    return subprocess.call(command + extra_args, env=env, cwd=workdir)
  except OSError as e:
    print >> sys.stderr, 'Failed to run: %s' % ' '.join(command)
    print >> sys.stderr, str(e)
    return 1
Example #46
0
 def post_event(self, event_type, message):
   """Posts an event to the server."""
   data = self._attributes.copy()
   data['event'] = event_type
   data['message'] = message
   net.url_read_json(self.server + '/swarming/api/v1/bot/event', data=data)