Beispiel #1
0
def _mint_jwt_based_token(scopes, service_account_key):
    """Creates new access token given service account private key."""
    # For more info see:
    # * https://developers.google.com/accounts/docs/OAuth2ServiceAccount.

    # JWT header.
    header_b64 = _b64_encode(
        utils.encode_to_json({
            'alg': 'RS256',
            'kid': service_account_key.private_key_id,
            'typ': 'JWT',
        }))

    # JWT claimset.
    now = int(utils.time_time())
    claimset_b64 = _b64_encode(
        utils.encode_to_json({
            'aud': 'https://www.googleapis.com/oauth2/v3/token',
            'exp': now + 3600,
            'iat': now,
            'iss': service_account_key.client_email,
            'scope': ' '.join(scopes),
        }))

    # Sign <header>.<claimset> with account's private key.
    signature_b64 = _b64_encode(
        _rsa_sign('%s.%s' % (header_b64, claimset_b64),
                  service_account_key.private_key))

    # URL encoded body of a token request.
    request_body = urllib.urlencode({
        'grant_type':
        'urn:ietf:params:oauth:grant-type:jwt-bearer',
        'assertion':
        '%s.%s.%s' % (header_b64, claimset_b64, signature_b64),
    })

    # Grab the token (with retries).
    for _ in xrange(0, 5):
        response = urlfetch.fetch(
            url='https://www.googleapis.com/oauth2/v3/token',
            payload=request_body,
            method='POST',
            headers={'Content-Type': 'application/x-www-form-urlencoded'},
            follow_redirects=False,
            deadline=10,
            validate_certificate=True)
        if response.status_code == 200:
            token = json.loads(response.content)
            return {
                'access_token': str(token['access_token']),
                'exp_ts': utils.time_time() + token['expires_in'],
            }
        logging.error('Failed to fetch access token (HTTP %d)\n%s',
                      response.status_code, response.content)

    # All retried has failed, give up.
    raise AccessTokenError(
        'Failed to fetch access token from /oauth2/v3/token')
Beispiel #2
0
def _mint_jwt_based_token(scopes, service_account_key):
  """Creates new access token given service account private key."""
  # For more info see:
  # * https://developers.google.com/accounts/docs/OAuth2ServiceAccount.

  # JWT header.
  header_b64 = _b64_encode(utils.encode_to_json({
    'alg': 'RS256',
    'kid': service_account_key.private_key_id,
    'typ': 'JWT',
  }))

  # JWT claimset.
  now = int(utils.time_time())
  claimset_b64 = _b64_encode(utils.encode_to_json({
    'aud': 'https://www.googleapis.com/oauth2/v3/token',
    'exp': now + 3600,
    'iat': now,
    'iss': service_account_key.client_email,
    'scope': ' '.join(scopes),
  }))

  # Sign <header>.<claimset> with account's private key.
  signature_b64 = _b64_encode(_rsa_sign(
      '%s.%s' % (header_b64, claimset_b64), service_account_key.private_key))

  # URL encoded body of a token request.
  request_body = urllib.urlencode({
    'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer',
    'assertion': '%s.%s.%s' % (header_b64, claimset_b64, signature_b64),
  })

  # Grab the token (with retries).
  for _ in xrange(0, 5):
    response = urlfetch.fetch(
        url='https://www.googleapis.com/oauth2/v3/token',
        payload=request_body,
        method='POST',
        headers={'Content-Type': 'application/x-www-form-urlencoded'},
        follow_redirects=False,
        deadline=10,
        validate_certificate=True)
    if response.status_code == 200:
      token = json.loads(response.content)
      return {
        'access_token': str(token['access_token']),
        'exp_ts': utils.time_time() + token['expires_in'],
      }
    logging.error(
        'Failed to fetch access token (HTTP %d)\n%s',
        response.status_code, response.content)

  # All retried has failed, give up.
  raise AccessTokenError('Failed to fetch access token from /oauth2/v3/token')
Beispiel #3
0
 def sign_claimset_async(self, claimset):
   # Prepare JWT header and claimset as base 64.
   header_b64 = _b64_encode(utils.encode_to_json({
     'alg': 'RS256',
     'kid': self._key.private_key_id,
     'typ': 'JWT',
   }))
   claimset_b64 = _b64_encode(utils.encode_to_json(claimset))
   # Sign <header>.<claimset> with account's private key.
   signature_b64 = _b64_encode(self._rsa_sign(
       '%s.%s' % (header_b64, claimset_b64), self._key.private_key))
   jwt = '%s.%s.%s' % (header_b64, claimset_b64, signature_b64)
   _log_jwt(self.email, 'local', jwt)
   raise ndb.Return(jwt)
Beispiel #4
0
    def test_start(self, enqueue_tasks):
        # create a build a day for 3 days
        proc = {'name': 'foo', 'payload': 'bar'}
        self.post({
            'proc': proc,
        })

        # Expect a segment for each day.
        seg_path_prefix = bulkproc.PATH_PREFIX + 'segment/'
        self.assertEqual(enqueue_tasks.call_count, 24)
        all_tasks = []
        for (queue_name, tasks), _ in enqueue_tasks.call_args_list:
            self.assertEqual(queue_name, 'bulkproc')
            all_tasks.extend(tasks)
        self.assertEqual(len(all_tasks), 2165)
        self.assertEqual(
            all_tasks[0],
            (
                None,
                seg_path_prefix + 'seg:0-percent:0',
                utils.encode_to_json(
                    {
                        'job_id': 'taskname',
                        'iteration': 0,
                        'seg_index': 0,
                        'seg_start': 8991624996803575808,
                        'seg_end': 8991647646045175807,
                        'started_ts': utils.datetime_to_timestamp(self.now),
                        'proc': proc,
                    }),
            ),
        )
        self.assertEqual(
            all_tasks[1],
            (
                None,
                seg_path_prefix + 'seg:1-percent:0',
                utils.encode_to_json(
                    {
                        'job_id': 'taskname',
                        'iteration': 0,
                        'seg_index': 1,
                        'seg_start': 8991647646045175808,
                        'seg_end': 8991670295286775807,
                        'started_ts': utils.datetime_to_timestamp(self.now),
                        'proc': proc,
                    }),
            ),
        )
Beispiel #5
0
def request_to_task_to_run_key(request):
    """Returns the ndb.Key for a TaskToRun from a TaskRequest."""
    assert isinstance(request, task_request.TaskRequest), request
    dimensions_json = utils.encode_to_json(request.properties.dimensions)
    return ndb.Key(TaskToRun,
                   _hash_dimensions(dimensions_json),
                   parent=request.key)
Beispiel #6
0
    def set_from_run_result(self, run_result, request):
        """Copies all the relevant properties from a TaskRunResult into this
    TaskResultSummary.

    If the task completed, succeeded and is idempotent, self.properties_hash is
    set.
    """
        assert isinstance(run_result, TaskRunResult), run_result
        for property_name in _TaskResultCommon._properties_fixed():
            setattr(self, property_name, getattr(run_result, property_name))
        # Include explicit support for 'state' and 'try_number'. TaskRunResult.state
        # is a ComputedProperty so it can't be copied as-is, and try_number is a
        # generated property.
        # pylint: disable=W0201
        self.state = run_result.state
        self.try_number = run_result.try_number

        while len(self.costs_usd) < run_result.try_number:
            self.costs_usd.append(0.)
        self.costs_usd[run_result.try_number - 1] = run_result.cost_usd

        if (self.state == State.COMPLETED and not self.failure
                and not self.internal_failure and request.properties.idempotent
                and not self.deduped_from):
            # Signal the results are valid and can be reused.
            phash = request.properties_hash
            if phash is None:
                # TODO(iannucci): Remove this 24 hours after addition.
                # this was triggered on the older schema where properties_hash was
                # dynamically calculated on every use, and so its value in the db is
                # None. Recalculate it here to smooth the transition.
                phash = request.HASHING_ALGO(
                    utils.encode_to_json(request.properties)).digest()
            self.properties_hash = phash
            assert self.properties_hash
Beispiel #7
0
 def test_dimensions_search_sizing_7_4(self):
     # Likely maximum permitted; 7 keys of 4 items each.
     dimensions = {str(k): ["01234567890123456789" * i for i in xrange(1, 4)] for k in xrange(7)}
     items = tuple(
         sorted(task_to_run._hash_dimensions(utils.encode_to_json(i)) for i in task_to_run._powerset(dimensions))
     )
     self.assertEqual(16384, len(items))
Beispiel #8
0
def _assert_task_props(properties, expiration_ts):
    """Asserts a TaskDimensions for a specific TaskProperties.

  Implementation of assert_task().
  """
    # TODO(maruel): Make it a tasklet.
    dimensions_hash = hash_dimensions(properties.dimensions)
    task_dims_key = _get_task_dims_key(dimensions_hash, properties.dimensions)
    obj = task_dims_key.get()
    if obj:
        # Reduce the check to be 5~10 minutes earlier to help reduce an attack of
        # task queues when there's a strong on-going load of tasks happening. This
        # jitter is essentially removed from _ADVANCE window.
        jitter = datetime.timedelta(seconds=random.randint(5 * 60, 10 * 60))
        valid_until_ts = expiration_ts - jitter
        s = obj.match_request(properties.dimensions)
        if s:
            if s.valid_until_ts >= valid_until_ts:
                # Cache hit. It is important to reconfirm the dimensions because a hash
                # can be conflicting.
                logging.debug('assert_task(%d): hit', dimensions_hash)
                return
            else:
                logging.info(
                    'assert_task(%d): set.valid_until_ts(%s) < expected(%s); '
                    'triggering rebuild-task-cache', dimensions_hash,
                    s.valid_until_ts, valid_until_ts)
        else:
            logging.info(
                'assert_task(%d): failed to match the dimensions; triggering '
                'rebuild-task-cache', dimensions_hash)
    else:
        logging.info(
            'assert_task(%d): new request kind; triggering rebuild-task-cache',
            dimensions_hash)

    data = {
        u'dimensions': properties.dimensions,
        u'dimensions_hash': str(dimensions_hash),
        u'valid_until_ts': expiration_ts + _ADVANCE,
    }
    payload = utils.encode_to_json(data)

    # If this task specifies an 'id' value, updates the cache inline since we know
    # there's only one bot that can run it, so it won't take long. This permits
    # tasks like 'terminate' tasks to execute faster.
    if properties.dimensions.get(u'id'):
        rebuild_task_cache(payload)
        return

    # We can't use the request ID since the request was not stored yet, so embed
    # all the necessary information.
    url = '/internal/taskqueue/rebuild-task-cache'
    if not utils.enqueue_task(
            url, queue_name='rebuild-task-cache', payload=payload):
        logging.error('Failed to enqueue TaskDimensions update %x',
                      dimensions_hash)
        # Technically we'd want to raise a endpoints.InternalServerErrorException.
        # Raising anything that is not TypeError or ValueError is fine.
        raise Error('Failed to trigger task queue; please try again')
Beispiel #9
0
def fetch_json_async(hostname, path, payload=None, headers=None, **kwargs):
  """Sends JSON request to Gerrit, parses prefixed JSON response.

  See 'fetch' for the list of arguments.

  Returns:
    Deserialized response body on success.
    None on 404 response.

  Raises:
    net.Error on communication errors.
  """
  headers = (headers or {}).copy()
  headers['Accept'] = 'application/json'
  if payload is not None:
    headers['Content-Type'] = 'application/json; charset=utf-8'
  content = yield fetch_async(
      hostname=hostname,
      path=path,
      payload=utils.encode_to_json(payload) if payload is not None else None,
      headers=headers,
      **kwargs)
  if content is None:
    raise ndb.Return(None)
  if not content.startswith(RESPONSE_PREFIX):
    msg = (
        'Unexpected response format. Expected prefix %s. Received: %s' %
        (RESPONSE_PREFIX, content))
    raise net.Error(msg, status_code=200, response=content)
  raise ndb.Return(json.loads(content[len(RESPONSE_PREFIX):]))
Beispiel #10
0
def _maybe_pubsub_notify_via_tq(result_summary, request):
    """Examines result_summary and enqueues a task to send PubSub message.

  Must be called within a transaction.

  Raises CommitError on errors (to abort the transaction).
  """
    assert ndb.in_transaction()
    assert isinstance(result_summary, task_result.TaskResultSummary), result_summary
    assert isinstance(request, task_request.TaskRequest), request
    if result_summary.state in task_result.State.STATES_NOT_RUNNING and request.pubsub_topic:
        task_id = task_pack.pack_result_summary_key(result_summary.key)
        ok = utils.enqueue_task(
            url="/internal/taskqueue/pubsub/%s" % task_id,
            queue_name="pubsub",
            transactional=True,
            payload=utils.encode_to_json(
                {
                    "task_id": task_id,
                    "topic": request.pubsub_topic,
                    "auth_token": request.pubsub_auth_token,
                    "userdata": request.pubsub_userdata,
                }
            ),
        )
        if not ok:
            raise datastore_utils.CommitError("Failed to enqueue task queue task")
Beispiel #11
0
def _maybe_pubsub_notify_via_tq(result_summary, request):
    """Examines result_summary and enqueues a task to send PubSub message.

  Must be called within a transaction.

  Raises CommitError on errors (to abort the transaction).
  """
    assert ndb.in_transaction()
    assert isinstance(result_summary,
                      task_result.TaskResultSummary), result_summary
    assert isinstance(request, task_request.TaskRequest), request
    if (result_summary.state in task_result.State.STATES_NOT_RUNNING
            and request.pubsub_topic):
        task_id = task_pack.pack_result_summary_key(result_summary.key)
        ok = utils.enqueue_task(url='/internal/taskqueue/pubsub/%s' % task_id,
                                queue_name='pubsub',
                                transactional=True,
                                payload=utils.encode_to_json({
                                    'task_id':
                                    task_id,
                                    'topic':
                                    request.pubsub_topic,
                                    'auth_token':
                                    request.pubsub_auth_token,
                                    'userdata':
                                    request.pubsub_userdata,
                                }))
        if not ok:
            raise datastore_utils.CommitError(
                'Failed to enqueue task queue task')
Beispiel #12
0
 def test_dimensions_search_sizing_14_1(self):
     dimensions = {str(k): '01234567890123456789' for k in xrange(14)}
     items = tuple(
         sorted(
             task_to_run._hash_dimensions(utils.encode_to_json(i))
             for i in task_to_run._powerset(dimensions)))
     self.assertEqual(16384, len(items))
Beispiel #13
0
    def post(self):
        # Do not run for more than 9 minutes. Exceeding 10min hard limit causes 500.
        time_to_stop = time.time() + 9 * 60

        data = json.loads(self.request.body)
        start = utils.parse_datetime(data['start'])
        end = utils.parse_datetime(data['end'])
        logging.info('Deleting between %s and %s', start, end)

        triggered = 0
        total = 0
        q = model.ContentEntry.query(model.ContentEntry.expiration_ts >= start,
                                     model.ContentEntry.expiration_ts < end)
        cursor = None
        more = True
        while more and time.time() < time_to_stop:
            # Since this query dooes not fetch the ContentEntry entities themselves,
            # we cannot easily compute the size of the data deleted.
            keys, cursor, more = q.fetch_page(500,
                                              start_cursor=cursor,
                                              keys_only=True)
            if not keys:
                break
            total += len(keys)
            data = utils.encode_to_json([k.string_id() for k in keys])
            if utils.enqueue_task('/internal/taskqueue/cleanup/expired',
                                  'cleanup-expired',
                                  payload=data):
                triggered += 1
            else:
                logging.warning('Failed to trigger task')
        logging.info('Triggered %d tasks for %d entries', triggered, total)
 def post(self, payload, headers=None):
     headers = headers or {}
     headers['X-AppEngine-QueueName'] = 'backfill-tag-index'
     headers['X-AppEngine-TaskName'] = 'taskname'
     return self.test_app.post(self.task_url + 'rest',
                               utils.encode_to_json(payload),
                               headers=headers)
Beispiel #15
0
 def json(self):
   return utils.encode_to_json({
       'cursor': self.cursor.urlsafe() if self.cursor else None,
       'task_start': self.task_start,
       'task_count': self.task_count,
       'count': self.count,
   })
    def test_segment_full(self, enqueue_tasks):
        ndb.put_multi([
            model.Build(id=i,
                        bucket='chromium',
                        tags=['buildset:%d' % (i % 3)])
            for i in xrange(50, 52)
        ])
        self.post({
            'action': 'segment',
            'tag': 'buildset',
            'seg_index': 0,
            'seg_start': 50,
            'seg_end': 60,
            'started_ts': utils.datetime_to_timestamp(self.now),
        })

        self.assertEqual(enqueue_tasks.call_count, 1)
        enqueue_tasks.assert_called_with('backfill-tag-index', [(
            None,
            self.task_url + 'tag:buildset-flush',
            utils.encode_to_json({
                'action': 'flush',
                'tag': 'buildset',
                'new_entries': {
                    '0': [['chromium', 51]],
                    '2': [['chromium', 50]],
                },
            }),
        )])
Beispiel #17
0
def fetch_json_async(hostname, path, payload=None, headers=None, **kwargs):
    """Sends JSON request to Gerrit, parses prefixed JSON response.

  See 'fetch' for the list of arguments.

  Returns:
    Deserialized response body on success.
    None on 404 response.

  Raises:
    net.Error on communication errors.
  """
    headers = (headers or {}).copy()
    headers['Accept'] = 'application/json'
    if payload is not None:
        headers['Content-Type'] = 'application/json; charset=utf-8'
    content = yield fetch_async(
        hostname=hostname,
        path=path,
        payload=utils.encode_to_json(payload) if payload is not None else None,
        headers=headers,
        **kwargs)
    if content is None:
        raise ndb.Return(None)
    if not content.startswith(RESPONSE_PREFIX):
        msg = ('Unexpected response format. Expected prefix %s. Received: %s' %
               (RESPONSE_PREFIX, content))
        raise net.Error(msg, status_code=200, response=content)
    raise ndb.Return(json.loads(content[len(RESPONSE_PREFIX):]))
Beispiel #18
0
    def post(self):
        body = self.parse_body()
        version = body.get('v')
        # Do not enforce version for now, just assert it is present.
        if not version:
            self.abort(400, 'Missing version')

        report = body.get('r')
        if not report:
            self.abort(400, 'Missing report')

        kwargs = dict(
            (k, report[k]) for k in on_error.VALID_ERROR_KEYS if report.get(k))
        report_id = on_error.log_request(self.request,
                                         add_params=False,
                                         **kwargs)
        self.response.headers[
            'Content-Type'] = 'application/json; charset=utf-8'
        body = {
            'id':
            report_id,
            'url':
            '%s/restricted/ereporter2/errors/%d' %
            (self.request.host_url, report_id),
        }
        self.response.write(utils.encode_to_json(body))
Beispiel #19
0
def _multipart_payload(body, content_type, metadata):
    """Generates a body for multipart/related upload request to GCS.

  Such request encodes both file body and its metadata.
  See https://cloud.google.com/storage/docs/json_api/v1/how-tos/multipart-upload

  Args:
    body: raw object body to upload.
    content_type: its content type.
    metadata: dict with GCS metadata (e.g. ACLs) to put into the request.

  Returns:
    (Blob with the request, random boundary string).
  """
    parts = [
        ('application/json; charset=UTF-8', utils.encode_to_json(metadata)),
        (content_type, body),
    ]

    boundary = _multipart_payload_boundary()

    buf = StringIO.StringIO()
    for ct, payload in parts:
        assert boundary not in payload
        buf.write('--%s\r\n' % boundary)
        buf.write('Content-Type: %s\r\n' % ct)
        buf.write('\r\n')
        buf.write(payload)
        buf.write('\r\n')
    buf.write('--%s--\r\n' % boundary)

    return buf.getvalue(), boundary
Beispiel #20
0
def reclaim_machine(machine_key, reclamation_ts):
    """Attempts to reclaim the given machine.

  Args:
    machine_key: ndb.Key for a model.CatalogMachineEntry instance.
    reclamation_ts: datetime.datetime instance indicating when the machine was
      reclaimed.

  Returns:
    True if the machine was reclaimed, else False.
  """
    machine = machine_key.get()
    if not machine:
        logging.warning('CatalogMachineEntry not found: %s', machine_key)
        return

    logging.info('Attempting to reclaim CatalogMachineEntry:\n%s', machine)

    if machine.lease_expiration_ts is None:
        # This can reasonably happen if e.g. the lease was voluntarily given up.
        logging.warning('CatalogMachineEntry no longer leased:\n%s', machine)
        return False

    if reclamation_ts < machine.lease_expiration_ts:
        # This can reasonably happen if e.g. the lease duration was extended.
        logging.warning('CatalogMachineEntry no longer overdue:\n%s', machine)
        return False

    logging.info('Reclaiming CatalogMachineEntry:\n%s', machine)
    lease = models.LeaseRequest.get_by_id(machine.lease_id)
    hostname = lease.response.hostname
    lease.response.hostname = None

    params = {
        'hostname': hostname,
        'machine_key': machine.key.urlsafe(),
        'machine_subscription': machine.pubsub_subscription,
        'machine_subscription_project': machine.pubsub_subscription_project,
        'machine_topic': machine.pubsub_topic,
        'machine_topic_project': machine.pubsub_topic_project,
        'policies': protojson.encode_message(machine.policies),
        'request_json': protojson.encode_message(lease.request),
        'response_json': protojson.encode_message(lease.response),
    }
    backend_attributes = {}
    for attribute in machine.policies.backend_attributes:
        backend_attributes[attribute.key] = attribute.value
    params['backend_attributes'] = utils.encode_to_json(backend_attributes)
    if lease.request.pubsub_topic:
        params['lessee_project'] = lease.request.pubsub_project
        params['lessee_topic'] = lease.request.pubsub_topic
    if not utils.enqueue_task(
            '/internal/queues/reclaim-machine',
            'reclaim-machine',
            params=params,
            transactional=True,
    ):
        raise TaskEnqueuingError('reclaim-machine')
    return True
Beispiel #21
0
def init_new_request(request, allow_high_priority):
  """Initializes a new TaskRequest but doesn't store it.

  Fills up some values and does minimal checks.

  If parent_task_id is set, properties for the parent are used:
  - priority: defaults to parent.priority - 1
  - user: overridden by parent.user

  """
  assert request.__class__ is TaskRequest, request
  if request.parent_task_id:
    run_result_key = task_pack.unpack_run_result_key(request.parent_task_id)
    result_summary_key = task_pack.run_result_key_to_result_summary_key(
        run_result_key)
    request_key = task_pack.result_summary_key_to_request_key(
        result_summary_key)
    parent = request_key.get()
    if not parent:
      raise ValueError('parent_task_id is not a valid task')
    request.priority = max(min(request.priority, parent.priority - 1), 0)
    # Drop the previous user.
    request.user = parent.user

  # If the priority is below 100, make sure the user has right to do so.
  if request.priority < 100 and not allow_high_priority:
    # Special case for terminate request.
    if not request.properties.is_terminate:
      # Silently drop the priority of normal users.
      request.priority = 100

  request.authenticated = auth.get_current_identity()
  if (not request.properties.is_terminate and
      request.properties.grace_period_secs is None):
    request.properties.grace_period_secs = 30
  if request.properties.idempotent is None:
    request.properties.idempotent = False

  request.service_account = 'none'
  if request.service_account_token and request.service_account_token != 'none':
    if request.service_account_token == 'bot':
      request.service_account = 'bot'
    else:
      # TODO(vadimsh): Check the token signature, verify it can be used by the
      # current user, extract service account email.
      raise auth.AuthorizationError('service_account_token is not implemented')

  request.tags.append('priority:%s' % request.priority)
  request.tags.append('user:%s' % request.user)
  request.tags.append('service_account:%s' % request.service_account)
  for key, value in request.properties.dimensions.iteritems():
    request.tags.append('%s:%s' % (key, value))
  request.tags = sorted(set(request.tags))

  if request.properties.idempotent:
    request.properties_hash = request.HASHING_ALGO(
      utils.encode_to_json(request.properties)).digest()
  else:
    request.properties_hash = None
Beispiel #22
0
def _mark_task_as_active(extras, tasks_active):
    task_id = extras.get('task_id')
    if not task_id:
        # Crudely zap out the retries for now.
        # https://code.google.com/p/swarming/issues/detail?id=108
        task_id = extras['run_id'][:-2] + '00'
    dimensions_json = utils.encode_to_json(extras['dimensions'])
    tasks_active.setdefault(dimensions_json, set()).add(task_id)
Beispiel #23
0
def _mark_task_as_active(extras, tasks_active):
  task_id = extras.get('task_id')
  if not task_id:
    # Crudely zap out the retries for now.
    # https://code.google.com/p/swarming/issues/detail?id=108
    task_id = extras['run_id'][:-2] + '00'
  dimensions_json = utils.encode_to_json(extras['dimensions'])
  tasks_active.setdefault(dimensions_json, set()).add(task_id)
 def test_dimensions_search_sizing_1_20(self):
   # Multi-value dimensions must *always* be prefered to split variables. They
   # are much quicker to search.
   dimensions = {'0': ['01234567890123456789' * i for i in xrange(1, 20)]}
   items = tuple(sorted(
       task_to_run._hash_dimensions(utils.encode_to_json(i))
       for i in task_to_run._powerset(dimensions)))
   self.assertEqual(20, len(items))
Beispiel #25
0
 def test_dimensions_search_sizing_1_20(self):
   # Multi-value dimensions must *always* be prefered to split variables. They
   # are much quicker to search.
   dimensions = {'0': ['01234567890123456789' * i for i in xrange(1, 20)]}
   items = tuple(sorted(
       task_to_run._hash_dimensions(utils.encode_to_json(i))
       for i in task_to_run._powerset(dimensions)))
   self.assertEqual(20, len(items))
Beispiel #26
0
def _authenticated_request_async(url, method='GET', payload=None, params=None):
  """Sends an authenticated JSON API request, returns deserialized response.

  Raises:
    DelegationTokenCreationError if request failed or response is malformed.
    DelegationAuthorizationError on HTTP 401 or 403 response from auth service.
  """
  scope = 'https://www.googleapis.com/auth/userinfo.email'
  access_token = service_account.get_access_token(scope)[0]
  headers = {
    'Accept': 'application/json; charset=utf-8',
    'Authorization': 'Bearer %s' % access_token,
  }

  if payload is not None:
    assert method in ('CREATE', 'POST', 'PUT'), method
    headers['Content-Type'] = 'application/json; charset=utf-8'
    payload = utils.encode_to_json(payload)

  if utils.is_local_dev_server():
    protocols = ('http://', 'https://')
  else:
    protocols = ('https://',)
  assert url.startswith(protocols) and '?' not in url, url
  if params:
    url += '?' + urllib.urlencode(params)

  try:
    res = yield _urlfetch_async(
        url=url,
        payload=payload,
        method=method,
        headers=headers,
        follow_redirects=False,
        deadline=10,
        validate_certificate=True)
  except (apiproxy_errors.DeadlineExceededError, urlfetch.Error) as e:
    raise DelegationTokenCreationError(str(e))

  if res.status_code in (401, 403):
    logging.error('Token server HTTP %d: %s', res.status_code, res.content)
    raise DelegationAuthorizationError(
        'HTTP %d: %s' % (res.status_code, res.content))

  if res.status_code >= 300:
    logging.error('Token server HTTP %d: %s', res.status_code, res.content)
    raise DelegationTokenCreationError(
        'HTTP %d: %s' % (res.status_code, res.content))

  try:
    content = res.content
    if content.startswith(")]}'\n"):
      content = content[5:]
    json_res = json.loads(content)
  except ValueError as e:
    raise DelegationTokenCreationError('Bad JSON response: %s' % e)
  raise ndb.Return(json_res)
Beispiel #27
0
 def post(self, payload, headers=None):
     assert self.path_suffix
     headers = headers or {}
     headers['X-AppEngine-QueueName'] = bulkproc.QUEUE_NAME
     headers['X-AppEngine-TaskName'] = 'taskname'
     task_url = bulkproc.PATH_PREFIX + self.path_suffix
     return self.test_app.post(task_url,
                               utils.encode_to_json(payload),
                               headers=headers)
Beispiel #28
0
 def test_dimensions_search_sizing_7_4(self):
   # Likely maximum permitted; 7 keys of 4 items each.
   dimensions = {
     str(k): ['01234567890123456789' * i for i in xrange(1, 4)]
     for k in xrange(7)
   }
   items = tuple(sorted(
       task_to_run._hash_dimensions(utils.encode_to_json(i))
       for i in task_to_run._powerset(dimensions)))
   self.assertEqual(16384, len(items))
Beispiel #29
0
 def sign_claimset_async(self, claimset):
   # https://cloud.google.com/iam/reference/rest/v1/projects.serviceAccounts/signJwt
   iam_token, _ = yield self._iam_token_factory()
   response = yield _call_async(
       url='https://iam.googleapis.com/v1/projects/-/serviceAccounts/'
           '%s:signJwt' % self._email,
       payload=utils.encode_to_json({
         'payload': utils.encode_to_json(claimset),  # yep, JSON in JSON
       }),
       method='POST',
       headers={
         'Accept': 'application/json',
         'Authorization': 'Bearer %s' % iam_token,
         'Content-Type': 'application/json; charset=utf-8',
       })
   # 'signedJwt' is base64-encoded string, convert it from unicode to str.
   jwt = response['signedJwt'].encode('ascii')
   _log_jwt(self.email, 'remote', jwt)
   raise ndb.Return(jwt)
Beispiel #30
0
def json_request_async(
    url,
    method='GET',
    payload=None,
    params=None,
    headers=None,
    scopes=None,
    service_account_key=None,
    delegation_token=None,
    deadline=None,
    max_attempts=None):
  """Sends a JSON REST API request, returns deserialized response.

  Retries the request on transient errors for up to |max_attempts| times.

  Args:
    url: url to send the request to.
    method: HTTP method to use, e.g. GET, POST, PUT.
    payload: object to be serialized to JSON and put in the request body.
    params: dict with query GET parameters (i.e. ?key=value&key=value).
    headers: additional request headers.
    scopes: OAuth2 scopes for the access token (ok skip auth if None).
    service_account_key: auth.ServiceAccountKey with credentials.
    delegation_token: delegation token returned by auth.delegate.
    deadline: deadline for a single attempt.
    max_attempts: how many times to retry on errors.

  Returns:
    Deserialized JSON response.

  Raises:
    NotFoundError on 404 response.
    AuthError on 401 or 403 response.
    Error on any other non-transient error.
  """
  if payload is not None:
    headers = (headers or {}).copy()
    headers['Content-Type'] = 'application/json; charset=utf-8'
    payload = utils.encode_to_json(payload)
  response = yield request_async(
      url=url,
      method=method,
      payload=payload,
      params=params,
      headers=headers,
      scopes=scopes,
      service_account_key=service_account_key,
      delegation_token=delegation_token,
      deadline=deadline,
      max_attempts=max_attempts)
  try:
    response = json.loads(response)
  except ValueError as e:
    raise Error('Bad JSON response: %s' % e, None, response)
  raise ndb.Return(response)
Beispiel #31
0
  def properties_hash(self):
    """Calculates the hash for this entity IFF the task is idempotent.

    It uniquely identifies the TaskProperties instance to permit deduplication
    by the task scheduler. It is None if the task is not idempotent.

    Returns:
      Hash as a compact byte str.
    """
    if not self.idempotent:
      return None
    return self.HASHING_ALGO(utils.encode_to_json(self)).digest()
Beispiel #32
0
    def test_build_steps_keys_only(self, enqueue_tasks):
        build_steps = [
            model.BuildSteps(parent=ndb.Key(model.Build, i),
                             step_container_bytes='') for i in xrange(50, 60)
        ]
        ndb.put_multi(build_steps)

        def processor(results, payload):
            # Take 5
            page = list(itertools.islice(results, 5))
            self.assertEqual(page, [b.key for b in build_steps[:5]])
            self.assertEqual(payload, 'bar')

        self.proc = {
            'entity_kind': 'BuildSteps',
            'func': processor,
            'keys_only': True,
        }

        self.post({
            'job_id': 'jobid',
            'iteration': 0,
            'seg_index': 0,
            'seg_start': 50,
            'seg_end': 59,
            'started_ts': utils.datetime_to_timestamp(self.now),
            'proc': {
                'name': 'foo',
                'payload': 'bar'
            },
        })

        expected_next_payload = {
            'job_id': 'jobid',
            'iteration': 1,
            'seg_index': 0,
            'seg_start': 50,
            'seg_end': 59,
            'start_from': 55,
            'started_ts': utils.datetime_to_timestamp(self.now),
            'proc': {
                'name': 'foo',
                'payload': 'bar'
            },
        }
        enqueue_tasks.assert_called_with(
            'bulkproc',
            [(
                'jobid-0-1',
                bulkproc.PATH_PREFIX + 'segment/seg:0-percent:50',
                utils.encode_to_json(expected_next_payload),
            )],
        )
Beispiel #33
0
def _pack_entry(**kwargs):
  """Packs an entry so it can be logged as a statistic in the logs.

  Specifically process 'dimensions' if present to remove the key 'hostname'.
  """
  assert kwargs['action'] in _VALID_ACTIONS, kwargs
  dimensions = kwargs.get('dimensions')
  if dimensions:
    kwargs['dimensions'] = {
      k: v for k, v in dimensions.iteritems() if k != 'hostname'
    }
  packed = {_KEY_MAPPING[k]: v for k, v in kwargs.iteritems()}
  return utils.encode_to_json(packed)
Beispiel #34
0
def _pack_entry(**kwargs):
  """Packs an entry so it can be logged as a statistic in the logs.

  Specifically process 'dimensions' if present to remove the key 'hostname'.
  """
  assert kwargs['action'] in _VALID_ACTIONS, kwargs
  dimensions = kwargs.get('dimensions')
  if dimensions:
    kwargs['dimensions'] = {
      k: v for k, v in dimensions.iteritems() if k != 'hostname'
    }
  packed = {_KEY_MAPPING[k]: v for k, v in kwargs.iteritems()}
  return utils.encode_to_json(packed)
Beispiel #35
0
 def backfill_tag_index(self, request):
     """Backfills TagIndex entites from builds."""
     if request.shards <= 0:
         raise endpoints.BadRequestException('shards must be positive')
     enqueue_task(
         'backfill-tag-index',
         ('/internal/task/buildbucket/backfill-tag-index/tag:%s-start' %
          request.tag),
         utils.encode_to_json({
             'action': 'start',
             'tag': request.tag,
             'shards': request.shards,
         }))
     return message_types.VoidMessage()
Beispiel #36
0
def create_subscription(machine_key):
    """Creates a Cloud Pub/Sub subscription for machine communication.

  Args:
    machine_key: ndb.Key for the machine whose subscription should be created.
  """
    machine = machine_key.get()
    logging.info('Attempting to subscribe CatalogMachineEntry:\n%s', machine)

    if not machine:
        logging.warning('CatalogMachineEntry no longer exists: %s',
                        machine_key)
        return

    if machine.state != models.CatalogMachineEntryStates.NEW:
        logging.warning('CatalogMachineEntry no longer new:\n%s', machine)
        return

    if machine.pubsub_subscription:
        logging.info('CatalogMachineEntry already subscribed:\n%s', machine)
        return

    machine.pubsub_subscription = 'subscription-%s' % machine.key.id()
    machine.pubsub_topic = 'topic-%s' % machine.key.id()

    params = {
        'backend_project': machine.policies.backend_project,
        'backend_topic': machine.policies.backend_topic,
        'hostname': machine.dimensions.hostname,
        'machine_id': machine.key.id(),
        'machine_service_account': machine.policies.machine_service_account,
        'machine_subscription': machine.pubsub_subscription,
        'machine_subscription_project': machine.pubsub_subscription_project,
        'machine_topic': machine.pubsub_topic,
        'machine_topic_project': machine.pubsub_topic_project,
    }
    backend_attributes = {}
    for attribute in machine.policies.backend_attributes:
        backend_attributes[attribute.key] = attribute.value
    params['backend_attributes'] = utils.encode_to_json(backend_attributes)
    if utils.enqueue_task(
            '/internal/queues/subscribe-machine',
            'subscribe-machine',
            params=params,
            transactional=True,
    ):
        machine.state = models.CatalogMachineEntryStates.SUBSCRIBING
        machine.put()
    else:
        raise TaskEnqueuingError('subscribe-machine')
Beispiel #37
0
def create_subscription(machine_key):
  """Creates a Cloud Pub/Sub subscription for machine communication.

  Args:
    machine_key: ndb.Key for the machine whose subscription should be created.
  """
  machine = machine_key.get()
  logging.info('Attempting to subscribe CatalogMachineEntry:\n%s', machine)

  if not machine:
    logging.warning('CatalogMachineEntry no longer exists: %s', machine_key)
    return

  if machine.state != models.CatalogMachineEntryStates.NEW:
    logging.warning('CatalogMachineEntry no longer new:\n%s', machine)
    return

  if machine.pubsub_subscription:
    logging.info('CatalogMachineEntry already subscribed:\n%s', machine)
    return

  machine.pubsub_subscription = 'subscription-%s' % machine.key.id()
  machine.pubsub_topic = 'topic-%s' % machine.key.id()

  params = {
      'backend_project': machine.policies.backend_project,
      'backend_topic': machine.policies.backend_topic,
      'hostname': machine.dimensions.hostname,
      'machine_id': machine.key.id(),
      'machine_service_account': machine.policies.machine_service_account,
      'machine_subscription': machine.pubsub_subscription,
      'machine_subscription_project': machine.pubsub_subscription_project,
      'machine_topic': machine.pubsub_topic,
      'machine_topic_project': machine.pubsub_topic_project,
  }
  backend_attributes = {}
  for attribute in machine.policies.backend_attributes:
    backend_attributes[attribute.key] = attribute.value
  params['backend_attributes'] = utils.encode_to_json(backend_attributes)
  if utils.enqueue_task(
      '/internal/queues/subscribe-machine',
      'subscribe-machine',
      params=params,
      transactional=True,
  ):
    machine.state = models.CatalogMachineEntryStates.SUBSCRIBING
    machine.put()
  else:
    raise TaskEnqueuingError('subscribe-machine')
Beispiel #38
0
 def test_backfill_tag_index(self, enqueue_task):
     auth.bootstrap_group(auth.ADMIN_GROUP, [auth.Anonymous])
     req = {
         'tag': 'buildset',
         'shards': '64',
     }
     self.call_api('backfill_tag_index', req, status=(200, 204))
     enqueue_task.assert_called_once_with(
         'backfill-tag-index',
         '/internal/task/buildbucket/backfill-tag-index/tag:buildset-start',
         utils.encode_to_json({
             'action': 'start',
             'tag': 'buildset',
             'shards': 64,
         }))
Beispiel #39
0
def _pubsub_notify(task_id, topic, auth_token, userdata):
    """Sends PubSub notification about task completion.

  Raises pubsub.TransientError on transient errors. Fatal errors are logged, but
  not retried.
  """
    logging.debug(
        'Sending PubSub notify to "%s" (with userdata "%s") about ' 'completion of "%s"', topic, userdata, task_id
    )
    msg = {"task_id": task_id}
    if userdata:
        msg["userdata"] = userdata
    try:
        pubsub.publish(
            topic=topic,
            message=utils.encode_to_json(msg),
            attributes={"auth_token": auth_token} if auth_token else None,
        )
    except pubsub.Error:
        logging.exception("Fatal error when sending PubSub notification")
Beispiel #40
0
  def post(self):
    body = self.parse_body()
    version = body.get('v')
    # Do not enforce version for now, just assert it is present.
    if not version:
      self.abort(400, 'Missing version')

    report = body.get('r')
    if not report:
      self.abort(400, 'Missing report')

    kwargs = dict(
        (k, report[k]) for k in on_error.VALID_ERROR_KEYS if report.get(k))
    report_id = on_error.log_request(self.request, add_params=False, **kwargs)
    self.response.headers['Content-Type'] = 'application/json; charset=utf-8'
    body = {
      'id': report_id,
      'url':
          '%s/restricted/ereporter2/errors/%d' %
          (self.request.host_url, report_id),
    }
    self.response.write(utils.encode_to_json(body))
Beispiel #41
0
def _parse_line(line, values, bots_active, bots_inactive, tasks_active):
  """Updates a Snapshot instance with a processed statistics line if relevant.

  This function is a big switch case, so while it is long and will get longer,
  it is relatively easy to read, as long as the keys are kept sorted!
  """
  try:
    try:
      extras = _unpack_entry(line)
      action = extras.pop('action')
    except (KeyError, ValueError):
      data = line.split('; ')
      action = data.pop(0)
      extras = dict(i.split('=', 1) for i in data)

    # Preemptively reduce copy-paste.
    d = None
    if 'dimensions' in extras and action not in ('bot_active', 'bot_inactive'):
      # Skip bot_active because we don't want complex dimensions to be created
      # implicitly.
      dimensions_json = utils.encode_to_json(extras['dimensions'])
      d = values.get_dimensions(dimensions_json)
    u = None
    if 'user' in extras:
      u = values.get_user(extras['user'])

    # Please keep 'action == 'foo' conditions sorted!
    if action == 'bot_active':
      if sorted(extras) != ['bot_id', 'dimensions']:
        raise ValueError(','.join(sorted(extras)))

      bots_active[extras['bot_id']] = extras['dimensions']
      return True

    if action == 'bot_inactive':
      if sorted(extras) != ['bot_id', 'dimensions']:
        raise ValueError(','.join(sorted(extras)))

      bots_inactive[extras.get('bot_id') or 'unknown'] = extras['dimensions']
      return True

    if action == 'run_bot_died':
      _assert_list(extras, ['bot_id', 'dimensions', 'run_id', 'user'])
      d.tasks_bot_died += 1
      u.tasks_bot_died += 1
      return True

    if action == 'run_completed':
      _assert_list(
          extras, ['bot_id', 'dimensions', 'run_id', 'runtime_ms', 'user'])
      _mark_bot_and_task_as_active(extras, bots_active, tasks_active)
      d.tasks_completed += 1
      d.tasks_total_runtime_secs += _ms_to_secs(extras['runtime_ms'])
      u.tasks_completed += 1
      u.tasks_total_runtime_secs += _ms_to_secs(extras['runtime_ms'])
      return True

    if action == 'run_started':
      _assert_list(
          extras, ['bot_id', 'dimensions', 'pending_ms', 'run_id', 'user'])
      _mark_bot_and_task_as_active(extras, bots_active, tasks_active)
      d.tasks_started += 1
      d.tasks_pending_secs += _ms_to_secs(extras['pending_ms'])
      u.tasks_started += 1
      u.tasks_pending_secs += _ms_to_secs(extras['pending_ms'])
      return True

    if action == 'run_updated':
      _assert_list(extras, ['bot_id', 'dimensions', 'run_id'])
      _mark_bot_and_task_as_active(extras, bots_active, tasks_active)
      return True

    # TODO(maruel): Ignore task_completed for now, since it is a duplicate of
    # run_completed.
    if action == 'task_completed':
      _assert_list(extras, ['dimensions', 'pending_ms', 'task_id', 'user'])
      # TODO(maruel): Add pending_ms as the total latency to run tasks, versus
      # the amount of time that was spent actually running the task. This gives
      # the infrastructure wall-clock time overhead.
      #d.tasks_completed += 1
      #u.tasks_completed += 1
      return True

    if action == 'task_enqueued':
      _assert_list(extras, ['dimensions', 'task_id', 'user'])
      _mark_task_as_active(extras, tasks_active)
      d.tasks_enqueued += 1
      u.tasks_enqueued += 1
      return True

    if action == 'task_request_expired':
      _assert_list(extras, ['dimensions', 'task_id', 'user'])
      d.tasks_request_expired += 1
      u.tasks_request_expired += 1
      return True

    logging.error('Unknown stats action\n%s', line)
    return False
  except (TypeError, ValueError) as e:
    logging.error('Failed to parse stats line\n%s\n%s', line, e)
    return False
Beispiel #42
0
def yield_next_available_task_to_dispatch(bot_dimensions, deadline):
  """Yields next available (TaskRequest, TaskToRun) in decreasing order of
  priority.

  Once the caller determines the task is suitable to execute, it must use
  reap_task_to_run(task.key) to mark that it is not to be scheduled anymore.

  Performance is the top most priority here.

  Arguments:
  - bot_dimensions: dimensions (as a dict) defined by the bot that can be
      matched.
  - deadline: UTC timestamp (as an int) that the bot must be able to
      complete the task by. None if there is no such deadline.
  """
  # List of all the valid dimensions hashed.
  accepted_dimensions_hash = frozenset(
      _hash_dimensions(utils.encode_to_json(i))
      for i in _powerset(bot_dimensions))
  now = utils.utcnow()
  broken = 0
  cache_lookup = 0
  expired = 0
  hash_mismatch = 0
  ignored = 0
  no_queue = 0
  real_mismatch = 0
  too_long = 0
  total = 0
  # Be very aggressive in fetching the largest amount of items as possible. Note
  # that we use the default ndb.EVENTUAL_CONSISTENCY so stale items may be
  # returned. It's handled specifically.
  # - 100/200 gives 2s~40s of query time for 1275 items.
  # - 250/500 gives 2s~50s of query time for 1275 items.
  # - 50/500 gives 3s~20s of query time for 1275 items. (Slower but less
  #   variance). Spikes in 20s~40s are rarer.
  # The problem here are:
  # - Outliers, some shards are simply slower at executing the query.
  # - Median time, which we should optimize.
  # - Abusing batching will slow down this query.
  #
  # TODO(maruel): Measure query performance with stats_framework!!
  # TODO(maruel): Use fetch_page_async() + ndb.get_multi_async() +
  # memcache.get_multi_async() to do pipelined processing. Should greatly reduce
  # the effect of latency on the total duration of this function. I also suspect
  # using ndb.get_multi() will return fresher objects than what is returned by
  # the query.
  opts = ndb.QueryOptions(batch_size=50, prefetch_size=500, keys_only=True)
  try:
    # Interestingly, the filter on .queue_number>0 is required otherwise all the
    # None items are returned first.
    q = TaskToRun.query(default_options=opts).order(
        TaskToRun.queue_number).filter(TaskToRun.queue_number > 0)
    for task_key in q:
      duration = (utils.utcnow() - now).total_seconds()
      if duration > 40.:
        # Stop searching after too long, since the odds of the request blowing
        # up right after succeeding in reaping a task is not worth the dangling
        # task request that will stay in limbo until the cron job reaps it and
        # retry it. The current handlers are given 60s to complete. By using
        # 40s, it gives 20s to complete the reaping and complete the HTTP
        # request.
        return

      total += 1
      # Verify TaskToRun is what is expected. Play defensive here.
      try:
        validate_to_run_key(task_key)
      except ValueError as e:
        logging.error(str(e))
        broken += 1
        continue

      # integer_id() == dimensions_hash.
      if task_key.integer_id() not in accepted_dimensions_hash:
        hash_mismatch += 1
        continue

      # Do this after the basic weeding out but before fetching TaskRequest.
      if _lookup_cache_is_taken(task_key):
        cache_lookup += 1
        continue

      # Ok, it's now worth taking a real look at the entity.
      task = task_key.get(use_cache=False)

      # DB operations are slow, double check memcache again.
      if _lookup_cache_is_taken(task_key):
        cache_lookup += 1
        continue

      # It is possible for the index to be inconsistent since it is not executed
      # in a transaction, no problem.
      if not task.queue_number:
        no_queue += 1
        continue

      # It expired. A cron job will cancel it eventually. Since 'now' is saved
      # before the query, an expired task may still be reaped even if
      # technically expired if the query is very slow. This is on purpose so
      # slow queries do not cause exagerate expirations.
      if task.expiration_ts < now:
        expired += 1
        continue

      # The hash may have conflicts. Ensure the dimensions actually match by
      # verifying the TaskRequest. There's a probability of 2**-31 of conflicts,
      # which is low enough for our purpose. The reason use_cache=False is
      # otherwise it'll create a buffer bloat.
      request = task.request_key.get(use_cache=False)
      if not match_dimensions(request.properties.dimensions, bot_dimensions):
        real_mismatch += 1
        continue

      # If the bot has a deadline, don't allow it to reap the task unless it can
      # be completed before the deadline. We have to assume the task takes the
      # theoretical maximum amount of time possible, which is governed by
      # execution_timeout_secs. An isolated task's download phase is not subject
      # to this limit, so we need to add io_timeout_secs. When a task is
      # signalled that it's about to be killed, it receives a grace period as
      # well. grace_period_secs is given by run_isolated to the task execution
      # process, by task_runner to run_isolated, and by bot_main to the
      # task_runner. Lastly, add a few seconds to account for any overhead.
      if deadline is not None:
        if not request.properties.execution_timeout_secs:
          # Task never times out, so it cannot be accepted.
          too_long += 1
          continue
        max_task_time = (utils.time_time() +
                         request.properties.execution_timeout_secs +
                         (request.properties.io_timeout_secs or 600) +
                         3 * (request.properties.grace_period_secs or 30) +
                         10)
        if deadline <= max_task_time:
          too_long += 1
          continue

      # It's a valid task! Note that in the meantime, another bot may have
      # reaped it.
      yield request, task
      ignored += 1
  finally:
    duration = (utils.utcnow() - now).total_seconds()
    logging.info(
        '%d/%s in %5.2fs: %d total, %d exp %d no_queue, %d hash mismatch, '
        '%d cache negative, %d dimensions mismatch, %d ignored, %d broken, '
        '%d not executable by deadline (UTC %s)',
        opts.batch_size,
        opts.prefetch_size,
        duration,
        total,
        expired,
        no_queue,
        hash_mismatch,
        cache_lookup,
        real_mismatch,
        ignored,
        broken,
        too_long,
        deadline)
Beispiel #43
0
 def send_json(self, body, http_code=200):
   """Serializes |body| into JSON and sends it as a response."""
   self.response.set_status(http_code)
   self.response.headers['Content-Type'] = 'application/json; charset=utf-8'
   self.response.write(utils.encode_to_json(body))
Beispiel #44
0
 def _to_base_type(self, value):
   """Makes it deterministic compared to ndb.JsonProperty._to_base_type()."""
   return utils.encode_to_json(value)
Beispiel #45
0
def _safe_cmp(a, b):
  # cmp(datetime.datetime.utcnow(), None) throws TypeError. Workaround.
  return cmp(utils.encode_to_json(a), utils.encode_to_json(b))
 def test_dimensions_search_sizing_14_1(self):
   dimensions = {str(k): '01234567890123456789' for k in xrange(14)}
   items = tuple(sorted(
       task_to_run._hash_dimensions(utils.encode_to_json(i))
       for i in task_to_run._powerset(dimensions)))
   self.assertEqual(16384, len(items))
Beispiel #47
0
def request_to_task_to_run_key(request):
  """Returns the ndb.Key for a TaskToRun from a TaskRequest."""
  assert isinstance(request, task_request.TaskRequest), request
  dimensions_json = utils.encode_to_json(request.properties.dimensions)
  return ndb.Key(
      TaskToRun, _hash_dimensions(dimensions_json), parent=request.key)
def _hash_dimensions(dimensions):
  return task_to_run._hash_dimensions(utils.encode_to_json(dimensions))
Beispiel #49
0
def reclaim_machine(machine_key, reclamation_ts):
  """Attempts to reclaim the given machine.

  Args:
    machine_key: ndb.Key for a model.CatalogMachineEntry instance.
    reclamation_ts: datetime.datetime instance indicating when the machine was
      reclaimed.

  Returns:
    True if the machine was reclaimed, else False.
  """
  machine = machine_key.get()
  logging.info('Attempting to reclaim CatalogMachineEntry:\n%s', machine)

  if machine.lease_expiration_ts is None:
    # This can reasonably happen if e.g. the lease was voluntarily given up.
    logging.warning('CatalogMachineEntry no longer leased:\n%s', machine)
    return False

  if reclamation_ts < machine.lease_expiration_ts:
    # This can reasonably happen if e.g. the lease duration was extended.
    logging.warning('CatalogMachineEntry no longer overdue:\n%s', machine)
    return False

  logging.info('Reclaiming CatalogMachineEntry:\n%s', machine)
  lease = models.LeaseRequest.get_by_id(machine.lease_id)
  hostname = lease.response.hostname
  lease.machine_id = None
  lease.response.hostname = None
  machine.lease_id = None
  machine.lease_expiration_ts = None

  policy = machine.policies.on_reclamation
  if policy == rpc_messages.MachineReclamationPolicy.DELETE:
    logging.info('Executing MachineReclamationPolicy: DELETE')
    lease.put()
    machine.key.delete()
  else:
    if policy == rpc_messages.MachineReclamationPolicy.MAKE_AVAILABLE:
      logging.info('Executing MachineReclamationPolicy: MAKE_AVAILABLE')
      machine.state = models.CatalogMachineEntryStates.AVAILABLE
    else:
      if policy != rpc_messages.MachineReclamationPolicy.RECLAIM:
        # Something is awry. Log an error, but still reclaim the machine.
        # Fall back on the RECLAIM policy because it notifies the backend and
        # prevents the machine from being leased out again, but keeps it in
        # the Catalog in case we want to examine it further.
        logging.error(
            'Unexpected MachineReclamationPolicy: %s\nDefaulting to RECLAIM',
            policy,
        )
      else:
        logging.info('Executing MachineReclamationPolicy: RECLAIM')
      machine.state = models.CatalogMachineEntryStates.RECLAIMED
    ndb.put_multi([lease, machine])

  params = {
      'hostname': hostname,
      'policies': protojson.encode_message(machine.policies),
      'request_json': protojson.encode_message(lease.request),
      'response_json': protojson.encode_message(lease.response),
  }
  backend_attributes = {}
  for attribute in machine.policies.backend_attributes:
    backend_attributes[attribute.key] = attribute.value
  params['backend_attributes'] = utils.encode_to_json(backend_attributes)
  if lease.request.pubsub_topic:
    params['lessee_project'] = lease.request.pubsub_project
    params['lessee_topic'] = lease.request.pubsub_topic
  if not utils.enqueue_task(
      '/internal/queues/reclaim-machine',
      'reclaim-machine',
      params=params,
      transactional=True,
  ):
    raise TaskEnqueuingError('reclaim-machine')
  return True
Beispiel #50
0
def create_instance_group(name, dimensions, policies, instances):
  """Stores an InstanceGroup and Instance entities in the datastore.

  Also attempts to catalog each running Instance in the Machine Provider.

  Operates on two root entities: model.Instance and model.InstanceGroup.

  Args:
    name: Name of this instance group.
    dimensions: machine_provider.Dimensions describing members of this instance
      group.
    policies: machine_provider.Policies governing members of this instance
      group.
    instances: Return value of gce.get_managed_instances listing instances in
      this instance group.
  """
  instance_map = {}
  instances_to_catalog = []

  for instance_name, instance in instances.iteritems():
    logging.info('Processing instance: %s', instance_name)
    instance_key = models.Instance.generate_key(instance_name)
    instance_map[instance_name] = models.Instance(
        key=instance_key,
        group=name,
        name=instance_name,
        state=models.InstanceStates.UNCATALOGED,
    )
    if instance['instanceStatus'] == 'RUNNING':
      existing_instance = instance_key.get()
      if existing_instance:
        if existing_instance.state == models.InstanceStates.UNCATALOGED:
          logging.info('Attempting to catalog instance: %s', instance_name)
          instances_to_catalog.append(instance_name)
        else:
          logging.info('Skipping already cataloged instance: %s', instance_name)
          instance_map[instance_name].state = existing_instance.state
    else:
      logging.warning(
          'Instance not running: %s\ncurrentAction: %s\ninstanceStatus: %s',
          instance_name,
          instance['currentAction'],
          instance['instanceStatus'],
      )

  if instances_to_catalog:
    # Above we defaulted each instance to UNCATALOGED. Here, try to enqueue a
    # task to catalog them in the Machine Provider, setting CATALOGED if
    # successful.
    if utils.enqueue_task(
        '/internal/queues/catalog-instance-group',
        'catalog-instance-group',
        params={
            'dimensions': utils.encode_to_json(dimensions),
            'instances': utils.encode_to_json(instances_to_catalog),
            'policies': utils.encode_to_json(policies),
        },
        transactional=True,
    ):
      for instance_name in instances_to_catalog:
        instance_map[instance_name].state = models.InstanceStates.CATALOGED
  else:
    logging.info('Nothing to catalog')

  ndb.put_multi(instance_map.values())
  models.InstanceGroup.create_and_put(
      name, dimensions, policies, sorted(instance_map.keys()))