def post(self): res = self._process() bot_management.bot_event( event_type='bot_connected', bot_id=res.bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=res.dimensions, state=res.state, version=res.version, quarantined=bool(res.quarantined_msg), task_id='', task_name=None, message=res.quarantined_msg) data = { 'bot_version': bot_code.get_bot_version(self.request.host_url), 'server_version': utils.get_app_version(), 'bot_group_cfg_version': res.bot_group_cfg.version, 'bot_group_cfg': { # Let the bot know its server-side dimensions (from bots.cfg file). 'dimensions': res.bot_group_cfg.dimensions, }, } self.send_response(data)
def post(self): res = self._process() event = res.request.get('event') if event not in self.ALLOWED_EVENTS: logging.error('Unexpected event type') self.abort_with_error(400, error='Unsupported event type') message = res.request.get('message') # Record the event in a BotEvent entity so it can be listed on the bot's # page. bot_management.bot_event( event_type=event, bot_id=res.bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=res.dimensions, state=res.state, version=res.version, quarantined=bool(res.quarantined_msg), maintenance_msg=res.maintenance_msg, task_id=None, task_name=None, message=message) if event == 'bot_error': # Also logs this to ereporter2, so it will be listed in the server's # hourly ereporter2 report. THIS IS NOISY so it should only be done with # issues requiring action. In this case, include again the bot's URL since # there's no context in the report. Redundantly include the bot id so # messages are bucketted by bot. line = ('%s\n' '\nhttps://%s/restricted/bot/%s') % ( message, app_identity.get_default_version_hostname(), res.bot_id) ereporter2.log_request(self.request, source='bot', message=line) self.send_response({})
def post(self): res = self._process() bot_management.bot_event( event_type='bot_connected', bot_id=res.bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=res.dimensions, state=res.state, version=res.version, quarantined=bool(res.quarantined_msg), maintenance_msg=res.maintenance_msg, task_id='', task_name=None, message=res.quarantined_msg) data = { 'bot_version': bot_code.get_bot_version(self.request.host_url)[0], 'server_version': utils.get_app_version(), 'bot_group_cfg_version': res.bot_group_cfg.version, 'bot_group_cfg': { # Let the bot know its server-side dimensions (from bots.cfg file). 'dimensions': res.bot_group_cfg.dimensions, }, } if res.bot_group_cfg.bot_config_script_content: logging.info('Injecting %s: %d bytes', res.bot_group_cfg.bot_config_script, len(res.bot_group_cfg.bot_config_script_content)) data['bot_config'] = res.bot_group_cfg.bot_config_script_content self.send_response(data)
def validate_bot_id_and_fetch_config(bot_id): """Verifies ID reported by a bot matches the credentials being used. Expected to be called in a context of some bot API request handler. Uses bots.cfg config to look up what credentials are expected to be used by the bot with given ID. Raises auth.AuthorizationError if bot_id is unknown or bot is using invalid credentials. On success returns the configuration for this bot (BotGroupConfig tuple), as defined in bots.cfg """ cfg = bot_groups_config.get_bot_group_config(bot_id) if not cfg: logging.error( 'bot_auth: unknown bot_id, not in the config\nbot_id: "%s"', bot_id) raise auth.AuthorizationError('Unknown bot ID, not in config') peer_ident = auth.get_peer_identity() if cfg.require_luci_machine_token: if not _is_valid_ident_for_bot(peer_ident, bot_id): logging.error( 'bot_auth: bot ID doesn\'t match the machine token used\n' 'bot_id: "%s", peer_ident: "%s"', bot_id, peer_ident.to_bytes()) raise auth.AuthorizationError( 'Bot ID doesn\'t match the token used') elif cfg.require_service_account: expected_id = auth.Identity(auth.IDENTITY_USER, cfg.require_service_account) if peer_ident != expected_id: logging.error( 'bot_auth: bot is not using expected service account\n' 'bot_id: "%s", expected_id: "%s", peer_ident: "%s"', bot_id, expected_id.to_bytes(), peer_ident.to_bytes()) raise auth.AuthorizationError( 'bot is not using expected service account') elif not cfg.ip_whitelist: # This branch should not be hit for validated configs. logging.error( 'bot_auth: invalid bot group config, no auth method defined\n' 'bot_id: "%s"', bot_id) raise auth.AuthorizationError('Invalid bot group config') # Check that IP whitelist applies (in addition to credentials). if cfg.ip_whitelist: ip = auth.get_peer_ip() if not auth.is_in_ip_whitelist(cfg.ip_whitelist, ip): logging.error( 'bot_auth: bot IP is not whitelisted\n' 'bot_id: "%s", peer_ip: "%s", ip_whitelist: "%s"', bot_id, ipaddr.ip_to_string(ip), cfg.ip_whitelist) raise auth.AuthorizationError('Not IP whitelisted') return cfg
def _is_allowed_to_schedule(pool_cfg): """True if the current caller is allowed to schedule tasks in the pool.""" caller_id = auth.get_current_identity() # Listed directly? if caller_id in pool_cfg.scheduling_users: logging.info( 'Caller "%s" is allowed to schedule tasks in the pool "%s" by being ' 'specified directly in the pool config', caller_id.to_bytes(), pool_cfg.name) return True # Listed through a group? for group in pool_cfg.scheduling_groups: if auth.is_group_member(group, caller_id): logging.info( 'Caller "%s" is allowed to schedule tasks in the pool "%s" by being ' 'referenced via the group "%s" in the pool config', caller_id.to_bytes(), pool_cfg.name, group) return True # Using delegation? delegation_token = auth.get_delegation_token() if not delegation_token: return False # Log relevant info about the delegation to simplify debugging. peer_id = auth.get_peer_identity() token_tags = set(delegation_token.tags or []) logging.info( 'Using delegation, delegatee is "%s", delegation tags are %s', peer_id.to_bytes(), sorted(map(str, token_tags))) # Is the delegatee listed in the config? trusted_delegatee = pool_cfg.trusted_delegatees.get(peer_id) if not trusted_delegatee: logging.warning('The delegatee "%s" is unknown', peer_id.to_bytes()) return False # Are any of the required delegation tags present in the token? cross = token_tags & trusted_delegatee.required_delegation_tags if cross: logging.info( 'Caller "%s" is allowed to schedule tasks in the pool "%s" by acting ' 'through a trusted delegatee "%s" that set the delegation tags %s', caller_id.to_bytes(), pool_cfg.name, peer_id.to_bytes(), sorted(map(str, cross))) return True logging.warning( 'Expecting any of %s tags, got %s, forbidding the call', sorted(map(str, trusted_delegatee.required_delegation_tags)), sorted(map(str, token_tags))) return False
def decorated(svc, *args, **kwargs): try: return fn(svc, *args, **kwargs) except errors.Error as ex: assert hasattr(response_message_class, 'error') return response_message_class(error=exception_to_error_message(ex)) except auth.AuthorizationError as ex: logging.warning('Authorization error.\n%s\nPeer: %s\nIP: %s', ex.message, auth.get_peer_identity().to_bytes(), svc.request_state.remote_address) raise endpoints.ForbiddenException(ex.message)
def decorated(svc, *args, **kwargs): try: return fn(svc, *args, **kwargs) except errors.Error as ex: assert hasattr(response_message_class, 'error') return response_message_class(error=exception_to_error_message(ex)) except auth.AuthorizationError as ex: logging.warning( 'Authorization error.\n%s\nPeer: %s\nIP: %s', ex.message, auth.get_peer_identity().to_bytes(), svc.request_state.remote_address) raise endpoints.ForbiddenException(ex.message)
def bot_event(event_type, task_id=None, task_name=None): bot_management.bot_event( event_type=event_type, bot_id=res.bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=res.dimensions, state=res.state, version=res.version, quarantined=quarantined, task_id=task_id, task_name=task_name, message=res.quarantined_msg)
def post(self): # Forbid usage of delegation tokens for this particular call. Using # delegation when creating delegation tokens is too deep. Redelegation will # be done as separate explicit API call that accept existing delegation # token via request body, not via headers. if auth.get_current_identity() != auth.get_peer_identity(): raise auth.AuthorizationError( 'This API call must not be used with active delegation token') # Convert request body to proto (with validation). Verify IP format. try: body = self.parse_body() subtoken = subtoken_from_jsonish(body) intent = body.get('intent') or '' if not isinstance(intent, basestring): raise TypeError('"intent" must be string') except (TypeError, ValueError) as exc: self.abort_with_error(400, text=str(exc)) # Fill in defaults. assert not subtoken.requestor_identity user_id = auth.get_current_identity().to_bytes() subtoken.requestor_identity = user_id if not subtoken.delegated_identity: subtoken.delegated_identity = user_id subtoken.creation_time = int(utils.time_time()) if not subtoken.validity_duration: subtoken.validity_duration = DEF_VALIDITY_DURATION_SEC if '*' in subtoken.services: subtoken.services[:] = get_default_allowed_services(user_id) # Check ACL (raises auth.AuthorizationError on errors). rule = check_can_create_token(user_id, subtoken) # Register the token in the datastore, generate its ID. subtoken.subtoken_id = register_subtoken(subtoken, rule, intent, auth.get_peer_ip()) # Create and sign the token. try: token = delegation.serialize_token(delegation.seal_token(subtoken)) except delegation.BadTokenError as exc: # This happens if resulting token is too large. self.abort_with_error(400, text=str(exc)) self.send_response(response={ 'delegation_token': token, 'subtoken_id': str(subtoken.subtoken_id), 'validity_duration': subtoken.validity_duration, }, http_code=201)
def post(self, task_id=None): request = self.parse_body() bot_id = request.get('id') task_id = request.get('task_id', '') message = request.get('message', 'unknown') machine_type = None bot_info = bot_management.get_info_key(bot_id).get() if bot_info: machine_type = bot_info.machine_type # Make sure bot self-reported ID matches the authentication token. Raises # auth.AuthorizationError if not. bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type) bot_management.bot_event( event_type='task_error', bot_id=bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=None, state=None, version=None, quarantined=None, maintenance_msg=None, task_id=task_id, task_name=None, message=message) line = ('Bot: https://%s/restricted/bot/%s\n' 'Task failed: https://%s/user/task/%s\n' '%s') % (app_identity.get_default_version_hostname(), bot_id, app_identity.get_default_version_hostname(), task_id, message) ereporter2.log_request(self.request, source='bot', message=line) msg = log_unexpected_keys(self.EXPECTED_KEYS, request, self.request, 'bot', 'keys') if msg: self.abort_with_error(400, error=msg) msg = task_scheduler.bot_kill_task( task_pack.unpack_run_result_key(task_id), bot_id) if msg: logging.error(msg) self.abort_with_error(400, error=msg) self.send_response({})
def post(self): # Forbid usage of delegation tokens for this particular call. Using # delegation when creating delegation tokens is too deep. Redelegation will # be done as separate explicit API call that accept existing delegation # token via request body, not via headers. if auth.get_current_identity() != auth.get_peer_identity(): raise auth.AuthorizationError( 'This API call must not be used with active delegation token') # Convert request body to proto (with validation). try: subtoken = subtoken_from_jsonish(self.parse_body()) except (TypeError, ValueError) as exc: self.abort_with_error(400, text=str(exc)) # Fill in defaults. assert not subtoken.impersonator_id user_id = auth.get_current_identity().to_bytes() if not subtoken.issuer_id: subtoken.issuer_id = user_id if subtoken.issuer_id != user_id: subtoken.impersonator_id = user_id subtoken.creation_time = int(utils.time_time()) if not subtoken.validity_duration: subtoken.validity_duration = DEF_VALIDITY_DURATION_SEC if not subtoken.services or '*' in subtoken.services: subtoken.services[:] = get_default_allowed_services(user_id) # Check ACL (raises auth.AuthorizationError on errors). check_can_create_token(user_id, subtoken) # Create and sign the token. try: token = delegation.serialize_token( delegation.seal_token( delegation_pb2.SubtokenList(subtokens=[subtoken]))) except delegation.BadTokenError as exc: # This happens if resulting token is too large. self.abort_with_error(400, text=str(exc)) self.send_response( response={ 'delegation_token': token, 'validity_duration': subtoken.validity_duration, }, http_code=201)
def post(self): # Forbid usage of delegation tokens for this particular call. Using # delegation when creating delegation tokens is too deep. Redelegation will # be done as separate explicit API call that accept existing delegation # token via request body, not via headers. if auth.get_current_identity() != auth.get_peer_identity(): raise auth.AuthorizationError( 'This API call must not be used with active delegation token') # Convert request body to proto (with validation). try: subtoken = subtoken_from_jsonish(self.parse_body()) except (TypeError, ValueError) as exc: self.abort_with_error(400, text=str(exc)) # Fill in defaults. assert not subtoken.impersonator_id user_id = auth.get_current_identity().to_bytes() if not subtoken.issuer_id: subtoken.issuer_id = user_id if subtoken.issuer_id != user_id: subtoken.impersonator_id = user_id subtoken.creation_time = int(utils.time_time()) if not subtoken.validity_duration: subtoken.validity_duration = DEF_VALIDITY_DURATION_SEC if not subtoken.services or '*' in subtoken.services: subtoken.services[:] = get_default_allowed_services(user_id) # Check ACL (raises auth.AuthorizationError on errors). check_can_create_token(user_id, subtoken) # Create and sign the token. try: token = delegation.serialize_token( delegation.seal_token( delegation_pb2.SubtokenList(subtokens=[subtoken]))) except delegation.BadTokenError as exc: # This happens if resulting token is too large. self.abort_with_error(400, text=str(exc)) self.send_response(response={ 'delegation_token': token, 'validity_duration': subtoken.validity_duration, }, http_code=201)
def file_size(size): """Reports the size of a file fetched from GCS by whitelisted clients. If the client's requests are not whitelisted for monitoring, does nothing. Args: size: Size of the file in bytes. """ ip = auth.get_peer_ip() for cfg in config.settings().client_monitoring_config: if auth.is_in_ip_whitelist(cfg.ip_whitelist, ip): _bytes_requested.increment_by( size, fields={ 'client_name': cfg.label, 'client_email': auth.get_peer_identity().to_bytes(), 'download_source': 'GCS' }) return
def post(self, task_id=None): # Unlike handshake and poll, we do not accept invalid keys here. This code # path is much more strict. request = self.parse_body() msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, 'bot', 'keys') if msg: self.abort_with_error(400, error=msg) bot_id = request['id'] task_id = request['task_id'] machine_type = None bot_info = bot_management.get_info_key(bot_id).get() if bot_info: machine_type = bot_info.machine_type # Make sure bot self-reported ID matches the authentication token. Raises # auth.AuthorizationError if not. bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type) bot_overhead = request.get('bot_overhead') cipd_pins = request.get('cipd_pins') cipd_stats = request.get('cipd_stats') cost_usd = request.get('cost_usd', 0) duration = request.get('duration') exit_code = request.get('exit_code') hard_timeout = request.get('hard_timeout') io_timeout = request.get('io_timeout') isolated_stats = request.get('isolated_stats') output = request.get('output') output_chunk_start = request.get('output_chunk_start') outputs_ref = request.get('outputs_ref') if (isolated_stats or cipd_stats) and bot_overhead is None: ereporter2.log_request(request=self.request, source='server', category='task_failure', message='Failed to update task: %s' % task_id) self.abort_with_error( 400, error= 'isolated_stats and cipd_stats require bot_overhead to be set' '\nbot_overhead: %s\nisolate_stats: %s' % (bot_overhead, isolated_stats)) run_result_key = task_pack.unpack_run_result_key(task_id) performance_stats = None if bot_overhead is not None: performance_stats = task_result.PerformanceStats( bot_overhead=bot_overhead) if isolated_stats: download = isolated_stats.get('download') or {} upload = isolated_stats.get('upload') or {} def unpack_base64(d, k): x = d.get(k) if x: return base64.b64decode(x) performance_stats.isolated_download = task_result.OperationStats( duration=download.get('duration'), initial_number_items=download.get('initial_number_items'), initial_size=download.get('initial_size'), items_cold=unpack_base64(download, 'items_cold'), items_hot=unpack_base64(download, 'items_hot')) performance_stats.isolated_upload = task_result.OperationStats( duration=upload.get('duration'), items_cold=unpack_base64(upload, 'items_cold'), items_hot=unpack_base64(upload, 'items_hot')) if cipd_stats: performance_stats.package_installation = task_result.OperationStats( duration=cipd_stats.get('duration')) if output is not None: try: output = base64.b64decode(output) except UnicodeEncodeError as e: logging.error('Failed to decode output\n%s\n%r', e, output) output = output.encode('ascii', 'replace') except TypeError as e: # Save the output as-is instead. The error will be logged in ereporter2 # and returning a HTTP 500 would only force the bot to stay in a retry # loop. logging.error('Failed to decode output\n%s\n%r', e, output) if outputs_ref: outputs_ref = task_request.FilesRef(**outputs_ref) if cipd_pins: cipd_pins = task_result.CipdPins( client_package=task_request.CipdPackage( **cipd_pins['client_package']), packages=[ task_request.CipdPackage(**args) for args in cipd_pins['packages'] ]) try: state = task_scheduler.bot_update_task( run_result_key=run_result_key, bot_id=bot_id, output=output, output_chunk_start=output_chunk_start, exit_code=exit_code, duration=duration, hard_timeout=hard_timeout, io_timeout=io_timeout, cost_usd=cost_usd, outputs_ref=outputs_ref, cipd_pins=cipd_pins, performance_stats=performance_stats) if not state: logging.info('Failed to update, please retry') self.abort_with_error(500, error='Failed to update, please retry') if state in (task_result.State.COMPLETED, task_result.State.TIMED_OUT): action = 'task_completed' elif state == task_result.State.KILLED: action = 'task_killed' else: assert state in (task_result.State.BOT_DIED, task_result.State.RUNNING), state action = 'task_update' bot_management.bot_event( event_type=action, bot_id=bot_id, external_ip=self.request.remote_addr, authenticated_as=auth.get_peer_identity().to_bytes(), dimensions=None, state=None, version=None, quarantined=None, maintenance_msg=None, task_id=task_id, task_name=None) except ValueError as e: ereporter2.log_request(request=self.request, source='server', category='task_failure', message='Failed to update task: %s' % e) self.abort_with_error(400, error=str(e)) except webob.exc.HTTPException: raise except Exception as e: logging.exception('Internal error: %s', e) self.abort_with_error(500, error=str(e)) self.send_response({ 'must_stop': state == task_result.State.KILLED, 'ok': True })
def _cmd_run(self, request, secret_bytes, run_result, bot_id, bot_group_cfg): logging.info('Run: %s', request.task_id) props = request.task_slice(run_result.current_task_slice).properties out = { 'cmd': 'run', 'manifest': { 'bot_id': bot_id, 'bot_authenticated_as': auth.get_peer_identity().to_bytes(), 'caches': [c.to_dict() for c in props.caches], 'cipd_input': { 'client_package': props.cipd_input.client_package.to_dict(), 'packages': [p.to_dict() for p in props.cipd_input.packages], 'server': props.cipd_input.server, } if props.cipd_input else None, 'command': props.command, 'dimensions': props.dimensions, 'env': props.env, 'env_prefixes': props.env_prefixes, 'extra_args': props.extra_args, 'grace_period': props.grace_period_secs, 'hard_timeout': props.execution_timeout_secs, 'host': utils.get_versioned_hosturl(), 'io_timeout': props.io_timeout_secs, 'secret_bytes': (secret_bytes.secret_bytes.encode('base64') if secret_bytes else None), 'isolated': { 'input': props.inputs_ref.isolated, 'namespace': props.inputs_ref.namespace, 'server': props.inputs_ref.isolatedserver, } if props.inputs_ref else None, 'outputs': props.outputs, 'relative_cwd': props.relative_cwd, 'service_accounts': { 'system': { # 'none', 'bot' or email. Bot interprets 'none' and 'bot' locally. # When it sees something else, it uses /oauth_token API endpoint to # grab tokens through server. 'service_account': bot_group_cfg.system_service_account or 'none', }, 'task': { # Same here. 'service_account': request.service_account, }, }, 'task_id': task_pack.pack_run_result_key(run_result.key), }, } self.send_response(utils.to_json_encodable(out))
def validate_bot_id_and_fetch_config(bot_id): """Verifies ID reported by a bot matches the credentials being used. Expected to be called in a context of some bot API request handler. Uses bots.cfg config to look up what credentials are expected to be used by the bot with given ID. Raises auth.AuthorizationError if bot_id is unknown or bot is using invalid credentials. On success returns the configuration for this bot (BotGroupConfig tuple), as defined in bots.cfg. """ bot_id = _extract_primary_hostname(bot_id) cfg = bot_groups_config.get_bot_group_config(bot_id) if not cfg: logging.error( 'bot_auth: unknown bot_id, not in the config\nbot_id: "%s"', bot_id) raise auth.AuthorizationError('Unknown bot ID, not in config') # This should not really happen for validated configs. if not cfg.auth: logging.error('bot_auth: no auth configured in bots.cfg') raise auth.AuthorizationError('No auth configured in bots.cfg') ip = auth.get_peer_ip() peer_ident = auth.get_peer_identity() # Errors from all auth methods. auth_errs = [] # Logs to emit if all methods fail. Omitted if some method succeeds. delayed_logs = [] # Try all auth methods sequentially until a first success. When migrating # between different methods it may be important to know when a method is # skipped. Logs from such methods are always emitted at 'error' level. Other # logs are buffered and emitted only if all methods fail. for bot_auth in cfg.auth: err, details = _check_bot_auth(bot_auth, bot_id, peer_ident, ip) if not err: logging.debug('Using auth method: %s', bot_auth) return cfg auth_errs.append(err) if bot_auth.log_if_failed: logging.error('Preferred auth method failed: %s', err) logging.error('Failed auth method: %s', bot_auth) for msg in details: logging.error('%s', msg) else: delayed_logs.append('Auth method failed: %s' % (err, )) delayed_logs.append('Failed auth method: %s' % (bot_auth, )) delayed_logs.extend(details) # All fallback methods failed. Need their logs to investigate. for msg in delayed_logs: logging.error('%s', msg) # In most cases there's only one auth method used, so we can simplify the # error message to be less confusing. if len(auth_errs) == 1: raise auth.AuthorizationError(auth_errs[0]) raise auth.AuthorizationError('All auth methods failed: %s' % '; '.join(auth_errs))