def do_verify(self): """ Perform verify using a random generated challenge """ challenge = TPM_Utilities.random_password(20) numtries = 0 while True: try: cloudagent_base_url = (f'{self.agent_ip}:{self.agent_port}') do_verify = RequestsClient(cloudagent_base_url, tls_enabled=False) response = do_verify.get( (f'/keys/verify?challenge={challenge}'), cert=self.cert, verify=False) except Exception as e: if response.status_code in (503, 504): numtries += 1 maxr = config.getint('tenant', 'max_retries') if numtries >= maxr: logger.error( f"Cannot establish connection to agent on {self.agent_ip} with port {self.agent_port}" ) sys.exit() retry = config.getfloat('tenant', 'retry_interval') logger.info( f"Verifier connection to agent at {self.agent_ip} refused {numtries}/{maxr} times, trying again in {retry} seconds..." ) time.sleep(retry) continue raise e response_body = response.json() if response.status_code == 200: if "results" not in response_body or 'hmac' not in response_body[ 'results']: logger.critical( f"Error: unexpected http response body from Cloud Agent: {response.status_code}" ) break mac = response_body['results']['hmac'] ex_mac = crypto.do_hmac(self.K, challenge) if mac == ex_mac: logger.info("Key derivation successful") else: logger.error("Key derivation failed") else: keylime_logging.log_http_response(logger, logging.ERROR, response_body) retry = config.getfloat('tenant', 'retry_interval') logger.warning( f"Key derivation not yet complete...trying again in {retry} seconds...Ctrl-C to stop" ) time.sleep(retry) continue break
def worker_webhook(tosend, url): interval = config.getfloat('cloud_verifier', 'retry_interval') exponential_backoff = config.getboolean('cloud_verifier', 'exponential_backoff') session = requests.session() logger.info("Sending revocation event via webhook...") for i in range(config.getint('cloud_verifier', 'max_retries')): next_retry = retry.retry_time(exponential_backoff, interval, i, logger) try: response = session.post(url, json=tosend, timeout=5) if response.status_code in [200, 202]: break logger.debug( "Unable to publish revocation message %d times via webhook, " "trying again in %d seconds. " "Server returned status code: %s", i, next_retry, response.status_code) except requests.exceptions.RequestException as e: logger.debug( "Unable to publish revocation message %d times via webhook, " "trying again in %d seconds: %s", i, next_retry, e) time.sleep(next_retry)
def worker(tosend): context = zmq.Context() mysock = context.socket(zmq.PUB) mysock.connect("ipc:///tmp/keylime.verifier.ipc") # wait 100ms for connect to happen time.sleep(0.2) # now send it out via 0mq logger.info("Sending revocation event to listening nodes...") for i in range(config.getint('cloud_verifier', 'max_retries')): try: mysock.send_string(json.dumps(tosend)) break except Exception as e: logger.debug("Unable to publish revocation message %d times, trying again in %f seconds: %s" % ( i, config.getfloat('cloud_verifier', 'retry_interval'), e)) time.sleep(config.getfloat('cloud_verifier', 'retry_interval')) mysock.close()
def __run(self, cmd, expectedcode=tpm_abstract.AbstractTPM.EXIT_SUCESS, raiseOnError=True, lock=True, outputpaths=None): env = _get_cmd_env() # Backwards compat with string input (force all to be dict) if isinstance(outputpaths, str): outputpaths = [outputpaths] # Handle stubbing the TPM out fprt = tpm1.__fingerprint(cmd) if config.STUB_TPM and config.TPM_CANNED_VALUES is not None: stub = _stub_command(fprt, lock, outputpaths) if stub: return stub numtries = 0 while True: if lock: with self.tpmutilLock: retDict = cmd_exec.run( cmd=cmd, expectedcode=expectedcode, raiseOnError=False, outputpaths=outputpaths, env=env) else: retDict = cmd_exec.run( cmd=cmd, expectedcode=expectedcode, raiseOnError=False, outputpaths=outputpaths, env=env) code = retDict['code'] retout = retDict['retout'] # keep trying to communicate with TPM if there was an I/O error if code == tpm_abstract.AbstractTPM.TPM_IO_ERR: numtries += 1 maxr = config.getint('cloud_agent', 'max_retries') if numtries >= maxr: logger.error("TPM appears to be in use by another application. Keylime is incompatible with other TPM TSS applications like trousers/tpm-tools. Please uninstall or disable.") break retry = config.getfloat('cloud_agent', 'retry_interval') logger.info("Failed to call TPM %d/%d times, trying again in %f seconds..." % (numtries, maxr, retry)) time.sleep(retry) continue break # Don't bother continuing if TPM call failed and we're raising on error if code != expectedcode and raiseOnError: raise Exception("Command: %s returned %d, expected %d, output %s" % (cmd, code, expectedcode, retout)) # Metric output if lock or self.tpmutilLock.locked(): _output_metrics(fprt, cmd, retDict, outputpaths) return retDict
def worker_webhook(tosend, url): retry_interval = config.getfloat('cloud_verifier', 'retry_interval') session = requests.session() logger.info("Sending revocation event via webhook...") for i in range(config.getint('cloud_verifier', 'max_retries')): try: response = session.post(url, json=tosend) if response.status_code in [200, 202]: break logger.debug( f"Unable to publish revocation message {i} times via webhook, " f"trying again in {retry_interval} seconds. " f"Server returned status code: {response.status_code}") except requests.exceptions.RequestException as e: logger.debug( f"Unable to publish revocation message {i} times via webhook, " f"trying again in {retry_interval} seconds: {e} ") time.sleep(retry_interval)
def worker(tosend): context = zmq.Context() mysock = context.socket(zmq.PUB) mysock.connect(f"ipc://{_SOCKET_PATH}") # wait 100ms for connect to happen time.sleep(0.2) # now send it out via 0mq logger.info("Sending revocation event to listening nodes...") for i in range(config.getint('cloud_verifier', 'max_retries')): try: mysock.send_string(json.dumps(tosend)) break except Exception as e: interval = config.getfloat('cloud_verifier', 'retry_interval') exponential_backoff = config.getboolean( 'cloud_verifier', 'exponential_backoff') next_retry = retry.retry_time(exponential_backoff, interval, i, logger) logger.debug( "Unable to publish revocation message %d times, trying again in %f seconds: %s", i, next_retry, e) time.sleep(next_retry) mysock.close()
async def process_agent(agent, new_operational_state): # Convert to dict if the agent arg is a db object if not isinstance(agent, dict): agent = _from_db_obj(agent) session = get_session() try: main_agent_operational_state = agent['operational_state'] try: stored_agent = session.query(VerfierMain).filter_by( agent_id=str(agent['agent_id'])).first() except SQLAlchemyError as e: logger.error('SQLAlchemy Error: %s', e) # if the user did terminated this agent if stored_agent.operational_state == states.TERMINATED: logger.warning("Agent %s terminated by user.", agent['agent_id']) if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) session.query(VerfierMain).filter_by( agent_id=agent['agent_id']).delete() session.commit() return # if the user tells us to stop polling because the tenant quote check failed if stored_agent.operational_state == states.TENANT_FAILED: logger.warning("Agent %s has failed tenant quote. Stopping polling", agent['agent_id']) if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) return # If failed during processing, log regardless and drop it on the floor # The administration application (tenant) can GET the status and act accordingly (delete/retry/etc). if new_operational_state in (states.FAILED, states.INVALID_QUOTE): agent['operational_state'] = new_operational_state # issue notification for invalid quotes if new_operational_state == states.INVALID_QUOTE: cloud_verifier_common.notify_error(agent) if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) for key in exclude_db: if key in agent: del agent[key] session.query(VerfierMain).filter_by( agent_id=agent['agent_id']).update(agent) session.commit() logger.warning("Agent %s failed, stopping polling", agent['agent_id']) return # propagate all state, but remove none DB keys first (using exclude_db) try: agent_db = dict(agent) for key in exclude_db: if key in agent_db: del agent_db[key] session.query(VerfierMain).filter_by( agent_id=agent_db['agent_id']).update(agent_db) session.commit() except SQLAlchemyError as e: logger.error('SQLAlchemy Error: %s', e) # if new, get a quote if (main_agent_operational_state == states.START and new_operational_state == states.GET_QUOTE): agent['num_retries'] = 0 agent['operational_state'] = states.GET_QUOTE await invoke_get_quote(agent, True) return if (main_agent_operational_state == states.GET_QUOTE and new_operational_state == states.PROVIDE_V): agent['num_retries'] = 0 agent['operational_state'] = states.PROVIDE_V await invoke_provide_v(agent) return if (main_agent_operational_state in (states.PROVIDE_V, states.GET_QUOTE) and new_operational_state == states.GET_QUOTE): agent['num_retries'] = 0 interval = config.getfloat('cloud_verifier', 'quote_interval') agent['operational_state'] = states.GET_QUOTE if interval == 0: await invoke_get_quote(agent, False) else: logger.debug("Setting up callback to check again in %f seconds", interval) # set up a call back to check again cb = functools.partial(invoke_get_quote, agent, False) pending = tornado.ioloop.IOLoop.current().call_later(interval, cb) agent['pending_event'] = pending return maxr = config.getint('cloud_verifier', 'max_retries') retry = config.getfloat('cloud_verifier', 'retry_interval') if (main_agent_operational_state == states.GET_QUOTE and new_operational_state == states.GET_QUOTE_RETRY): if agent['num_retries'] >= maxr: logger.warning("Agent %s was not reachable for quote in %d tries, setting state to FAILED", agent['agent_id'], maxr) if agent['first_verified']: # only notify on previously good agents cloud_verifier_common.notify_error( agent, msgtype='comm_error') else: logger.debug("Communication error for new agent. No notification will be sent") await process_agent(agent, states.FAILED) else: agent['operational_state'] = states.GET_QUOTE cb = functools.partial(invoke_get_quote, agent, True) agent['num_retries'] += 1 logger.info("Connection to %s refused after %d/%d tries, trying again in %f seconds", agent['ip'], agent['num_retries'], maxr, retry) tornado.ioloop.IOLoop.current().call_later(retry, cb) return if (main_agent_operational_state == states.PROVIDE_V and new_operational_state == states.PROVIDE_V_RETRY): if agent['num_retries'] >= maxr: logger.warning("Agent %s was not reachable to provide v in %d tries, setting state to FAILED", agent['agent_id'], maxr) cloud_verifier_common.notify_error( agent, msgtype='comm_error') await process_agent(agent, states.FAILED) else: agent['operational_state'] = states.PROVIDE_V cb = functools.partial(invoke_provide_v, agent) agent['num_retries'] += 1 logger.info("Connection to %s refused after %d/%d tries, trying again in %f seconds", agent['ip'], agent['num_retries'], maxr, retry) tornado.ioloop.IOLoop.current().call_later(retry, cb) return raise Exception("nothing should ever fall out of this!") except Exception as e: logger.error("Polling thread error: %s", e) logger.exception(e)
def do_quote(self): """ Perform TPM quote by GET towards Agent Raises: UserError: Connection handler """ self.nonce = TPM_Utilities.random_password(20) numtries = 0 response = None # Note: We need a specific retry handler (perhaps in common), no point having localised unless we have too. while True: try: params = '/quotes/identity?nonce=%s' % (self.nonce) cloudagent_base_url = f'{self.agent_ip}:{self.agent_port}' do_quote = RequestsClient(cloudagent_base_url, tls_enabled=False) response = do_quote.get( params, cert=self.cert ) response_body = response.json() except Exception as e: if response.status_code in (503, 504): numtries += 1 maxr = config.getint('tenant', 'max_retries') if numtries >= maxr: logger.error("Tenant cannot establish connection to agent on %s with port %s", self.agent_ip, self.agent_port) sys.exit() retry = config.getfloat('tenant', 'retry_interval') logger.info("Tenant connection to agent at %s refused %s/%s times, trying again in %s seconds...", self.agent_ip, numtries, maxr, retry) time.sleep(retry) continue raise e break try: if response is not None and response.status_code != 200: raise UserError( "Status command response: %d Unexpected response from Cloud Agent." % response.status) if "results" not in response_body: raise UserError( "Error: unexpected http response body from Cloud Agent: %s" % str(response.status)) quote = response_body["results"]["quote"] logger.debug("Agent_quote received quote: %s", quote) public_key = response_body["results"]["pubkey"] logger.debug("Agent_quote received public key: %s", public_key) # Ensure hash_alg is in accept_tpm_hash_algs list hash_alg = response_body["results"]["hash_alg"] logger.debug("Agent_quote received hash algorithm: %s", hash_alg) if not algorithms.is_accepted(hash_alg, config.get('tenant', 'accept_tpm_hash_algs').split(',')): raise UserError( "TPM Quote is using an unaccepted hash algorithm: %s" % hash_alg) # Ensure enc_alg is in accept_tpm_encryption_algs list enc_alg = response_body["results"]["enc_alg"] logger.debug("Agent_quote received encryption algorithm: %s", enc_alg) if not algorithms.is_accepted(enc_alg, config.get('tenant', 'accept_tpm_encryption_algs').split(',')): raise UserError( "TPM Quote is using an unaccepted encryption algorithm: %s" % enc_alg) # Ensure sign_alg is in accept_tpm_encryption_algs list sign_alg = response_body["results"]["sign_alg"] logger.debug("Agent_quote received signing algorithm: %s", sign_alg) if not algorithms.is_accepted(sign_alg, config.get('tenant', 'accept_tpm_signing_algs').split(',')): raise UserError( "TPM Quote is using an unaccepted signing algorithm: %s" % sign_alg) if not self.validate_tpm_quote(public_key, quote, hash_alg): raise UserError( "TPM Quote from cloud agent is invalid for nonce: %s" % self.nonce) logger.info("Quote from %s validated", self.agent_ip) # encrypt U with the public key encrypted_U = crypto.rsa_encrypt( crypto.rsa_import_pubkey(public_key), self.U) b64_encrypted_u = base64.b64encode(encrypted_U) logger.debug("b64_encrypted_u: %s", b64_encrypted_u.decode('utf-8')) data = { 'encrypted_key': b64_encrypted_u, 'auth_tag': self.auth_tag } if self.payload is not None: data['payload'] = self.payload u_json_message = json.dumps(data) # post encrypted U back to CloudAgent params = '/keys/ukey' cloudagent_base_url = ( f'{self.agent_ip}:{self.agent_port}' ) post_ukey = RequestsClient(cloudagent_base_url, tls_enabled=False) response = post_ukey.post( params, data=u_json_message ) if response.status_code == 503: logger.error("Cannot connect to Agent at %s with Port %s. Connection refused.", self.agent_ip, self.agent_port) sys.exit() elif response.status_code == 504: logger.error("Verifier at %s with Port %s timed out.", self.verifier_ip, self.verifier_port) sys.exit() if response.status_code != 200: keylime_logging.log_http_response( logger, logging.ERROR, response_body) raise UserError( "Posting of Encrypted U to the Cloud Agent failed with response code %d" % response.status) except Exception as e: self.do_cvstop() raise e